changeset 46588:27a438928e38

Merge
author coleenp
date Wed, 28 Jun 2017 20:21:04 +0000
parents 6c97f34cb194 5c2a3a2e86ea
children f1c04490ded1
files
diffstat 32 files changed, 520 insertions(+), 598 deletions(-) [+]
line wrap: on
line diff
--- a/hotspot/src/jdk.aot/share/classes/jdk.tools.jaotc/src/jdk/tools/jaotc/collect/FileSupport.java	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/jdk.aot/share/classes/jdk.tools.jaotc/src/jdk/tools/jaotc/collect/FileSupport.java	Wed Jun 28 20:21:04 2017 +0000
@@ -46,7 +46,9 @@
 
     private URI makeJarFileURI(Path path) {
         try {
-            return new URI("jar:file:" + path.toAbsolutePath() + "!/");
+            String name = path.toAbsolutePath().toString();
+            name = name.replace('\\','/');
+            return new URI("jar:file:///" + name + "!/");
         } catch (URISyntaxException e) {
             throw new InternalError(e);
         }
--- a/hotspot/src/os/aix/vm/os_aix.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/os/aix/vm/os_aix.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -130,8 +130,6 @@
 #define ERROR_MP_VMGETINFO_FAILED                    102
 #define ERROR_MP_VMGETINFO_CLAIMS_NO_SUPPORT_FOR_64K 103
 
-// Query dimensions of the stack of the calling thread.
-static bool query_stack_dimensions(address* p_stack_base, size_t* p_stack_size);
 static address resolve_function_descriptor_to_code_pointer(address p);
 
 static void vmembk_print_on(outputStream* os);
@@ -764,11 +762,8 @@
   // find out my own stack dimensions
   {
     // actually, this should do exactly the same as thread->record_stack_base_and_size...
-    address base = 0;
-    size_t size = 0;
-    query_stack_dimensions(&base, &size);
-    thread->set_stack_base(base);
-    thread->set_stack_size(size);
+    thread->set_stack_base(os::current_stack_base());
+    thread->set_stack_size(os::current_stack_size());
   }
 
   const pthread_t pthread_id = ::pthread_self();
@@ -4297,91 +4292,28 @@
 /////////////////////////////////////////////////////////////////////////////
 // thread stack
 
-// Function to query the current stack size using pthread_getthrds_np.
-static bool query_stack_dimensions(address* p_stack_base, size_t* p_stack_size) {
-
-  // Information about this api can be found (a) in the pthread.h header and
-  // (b) in http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/pthread_getthrds_np.htm
-  //
-  // The use of this API to find out the current stack is kind of undefined.
-  // But after a lot of tries and asking IBM about it, I concluded that it is safe
-  // enough for cases where I let the pthread library create its stacks. For cases
-  // where I create an own stack and pass this to pthread_create, it seems not to
-  // work (the returned stack size in that case is 0).
-
-  pthread_t tid = pthread_self();
-  struct __pthrdsinfo pinfo;
-  char dummy[1]; // Just needed to satisfy pthread_getthrds_np.
-  int dummy_size = sizeof(dummy);
-
-  memset(&pinfo, 0, sizeof(pinfo));
-
-  const int rc = pthread_getthrds_np(&tid, PTHRDSINFO_QUERY_ALL, &pinfo,
-                                     sizeof(pinfo), dummy, &dummy_size);
-
-  if (rc != 0) {
-    trcVerbose("pthread_getthrds_np failed (%d)", rc);
-    return false;
-  }
-  guarantee0(pinfo.__pi_stackend);
-
-  // The following may happen when invoking pthread_getthrds_np on a pthread
-  // running on a user provided stack (when handing down a stack to pthread
-  // create, see pthread_attr_setstackaddr).
-  // Not sure what to do then.
-
-  guarantee0(pinfo.__pi_stacksize);
-
-  // Note: we get three values from pthread_getthrds_np:
-  //       __pi_stackaddr, __pi_stacksize, __pi_stackend
-  //
-  // high addr    ---------------------
-  //
-  //    |         pthread internal data, like ~2K
-  //    |
-  //    |         ---------------------   __pi_stackend   (usually not page aligned, (xxxxF890))
-  //    |
-  //    |
-  //    |
-  //    |
-  //    |
-  //    |
-  //    |          ---------------------   (__pi_stackend - __pi_stacksize)
-  //    |
-  //    |          padding to align the following AIX guard pages, if enabled.
-  //    |
-  //    V          ---------------------   __pi_stackaddr
-  //
-  // low addr      AIX guard pages, if enabled (AIXTHREAD_GUARDPAGES > 0)
-  //
-
-  address stack_base = (address)(pinfo.__pi_stackend);
-  address stack_low_addr = (address)align_ptr_up(pinfo.__pi_stackaddr,
-    os::vm_page_size());
-  size_t stack_size = stack_base - stack_low_addr;
-
-  if (p_stack_base) {
-    *p_stack_base = stack_base;
-  }
-
-  if (p_stack_size) {
-    *p_stack_size = stack_size;
-  }
-
-  return true;
+// Get the current stack base from the OS (actually, the pthread library).
+// Note: usually not page aligned.
+address os::current_stack_base() {
+  AixMisc::stackbounds_t bounds;
+  bool rc = AixMisc::query_stack_bounds_for_current_thread(&bounds);
+  guarantee(rc, "Unable to retrieve stack bounds.");
+  return bounds.base;
 }
 
-// Get the current stack base from the OS (actually, the pthread library).
-address os::current_stack_base() {
-  address p;
-  query_stack_dimensions(&p, 0);
-  return p;
-}
-
 // Get the current stack size from the OS (actually, the pthread library).
+// Returned size is such that (base - size) is always aligned to page size.
 size_t os::current_stack_size() {
-  size_t s;
-  query_stack_dimensions(0, &s);
+  AixMisc::stackbounds_t bounds;
+  bool rc = AixMisc::query_stack_bounds_for_current_thread(&bounds);
+  guarantee(rc, "Unable to retrieve stack bounds.");
+  // Align the returned stack size such that the stack low address
+  // is aligned to page size (Note: base is usually not and we do not care).
+  // We need to do this because caller code will assume stack low address is
+  // page aligned and will place guard pages without checking.
+  address low = bounds.base - bounds.size;
+  address low_aligned = (address)align_ptr_up(low, os::vm_page_size());
+  size_t s = bounds.base - low_aligned;
   return s;
 }
 
--- a/hotspot/src/os/aix/vm/porting_aix.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/os/aix/vm/porting_aix.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -35,6 +35,7 @@
 
 #include <demangle.h>
 #include <sys/debug.h>
+#include <pthread.h>
 #include <ucontext.h>
 
 //////////////////////////////////
@@ -680,13 +681,14 @@
   // retrieve it from the OS.
   stackptr_t stack_base = NULL;
   size_t stack_size = NULL;
-  Thread* const thread = Thread::current_or_null_safe();
-  if (thread) {
-    stack_base = (stackptr_t) thread->stack_base();
-    stack_size = thread->stack_size();
-  } else {
-    stack_base = (stackptr_t) os::current_stack_base();
-    stack_size = os::current_stack_size();
+  {
+    AixMisc::stackbounds_t stackbounds;
+    if (!AixMisc::query_stack_bounds_for_current_thread(&stackbounds)) {
+      st->print_cr("Cannot retrieve stack bounds.");
+      return;
+    }
+    stack_base = (stackptr_t)stackbounds.base;
+    stack_size = stackbounds.size;
   }
 
   st->print_cr("------ current frame:");
@@ -809,5 +811,73 @@
 }
 
 
+bool AixMisc::query_stack_bounds_for_current_thread(stackbounds_t* out) {
 
+  // Information about this api can be found (a) in the pthread.h header and
+  // (b) in http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.basetechref/doc/basetrf1/pthread_getthrds_np.htm
+  //
+  // The use of this API to find out the current stack is kind of undefined.
+  // But after a lot of tries and asking IBM about it, I concluded that it is safe
+  // enough for cases where I let the pthread library create its stacks. For cases
+  // where I create an own stack and pass this to pthread_create, it seems not to
+  // work (the returned stack size in that case is 0).
 
+  pthread_t tid = pthread_self();
+  struct __pthrdsinfo pinfo;
+  char dummy[1]; // Just needed to satisfy pthread_getthrds_np.
+  int dummy_size = sizeof(dummy);
+
+  memset(&pinfo, 0, sizeof(pinfo));
+
+  const int rc = pthread_getthrds_np(&tid, PTHRDSINFO_QUERY_ALL, &pinfo,
+                                     sizeof(pinfo), dummy, &dummy_size);
+
+  if (rc != 0) {
+    fprintf(stderr, "pthread_getthrds_np failed (%d)\n", rc);
+    fflush(stdout);
+    return false;
+  }
+
+  // The following may happen when invoking pthread_getthrds_np on a pthread
+  // running on a user provided stack (when handing down a stack to pthread
+  // create, see pthread_attr_setstackaddr).
+  // Not sure what to do then.
+  if (pinfo.__pi_stackend == NULL || pinfo.__pi_stackaddr == NULL) {
+    fprintf(stderr, "pthread_getthrds_np - invalid values\n");
+    fflush(stdout);
+    return false;
+  }
+
+  // Note: we get three values from pthread_getthrds_np:
+  //       __pi_stackaddr, __pi_stacksize, __pi_stackend
+  //
+  // high addr    ---------------------                                                           base, high
+  //
+  //    |         pthread internal data, like ~2K
+  //    |
+  //    |         ---------------------   __pi_stackend   (usually not page aligned, (xxxxF890))
+  //    |
+  //    |
+  //    |
+  //    |
+  //    |
+  //    |
+  //    |          ---------------------   (__pi_stackend - __pi_stacksize)
+  //    |
+  //    |          padding to align the following AIX guard pages, if enabled.
+  //    |
+  //    V          ---------------------   __pi_stackaddr                                        low, base - size
+  //
+  // low addr      AIX guard pages, if enabled (AIXTHREAD_GUARDPAGES > 0)
+  //
+
+  out->base = (address)pinfo.__pi_stackend;
+  address low = (address)pinfo.__pi_stackaddr;
+  out->size = out->base - low;
+  return true;
+
+}
+
+
+
+
--- a/hotspot/src/os/aix/vm/porting_aix.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/os/aix/vm/porting_aix.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -87,11 +87,25 @@
 
 class AixNativeCallstack {
  public:
+  // This function can be used independently from os::init();
   static void print_callstack_for_context(outputStream* st, const ucontext_t* uc,
                                           bool demangle,
                                           char* buf, size_t buf_size);
 };
 
+class AixMisc {
+ public:
+  struct stackbounds_t {
+    address base; // high address (stack grows down)
+    size_t size;
+  };
+
+  // Invokes pthread_getthrds_np() and returns its values. Note: values are
+  // not aligned to stack page sizes.
+  // This function can be used independently from os::init();
+  static bool query_stack_bounds_for_current_thread(stackbounds_t* out);
+
+};
 
 #endif // OS_AIX_VM_PORTING_AIX_HPP
 
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1108,7 +1108,7 @@
 
 class RebuildRSOutOfRegionClosure: public HeapRegionClosure {
   G1CollectedHeap*   _g1h;
-  UpdateRSOopClosure _cl;
+  RebuildRSOopClosure _cl;
 public:
   RebuildRSOutOfRegionClosure(G1CollectedHeap* g1, uint worker_i = 0) :
     _cl(g1->g1_rem_set(), worker_i),
@@ -2360,20 +2360,6 @@
 
 // Iteration functions.
 
-// Applies an ExtendedOopClosure onto all references of objects within a HeapRegion.
-
-class IterateOopClosureRegionClosure: public HeapRegionClosure {
-  ExtendedOopClosure* _cl;
-public:
-  IterateOopClosureRegionClosure(ExtendedOopClosure* cl) : _cl(cl) {}
-  bool doHeapRegion(HeapRegion* r) {
-    if (!r->is_continues_humongous()) {
-      r->oop_iterate(_cl);
-    }
-    return false;
-  }
-};
-
 // Iterates an ObjectClosure over all objects within a HeapRegion.
 
 class IterateObjectClosureRegionClosure: public HeapRegionClosure {
@@ -2397,12 +2383,10 @@
   _hrm.iterate(cl);
 }
 
-void
-G1CollectedHeap::heap_region_par_iterate(HeapRegionClosure* cl,
-                                         uint worker_id,
-                                         HeapRegionClaimer *hrclaimer,
-                                         bool concurrent) const {
-  _hrm.par_iterate(cl, worker_id, hrclaimer, concurrent);
+void G1CollectedHeap::heap_region_par_iterate(HeapRegionClosure* cl,
+                                              uint worker_id,
+                                              HeapRegionClaimer *hrclaimer) const {
+  _hrm.par_iterate(cl, worker_id, hrclaimer);
 }
 
 void G1CollectedHeap::collection_set_iterate(HeapRegionClosure* cl) {
@@ -3274,7 +3258,7 @@
         // investigate this in CR 7178365.
         double sample_end_time_sec = os::elapsedTime();
         double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
-        size_t total_cards_scanned = per_thread_states.total_cards_scanned();
+        size_t total_cards_scanned = g1_policy()->phase_times()->sum_thread_work_items(G1GCPhaseTimes::ScanRS, G1GCPhaseTimes::ScannedCards);
         g1_policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned, heap_used_bytes_before_gc);
 
         evacuation_info.set_collectionset_used_before(collection_set()->bytes_used_before());
@@ -3458,17 +3442,13 @@
 
       _root_processor->evacuate_roots(pss->closures(), worker_id);
 
-      G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss);
-
       // We pass a weak code blobs closure to the remembered set scanning because we want to avoid
       // treating the nmethods visited to act as roots for concurrent marking.
       // We only want to make sure that the oops in the nmethods are adjusted with regard to the
       // objects copied by the current evacuation.
-      size_t cards_scanned = _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
-                                                                             pss->closures()->weak_codeblobs(),
-                                                                             worker_id);
-
-      _pss->add_cards_scanned(worker_id, cards_scanned);
+      _g1h->g1_rem_set()->oops_into_collection_set_do(pss,
+                                                      pss->closures()->weak_codeblobs(),
+                                                      worker_id);
 
       double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec;
 
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1136,6 +1136,7 @@
   // set. Assumes that the reference points into the heap.
   inline bool is_in_cset(const HeapRegion *hr);
   inline bool is_in_cset(oop obj);
+  inline bool is_in_cset(HeapWord* addr);
 
   inline bool is_in_cset_or_humongous(const oop obj);
 
@@ -1194,17 +1195,14 @@
   inline HeapWord* bottom_addr_for_region(uint index) const;
 
   // Iterate over the heap regions in parallel. Assumes that this will be called
-  // in parallel by ParallelGCThreads worker threads with distinct worker ids
-  // in the range [0..max(ParallelGCThreads-1, 1)]. Applies "blk->doHeapRegion"
+  // in parallel by a number of worker threads with distinct worker ids
+  // in the range passed to the HeapRegionClaimer. Applies "blk->doHeapRegion"
   // to each of the regions, by attempting to claim the region using the
   // HeapRegionClaimer and, if successful, applying the closure to the claimed
-  // region. The concurrent argument should be set to true if iteration is
-  // performed concurrently, during which no assumptions are made for consistent
-  // attributes of the heap regions (as they might be modified while iterating).
+  // region.
   void heap_region_par_iterate(HeapRegionClosure* cl,
                                uint worker_id,
-                               HeapRegionClaimer* hrclaimer,
-                               bool concurrent = false) const;
+                               HeapRegionClaimer* hrclaimer) const;
 
   // Iterate over the regions (if any) in the current collection set.
   void collection_set_iterate(HeapRegionClosure* blk);
--- a/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1CollectedHeap.inline.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -139,7 +139,11 @@
 }
 
 inline bool G1CollectedHeap::is_in_cset(oop obj) {
-  return _in_cset_fast_test.is_in_cset((HeapWord*)obj);
+  return is_in_cset((HeapWord*)obj);
+}
+
+inline bool G1CollectedHeap::is_in_cset(HeapWord* addr) {
+  return _in_cset_fast_test.is_in_cset(addr);
 }
 
 bool G1CollectedHeap::is_in_cset(const HeapRegion* hr) {
--- a/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -703,7 +703,7 @@
 
   void work(uint worker_id) {
     SuspendibleThreadSetJoiner sts_join(_suspendible);
-    G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer, true);
+    G1CollectedHeap::heap()->heap_region_par_iterate(&_cl, worker_id, &_hr_claimer);
   }
 
   bool is_complete() {
--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -78,6 +78,13 @@
   _gc_par_phases[GCWorkerEnd] = new WorkerDataArray<double>(max_gc_threads, "GC Worker End (ms):");
   _gc_par_phases[Other] = new WorkerDataArray<double>(max_gc_threads, "GC Worker Other (ms):");
 
+  _scan_rs_scanned_cards = new WorkerDataArray<size_t>(max_gc_threads, "Scanned Cards:");
+  _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_scanned_cards, ScannedCards);
+  _scan_rs_claimed_cards = new WorkerDataArray<size_t>(max_gc_threads, "Claimed Cards:");
+  _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_claimed_cards, ClaimedCards);
+  _scan_rs_skipped_cards = new WorkerDataArray<size_t>(max_gc_threads, "Skipped Cards:");
+  _gc_par_phases[ScanRS]->link_thread_work_items(_scan_rs_skipped_cards, SkippedCards);
+
   _update_rs_processed_buffers = new WorkerDataArray<size_t>(max_gc_threads, "Processed Buffers:");
   _gc_par_phases[UpdateRS]->link_thread_work_items(_update_rs_processed_buffers);
 
@@ -210,8 +217,8 @@
   _gc_par_phases[phase]->add(worker_i, secs);
 }
 
-void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count) {
-  _gc_par_phases[phase]->set_thread_work_item(worker_i, count);
+void G1GCPhaseTimes::record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index) {
+  _gc_par_phases[phase]->set_thread_work_item(worker_i, count, index);
 }
 
 // return the average time for a phase in milliseconds
@@ -219,9 +226,9 @@
   return _gc_par_phases[phase]->average() * 1000.0;
 }
 
-size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase) {
-  assert(_gc_par_phases[phase]->thread_work_items() != NULL, "No sub count");
-  return _gc_par_phases[phase]->thread_work_items()->sum();
+size_t G1GCPhaseTimes::sum_thread_work_items(GCParPhases phase, uint index) {
+  assert(_gc_par_phases[phase]->thread_work_items(index) != NULL, "No sub count");
+  return _gc_par_phases[phase]->thread_work_items(index)->sum();
 }
 
 template <class T>
@@ -239,11 +246,13 @@
   phase->print_summary_on(out, print_sum);
   details(phase, Indents[indent]);
 
-  WorkerDataArray<size_t>* work_items = phase->thread_work_items();
-  if (work_items != NULL) {
-    out->print("%s", Indents[indent + 1]);
-    work_items->print_summary_on(out, true);
-    details(work_items, Indents[indent + 1]);
+  for (uint i = 0; i < phase->MaxThreadWorkItems; i++) {
+    WorkerDataArray<size_t>* work_items = phase->thread_work_items(i);
+    if (work_items != NULL) {
+      out->print("%s", Indents[indent + 1]);
+      work_items->print_summary_on(out, true);
+      details(work_items, Indents[indent + 1]);
+    }
   }
 }
 
--- a/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1GCPhaseTimes.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -76,6 +76,12 @@
     GCParPhasesSentinel
   };
 
+  enum GCScanRSWorkItems {
+    ScannedCards,
+    ClaimedCards,
+    SkippedCards
+  };
+
  private:
   // Markers for grouping the phases in the GCPhases enum above
   static const int GCMainParPhasesLast = GCWorkerEnd;
@@ -83,8 +89,15 @@
   static const int StringDedupPhasesLast = StringDedupTableFixup;
 
   WorkerDataArray<double>* _gc_par_phases[GCParPhasesSentinel];
+
   WorkerDataArray<size_t>* _update_rs_processed_buffers;
+
+  WorkerDataArray<size_t>* _scan_rs_scanned_cards;
+  WorkerDataArray<size_t>* _scan_rs_claimed_cards;
+  WorkerDataArray<size_t>* _scan_rs_skipped_cards;
+
   WorkerDataArray<size_t>* _termination_attempts;
+
   WorkerDataArray<size_t>* _redirtied_cards;
 
   double _cur_collection_par_time_ms;
@@ -170,12 +183,12 @@
   // add a number of seconds to a phase
   void add_time_secs(GCParPhases phase, uint worker_i, double secs);
 
-  void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count);
+  void record_thread_work_item(GCParPhases phase, uint worker_i, size_t count, uint index = 0);
 
   // return the average time for a phase in milliseconds
   double average_time_ms(GCParPhases phase);
 
-  size_t sum_thread_work_items(GCParPhases phase);
+  size_t sum_thread_work_items(GCParPhases phase, uint index = 0);
 
  public:
 
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -38,7 +38,7 @@
   _cm(_g1->concurrent_mark())
 { }
 
-G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+G1ScanClosureBase::G1ScanClosureBase(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
   _g1(g1), _par_scan_state(par_scan_state)
 { }
 
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_G1_G1OOPCLOSURES_HPP
 #define SHARE_VM_GC_G1_G1OOPCLOSURES_HPP
 
+#include "gc/g1/g1InCSetState.hpp"
 #include "memory/iterator.hpp"
 #include "oops/markOop.hpp"
 
@@ -47,34 +48,60 @@
   void set_region(HeapRegion* from) { _from = from; }
 };
 
-class G1ParClosureSuper : public OopsInHeapRegionClosure {
+class G1ScanClosureBase : public OopsInHeapRegionClosure {
 protected:
   G1CollectedHeap* _g1;
   G1ParScanThreadState* _par_scan_state;
 
-  G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
-  ~G1ParClosureSuper() { }
+  G1ScanClosureBase(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state);
+  ~G1ScanClosureBase() { }
 
+  template <class T>
+  inline void prefetch_and_push(T* p, oop const obj);
+
+  template <class T>
+  inline void handle_non_cset_obj_common(InCSetState const state, T* p, oop const obj);
 public:
   // This closure needs special handling for InstanceRefKlass.
   virtual ReferenceIterationMode reference_iteration_mode() { return DO_DISCOVERED_AND_DISCOVERY; }
 };
 
-class G1ParPushHeapRSClosure : public G1ParClosureSuper {
+// Used during the Update RS phase to refine remaining cards in the DCQ during garbage collection.
+class G1ScanObjsDuringUpdateRSClosure: public G1ScanClosureBase {
+  uint _worker_i;
+  bool _has_refs_into_cset;
+
 public:
-  G1ParPushHeapRSClosure(G1CollectedHeap* g1,
-                         G1ParScanThreadState* par_scan_state):
-    G1ParClosureSuper(g1, par_scan_state) { }
+  G1ScanObjsDuringUpdateRSClosure(G1CollectedHeap* g1h,
+                                  G1ParScanThreadState* pss,
+                                  uint worker_i) :
+    G1ScanClosureBase(g1h, pss), _has_refs_into_cset(false), _worker_i(worker_i) { }
+
+  void reset_has_refs_into_cset() { _has_refs_into_cset = false; }
+  bool has_refs_into_cset() const { return _has_refs_into_cset; }
+
+  template <class T> void do_oop_nv(T* p);
+  virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+  virtual void do_oop(oop* p) { do_oop_nv(p); }
+};
+
+// Used during the Scan RS phase to scan cards from the remembered set during garbage collection.
+class G1ScanObjsDuringScanRSClosure : public G1ScanClosureBase {
+public:
+  G1ScanObjsDuringScanRSClosure(G1CollectedHeap* g1,
+                                G1ParScanThreadState* par_scan_state):
+    G1ScanClosureBase(g1, par_scan_state) { }
 
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(oop* p)          { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p)    { do_oop_nv(p); }
 };
 
-class G1ParScanClosure : public G1ParClosureSuper {
+// This closure is applied to the fields of the objects that have just been copied during evacuation.
+class G1ScanEvacuatedObjClosure : public G1ScanClosureBase {
 public:
-  G1ParScanClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
-    G1ParClosureSuper(g1, par_scan_state) { }
+  G1ScanEvacuatedObjClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+    G1ScanClosureBase(g1, par_scan_state) { }
 
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(oop* p)          { do_oop_nv(p); }
@@ -186,42 +213,7 @@
 
   template <class T> void do_oop_nv(T* p);
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
-  virtual void do_oop(oop* p) { do_oop_nv(p); }
-};
-
-class G1UpdateRSOrPushRefOopClosure: public ExtendedOopClosure {
-  G1CollectedHeap* _g1;
-  HeapRegion* _from;
-  G1ParPushHeapRSClosure* _push_ref_cl;
-  bool _record_refs_into_cset;
-  uint _worker_i;
-  bool _has_refs_into_cset;
-
-public:
-  G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
-                                G1ParPushHeapRSClosure* push_ref_cl,
-                                bool record_refs_into_cset,
-                                uint worker_i = 0);
-
-  void set_from(HeapRegion* from) {
-    assert(from != NULL, "from region must be non-NULL");
-    _from = from;
-  }
-
-  bool self_forwarded(oop obj) {
-    markOop m = obj->mark();
-    bool result = (m->is_marked() && ((oop)m->decode_pointer() == obj));
-    return result;
-  }
-
-  bool has_refs_into_cset() const { return _has_refs_into_cset; }
-
-  template <class T> inline void do_oop_nv(T* p);
-  virtual inline void do_oop(narrowOop* p);
-  virtual inline void do_oop(oop* p);
-
-  // This closure needs special handling for InstanceRefKlass.
-  virtual ReferenceIterationMode reference_iteration_mode() { return DO_DISCOVERED_AND_DISCOVERY; }
+  virtual void do_oop(oop* p)       { do_oop_nv(p); }
 };
 
 #endif // SHARE_VM_GC_G1_G1OOPCLOSURES_HPP
--- a/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -36,61 +36,51 @@
 #include "memory/iterator.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
 
-// This closure is applied to the fields of the objects that have just been copied.
 template <class T>
-inline void G1ParScanClosure::do_oop_nv(T* p) {
-  T heap_oop = oopDesc::load_heap_oop(p);
+inline void G1ScanClosureBase::prefetch_and_push(T* p, const oop obj) {
+  // We're not going to even bother checking whether the object is
+  // already forwarded or not, as this usually causes an immediate
+  // stall. We'll try to prefetch the object (for write, given that
+  // we might need to install the forwarding reference) and we'll
+  // get back to it when pop it from the queue
+  Prefetch::write(obj->mark_addr(), 0);
+  Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
 
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    const InCSetState state = _g1->in_cset_state(obj);
-    if (state.is_in_cset()) {
-      // We're not going to even bother checking whether the object is
-      // already forwarded or not, as this usually causes an immediate
-      // stall. We'll try to prefetch the object (for write, given that
-      // we might need to install the forwarding reference) and we'll
-      // get back to it when pop it from the queue
-      Prefetch::write(obj->mark_addr(), 0);
-      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+  // slightly paranoid test; I'm trying to catch potential
+  // problems before we go into push_on_queue to know where the
+  // problem is coming from
+  assert((obj == oopDesc::load_decode_heap_oop(p)) ||
+         (obj->is_forwarded() &&
+         obj->forwardee() == oopDesc::load_decode_heap_oop(p)),
+         "p should still be pointing to obj or to its forwardee");
 
-      // slightly paranoid test; I'm trying to catch potential
-      // problems before we go into push_on_queue to know where the
-      // problem is coming from
-      assert((obj == oopDesc::load_decode_heap_oop(p)) ||
-             (obj->is_forwarded() &&
-                 obj->forwardee() == oopDesc::load_decode_heap_oop(p)),
-             "p should still be pointing to obj or to its forwardee");
+  _par_scan_state->push_on_queue(p);
+}
 
-      _par_scan_state->push_on_queue(p);
-    } else {
-      if (state.is_humongous()) {
-        _g1->set_humongous_is_live(obj);
-      } else if (state.is_ext()) {
-        _par_scan_state->do_oop_ext(p);
-      }
-      _par_scan_state->update_rs(_from, p, obj);
-    }
+template <class T>
+inline void G1ScanClosureBase::handle_non_cset_obj_common(InCSetState const state, T* p, oop const obj) {
+  if (state.is_humongous()) {
+    _g1->set_humongous_is_live(obj);
+  } else if (state.is_ext()) {
+    _par_scan_state->do_oop_ext(p);
   }
 }
 
 template <class T>
-inline void G1ParPushHeapRSClosure::do_oop_nv(T* p) {
+inline void G1ScanEvacuatedObjClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
 
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    const InCSetState state = _g1->in_cset_state(obj);
-    if (state.is_in_cset_or_humongous()) {
-      Prefetch::write(obj->mark_addr(), 0);
-      Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
+  if (oopDesc::is_null(heap_oop)) {
+    return;
+  }
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
+    prefetch_and_push(p, obj);
+  } else {
+    handle_non_cset_obj_common(state, p, obj);
 
-      // Place on the references queue
-      _par_scan_state->push_on_queue(p);
-    } else if (state.is_ext()) {
-      _par_scan_state->do_oop_ext(p);
-    } else {
-      assert(!_g1->is_in_cset(obj), "checking");
-    }
+    _par_scan_state->update_rs(_from, p, obj);
   }
 }
 
@@ -145,10 +135,10 @@
     // Normally this closure should only be called with cross-region references.
     // But since Java threads are manipulating the references concurrently and we
     // reload the values things may have changed.
-    // This check lets slip through references from a humongous continues region
+    // Also this check lets slip through references from a humongous continues region
     // to its humongous start region, as they are in different regions, and adds a
-    // remembered set entry. This is benign (apart from memory usage), as this
-    // closure is never called during evacuation.
+    // remembered set entry. This is benign (apart from memory usage), as we never
+    // try to either evacuate or eager reclaim humonguous arrays of j.l.O.
     return;
   }
 
@@ -159,79 +149,50 @@
 }
 
 template <class T>
-inline void G1UpdateRSOrPushRefOopClosure::do_oop_nv(T* p) {
-  oop obj = oopDesc::load_decode_heap_oop(p);
-  if (obj == NULL) {
+inline void G1ScanObjsDuringUpdateRSClosure::do_oop_nv(T* p) {
+  T o = oopDesc::load_heap_oop(p);
+  if (oopDesc::is_null(o)) {
     return;
   }
+  oop obj = oopDesc::decode_heap_oop_not_null(o);
 
-#ifdef ASSERT
-  // can't do because of races
-  // assert(obj == NULL || obj->is_oop(), "expected an oop");
-  assert(check_obj_alignment(obj), "not oop aligned");
-  assert(_g1->is_in_reserved(obj), "must be in heap");
-#endif // ASSERT
+  check_obj_during_refinement(p, obj);
 
-  assert(_from != NULL, "from region must be non-NULL");
-  assert(_from->is_in_reserved(p) ||
-         (_from->is_humongous() &&
-          _g1->heap_region_containing(p)->is_humongous() &&
-          _from->humongous_start_region() == _g1->heap_region_containing(p)->humongous_start_region()),
-         "p " PTR_FORMAT " is not in the same region %u or part of the correct humongous object starting at region %u.",
-         p2i(p), _from->hrm_index(), _from->humongous_start_region()->hrm_index());
+  assert(!_g1->is_in_cset((HeapWord*)p), "Oop originates from " PTR_FORMAT " (region: %u) which is in the collection set.", p2i(p), _g1->addr_to_region((HeapWord*)p));
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
+    // Since the source is always from outside the collection set, here we implicitly know
+    // that this is a cross-region reference too.
+    prefetch_and_push(p, obj);
 
-  HeapRegion* to = _g1->heap_region_containing(obj);
-  if (_from == to) {
-    // Normally this closure should only be called with cross-region references.
-    // But since Java threads are manipulating the references concurrently and we
-    // reload the values things may have changed.
-    // Also this check lets slip through references from a humongous continues region
-    // to its humongous start region, as they are in different regions, and adds a
-    // remembered set entry. This is benign (apart from memory usage), as we never
-    // try to either evacuate or eager reclaim these kind of regions.
-    return;
-  }
+    _has_refs_into_cset = true;
+  } else {
+    HeapRegion* to = _g1->heap_region_containing(obj);
+    if (_from == to) {
+      return;
+    }
 
-  // The _record_refs_into_cset flag is true during the RSet
-  // updating part of an evacuation pause. It is false at all
-  // other times:
-  //  * rebuilding the remembered sets after a full GC
-  //  * during concurrent refinement.
-  //  * updating the remembered sets of regions in the collection
-  //    set in the event of an evacuation failure (when deferred
-  //    updates are enabled).
+    handle_non_cset_obj_common(state, p, obj);
 
-  if (_record_refs_into_cset && to->in_collection_set()) {
-    // We are recording references that point into the collection
-    // set and this particular reference does exactly that...
-    // If the referenced object has already been forwarded
-    // to itself, we are handling an evacuation failure and
-    // we have already visited/tried to copy this object
-    // there is no need to retry.
-    if (!self_forwarded(obj)) {
-    assert(_push_ref_cl != NULL, "should not be null");
-    // Push the reference in the refs queue of the G1ParScanThreadState
-    // instance for this worker thread.
-      _push_ref_cl->do_oop(p);
-    }
-    _has_refs_into_cset = true;
-
-    // Deferred updates to the CSet are either discarded (in the normal case),
-    // or processed (if an evacuation failure occurs) at the end
-    // of the collection.
-    // See G1RemSet::cleanup_after_oops_into_collection_set_do().
-  } else {
-    // We either don't care about pushing references that point into the
-    // collection set (i.e. we're not during an evacuation pause) _or_
-    // the reference doesn't point into the collection set. Either way
-    // we add the reference directly to the RSet of the region containing
-    // the referenced object.
-    assert(to->rem_set() != NULL, "Need per-region 'into' remsets.");
     to->rem_set()->add_reference(p, _worker_i);
   }
 }
-void G1UpdateRSOrPushRefOopClosure::do_oop(oop* p)       { do_oop_nv(p); }
-void G1UpdateRSOrPushRefOopClosure::do_oop(narrowOop* p) { do_oop_nv(p); }
+
+template <class T>
+inline void G1ScanObjsDuringScanRSClosure::do_oop_nv(T* p) {
+  T heap_oop = oopDesc::load_heap_oop(p);
+  if (oopDesc::is_null(heap_oop)) {
+    return;
+  }
+  oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+
+  const InCSetState state = _g1->in_cset_state(obj);
+  if (state.is_in_cset()) {
+    prefetch_and_push(p, obj);
+  } else {
+    handle_non_cset_obj_common(state, p, obj);
+  }
+}
 
 template <class T>
 void G1ParCopyHelper::do_klass_barrier(T* p, oop new_obj) {
--- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -337,16 +337,6 @@
   return _states[worker_id];
 }
 
-void G1ParScanThreadStateSet::add_cards_scanned(uint worker_id, size_t cards_scanned) {
-  assert(worker_id < _n_workers, "out of bounds access");
-  _cards_scanned[worker_id] += cards_scanned;
-}
-
-size_t G1ParScanThreadStateSet::total_cards_scanned() const {
-  assert(_flushed, "thread local state from the per thread states should have been flushed");
-  return _total_cards_scanned;
-}
-
 const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
   assert(_flushed, "thread local state from the per thread states should have been flushed");
   return _surviving_young_words_total;
@@ -354,7 +344,6 @@
 
 void G1ParScanThreadStateSet::flush() {
   assert(!_flushed, "thread local state from the per thread states should be flushed once");
-  assert(_total_cards_scanned == 0, "should have been cleared");
 
   for (uint worker_index = 0; worker_index < _n_workers; ++worker_index) {
     G1ParScanThreadState* pss = _states[worker_index];
@@ -363,8 +352,6 @@
       continue;
     }
 
-    _total_cards_scanned += _cards_scanned[worker_index];
-
     pss->flush(_surviving_young_words_total);
     delete pss;
     _states[worker_index] = NULL;
--- a/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -54,7 +54,7 @@
   InCSetState       _dest[InCSetState::Num];
   // Local tenuring threshold.
   uint              _tenuring_threshold;
-  G1ParScanClosure  _scanner;
+  G1ScanEvacuatedObjClosure  _scanner;
 
   int  _hash_seed;
   uint _worker_id;
@@ -198,8 +198,6 @@
   G1CollectedHeap* _g1h;
   G1ParScanThreadState** _states;
   size_t* _surviving_young_words_total;
-  size_t* _cards_scanned;
-  size_t _total_cards_scanned;
   size_t _young_cset_length;
   uint _n_workers;
   bool _flushed;
@@ -209,8 +207,6 @@
       _g1h(g1h),
       _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC)),
       _surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, young_cset_length, mtGC)),
-      _cards_scanned(NEW_C_HEAP_ARRAY(size_t, n_workers, mtGC)),
-      _total_cards_scanned(0),
       _young_cset_length(young_cset_length),
       _n_workers(n_workers),
       _flushed(false) {
@@ -218,22 +214,18 @@
       _states[i] = NULL;
     }
     memset(_surviving_young_words_total, 0, young_cset_length * sizeof(size_t));
-    memset(_cards_scanned, 0, n_workers * sizeof(size_t));
   }
 
   ~G1ParScanThreadStateSet() {
     assert(_flushed, "thread local state from the per thread states should have been flushed");
     FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
     FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
-    FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
   }
 
   void flush();
 
   G1ParScanThreadState* state_for_worker(uint worker_id);
 
-  void add_cards_scanned(uint worker_id, size_t cards_scanned);
-  size_t total_cards_scanned() const;
   const size_t* surviving_young_words() const;
 
  private:
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -326,24 +326,24 @@
   }
 }
 
-G1ScanRSClosure::G1ScanRSClosure(G1RemSetScanState* scan_state,
-                                 G1ParPushHeapRSClosure* push_heap_cl,
-                                 CodeBlobClosure* code_root_cl,
-                                 uint worker_i) :
+G1ScanRSForRegionClosure::G1ScanRSForRegionClosure(G1RemSetScanState* scan_state,
+                                                   G1ScanObjsDuringScanRSClosure* scan_obj_on_card,
+                                                   CodeBlobClosure* code_root_cl,
+                                                   uint worker_i) :
   _scan_state(scan_state),
-  _push_heap_cl(push_heap_cl),
+  _scan_objs_on_card_cl(scan_obj_on_card),
   _code_root_cl(code_root_cl),
   _strong_code_root_scan_time_sec(0.0),
-  _cards(0),
-  _cards_done(0),
+  _cards_claimed(0),
+  _cards_scanned(0),
+  _cards_skipped(0),
   _worker_i(worker_i) {
   _g1h = G1CollectedHeap::heap();
   _bot = _g1h->bot();
   _ct_bs = _g1h->g1_barrier_set();
-  _block_size = MAX2<size_t>(G1RSetScanBlockSize, 1);
 }
 
-void G1ScanRSClosure::scan_card(size_t index, HeapWord* card_start, HeapRegion *r) {
+void G1ScanRSForRegionClosure::scan_card(size_t index, HeapWord* card_start, HeapRegion *r) {
   MemRegion card_region(card_start, BOTConstants::N_words);
   MemRegion pre_gc_allocated(r->bottom(), _scan_state->scan_top(r->hrm_index()));
   MemRegion mr = pre_gc_allocated.intersection(card_region);
@@ -352,19 +352,19 @@
     // but they're benign), which reduces the number of duplicate
     // scans (the rsets of the regions in the cset can intersect).
     _ct_bs->set_card_claimed(index);
-    _push_heap_cl->set_region(r);
-    r->oops_on_card_seq_iterate_careful<true>(mr, _push_heap_cl);
-    _cards_done++;
+    _scan_objs_on_card_cl->set_region(r);
+    r->oops_on_card_seq_iterate_careful<true>(mr, _scan_objs_on_card_cl);
+    _cards_scanned++;
   }
 }
 
-void G1ScanRSClosure::scan_strong_code_roots(HeapRegion* r) {
+void G1ScanRSForRegionClosure::scan_strong_code_roots(HeapRegion* r) {
   double scan_start = os::elapsedTime();
   r->strong_code_roots_do(_code_root_cl);
   _strong_code_root_scan_time_sec += (os::elapsedTime() - scan_start);
 }
 
-bool G1ScanRSClosure::doHeapRegion(HeapRegion* r) {
+bool G1ScanRSForRegionClosure::doHeapRegion(HeapRegion* r) {
   assert(r->in_collection_set(), "should only be called on elements of CS.");
   uint region_idx = r->hrm_index();
 
@@ -378,23 +378,25 @@
     _scan_state->add_dirty_region(region_idx);
   }
 
+  // We claim cards in blocks so as to reduce the contention.
+  size_t const block_size = G1RSetScanBlockSize;
+
   HeapRegionRemSetIterator iter(r->rem_set());
   size_t card_index;
 
-  // We claim cards in block so as to reduce the contention. The block size is determined by
-  // the G1RSetScanBlockSize parameter.
-  size_t claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size);
+  size_t claimed_card_block = _scan_state->iter_claimed_next(region_idx, block_size);
   for (size_t current_card = 0; iter.has_next(card_index); current_card++) {
-    if (current_card >= claimed_card_block + _block_size) {
-      claimed_card_block = _scan_state->iter_claimed_next(region_idx, _block_size);
+    if (current_card >= claimed_card_block + block_size) {
+      claimed_card_block = _scan_state->iter_claimed_next(region_idx, block_size);
     }
     if (current_card < claimed_card_block) {
+      _cards_skipped++;
       continue;
     }
     HeapWord* card_start = _g1h->bot()->address_for_index(card_index);
 
     HeapRegion* card_region = _g1h->heap_region_containing(card_start);
-    _cards++;
+    _cards_claimed++;
 
     _scan_state->add_dirty_region(card_region->hrm_index());
 
@@ -411,36 +413,40 @@
   return false;
 }
 
-size_t G1RemSet::scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
-                              CodeBlobClosure* heap_region_codeblobs,
-                              uint worker_i) {
+void G1RemSet::scan_rem_set(G1ParScanThreadState* pss,
+                            CodeBlobClosure* heap_region_codeblobs,
+                            uint worker_i) {
   double rs_time_start = os::elapsedTime();
 
-  G1ScanRSClosure cl(_scan_state, oops_in_heap_closure, heap_region_codeblobs, worker_i);
+  G1ScanObjsDuringScanRSClosure scan_cl(_g1, pss);
+  G1ScanRSForRegionClosure cl(_scan_state, &scan_cl, heap_region_codeblobs, worker_i);
   _g1->collection_set_iterate_from(&cl, worker_i);
 
-   double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
-                              cl.strong_code_root_scan_time_sec();
+  double scan_rs_time_sec = (os::elapsedTime() - rs_time_start) -
+                             cl.strong_code_root_scan_time_sec();
 
-  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
-  _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec());
+  G1GCPhaseTimes* p = _g1p->phase_times();
 
-  return cl.cards_done();
+  p->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
+  p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_scanned(), G1GCPhaseTimes::ScannedCards);
+  p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_claimed(), G1GCPhaseTimes::ClaimedCards);
+  p->record_thread_work_item(G1GCPhaseTimes::ScanRS, worker_i, cl.cards_skipped(), G1GCPhaseTimes::SkippedCards);
+
+  p->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, cl.strong_code_root_scan_time_sec());
 }
 
 // Closure used for updating RSets and recording references that
 // point into the collection set. Only called during an
 // evacuation pause.
-
-class RefineRecordRefsIntoCSCardTableEntryClosure: public CardTableEntryClosure {
+class G1RefineCardClosure: public CardTableEntryClosure {
   G1RemSet* _g1rs;
   DirtyCardQueue* _into_cset_dcq;
-  G1ParPushHeapRSClosure* _cl;
+  G1ScanObjsDuringUpdateRSClosure* _update_rs_cl;
 public:
-  RefineRecordRefsIntoCSCardTableEntryClosure(G1CollectedHeap* g1h,
-                                              DirtyCardQueue* into_cset_dcq,
-                                              G1ParPushHeapRSClosure* cl) :
-    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq), _cl(cl)
+  G1RefineCardClosure(G1CollectedHeap* g1h,
+                      DirtyCardQueue* into_cset_dcq,
+                      G1ScanObjsDuringUpdateRSClosure* update_rs_cl) :
+    _g1rs(g1h->g1_rem_set()), _into_cset_dcq(into_cset_dcq), _update_rs_cl(update_rs_cl)
   {}
 
   bool do_card_ptr(jbyte* card_ptr, uint worker_i) {
@@ -449,9 +455,8 @@
     // is during RSet updating within an evacuation pause.
     // In this case worker_i should be the id of a GC worker thread.
     assert(SafepointSynchronize::is_at_safepoint(), "not during an evacuation pause");
-    assert(worker_i < ParallelGCThreads, "should be a GC worker");
 
-    if (_g1rs->refine_card_during_gc(card_ptr, worker_i, _cl)) {
+    if (_g1rs->refine_card_during_gc(card_ptr, _update_rs_cl)) {
       // 'card_ptr' contains references that point into the collection
       // set. We need to record the card in the DCQS
       // (_into_cset_dirty_card_queue_set)
@@ -465,27 +470,28 @@
 };
 
 void G1RemSet::update_rem_set(DirtyCardQueue* into_cset_dcq,
-                              G1ParPushHeapRSClosure* oops_in_heap_closure,
+                              G1ParScanThreadState* pss,
                               uint worker_i) {
-  RefineRecordRefsIntoCSCardTableEntryClosure into_cset_update_rs_cl(_g1, into_cset_dcq, oops_in_heap_closure);
+  G1ScanObjsDuringUpdateRSClosure update_rs_cl(_g1, pss, worker_i);
+  G1RefineCardClosure refine_card_cl(_g1, into_cset_dcq, &update_rs_cl);
 
   G1GCParPhaseTimesTracker x(_g1p->phase_times(), G1GCPhaseTimes::UpdateRS, worker_i);
   if (G1HotCardCache::default_use_cache()) {
     // Apply the closure to the entries of the hot card cache.
     G1GCParPhaseTimesTracker y(_g1p->phase_times(), G1GCPhaseTimes::ScanHCC, worker_i);
-    _g1->iterate_hcc_closure(&into_cset_update_rs_cl, worker_i);
+    _g1->iterate_hcc_closure(&refine_card_cl, worker_i);
   }
   // Apply the closure to all remaining log entries.
-  _g1->iterate_dirty_card_closure(&into_cset_update_rs_cl, worker_i);
+  _g1->iterate_dirty_card_closure(&refine_card_cl, worker_i);
 }
 
 void G1RemSet::cleanupHRRS() {
   HeapRegionRemSet::cleanup();
 }
 
-size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
-                                             CodeBlobClosure* heap_region_codeblobs,
-                                             uint worker_i) {
+void G1RemSet::oops_into_collection_set_do(G1ParScanThreadState* pss,
+                                           CodeBlobClosure* heap_region_codeblobs,
+                                           uint worker_i) {
   // A DirtyCardQueue that is used to hold cards containing references
   // that point into the collection set. This DCQ is associated with a
   // special DirtyCardQueueSet (see g1CollectedHeap.hpp).  Under normal
@@ -497,8 +503,8 @@
   // DirtyCardQueueSet that is used to manage RSet updates
   DirtyCardQueue into_cset_dcq(&_into_cset_dirty_card_queue_set);
 
-  update_rem_set(&into_cset_dcq, cl, worker_i);
-  return scan_rem_set(cl, heap_region_codeblobs, worker_i);;
+  update_rem_set(&into_cset_dcq, pss, worker_i);
+  scan_rem_set(pss, heap_region_codeblobs, worker_i);;
 }
 
 void G1RemSet::prepare_for_oops_into_collection_set_do() {
@@ -573,17 +579,6 @@
 #endif
 }
 
-G1UpdateRSOrPushRefOopClosure::G1UpdateRSOrPushRefOopClosure(G1CollectedHeap* g1h,
-                                                             G1ParPushHeapRSClosure* push_ref_cl,
-                                                             bool record_refs_into_cset,
-                                                             uint worker_i) :
-  _g1(g1h),
-  _from(NULL),
-  _record_refs_into_cset(record_refs_into_cset),
-  _has_refs_into_cset(false),
-  _push_ref_cl(push_ref_cl),
-  _worker_i(worker_i) { }
-
 void G1RemSet::refine_card_concurrently(jbyte* card_ptr,
                                         uint worker_i) {
   assert(!_g1->is_gc_active(), "Only call concurrently");
@@ -734,8 +729,7 @@
 }
 
 bool G1RemSet::refine_card_during_gc(jbyte* card_ptr,
-                                     uint worker_i,
-                                     G1ParPushHeapRSClosure*  oops_in_heap_closure) {
+                                     G1ScanObjsDuringUpdateRSClosure* update_rs_cl) {
   assert(_g1->is_gc_active(), "Only call during GC");
 
   check_card_ptr(card_ptr, _ct_bs);
@@ -769,19 +763,14 @@
   MemRegion dirty_region(card_start, MIN2(scan_limit, card_end));
   assert(!dirty_region.is_empty(), "sanity");
 
-  G1UpdateRSOrPushRefOopClosure update_rs_oop_cl(_g1,
-                                                 oops_in_heap_closure,
-                                                 true,
-                                                 worker_i);
-  update_rs_oop_cl.set_from(r);
+  update_rs_cl->set_region(r);
+  update_rs_cl->reset_has_refs_into_cset();
 
-  bool card_processed =
-    r->oops_on_card_seq_iterate_careful<true>(dirty_region,
-                                              &update_rs_oop_cl);
+  bool card_processed = r->oops_on_card_seq_iterate_careful<true>(dirty_region, update_rs_cl);
   assert(card_processed, "must be");
   _conc_refine_cards++;
 
-  return update_rs_oop_cl.has_refs_into_cset();
+  return update_rs_cl->has_refs_into_cset();
 }
 
 void G1RemSet::print_periodic_summary_info(const char* header, uint period_count) {
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -41,10 +41,12 @@
 class CodeBlobClosure;
 class G1CollectedHeap;
 class G1HotCardCache;
-class G1ParPushHeapRSClosure;
 class G1RemSetScanState;
+class G1ParScanThreadState;
 class G1Policy;
 class G1SATBCardTableModRefBS;
+class G1ScanObjsDuringScanRSClosure;
+class G1ScanObjsDuringUpdateRSClosure;
 class HeapRegionClaimer;
 
 // A G1RemSet in which each heap region has a rem set that records the
@@ -63,6 +65,16 @@
   // set in the event of an evacuation failure.
   DirtyCardQueueSet _into_cset_dirty_card_queue_set;
 
+  // Scan all remembered sets of the collection set for references into the collection
+  // set.
+  void scan_rem_set(G1ParScanThreadState* pss,
+                    CodeBlobClosure* heap_region_codeblobs,
+                    uint worker_i);
+
+  // Flush remaining refinement buffers for cross-region references to either evacuate references
+  // into the collection set or update the remembered set.
+  void update_rem_set(DirtyCardQueue* into_cset_dcq, G1ParScanThreadState* pss, uint worker_i);
+
 protected:
   G1CollectedHeap* _g1;
   size_t _conc_refine_cards;
@@ -94,66 +106,41 @@
            G1HotCardCache* hot_card_cache);
   ~G1RemSet();
 
-  // Invoke "cl->do_oop" on all pointers into the collection set
-  // from objects in regions outside the collection set (having
-  // invoked "cl->set_region" to set the "from" region correctly
-  // beforehand.)
+  // Process all oops in the collection set from the cards in the refinement buffers and
+  // remembered sets using pss.
   //
-  // Apply non_heap_roots on the oops of the unmarked nmethods
-  // on the strong code roots list for each region in the
-  // collection set.
-  //
-  // The "worker_i" param is for the parallel case where the id
-  // of the worker thread calling this function can be helpful in
-  // partitioning the work to be done. It should be the same as
-  // the "i" passed to the calling thread's work(i) function.
-  // In the sequential case this param will be ignored.
-  //
-  // Returns the number of cards scanned while looking for pointers
-  // into the collection set.
-  size_t oops_into_collection_set_do(G1ParPushHeapRSClosure* cl,
-                                     CodeBlobClosure* heap_region_codeblobs,
-                                     uint worker_i);
+  // Further applies heap_region_codeblobs on the oops of the unmarked nmethods on the strong code
+  // roots list for each region in the collection set.
+  void oops_into_collection_set_do(G1ParScanThreadState* pss,
+                                   CodeBlobClosure* heap_region_codeblobs,
+                                   uint worker_i);
 
   // Prepare for and cleanup after an oops_into_collection_set_do
   // call.  Must call each of these once before and after (in sequential
-  // code) any threads call oops_into_collection_set_do.  (This offers an
-  // opportunity to sequential setup and teardown of structures needed by a
-  // parallel iteration over the CS's RS.)
+  // code) any thread calls oops_into_collection_set_do.
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();
 
-  size_t scan_rem_set(G1ParPushHeapRSClosure* oops_in_heap_closure,
-                      CodeBlobClosure* heap_region_codeblobs,
-                      uint worker_i);
-
   G1RemSetScanState* scan_state() const { return _scan_state; }
 
-  // Flush remaining refinement buffers into the remembered set,
-  // applying oops_in_heap_closure on the references found.
-  void update_rem_set(DirtyCardQueue* into_cset_dcq, G1ParPushHeapRSClosure* oops_in_heap_closure, uint worker_i);
-
   // Record, if necessary, the fact that *p (where "p" is in region "from",
   // which is required to be non-NULL) has changed to a new non-NULL value.
   template <class T> void par_write_ref(HeapRegion* from, T* p, uint tid);
 
-  // Requires "region_bm" and "card_bm" to be bitmaps with 1 bit per region
-  // or card, respectively, such that a region or card with a corresponding
-  // 0 bit contains no part of any live object.  Eliminates any remembered
-  // set entries that correspond to dead heap ranges. "worker_num" is the
-  // parallel thread id of the current thread, and "hrclaimer" is the
-  // HeapRegionClaimer that should be used.
+  // Eliminates any remembered set entries that correspond to dead heap ranges.
   void scrub(uint worker_num, HeapRegionClaimer* hrclaimer);
 
-  // Refine the card corresponding to "card_ptr".
+  // Refine the card corresponding to "card_ptr". Safe to be called concurrently
+  // to the mutator.
   void refine_card_concurrently(jbyte* card_ptr,
                                 uint worker_i);
 
-  // Refine the card corresponding to "card_ptr". Returns "true" if the given card contains
-  // oops that have references into the current collection set.
+  // Refine the card corresponding to "card_ptr", applying the given closure to
+  // all references found. Returns "true" if the given card contains
+  // oops that have references into the current collection set. Must only be
+  // called during gc.
   bool refine_card_during_gc(jbyte* card_ptr,
-                             uint worker_i,
-                             G1ParPushHeapRSClosure* oops_in_heap_closure);
+                             G1ScanObjsDuringUpdateRSClosure* update_rs_cl);
 
   // Print accumulated summary info from the start of the VM.
   void print_summary_info();
@@ -164,9 +151,7 @@
   // Prepare remembered set for verification.
   void prepare_for_verify();
 
-  size_t conc_refine_cards() const {
-    return _conc_refine_cards;
-  }
+  size_t conc_refine_cards() const { return _conc_refine_cards; }
 
   void create_card_live_data(WorkGang* workers, G1CMBitMap* mark_bitmap);
   void finalize_card_live_data(WorkGang* workers, G1CMBitMap* mark_bitmap);
@@ -182,14 +167,16 @@
 #endif
 };
 
-class G1ScanRSClosure : public HeapRegionClosure {
+class G1ScanRSForRegionClosure : public HeapRegionClosure {
   G1RemSetScanState* _scan_state;
 
-  size_t _cards_done;
-  size_t _cards;
+  size_t _cards_scanned;
+  size_t _cards_claimed;
+  size_t _cards_skipped;
+
   G1CollectedHeap* _g1h;
 
-  G1ParPushHeapRSClosure* _push_heap_cl;
+  G1ScanObjsDuringScanRSClosure* _scan_objs_on_card_cl;
   CodeBlobClosure* _code_root_cl;
 
   G1BlockOffsetTable* _bot;
@@ -197,15 +184,14 @@
 
   double _strong_code_root_scan_time_sec;
   uint   _worker_i;
-  size_t _block_size;
 
   void scan_card(size_t index, HeapWord* card_start, HeapRegion *r);
   void scan_strong_code_roots(HeapRegion* r);
 public:
-  G1ScanRSClosure(G1RemSetScanState* scan_state,
-                  G1ParPushHeapRSClosure* push_heap_cl,
-                  CodeBlobClosure* code_root_cl,
-                  uint worker_i);
+  G1ScanRSForRegionClosure(G1RemSetScanState* scan_state,
+                           G1ScanObjsDuringScanRSClosure* scan_obj_on_card,
+                           CodeBlobClosure* code_root_cl,
+                           uint worker_i);
 
   bool doHeapRegion(HeapRegion* r);
 
@@ -213,11 +199,12 @@
     return _strong_code_root_scan_time_sec;
   }
 
-  size_t cards_done() { return _cards_done;}
-  size_t cards_looked_up() { return _cards;}
+  size_t cards_scanned() const { return _cards_scanned; }
+  size_t cards_claimed() const { return _cards_claimed; }
+  size_t cards_skipped() const { return _cards_skipped; }
 };
 
-class UpdateRSOopClosure: public ExtendedOopClosure {
+class RebuildRSOopClosure: public ExtendedOopClosure {
   HeapRegion* _from;
   G1RemSet* _rs;
   uint _worker_i;
@@ -225,7 +212,7 @@
   template <class T> void do_oop_work(T* p);
 
 public:
-  UpdateRSOopClosure(G1RemSet* rs, uint worker_i = 0) :
+  RebuildRSOopClosure(G1RemSet* rs, uint worker_i = 0) :
     _from(NULL), _rs(rs), _worker_i(worker_i)
   {}
 
--- a/hotspot/src/share/vm/gc/g1/g1RemSet.inline.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1RemSet.inline.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,7 +58,7 @@
 }
 
 template <class T>
-inline void UpdateRSOopClosure::do_oop_work(T* p) {
+inline void RebuildRSOopClosure::do_oop_work(T* p) {
   assert(_from != NULL, "from region must be non-NULL");
   _rs->par_write_ref(_from, p, _worker_i);
 }
--- a/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1RootProcessor.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,6 @@
 class G1CollectedHeap;
 class G1EvacuationRootClosures;
 class G1GCPhaseTimes;
-class G1ParPushHeapRSClosure;
 class G1RootClosures;
 class Monitor;
 class OopClosure;
--- a/hotspot/src/share/vm/gc/g1/g1_specialized_oop_closures.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/g1_specialized_oop_closures.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -32,19 +32,19 @@
 
 // Forward declarations.
 
-class G1ParScanClosure;
-class G1ParPushHeapRSClosure;
+class G1ScanEvacuatedObjClosure;
 
-class G1UpdateRSOrPushRefOopClosure;
+class G1ScanObjsDuringUpdateRSClosure;
+class G1ScanObjsDuringScanRSClosure;
 class G1ConcurrentRefineOopClosure;
 
 class G1CMOopClosure;
 class G1RootRegionScanClosure;
 
 #define SPECIALIZED_OOP_OOP_ITERATE_CLOSURES_G1(f) \
-      f(G1ParScanClosure,_nv)                      \
-      f(G1ParPushHeapRSClosure,_nv)                \
-      f(G1UpdateRSOrPushRefOopClosure,_nv)         \
+      f(G1ScanEvacuatedObjClosure,_nv)             \
+      f(G1ScanObjsDuringUpdateRSClosure,_nv)       \
+      f(G1ScanObjsDuringScanRSClosure,_nv)         \
       f(G1ConcurrentRefineOopClosure,_nv)          \
       f(G1CMOopClosure,_nv)                        \
       f(G1RootRegionScanClosure,_nv)
--- a/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -327,7 +327,7 @@
   return true;
 }
 
-void HeapRegionManager::par_iterate(HeapRegionClosure* blk, uint worker_id, HeapRegionClaimer* hrclaimer, bool concurrent) const {
+void HeapRegionManager::par_iterate(HeapRegionClosure* blk, uint worker_id, HeapRegionClaimer* hrclaimer) const {
   const uint start_index = hrclaimer->start_region_for_worker(worker_id);
 
   // Every worker will actually look at all regions, skipping over regions that
--- a/hotspot/src/share/vm/gc/g1/heapRegionManager.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/heapRegionManager.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -240,7 +240,7 @@
   // terminating the iteration early if doHeapRegion() returns true.
   void iterate(HeapRegionClosure* blk) const;
 
-  void par_iterate(HeapRegionClosure* blk, uint worker_id, HeapRegionClaimer* hrclaimer, bool concurrent) const;
+  void par_iterate(HeapRegionClosure* blk, uint worker_id, HeapRegionClaimer* hrclaimer) const;
 
   // Uncommit up to num_regions_to_remove regions that are completely free.
   // Return the actual number of uncommitted regions.
--- a/hotspot/src/share/vm/gc/g1/workerDataArray.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/workerDataArray.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -33,20 +33,25 @@
 template <class T>
 class WorkerDataArray  : public CHeapObj<mtGC> {
   friend class WDAPrinter;
+public:
+  static const uint MaxThreadWorkItems = 3;
+private:
   T*          _data;
   uint        _length;
   const char* _title;
 
-  WorkerDataArray<size_t>* _thread_work_items;
+  WorkerDataArray<size_t>* _thread_work_items[MaxThreadWorkItems];
 
  public:
   WorkerDataArray(uint length, const char* title);
   ~WorkerDataArray();
 
-  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items);
-  void set_thread_work_item(uint worker_i, size_t value);
-  WorkerDataArray<size_t>* thread_work_items() const {
-    return _thread_work_items;
+  void link_thread_work_items(WorkerDataArray<size_t>* thread_work_items, uint index = 0);
+  void set_thread_work_item(uint worker_i, size_t value, uint index = 0);
+  void add_thread_work_item(uint worker_i, size_t value, uint index = 0);
+  WorkerDataArray<size_t>* thread_work_items(uint index = 0) const {
+    assert(index < MaxThreadWorkItems, "Tried to access thread work item %u max %u", index, MaxThreadWorkItems);
+    return _thread_work_items[index];
   }
 
   static T uninitialized();
--- a/hotspot/src/share/vm/gc/g1/workerDataArray.inline.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/gc/g1/workerDataArray.inline.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -32,11 +32,13 @@
 template <typename T>
 WorkerDataArray<T>::WorkerDataArray(uint length, const char* title) :
  _title(title),
- _length(0),
- _thread_work_items(NULL) {
+ _length(0) {
   assert(length > 0, "Must have some workers to store data for");
   _length = length;
   _data = NEW_C_HEAP_ARRAY(T, _length, mtGC);
+  for (uint i = 0; i < MaxThreadWorkItems; i++) {
+    _thread_work_items[i] = NULL;
+  }
   reset();
 }
 
@@ -59,14 +61,23 @@
 }
 
 template <typename T>
-void WorkerDataArray<T>::link_thread_work_items(WorkerDataArray<size_t>* thread_work_items) {
-  _thread_work_items = thread_work_items;
+void WorkerDataArray<T>::link_thread_work_items(WorkerDataArray<size_t>* thread_work_items, uint index) {
+  assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems);
+  _thread_work_items[index] = thread_work_items;
 }
 
 template <typename T>
-void WorkerDataArray<T>::set_thread_work_item(uint worker_i, size_t value) {
-  assert(_thread_work_items != NULL, "No sub count");
-  _thread_work_items->set(worker_i, value);
+void WorkerDataArray<T>::set_thread_work_item(uint worker_i, size_t value, uint index) {
+  assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems);
+  assert(_thread_work_items[index] != NULL, "No sub count");
+  _thread_work_items[index]->set(worker_i, value);
+}
+
+template <typename T>
+void WorkerDataArray<T>::add_thread_work_item(uint worker_i, size_t value, uint index) {
+  assert(index < MaxThreadWorkItems, "Tried to access thread work item %u (max %u)", index, MaxThreadWorkItems);
+  assert(_thread_work_items[index] != NULL, "No sub count");
+  _thread_work_items[index]->add(worker_i, value);
 }
 
 template <typename T>
@@ -148,8 +159,10 @@
 template <typename T>
 void WorkerDataArray<T>::reset() {
   set_all(uninitialized());
-  if (_thread_work_items != NULL) {
-    _thread_work_items->reset();
+  for (uint i = 0; i < MaxThreadWorkItems; i++) {
+    if (_thread_work_items[i] != NULL) {
+      _thread_work_items[i]->reset();
+    }
   }
 }
 
--- a/hotspot/src/share/vm/runtime/globals.hpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/src/share/vm/runtime/globals.hpp	Wed Jun 28 20:21:04 2017 +0000
@@ -1304,7 +1304,7 @@
   product(bool, UseBiasedLocking, true,                                     \
           "Enable biased locking in JVM")                                   \
                                                                             \
-  product(intx, BiasedLockingStartupDelay, 4000,                            \
+  product(intx, BiasedLockingStartupDelay, 0,                               \
           "Number of milliseconds to wait before enabling biased locking")  \
           range(0, (intx)(max_jint-(max_jint%PeriodicTask::interval_gran))) \
           constraint(BiasedLockingStartupDelayFunc,AfterErgo)               \
--- a/hotspot/test/Makefile	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/test/Makefile	Wed Jun 28 20:21:04 2017 +0000
@@ -95,26 +95,8 @@
 ALT_MAKE ?= closed
 -include $(ALT_MAKE)/Makefile
 
-# flags used to execute java in test targets
-TEST_FLAGS += -version -Xinternalversion -X -help
-
-sanitytest: prep $(PRODUCT_HOME)
-	@for flag in $(TEST_FLAGS);                                             \
-	do                                                                      \
-	    echo Executing java $(JAVA_OPTIONS) $$flag;                         \
-	    $(PRODUCT_HOME)/bin/java $(JAVA_OPTIONS) $$flag;                    \
-	    res=$$?;                                                            \
-	    if [ $$res -ne 0 ]; then                                            \
-	        exit $$res;                                                     \
-	    fi;                                                                 \
-	done
-
-PHONY_LIST += sanitytest
-
 ################################################################
 
-# basicvmtest (make sure various basic java options work)
-
 # Set up the directory in which the jvm directories live (client/, server/, etc.)
 ifeq ($(PLATFORM),windows)
 JVMS_DIR := $(PRODUCT_HOME)/bin
@@ -126,45 +108,6 @@
 CANDIDATE_JVM_VARIANTS := client minimal server
 JVM_VARIANTS := $(strip $(foreach x,$(CANDIDATE_JVM_VARIANTS),$(if $(wildcard $(JVMS_DIR)/$(x)),$(x))))
 
-hotspot_basicvmtest:
-	for variant in $(JVM_VARIANTS);                                           \
-	do                                                                        \
-	    $(MAKE) JAVA_ARGS="$(JAVA_ARGS) -$$variant" hotspot_$${variant}test;  \
-	    res=$$?;                                                              \
-	    if [ $$res -ne 0 ]; then                                              \
-	        exit $$res;                                                       \
-	    fi;                                                                   \
-	done
-
-PHONY_LIST += hotspot_basicvmtest
-
-################################################################
-
-# clienttest (make sure various basic java client options work)
-
-hotspot_clienttest clienttest: sanitytest
-	$(RM) $(PRODUCT_HOME)/jre/lib/*/client/classes.jsa
-	$(RM) $(PRODUCT_HOME)/jre/bin/client/classes.jsa
-	$(PRODUCT_HOME)/bin/java $(JAVA_OPTIONS) -Xshare:dump
-
-PHONY_LIST += hotspot_clienttest clienttest
-
-################################################################
-
-# minimaltest (make sure various basic java minimal options work)
-
-hotspot_minimaltest minimaltest: sanitytest
-
-PHONY_LIST += hotspot_minimaltest minimaltest
-
-################################################################
-
-# servertest (make sure various basic java server options work)
-
-hotspot_servertest servertest: sanitytest
-
-PHONY_LIST += hotspot_servertest servertest
-
 ################################################################
 
 # Run the native gtest tests from the test image
--- a/hotspot/test/ProblemList.txt	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/test/ProblemList.txt	Wed Jun 28 20:21:04 2017 +0000
@@ -50,61 +50,7 @@
 compiler/types/correctness/CorrectnessTest.java 8066173 generic-all
 compiler/types/correctness/OffTest.java 8066173 generic-all
 
-# aot tests intermittently failing in jprt 8175791
-
-compiler/aot/RecompilationTest.java 8175791 windows-all
-compiler/aot/SharedUsageTest.java 8175791 windows-all
-compiler/aot/cli/MultipleAOTLibraryTest.java 8175791 windows-all
-compiler/aot/cli/DisabledAOTWithLibraryTest.java 8175791 windows-all
-compiler/aot/cli/SingleAOTLibraryTest.java 8175791 windows-all
-compiler/aot/cli/NonExistingAOTLibraryTest.java 8175791 windows-all
-compiler/aot/cli/SingleAOTOptionTest.java 8175791 windows-all
-compiler/aot/cli/jaotc/ClasspathOptionTest.java 8175791 windows-all
-compiler/aot/cli/jaotc/ListOptionTest.java 8175791 windows-all
-compiler/aot/cli/jaotc/CompileModuleTest.java 8175791 windows-all
-compiler/aot/cli/jaotc/CompileClassTest.java 8175791 windows-all
-compiler/aot/cli/jaotc/CompileJarTest.java 8175791 windows-all
-compiler/aot/cli/jaotc/ListOptionWrongFileTest.java 8175791 windows-all
-compiler/aot/cli/jaotc/CompileDirectoryTest.java 8175791 windows-all
-compiler/aot/cli/jaotc/ClasspathOptionUnknownClassTest.java 8175791 windows-all
-compiler/aot/cli/jaotc/ListOptionNotExistingTest.java 8175791 windows-all
-compiler/aot/cli/IncorrectAOTLibraryTest.java 8175791 windows-all
-compiler/aot/verification/vmflags/TrackedFlagTest.java 8175791 windows-all
-compiler/aot/verification/vmflags/NotTrackedFlagTest.java 8175791 windows-all
-compiler/aot/verification/ClassAndLibraryNotMatchTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeInterface2InterpretedTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeDynamic2CompiledTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeVirtual2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeVirtual2NativeTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeStatic2CompiledTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeStatic2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeSpecial2NativeTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeDynamic2InterpretedTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeVirtual2CompiledTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeSpecial2InterpretedTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeSpecial2CompiledTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeVirtual2InterpretedTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeStatic2InterpretedTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeInterface2CompiledTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeStatic2NativeTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeInterface2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeSpecial2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeDynamic2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeInterface2NativeTest.java 8175791 windows-all
-compiler/aot/calls/fromAot/AotInvokeDynamic2NativeTest.java 8175791 windows-all
-compiler/aot/calls/fromNative/NativeInvokeVirtual2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromNative/NativeInvokeStatic2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromNative/NativeInvokeSpecial2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromInterpreted/InterpretedInvokeDynamic2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromInterpreted/InterpretedInvokeSpecial2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromInterpreted/InterpretedInvokeStatic2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromInterpreted/InterpretedInvokeInterface2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromInterpreted/InterpretedInvokeVirtual2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromCompiled/CompiledInvokeStatic2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromCompiled/CompiledInvokeInterface2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromCompiled/CompiledInvokeSpecial2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromCompiled/CompiledInvokeVirtual2AotTest.java 8175791 windows-all
-compiler/aot/calls/fromCompiled/CompiledInvokeDynamic2AotTest.java 8175791 windows-all
+# aot test intermittently failing in jprt 8175791
 compiler/aot/DeoptimizationTest.java 8175791 windows-all
 
 #############################################################################
--- a/hotspot/test/TEST.groups	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/test/TEST.groups	Wed Jun 28 20:21:04 2017 +0000
@@ -47,6 +47,10 @@
 hotspot_native_sanity = \
   native_sanity
 
+hotspot_tier1_common = \
+  sanity/BasicVMTest.java \
+  native/GTestWrapper.java
+
 hotspot_tier1_compiler_1 = \
   compiler/aot/ \
   compiler/arraycopy/ \
@@ -196,6 +200,7 @@
   serviceability/logging
 
 hotspot_tier1 = \
+  :hotspot_tier1_common \
   :hotspot_tier1_compiler_1 \
   :hotspot_tier1_compiler_2 \
   :hotspot_tier1_compiler_3 \
--- a/hotspot/test/compiler/aot/verification/ClassAndLibraryNotMatchTest.java	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/test/compiler/aot/verification/ClassAndLibraryNotMatchTest.java	Wed Jun 28 20:21:04 2017 +0000
@@ -86,7 +86,7 @@
 
     private void compileAotLibrary() {
         AotCompiler.launchCompiler(LIB_NAME, HELLO_WORLD_CLASS_NAME,
-                Arrays.asList("-classpath", Utils.TEST_CLASS_PATH + ":."), null);
+                Arrays.asList("-classpath", Utils.TEST_CLASS_PATH + File.pathSeparator + "."), null);
     }
 
     private void runAndCheckHelloWorld(String checkString) {
--- a/hotspot/test/gc/g1/TestGCLogMessages.java	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/test/gc/g1/TestGCLogMessages.java	Wed Jun 28 20:21:04 2017 +0000
@@ -94,7 +94,14 @@
         new LogMessageWithLevel("Other", Level.INFO),
 
         // Update RS
+        new LogMessageWithLevel("Update RS", Level.DEBUG),
+        new LogMessageWithLevel("Processed Buffers", Level.DEBUG),
         new LogMessageWithLevel("Scan HCC", Level.TRACE),
+        // Scan RS
+        new LogMessageWithLevel("Scan RS", Level.DEBUG),
+        new LogMessageWithLevel("Scanned Cards", Level.DEBUG),
+        new LogMessageWithLevel("Claimed Cards", Level.DEBUG),
+        new LogMessageWithLevel("Skipped Cards", Level.DEBUG),
         // Ext Root Scan
         new LogMessageWithLevel("Thread Roots", Level.TRACE),
         new LogMessageWithLevel("StringTable Roots", Level.TRACE),
--- a/hotspot/test/native/gc/g1/test_workerDataArray.cpp	Wed Jun 28 16:14:20 2017 -0400
+++ b/hotspot/test/native/gc/g1/test_workerDataArray.cpp	Wed Jun 28 20:21:04 2017 +0000
@@ -34,7 +34,11 @@
  protected:
   WorkerDataArrayTest() :
     title("Test array"),
-    array(3, title) {
+    array(3, title),
+    sub_item_title("Sub item array"),
+    sub_item(3, sub_item_title) {
+
+    array.link_thread_work_items(&sub_item);
   }
 
   const char* print_summary() {
@@ -65,6 +69,9 @@
   const char* title;
   WorkerDataArray<T> array;
 
+  const char* sub_item_title;
+  WorkerDataArray<size_t> sub_item;
+
  private:
   virtual const char* expected_summary() = 0;
   virtual const char* expected_details() = 0;
@@ -111,6 +118,10 @@
     array.set(0, 5);
     array.set(1, 3);
     array.set(2, 7);
+
+    array.set_thread_work_item(0, 1);
+    array.set_thread_work_item(1, 2);
+    array.set_thread_work_item(2, 3);
   }
 
  private:
@@ -125,10 +136,12 @@
 
 TEST_VM_F(BasicWorkerDataArrayTest, sum_test) {
   ASSERT_EQ(15u, array.sum());
+  ASSERT_EQ(6u, array.thread_work_items(0)->sum());
 }
 
 TEST_VM_F(BasicWorkerDataArrayTest, average_test) {
   ASSERT_NEAR(5.0, array.average(), epsilon);
+  ASSERT_NEAR(2.0, array.thread_work_items(0)->average(), epsilon);
 }
 
 TEST_VM_F(BasicWorkerDataArrayTest, print_summary_on_test) {
@@ -149,6 +162,16 @@
     for (uint i = 0; i < 3; i++) {
       array.add(i, 1);
     }
+
+    WorkerDataArray<size_t>* sub_items = array.thread_work_items(0);
+
+    sub_items->set(0, 1);
+    sub_items->set(1, 2);
+    sub_items->set(2, 3);
+
+    for (uint i = 0; i < 3; i++) {
+      array.add_thread_work_item(i, 1);
+    }
   }
 
  private:
@@ -163,10 +186,12 @@
 
 TEST_VM_F(AddWorkerDataArrayTest, sum_test) {
   ASSERT_EQ(18u, array.sum());
+  ASSERT_EQ(9u, array.thread_work_items(0)->sum());
 }
 
 TEST_VM_F(AddWorkerDataArrayTest, average_test) {
   ASSERT_NEAR(6.0, array.average(), epsilon);
+  ASSERT_NEAR(3.0, array.thread_work_items(0)->average(), epsilon);
 }
 
 TEST_VM_F(AddWorkerDataArrayTest, print_summary_on_test) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/test/sanity/BasicVMTest.java	Wed Jun 28 20:21:04 2017 +0000
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+package sanity;
+
+import jdk.test.lib.process.ProcessTools;
+
+import java.util.List;
+
+/*
+ * @test
+ * @summary make sure various basic java options work
+ * @library /test/lib
+ *
+ * @run driver sanity.BasicVMTest
+ */
+public class BasicVMTest {
+    public static void main(String[] args) throws Exception {
+        List<String> flags = List.of(
+                "-version",
+                "-Xinternalversion",
+                "-X",
+                "-help");
+        for (String flag : flags) {
+            ProcessTools.executeTestJvm(flag)
+                        .shouldHaveExitValue(0);
+        }
+    }
+}