changeset 9316:2acd6706667f

Merge
author jwilhelm
date Tue, 15 Sep 2015 15:49:33 +0200
parents bef52fbeb8ae 4f3ddb6a2b70
children 42f7bd6a45c4
files
diffstat 28 files changed, 388 insertions(+), 258 deletions(-) [+]
line wrap: on
line diff
--- a/src/os/linux/vm/os_linux.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -2211,9 +2211,13 @@
   }
 }
 
-const char* search_string = IA32_ONLY("model name") AMD64_ONLY("model name")
-                            IA64_ONLY("") SPARC_ONLY("cpu")
-                            ARM32_ONLY("Processor") PPC_ONLY("Processor") AARCH64_ONLY("Processor");
+#if defined(AMD64) || defined(IA32) || defined(X32)
+const char* search_string = "model name";
+#elif defined(SPARC)
+const char* search_string = "cpu";
+#else
+const char* search_string = "Processor";
+#endif
 
 // Parses the cpuinfo file for string representing the model name.
 void os::get_summary_cpu_info(char* cpuinfo, size_t length) {
@@ -2248,9 +2252,25 @@
   }
   // cpuinfo not found or parsing failed, just print generic string.  The entire
   // /proc/cpuinfo file will be printed later in the file (or enough of it for x86)
-  strncpy(cpuinfo, IA32_ONLY("x86_32") AMD64_ONLY("x86_32")
-                   IA64_ONLY("IA64") SPARC_ONLY("sparcv9")
-                   ARM32_ONLY("ARM") PPC_ONLY("PPC64") AARCH64_ONLY("AArch64"), length);
+#if defined(AMD64)
+  strncpy(cpuinfo, "x86_64", length);
+#elif defined(IA32)
+  strncpy(cpuinfo, "x86_32", length);
+#elif defined(IA64)
+  strncpy(cpuinfo, "IA64", length);
+#elif defined(SPARC)
+  strncpy(cpuinfo, "sparcv9", length);
+#elif defined(AARCH64)
+  strncpy(cpuinfo, "AArch64", length);
+#elif defined(ARM)
+  strncpy(cpuinfo, "ARM", length);
+#elif defined(PPC)
+  strncpy(cpuinfo, "PPC64", length);
+#elif defined(ZERO_LIBARCH)
+  strncpy(cpuinfo, ZERO_LIBARCH, length);
+#else
+  strncpy(cpuinfo, "unknown", length);
+#endif
 }
 
 void os::print_siginfo(outputStream* st, void* siginfo) {
--- a/src/os/windows/vm/os_windows.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -4877,6 +4877,26 @@
 // Returns true=success, otherwise false.
 
 bool os::pd_unmap_memory(char* addr, size_t bytes) {
+  MEMORY_BASIC_INFORMATION mem_info;
+  if (VirtualQuery(addr, &mem_info, sizeof(mem_info)) == 0) {
+    if (PrintMiscellaneous && Verbose) {
+      DWORD err = GetLastError();
+      tty->print_cr("VirtualQuery() failed: GetLastError->%ld.", err);
+    }
+    return false;
+  }
+
+  // Executable memory was not mapped using CreateFileMapping/MapViewOfFileEx.
+  // Instead, executable region was allocated using VirtualAlloc(). See
+  // pd_map_memory() above.
+  //
+  // The following flags should match the 'exec_access' flages used for
+  // VirtualProtect() in pd_map_memory().
+  if (mem_info.Protect == PAGE_EXECUTE_READ ||
+      mem_info.Protect == PAGE_EXECUTE_READWRITE) {
+    return pd_release_memory(addr, bytes);
+  }
+
   BOOL result = UnmapViewOfFile(addr);
   if (result == 0) {
     if (PrintMiscellaneous && Verbose) {
--- a/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -304,8 +304,7 @@
 void ConcurrentMarkSweepGeneration::initialize_performance_counters() {
 
   const char* gen_name = "old";
-  GenCollectorPolicy* gcp = (GenCollectorPolicy*) GenCollectedHeap::heap()->collector_policy();
-
+  GenCollectorPolicy* gcp = GenCollectedHeap::heap()->gen_policy();
   // Generation Counters - generation 1, 1 subspace
   _gen_counters = new GenerationCounters(gen_name, 1, 1,
       gcp->min_old_size(), gcp->max_old_size(), &_virtual_space);
--- a/src/share/vm/gc/g1/collectionSetChooser.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/collectionSetChooser.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -83,7 +83,7 @@
   _regions((ResourceObj::set_allocation_type((address) &_regions,
                                              ResourceObj::C_HEAP),
                   100), true /* C_Heap */),
-    _curr_index(0), _length(0), _first_par_unreserved_idx(0),
+    _front(0), _end(0), _first_par_unreserved_idx(0),
     _region_live_threshold_bytes(0), _remaining_reclaimable_bytes(0) {
   _region_live_threshold_bytes =
     HeapRegion::GrainBytes * (size_t) G1MixedGCLiveThresholdPercent / 100;
@@ -91,19 +91,19 @@
 
 #ifndef PRODUCT
 void CollectionSetChooser::verify() {
-  guarantee(_length <= regions_length(),
-         err_msg("_length: %u regions length: %u", _length, regions_length()));
-  guarantee(_curr_index <= _length,
-            err_msg("_curr_index: %u _length: %u", _curr_index, _length));
+  guarantee(_end <= regions_length(),
+         err_msg("_end: %u regions length: %u", _end, regions_length()));
+  guarantee(_front <= _end,
+            err_msg("_front: %u _end: %u", _front, _end));
   uint index = 0;
   size_t sum_of_reclaimable_bytes = 0;
-  while (index < _curr_index) {
+  while (index < _front) {
     guarantee(regions_at(index) == NULL,
-              "all entries before _curr_index should be NULL");
+              "all entries before _front should be NULL");
     index += 1;
   }
   HeapRegion *prev = NULL;
-  while (index < _length) {
+  while (index < _end) {
     HeapRegion *curr = regions_at(index++);
     guarantee(curr != NULL, "Regions in _regions array cannot be NULL");
     guarantee(!curr->is_young(), "should not be young!");
@@ -132,15 +132,15 @@
     regions_trunc_to(_first_par_unreserved_idx);
   }
   _regions.sort(order_regions);
-  assert(_length <= regions_length(), "Requirement");
+  assert(_end <= regions_length(), "Requirement");
 #ifdef ASSERT
-  for (uint i = 0; i < _length; i++) {
+  for (uint i = 0; i < _end; i++) {
     assert(regions_at(i) != NULL, "Should be true by sorting!");
   }
 #endif // ASSERT
   if (G1PrintRegionLivenessInfo) {
     G1PrintRegionLivenessInfoClosure cl(gclog_or_tty, "Post-Sorting");
-    for (uint i = 0; i < _length; ++i) {
+    for (uint i = 0; i < _end; ++i) {
       HeapRegion* r = regions_at(i);
       cl.doHeapRegion(r);
     }
@@ -154,11 +154,19 @@
          err_msg("Pinned region shouldn't be added to the collection set (index %u)", hr->hrm_index()));
   assert(!hr->is_young(), "should not be young!");
   _regions.append(hr);
-  _length++;
+  _end++;
   _remaining_reclaimable_bytes += hr->reclaimable_bytes();
   hr->calc_gc_efficiency();
 }
 
+void CollectionSetChooser::push(HeapRegion* hr) {
+  assert(hr != NULL, "Can't put back a NULL region");
+  assert(_front >= 1, "Too many regions have been put back");
+  _front--;
+  regions_at_put(_front, hr);
+  _remaining_reclaimable_bytes += hr->reclaimable_bytes();
+}
+
 void CollectionSetChooser::prepare_for_par_region_addition(uint n_threads,
                                                            uint n_regions,
                                                            uint chunk_size) {
@@ -193,7 +201,7 @@
     // We could have just used atomics instead of taking the
     // lock. However, we currently don't have an atomic add for size_t.
     MutexLockerEx x(ParGCRareEvent_lock, Mutex::_no_safepoint_check_flag);
-    _length += region_num;
+    _end += region_num;
     _remaining_reclaimable_bytes += reclaimable_bytes;
   } else {
     assert(reclaimable_bytes == 0, "invariant");
@@ -202,7 +210,7 @@
 
 void CollectionSetChooser::clear() {
   _regions.clear();
-  _curr_index = 0;
-  _length = 0;
+  _front = 0;
+  _end = 0;
   _remaining_reclaimable_bytes = 0;
 };
--- a/src/share/vm/gc/g1/collectionSetChooser.hpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/collectionSetChooser.hpp	Tue Sep 15 15:49:33 2015 +0200
@@ -48,12 +48,10 @@
 
   // The index of the next candidate old region to be considered for
   // addition to the CSet.
-  uint _curr_index;
+  uint _front;
 
-  // The number of candidate old regions added to the CSet chooser.
-  // Note: this is not updated when removing a region using
-  // remove_and_move_to_next() below.
-  uint _length;
+  // The index of the last candidate old region
+  uint _end;
 
   // Keeps track of the start of the next array chunk to be claimed by
   // parallel GC workers.
@@ -73,31 +71,33 @@
   // collection without removing it from the CSet chooser.
   HeapRegion* peek() {
     HeapRegion* res = NULL;
-    if (_curr_index < _length) {
-      res = regions_at(_curr_index);
+    if (_front < _end) {
+      res = regions_at(_front);
       assert(res != NULL,
              err_msg("Unexpected NULL hr in _regions at index %u",
-                     _curr_index));
+                     _front));
     }
     return res;
   }
 
   // Remove the given region from the CSet chooser and move to the
-  // next one. The given region should be the current candidate region
-  // in the CSet chooser.
-  void remove_and_move_to_next(HeapRegion* hr) {
+  // next one.
+  HeapRegion* pop() {
+    HeapRegion* hr = regions_at(_front);
     assert(hr != NULL, "pre-condition");
-    assert(_curr_index < _length, "pre-condition");
-    assert(regions_at(_curr_index) == hr, "pre-condition");
-    regions_at_put(_curr_index, NULL);
+    assert(_front < _end, "pre-condition");
+    regions_at_put(_front, NULL);
     assert(hr->reclaimable_bytes() <= _remaining_reclaimable_bytes,
            err_msg("remaining reclaimable bytes inconsistent "
                    "from region: " SIZE_FORMAT " remaining: " SIZE_FORMAT,
                    hr->reclaimable_bytes(), _remaining_reclaimable_bytes));
     _remaining_reclaimable_bytes -= hr->reclaimable_bytes();
-    _curr_index += 1;
+    _front += 1;
+    return hr;
   }
 
+  void push(HeapRegion* hr);
+
   CollectionSetChooser();
 
   void sort_regions();
@@ -113,7 +113,7 @@
   }
 
   // Returns the number candidate old regions added
-  uint length() { return _length; }
+  uint length() { return _end; }
 
   // Serial version.
   void add_region(HeapRegion *hr);
@@ -135,7 +135,7 @@
   void clear();
 
   // Return the number of candidate regions that remain to be collected.
-  uint remaining_regions() { return _length - _curr_index; }
+  uint remaining_regions() { return _end - _front; }
 
   // Determine whether the CSet chooser has more candidate regions or not.
   bool is_empty() { return remaining_regions() == 0; }
--- a/src/share/vm/gc/g1/concurrentG1Refine.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/concurrentG1Refine.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -29,7 +29,7 @@
 #include "gc/g1/g1HotCardCache.hpp"
 #include "runtime/java.hpp"
 
-ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h, CardTableEntryClosure* refine_closure) :
+ConcurrentG1Refine::ConcurrentG1Refine(G1CollectedHeap* g1h) :
   _threads(NULL), _n_threads(0),
   _hot_card_cache(g1h)
 {
@@ -48,29 +48,46 @@
     FLAG_SET_DEFAULT(G1ConcRefinementRedZone, yellow_zone() * 2);
   }
   set_red_zone(MAX2<int>(G1ConcRefinementRedZone, yellow_zone()));
+}
 
-  _n_worker_threads = thread_num();
+ConcurrentG1Refine* ConcurrentG1Refine::create(G1CollectedHeap* g1h, CardTableEntryClosure* refine_closure, jint* ecode) {
+  ConcurrentG1Refine* cg1r = new ConcurrentG1Refine(g1h);
+  if (cg1r == NULL) {
+    *ecode = JNI_ENOMEM;
+    vm_shutdown_during_initialization("Could not create ConcurrentG1Refine");
+    return NULL;
+  }
+  cg1r->_n_worker_threads = thread_num();
   // We need one extra thread to do the young gen rset size sampling.
-  _n_threads = _n_worker_threads + 1;
+  cg1r->_n_threads = cg1r->_n_worker_threads + 1;
 
-  reset_threshold_step();
+  cg1r->reset_threshold_step();
 
-  _threads = NEW_C_HEAP_ARRAY(ConcurrentG1RefineThread*, _n_threads, mtGC);
+  cg1r->_threads = NEW_C_HEAP_ARRAY_RETURN_NULL(ConcurrentG1RefineThread*, cg1r->_n_threads, mtGC);
+  if (cg1r->_threads == NULL) {
+    *ecode = JNI_ENOMEM;
+    vm_shutdown_during_initialization("Could not allocate an array for ConcurrentG1RefineThread");
+    return NULL;
+  }
 
   uint worker_id_offset = DirtyCardQueueSet::num_par_ids();
 
   ConcurrentG1RefineThread *next = NULL;
-  for (uint i = _n_threads - 1; i != UINT_MAX; i--) {
-    ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(this, next, refine_closure, worker_id_offset, i);
+  for (uint i = cg1r->_n_threads - 1; i != UINT_MAX; i--) {
+    ConcurrentG1RefineThread* t = new ConcurrentG1RefineThread(cg1r, next, refine_closure, worker_id_offset, i);
     assert(t != NULL, "Conc refine should have been created");
     if (t->osthread() == NULL) {
-        vm_shutdown_during_initialization("Could not create ConcurrentG1RefineThread");
+      *ecode = JNI_ENOMEM;
+      vm_shutdown_during_initialization("Could not create ConcurrentG1RefineThread");
+      return NULL;
     }
 
-    assert(t->cg1r() == this, "Conc refine thread should refer to this");
-    _threads[i] = t;
+    assert(t->cg1r() == cg1r, "Conc refine thread should refer to this");
+    cg1r->_threads[i] = t;
     next = t;
   }
+  *ecode = JNI_OK;
+  return cg1r;
 }
 
 void ConcurrentG1Refine::reset_threshold_step() {
--- a/src/share/vm/gc/g1/concurrentG1Refine.hpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/concurrentG1Refine.hpp	Tue Sep 15 15:49:33 2015 +0200
@@ -71,10 +71,15 @@
   // Reset the threshold step value based of the current zone boundaries.
   void reset_threshold_step();
 
+  ConcurrentG1Refine(G1CollectedHeap* g1h);
+
  public:
-  ConcurrentG1Refine(G1CollectedHeap* g1h, CardTableEntryClosure* refine_closure);
   ~ConcurrentG1Refine();
 
+  // Returns ConcurrentG1Refine instance if succeeded to create/initialize ConcurrentG1Refine and ConcurrentG1RefineThread.
+  // Otherwise, returns NULL with error code.
+  static ConcurrentG1Refine* create(G1CollectedHeap* g1h, CardTableEntryClosure* refine_closure, jint* ecode);
+
   void init(G1RegionToSpaceMapper* card_counts_storage);
   void stop();
 
--- a/src/share/vm/gc/g1/g1CollectedHeap.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1CollectedHeap.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -2025,7 +2025,6 @@
   _survivor_evac_stats(YoungPLABSize, PLABWeight),
   _old_evac_stats(OldPLABSize, PLABWeight),
   _expand_heap_after_alloc_failure(true),
-  _surviving_young_words(NULL),
   _old_marking_cycles_started(0),
   _old_marking_cycles_completed(0),
   _heap_summary_sent(false),
@@ -2126,7 +2125,11 @@
 
   _refine_cte_cl = new RefineCardTableEntryClosure();
 
-  _cg1r = new ConcurrentG1Refine(this, _refine_cte_cl);
+  jint ecode = JNI_OK;
+  _cg1r = ConcurrentG1Refine::create(this, _refine_cte_cl, &ecode);
+  if (_cg1r == NULL) {
+    return ecode;
+  }
 
   // Reserve the maximum.
 
@@ -2397,6 +2400,10 @@
                                 // (for efficiency/performance)
 }
 
+CollectorPolicy* G1CollectedHeap::collector_policy() const {
+  return g1_policy();
+}
+
 size_t G1CollectedHeap::capacity() const {
   return _hrm.length() * HeapRegion::GrainBytes;
 }
@@ -3694,10 +3701,6 @@
   return (buffer_size * buffer_num + extra_cards) / oopSize;
 }
 
-size_t G1CollectedHeap::cards_scanned() {
-  return g1_rem_set()->cardsScanned();
-}
-
 class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
  private:
   size_t _total_humongous;
@@ -3838,36 +3841,6 @@
   cl.flush_rem_set_entries();
 }
 
-void G1CollectedHeap::setup_surviving_young_words() {
-  assert(_surviving_young_words == NULL, "pre-condition");
-  uint array_length = g1_policy()->young_cset_region_length();
-  _surviving_young_words = NEW_C_HEAP_ARRAY(size_t, (size_t) array_length, mtGC);
-  if (_surviving_young_words == NULL) {
-    vm_exit_out_of_memory(sizeof(size_t) * array_length, OOM_MALLOC_ERROR,
-                          "Not enough space for young surv words summary.");
-  }
-  memset(_surviving_young_words, 0, (size_t) array_length * sizeof(size_t));
-#ifdef ASSERT
-  for (uint i = 0;  i < array_length; ++i) {
-    assert( _surviving_young_words[i] == 0, "memset above" );
-  }
-#endif // !ASSERT
-}
-
-void G1CollectedHeap::update_surviving_young_words(size_t* surv_young_words) {
-  assert_at_safepoint(true);
-  uint array_length = g1_policy()->young_cset_region_length();
-  for (uint i = 0; i < array_length; ++i) {
-    _surviving_young_words[i] += surv_young_words[i];
-  }
-}
-
-void G1CollectedHeap::cleanup_surviving_young_words() {
-  guarantee( _surviving_young_words != NULL, "pre-condition" );
-  FREE_C_HEAP_ARRAY(size_t, _surviving_young_words);
-  _surviving_young_words = NULL;
-}
-
 #ifdef ASSERT
 class VerifyCSetClosure: public HeapRegionClosure {
 public:
@@ -4129,7 +4102,8 @@
         g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty);
 #endif // YOUNG_LIST_VERBOSE
 
-        g1_policy()->finalize_cset(target_pause_time_ms);
+        double time_remaining_ms = g1_policy()->finalize_young_cset_part(target_pause_time_ms);
+        g1_policy()->finalize_old_cset_part(time_remaining_ms);
 
         evacuation_info.set_collectionset_regions(g1_policy()->cset_region_length());
 
@@ -4155,22 +4129,20 @@
         collection_set_iterate(&cl);
 #endif // ASSERT
 
-        setup_surviving_young_words();
-
         // Initialize the GC alloc regions.
         _allocator->init_gc_alloc_regions(evacuation_info);
 
+        G1ParScanThreadStateSet per_thread_states(this, workers()->active_workers(), g1_policy()->young_cset_region_length());
         // Actually do the work...
-        evacuate_collection_set(evacuation_info);
-
-        free_collection_set(g1_policy()->collection_set(), evacuation_info);
+        evacuate_collection_set(evacuation_info, &per_thread_states);
+
+        const size_t* surviving_young_words = per_thread_states.surviving_young_words();
+        free_collection_set(g1_policy()->collection_set(), evacuation_info, surviving_young_words);
 
         eagerly_reclaim_humongous_regions();
 
         g1_policy()->clear_collection_set();
 
-        cleanup_surviving_young_words();
-
         // Start a new incremental collection set for the next pause.
         g1_policy()->start_incremental_cset_building();
 
@@ -4255,7 +4227,8 @@
         // investigate this in CR 7178365.
         double sample_end_time_sec = os::elapsedTime();
         double pause_time_ms = (sample_end_time_sec - sample_start_time_sec) * MILLIUNITS;
-        g1_policy()->record_collection_pause_end(pause_time_ms);
+        size_t total_cards_scanned = per_thread_states.total_cards_scanned();
+        g1_policy()->record_collection_pause_end(pause_time_ms, total_cards_scanned);
 
         evacuation_info.set_collectionset_used_before(g1_policy()->collection_set_bytes_used_before());
         evacuation_info.set_bytes_copied(g1_policy()->bytes_copied_during_gc());
@@ -4541,15 +4514,15 @@
 
 class G1ParTask : public AbstractGangTask {
 protected:
-  G1CollectedHeap*       _g1h;
-  G1ParScanThreadState** _pss;
-  RefToScanQueueSet*     _queues;
-  G1RootProcessor*       _root_processor;
-  ParallelTaskTerminator _terminator;
-  uint _n_workers;
+  G1CollectedHeap*         _g1h;
+  G1ParScanThreadStateSet* _pss;
+  RefToScanQueueSet*       _queues;
+  G1RootProcessor*         _root_processor;
+  ParallelTaskTerminator   _terminator;
+  uint                     _n_workers;
 
 public:
-  G1ParTask(G1CollectedHeap* g1h, G1ParScanThreadState** per_thread_states, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers)
+  G1ParTask(G1CollectedHeap* g1h, G1ParScanThreadStateSet* per_thread_states, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers)
     : AbstractGangTask("G1 collection"),
       _g1h(g1h),
       _pss(per_thread_states),
@@ -4607,7 +4580,7 @@
 
       ReferenceProcessor*             rp = _g1h->ref_processor_stw();
 
-      G1ParScanThreadState*           pss = _pss[worker_id];
+      G1ParScanThreadState*           pss = _pss->state_for_worker(worker_id);
       pss->set_ref_processor(rp);
 
       bool only_young = _g1h->collector_state()->gcs_are_young();
@@ -4664,9 +4637,12 @@
                                       worker_id);
 
       G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss);
-      _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
-                                                      weak_root_cl,
-                                                      worker_id);
+      size_t cards_scanned = _g1h->g1_rem_set()->oops_into_collection_set_do(&push_heap_rs_cl,
+                                                                             weak_root_cl,
+                                                                             worker_id);
+
+      _pss->add_cards_scanned(worker_id, cards_scanned);
+
       double strong_roots_sec = os::elapsedTime() - start_strong_roots_sec;
 
       double term_sec = 0.0;
@@ -5263,15 +5239,15 @@
 
 class G1STWRefProcTaskExecutor: public AbstractRefProcTaskExecutor {
 private:
-  G1CollectedHeap*        _g1h;
-  G1ParScanThreadState**  _pss;
-  RefToScanQueueSet*      _queues;
-  WorkGang*               _workers;
-  uint                    _active_workers;
+  G1CollectedHeap*          _g1h;
+  G1ParScanThreadStateSet*  _pss;
+  RefToScanQueueSet*        _queues;
+  WorkGang*                 _workers;
+  uint                      _active_workers;
 
 public:
   G1STWRefProcTaskExecutor(G1CollectedHeap* g1h,
-                           G1ParScanThreadState** per_thread_states,
+                           G1ParScanThreadStateSet* per_thread_states,
                            WorkGang* workers,
                            RefToScanQueueSet *task_queues,
                            uint n_workers) :
@@ -5295,14 +5271,14 @@
   typedef AbstractRefProcTaskExecutor::ProcessTask ProcessTask;
   ProcessTask&     _proc_task;
   G1CollectedHeap* _g1h;
-  G1ParScanThreadState** _pss;
+  G1ParScanThreadStateSet* _pss;
   RefToScanQueueSet* _task_queues;
   ParallelTaskTerminator* _terminator;
 
 public:
   G1STWRefProcTaskProxy(ProcessTask& proc_task,
                         G1CollectedHeap* g1h,
-                        G1ParScanThreadState** per_thread_states,
+                        G1ParScanThreadStateSet* per_thread_states,
                         RefToScanQueueSet *task_queues,
                         ParallelTaskTerminator* terminator) :
     AbstractGangTask("Process reference objects in parallel"),
@@ -5320,7 +5296,7 @@
 
     G1STWIsAliveClosure is_alive(_g1h);
 
-    G1ParScanThreadState*           pss = _pss[worker_id];
+    G1ParScanThreadState*          pss = _pss->state_for_worker(worker_id);
     pss->set_ref_processor(NULL);
 
     G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, pss);
@@ -5399,14 +5375,14 @@
 
 class G1ParPreserveCMReferentsTask: public AbstractGangTask {
 protected:
-  G1CollectedHeap*       _g1h;
-  G1ParScanThreadState** _pss;
-  RefToScanQueueSet*     _queues;
-  ParallelTaskTerminator _terminator;
-  uint _n_workers;
+  G1CollectedHeap*         _g1h;
+  G1ParScanThreadStateSet* _pss;
+  RefToScanQueueSet*       _queues;
+  ParallelTaskTerminator   _terminator;
+  uint                     _n_workers;
 
 public:
-  G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h, G1ParScanThreadState** per_thread_states, int workers, RefToScanQueueSet *task_queues) :
+  G1ParPreserveCMReferentsTask(G1CollectedHeap* g1h, G1ParScanThreadStateSet* per_thread_states, int workers, RefToScanQueueSet *task_queues) :
     AbstractGangTask("ParPreserveCMReferents"),
     _g1h(g1h),
     _pss(per_thread_states),
@@ -5419,7 +5395,7 @@
     ResourceMark rm;
     HandleMark   hm;
 
-    G1ParScanThreadState*          pss = _pss[worker_id];
+    G1ParScanThreadState*          pss = _pss->state_for_worker(worker_id);
     pss->set_ref_processor(NULL);
     assert(pss->queue_is_empty(), "both queue and overflow should be empty");
 
@@ -5480,7 +5456,7 @@
 };
 
 // Weak Reference processing during an evacuation pause (part 1).
-void G1CollectedHeap::process_discovered_references(G1ParScanThreadState** per_thread_states) {
+void G1CollectedHeap::process_discovered_references(G1ParScanThreadStateSet* per_thread_states) {
   double ref_proc_start = os::elapsedTime();
 
   ReferenceProcessor* rp = _ref_processor_stw;
@@ -5525,7 +5501,7 @@
   // JNI refs.
 
   // Use only a single queue for this PSS.
-  G1ParScanThreadState*           pss = per_thread_states[0];
+  G1ParScanThreadState*          pss = per_thread_states->state_for_worker(0);
   pss->set_ref_processor(NULL);
   assert(pss->queue_is_empty(), "pre-condition");
 
@@ -5586,7 +5562,7 @@
 }
 
 // Weak Reference processing during an evacuation pause (part 2).
-void G1CollectedHeap::enqueue_discovered_references(G1ParScanThreadState** per_thread_states) {
+void G1CollectedHeap::enqueue_discovered_references(G1ParScanThreadStateSet* per_thread_states) {
   double ref_enq_start = os::elapsedTime();
 
   ReferenceProcessor* rp = _ref_processor_stw;
@@ -5621,7 +5597,7 @@
   g1_policy()->phase_times()->record_ref_enq_time(ref_enq_time * 1000.0);
 }
 
-void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) {
+void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states) {
   _expand_heap_after_alloc_failure = true;
   _evacuation_failed = false;
 
@@ -5641,11 +5617,6 @@
   double start_par_time_sec = os::elapsedTime();
   double end_par_time_sec;
 
-  G1ParScanThreadState** per_thread_states = NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC);
-  for (uint i = 0; i < n_workers; i++) {
-    per_thread_states[i] = new_par_scan_state(i);
-  }
-
   {
     G1RootProcessor root_processor(this, n_workers);
     G1ParTask g1_par_task(this, per_thread_states, _task_queues, &root_processor, n_workers);
@@ -5699,11 +5670,7 @@
   _allocator->release_gc_alloc_regions(evacuation_info);
   g1_rem_set()->cleanup_after_oops_into_collection_set_do();
 
-  for (uint i = 0; i < n_workers; i++) {
-    G1ParScanThreadState* pss = per_thread_states[i];
-    delete pss;
-  }
-  FREE_C_HEAP_ARRAY(G1ParScanThreadState*, per_thread_states);
+  per_thread_states->flush();
 
   record_obj_copy_mem_stats();
 
@@ -6054,7 +6021,7 @@
   g1_policy()->phase_times()->record_clear_ct_time(elapsed * 1000.0);
 }
 
-void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info) {
+void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info, const size_t* surviving_young_words) {
   size_t pre_used = 0;
   FreeRegionList local_free_list("Local List for CSet Freeing");
 
@@ -6108,7 +6075,7 @@
       int index = cur->young_index_in_cset();
       assert(index != -1, "invariant");
       assert((uint) index < policy->young_cset_region_length(), "invariant");
-      size_t words_survived = _surviving_young_words[index];
+      size_t words_survived = surviving_young_words[index];
       cur->record_surv_words_in_group(words_survived);
 
       // At this point the we have 'popped' cur from the collection set
--- a/src/share/vm/gc/g1/g1CollectedHeap.hpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1CollectedHeap.hpp	Tue Sep 15 15:49:33 2015 +0200
@@ -56,6 +56,7 @@
 class GenerationSpec;
 class OopsInHeapRegionClosure;
 class G1ParScanThreadState;
+class G1ParScanThreadStateSet;
 class G1KlassScanClosure;
 class G1ParScanThreadState;
 class ObjectClosure;
@@ -192,6 +193,7 @@
 
   // Closures used in implementation.
   friend class G1ParScanThreadState;
+  friend class G1ParScanThreadStateSet;
   friend class G1ParTask;
   friend class G1PLABAllocator;
   friend class G1PrepareCompactClosure;
@@ -309,14 +311,8 @@
 
   volatile unsigned _gc_time_stamp;
 
-  size_t* _surviving_young_words;
-
   G1HRPrinter _hr_printer;
 
-  void setup_surviving_young_words();
-  void update_surviving_young_words(size_t* surv_young_words);
-  void cleanup_surviving_young_words();
-
   // It decides whether an explicit GC should start a concurrent cycle
   // instead of doing a STW GC. Currently, a concurrent cycle is
   // explicitly started if:
@@ -584,11 +580,11 @@
 
   // Process any reference objects discovered during
   // an incremental evacuation pause.
-  void process_discovered_references(G1ParScanThreadState** per_thread_states);
+  void process_discovered_references(G1ParScanThreadStateSet* per_thread_states);
 
   // Enqueue any remaining discovered references
   // after processing.
-  void enqueue_discovered_references(G1ParScanThreadState** per_thread_states);
+  void enqueue_discovered_references(G1ParScanThreadStateSet* per_thread_states);
 
 public:
   WorkGang* workers() const { return _workers; }
@@ -683,9 +679,6 @@
   // Allocates a new heap region instance.
   HeapRegion* new_heap_region(uint hrs_index, MemRegion mr);
 
-  // Allocates a new per thread par scan state for the given thread id.
-  G1ParScanThreadState* new_par_scan_state(uint worker_id);
-
   // Allocate the highest free region in the reserved heap. This will commit
   // regions as necessary.
   HeapRegion* alloc_highest_free_region();
@@ -799,7 +792,7 @@
   bool do_collection_pause_at_safepoint(double target_pause_time_ms);
 
   // Actually do the work of evacuating the collection set.
-  void evacuate_collection_set(EvacuationInfo& evacuation_info);
+  void evacuate_collection_set(EvacuationInfo& evacuation_info, G1ParScanThreadStateSet* per_thread_states);
 
   // Print the header for the per-thread termination statistics.
   static void print_termination_stats_hdr(outputStream* const st);
@@ -833,7 +826,7 @@
 
   // After a collection pause, make the regions in the CS into free
   // regions.
-  void free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info);
+  void free_collection_set(HeapRegion* cs_head, EvacuationInfo& evacuation_info, const size_t* surviving_young_words);
 
   // Abandon the current collection set without recording policy
   // statistics or updating free lists.
@@ -1057,7 +1050,7 @@
   // The current policy object for the collector.
   G1CollectorPolicy* g1_policy() const { return _g1_policy; }
 
-  virtual CollectorPolicy* collector_policy() const { return (CollectorPolicy*) g1_policy(); }
+  virtual CollectorPolicy* collector_policy() const;
 
   // Adaptive size policy.  No such thing for g1.
   virtual AdaptiveSizePolicy* size_policy() { return NULL; }
@@ -1610,7 +1603,6 @@
 
 public:
   size_t pending_card_num();
-  size_t cards_scanned();
 
 protected:
   size_t _max_heap_capacity;
--- a/src/share/vm/gc/g1/g1CollectedHeap_ext.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1CollectedHeap_ext.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -38,7 +38,3 @@
                                              MemRegion mr) {
   return new HeapRegion(hrs_index, bot_shared(), mr);
 }
-
-G1ParScanThreadState* G1CollectedHeap::new_par_scan_state(uint worker_id) {
-  return new G1ParScanThreadState(this, worker_id);
-}
--- a/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -923,7 +923,7 @@
 // Anything below that is considered to be zero
 #define MIN_TIMER_GRANULARITY 0.0000001
 
-void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms) {
+void G1CollectorPolicy::record_collection_pause_end(double pause_time_ms, size_t cards_scanned) {
   double end_time_sec = os::elapsedTime();
   assert(_cur_collection_pause_used_regions_at_start >= cset_region_length(),
          "otherwise, the subtraction below does not make sense");
@@ -1052,8 +1052,6 @@
       _cost_per_card_ms_seq->add(cost_per_card_ms);
     }
 
-    size_t cards_scanned = _g1->cards_scanned();
-
     double cost_per_entry_ms = 0.0;
     if (cards_scanned > 10) {
       cost_per_entry_ms = phase_times()->average_time_ms(G1GCPhaseTimes::ScanRS) / (double) cards_scanned;
@@ -1871,7 +1869,7 @@
 }
 
 
-void G1CollectorPolicy::finalize_cset(double target_pause_time_ms) {
+double G1CollectorPolicy::finalize_young_cset_part(double target_pause_time_ms) {
   double young_start_time_sec = os::elapsedTime();
 
   YoungList* young_list = _g1->young_list();
@@ -1883,7 +1881,6 @@
   guarantee(_collection_set == NULL, "Precondition");
 
   double base_time_ms = predict_base_elapsed_time_ms(_pending_cards);
-  double predicted_pause_time_ms = base_time_ms;
   double time_remaining_ms = MAX2(target_pause_time_ms - base_time_ms, 0.0);
 
   ergo_verbose4(ErgoCSetConstruction | ErgoHigh,
@@ -1927,15 +1924,16 @@
   _collection_set = _inc_cset_head;
   _collection_set_bytes_used_before = _inc_cset_bytes_used_before;
   time_remaining_ms = MAX2(time_remaining_ms - _inc_cset_predicted_elapsed_time_ms, 0.0);
-  predicted_pause_time_ms += _inc_cset_predicted_elapsed_time_ms;
 
-  ergo_verbose3(ErgoCSetConstruction | ErgoHigh,
+  ergo_verbose4(ErgoCSetConstruction | ErgoHigh,
                 "add young regions to CSet",
                 ergo_format_region("eden")
                 ergo_format_region("survivors")
-                ergo_format_ms("predicted young region time"),
+                ergo_format_ms("predicted young region time")
+                ergo_format_ms("target pause time"),
                 eden_region_length, survivor_region_length,
-                _inc_cset_predicted_elapsed_time_ms);
+                _inc_cset_predicted_elapsed_time_ms,
+                target_pause_time_ms);
 
   // The number of recorded young regions is the incremental
   // collection set's current size
@@ -1944,8 +1942,13 @@
   double young_end_time_sec = os::elapsedTime();
   phase_times()->record_young_cset_choice_time_ms((young_end_time_sec - young_start_time_sec) * 1000.0);
 
-  // Set the start of the non-young choice time.
-  double non_young_start_time_sec = young_end_time_sec;
+  return time_remaining_ms;
+}
+
+void G1CollectorPolicy::finalize_old_cset_part(double time_remaining_ms) {
+  double non_young_start_time_sec = os::elapsedTime();
+  double predicted_old_time_ms = 0.0;
+
 
   if (!collector_state()->gcs_are_young()) {
     CollectionSetChooser* cset_chooser = _collectionSetChooser;
@@ -2033,8 +2036,8 @@
 
       // We will add this region to the CSet.
       time_remaining_ms = MAX2(time_remaining_ms - predicted_time_ms, 0.0);
-      predicted_pause_time_ms += predicted_time_ms;
-      cset_chooser->remove_and_move_to_next(hr);
+      predicted_old_time_ms += predicted_time_ms;
+      cset_chooser->pop(); // already have region via peek()
       _g1->old_set_remove(hr);
       add_old_region_to_cset(hr);
 
@@ -2068,16 +2071,13 @@
 
   stop_incremental_cset_building();
 
-  ergo_verbose5(ErgoCSetConstruction,
+  ergo_verbose3(ErgoCSetConstruction,
                 "finish choosing CSet",
-                ergo_format_region("eden")
-                ergo_format_region("survivors")
                 ergo_format_region("old")
-                ergo_format_ms("predicted pause time")
-                ergo_format_ms("target pause time"),
-                eden_region_length, survivor_region_length,
+                ergo_format_ms("predicted old region time")
+                ergo_format_ms("time remaining"),
                 old_cset_region_length(),
-                predicted_pause_time_ms, target_pause_time_ms);
+                predicted_old_time_ms, time_remaining_ms);
 
   double non_young_end_time_sec = os::elapsedTime();
   phase_times()->record_non_young_cset_choice_time_ms((non_young_end_time_sec - non_young_start_time_sec) * 1000.0);
--- a/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Tue Sep 15 15:49:33 2015 +0200
@@ -473,7 +473,7 @@
 
   // The number of bytes in the collection set before the pause. Set from
   // the incrementally built collection set at the start of an evacuation
-  // pause, and incremented in finalize_cset() when adding old regions
+  // pause, and incremented in finalize_old_cset_part() when adding old regions
   // (if any) to the collection set.
   size_t _collection_set_bytes_used_before;
 
@@ -634,7 +634,7 @@
 
   // Record the start and end of an evacuation pause.
   void record_collection_pause_start(double start_time_sec);
-  void record_collection_pause_end(double pause_time_ms);
+  void record_collection_pause_end(double pause_time_ms, size_t cards_scanned);
 
   // Record the start and end of a full collection.
   void record_full_collection_start();
@@ -689,7 +689,8 @@
   // Choose a new collection set.  Marks the chosen regions as being
   // "in_collection_set", and links them together.  The head and number of
   // the collection set are available via access methods.
-  void finalize_cset(double target_pause_time_ms);
+  double finalize_young_cset_part(double target_pause_time_ms);
+  virtual void finalize_old_cset_part(double time_remaining_ms);
 
   // The head of the list (via "next_in_collection_set()") representing the
   // current collection set.
@@ -865,8 +866,8 @@
     return _recorded_survivor_regions;
   }
 
-  void record_thread_age_table(ageTable* age_table) {
-    _survivors_age_table.merge_par(age_table);
+  void record_age_table(ageTable* age_table) {
+    _survivors_age_table.merge(age_table);
   }
 
   void update_max_gc_locker_expansion();
--- a/src/share/vm/gc/g1/g1EvacStats.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1EvacStats.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -46,11 +46,11 @@
     if (_allocated == 0) {
       assert((_unused == 0),
              err_msg("Inconsistency in PLAB stats: "
-                     "_allocated: "SIZE_FORMAT", "
-                     "_wasted: "SIZE_FORMAT", "
-                     "_region_end_waste: "SIZE_FORMAT", "
-                     "_unused: "SIZE_FORMAT", "
-                     "_used  : "SIZE_FORMAT,
+                     "_allocated: " SIZE_FORMAT ", "
+                     "_wasted: " SIZE_FORMAT ", "
+                     "_region_end_waste: " SIZE_FORMAT ", "
+                     "_unused: " SIZE_FORMAT ", "
+                     "_used  : " SIZE_FORMAT,
                      _allocated, _wasted, _region_end_waste, _unused, used()));
       _allocated = 1;
     }
--- a/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1ParScanThreadState.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -32,7 +32,7 @@
 #include "oops/oop.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
 
-G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id)
+G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id, size_t young_cset_length)
   : _g1h(g1h),
     _refs(g1h->task_queue(worker_id)),
     _dcq(&g1h->dirty_card_queue_set()),
@@ -51,8 +51,8 @@
   // non-young regions (where the age is -1)
   // We also add a few elements at the beginning and at the end in
   // an attempt to eliminate cache contention
-  uint real_length = 1 + _g1h->g1_policy()->young_cset_region_length();
-  uint array_length = PADDING_ELEM_NUM +
+  size_t real_length = 1 + young_cset_length;
+  size_t array_length = PADDING_ELEM_NUM +
                       real_length +
                       PADDING_ELEM_NUM;
   _surviving_young_words_base = NEW_C_HEAP_ARRAY(size_t, array_length, mtGC);
@@ -60,7 +60,7 @@
     vm_exit_out_of_memory(array_length * sizeof(size_t), OOM_MALLOC_ERROR,
                           "Not enough space for young surv histo.");
   _surviving_young_words = _surviving_young_words_base + PADDING_ELEM_NUM;
-  memset(_surviving_young_words, 0, (size_t) real_length * sizeof(size_t));
+  memset(_surviving_young_words, 0, real_length * sizeof(size_t));
 
   _plab_allocator = G1PLABAllocator::create_allocator(_g1h->allocator());
 
@@ -71,13 +71,21 @@
   _dest[InCSetState::Old]          = InCSetState::Old;
 }
 
-G1ParScanThreadState::~G1ParScanThreadState() {
+// Pass locally gathered statistics to global state.
+void G1ParScanThreadState::flush(size_t* surviving_young_words) {
+  _dcq.flush();
   // Update allocation statistics.
   _plab_allocator->flush_and_retire_stats();
+  _g1h->g1_policy()->record_age_table(&_age_table);
+
+  uint length = _g1h->g1_policy()->young_cset_region_length();
+  for (uint region_index = 0; region_index < length; region_index++) {
+    surviving_young_words[region_index] += _surviving_young_words[region_index];
+  }
+}
+
+G1ParScanThreadState::~G1ParScanThreadState() {
   delete _plab_allocator;
-  _g1h->g1_policy()->record_thread_age_table(&_age_table);
-  // Update heap statistics.
-  _g1h->update_surviving_young_words(_surviving_young_words);
   FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base);
 }
 
@@ -314,6 +322,42 @@
   }
 }
 
+G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id) {
+  assert(worker_id < _n_workers, "out of bounds access");
+  return _states[worker_id];
+}
+
+void G1ParScanThreadStateSet::add_cards_scanned(uint worker_id, size_t cards_scanned) {
+  assert(worker_id < _n_workers, "out of bounds access");
+  _cards_scanned[worker_id] += cards_scanned;
+}
+
+size_t G1ParScanThreadStateSet::total_cards_scanned() const {
+  assert(_flushed, "thread local state from the per thread states should have been flushed");
+  return _total_cards_scanned;
+}
+
+const size_t* G1ParScanThreadStateSet::surviving_young_words() const {
+  assert(_flushed, "thread local state from the per thread states should have been flushed");
+  return _surviving_young_words_total;
+}
+
+void G1ParScanThreadStateSet::flush() {
+  assert(!_flushed, "thread local state from the per thread states should be flushed once");
+  assert(_total_cards_scanned == 0, "should have been cleared");
+
+  for (uint worker_index = 0; worker_index < _n_workers; ++worker_index) {
+    G1ParScanThreadState* pss = _states[worker_index];
+
+    _total_cards_scanned += _cards_scanned[worker_index];
+
+    pss->flush(_surviving_young_words_total);
+    delete pss;
+    _states[worker_index] = NULL;
+  }
+  _flushed = true;
+}
+
 oop G1ParScanThreadState::handle_evacuation_failure_par(oop old, markOop m) {
   assert(_g1h->obj_in_cs(old),
          err_msg("Object " PTR_FORMAT " should be in the CSet", p2i(old)));
--- a/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1ParScanThreadState.hpp	Tue Sep 15 15:49:33 2015 +0200
@@ -82,7 +82,7 @@
   }
 
  public:
-  G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id);
+  G1ParScanThreadState(G1CollectedHeap* g1h, uint worker_id, size_t young_cset_length);
   ~G1ParScanThreadState();
 
   void set_ref_processor(ReferenceProcessor* rp) { _scanner.set_ref_processor(rp); }
@@ -121,6 +121,8 @@
     return _surviving_young_words + 1;
   }
 
+  void flush(size_t* surviving_young_words);
+
  private:
   #define G1_PARTIAL_ARRAY_MASK 0x2
 
@@ -189,4 +191,48 @@
   oop handle_evacuation_failure_par(oop obj, markOop m);
 };
 
+class G1ParScanThreadStateSet : public StackObj {
+  G1CollectedHeap* _g1h;
+  G1ParScanThreadState** _states;
+  size_t* _surviving_young_words_total;
+  size_t* _cards_scanned;
+  size_t _total_cards_scanned;
+  uint _n_workers;
+  bool _flushed;
+
+ public:
+  G1ParScanThreadStateSet(G1CollectedHeap* g1h, uint n_workers, size_t young_cset_length) :
+      _g1h(g1h),
+      _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC)),
+      _surviving_young_words_total(NEW_C_HEAP_ARRAY(size_t, young_cset_length, mtGC)),
+      _cards_scanned(NEW_C_HEAP_ARRAY(size_t, n_workers, mtGC)),
+      _total_cards_scanned(0),
+      _n_workers(n_workers),
+      _flushed(false) {
+    for (uint i = 0; i < n_workers; ++i) {
+      _states[i] = new_par_scan_state(i, young_cset_length);
+    }
+    memset(_surviving_young_words_total, 0, young_cset_length * sizeof(size_t));
+    memset(_cards_scanned, 0, n_workers * sizeof(size_t));
+  }
+
+  ~G1ParScanThreadStateSet() {
+    assert(_flushed, "thread local state from the per thread states should have been flushed");
+    FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states);
+    FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_total);
+    FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
+  }
+
+  void flush();
+
+  G1ParScanThreadState* state_for_worker(uint worker_id);
+
+  void add_cards_scanned(uint worker_id, size_t cards_scanned);
+  size_t total_cards_scanned() const;
+  const size_t* surviving_young_words() const;
+
+ private:
+  G1ParScanThreadState* new_par_scan_state(uint worker_id, size_t young_cset_length);
+};
+
 #endif // SHARE_VM_GC_G1_G1PARSCANTHREADSTATE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc/g1/g1ParScanThreadState_ext.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+
+#include "gc/g1/g1ParScanThreadState.hpp"
+
+G1ParScanThreadState* G1ParScanThreadStateSet::new_par_scan_state(uint worker_id, size_t young_cset_length) {
+  return new G1ParScanThreadState(_g1h, worker_id, young_cset_length);
+}
--- a/src/share/vm/gc/g1/g1RemSet.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1RemSet.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -76,7 +76,6 @@
     _ct_bs(ct_bs), _g1p(_g1->g1_policy()),
     _cg1r(g1->concurrent_g1_refine()),
     _cset_rs_update_cl(NULL),
-    _cards_scanned(NULL), _total_cards_scanned(0),
     _prev_period_summary()
 {
   _cset_rs_update_cl = NEW_C_HEAP_ARRAY(G1ParPushHeapRSClosure*, n_workers(), mtGC);
@@ -228,9 +227,9 @@
   size_t cards_looked_up() { return _cards;}
 };
 
-void G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
-                      OopClosure* non_heap_roots,
-                      uint worker_i) {
+size_t G1RemSet::scanRS(G1ParPushHeapRSClosure* oc,
+                        OopClosure* non_heap_roots,
+                        uint worker_i) {
   double rs_time_start = os::elapsedTime();
 
   G1CodeBlobClosure code_root_cl(non_heap_roots);
@@ -246,11 +245,10 @@
   double scan_rs_time_sec = (os::elapsedTime() - rs_time_start)
                             - scanRScl.strong_code_root_scan_time_sec();
 
-  assert(_cards_scanned != NULL, "invariant");
-  _cards_scanned[worker_i] = scanRScl.cards_done();
-
   _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::ScanRS, worker_i, scan_rs_time_sec);
   _g1p->phase_times()->record_time_secs(G1GCPhaseTimes::CodeRoots, worker_i, scanRScl.strong_code_root_scan_time_sec());
+
+  return scanRScl.cards_done();
 }
 
 // Closure used for updating RSets and recording references that
@@ -298,9 +296,9 @@
   HeapRegionRemSet::cleanup();
 }
 
-void G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc,
-                                           OopClosure* non_heap_roots,
-                                           uint worker_i) {
+size_t G1RemSet::oops_into_collection_set_do(G1ParPushHeapRSClosure* oc,
+                                             OopClosure* non_heap_roots,
+                                             uint worker_i) {
 #if CARD_REPEAT_HISTO
   ct_freq_update_histo_and_reset();
 #endif
@@ -322,10 +320,11 @@
   DirtyCardQueue into_cset_dcq(&_g1->into_cset_dirty_card_queue_set());
 
   updateRS(&into_cset_dcq, worker_i);
-  scanRS(oc, non_heap_roots, worker_i);
+  size_t cards_scanned = scanRS(oc, non_heap_roots, worker_i);
 
   // We now clear the cached values of _cset_rs_update_cl for this worker
   _cset_rs_update_cl[worker_i] = NULL;
+  return cards_scanned;
 }
 
 void G1RemSet::prepare_for_oops_into_collection_set_do() {
@@ -333,23 +332,9 @@
   _g1->set_refine_cte_cl_concurrency(false);
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   dcqs.concatenate_logs();
-
-  guarantee( _cards_scanned == NULL, "invariant" );
-  _cards_scanned = NEW_C_HEAP_ARRAY(size_t, n_workers(), mtGC);
-  for (uint i = 0; i < n_workers(); ++i) {
-    _cards_scanned[i] = 0;
-  }
-  _total_cards_scanned = 0;
 }
 
 void G1RemSet::cleanup_after_oops_into_collection_set_do() {
-  guarantee( _cards_scanned != NULL, "invariant" );
-  _total_cards_scanned = 0;
-  for (uint i = 0; i < n_workers(); ++i) {
-    _total_cards_scanned += _cards_scanned[i];
-  }
-  FREE_C_HEAP_ARRAY(size_t, _cards_scanned);
-  _cards_scanned = NULL;
   // Cleanup after copy
   _g1->set_refine_cte_cl_concurrency(true);
   // Set all cards back to clean.
--- a/src/share/vm/gc/g1/g1RemSet.hpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/g1/g1RemSet.hpp	Tue Sep 15 15:49:33 2015 +0200
@@ -62,9 +62,6 @@
 
   ConcurrentG1Refine*    _cg1r;
 
-  size_t*                _cards_scanned;
-  size_t                 _total_cards_scanned;
-
   // Used for caching the closure that is responsible for scanning
   // references into the collection set.
   G1ParPushHeapRSClosure** _cset_rs_update_cl;
@@ -94,9 +91,12 @@
   // partitioning the work to be done. It should be the same as
   // the "i" passed to the calling thread's work(i) function.
   // In the sequential case this param will be ignored.
-  void oops_into_collection_set_do(G1ParPushHeapRSClosure* blk,
-                                   OopClosure* non_heap_roots,
-                                   uint worker_i);
+  //
+  // Returns the number of cards scanned while looking for pointers
+  // into the collection set.
+  size_t oops_into_collection_set_do(G1ParPushHeapRSClosure* blk,
+                                     OopClosure* non_heap_roots,
+                                     uint worker_i);
 
   // Prepare for and cleanup after an oops_into_collection_set_do
   // call.  Must call each of these once before and after (in sequential
@@ -106,14 +106,13 @@
   void prepare_for_oops_into_collection_set_do();
   void cleanup_after_oops_into_collection_set_do();
 
-  void scanRS(G1ParPushHeapRSClosure* oc,
-              OopClosure* non_heap_roots,
-              uint worker_i);
+  size_t scanRS(G1ParPushHeapRSClosure* oc,
+                OopClosure* non_heap_roots,
+                uint worker_i);
 
   void updateRS(DirtyCardQueue* into_cset_dcq, uint worker_i);
 
   CardTableModRefBS* ct_bs() { return _ct_bs; }
-  size_t cardsScanned() { return _total_cards_scanned; }
 
   // Record, if necessary, the fact that *p (where "p" is in region "from",
   // which is required to be non-NULL) has changed to a new non-NULL value.
--- a/src/share/vm/gc/parallel/parallelScavengeHeap.hpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/parallel/parallelScavengeHeap.hpp	Tue Sep 15 15:49:33 2015 +0200
@@ -87,7 +87,7 @@
     return CollectedHeap::ParallelScavengeHeap;
   }
 
-  virtual CollectorPolicy* collector_policy() const { return (CollectorPolicy*) _collector_policy; }
+  virtual CollectorPolicy* collector_policy() const { return _collector_policy; }
 
   static PSYoungGen* young_gen() { return _young_gen; }
   static PSOldGen* old_gen()     { return _old_gen; }
--- a/src/share/vm/gc/serial/defNewGeneration.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/serial/defNewGeneration.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -213,7 +213,7 @@
   _max_eden_size = size - (2*_max_survivor_size);
 
   // allocate the performance counters
-  GenCollectorPolicy* gcp = (GenCollectorPolicy*)gch->collector_policy();
+  GenCollectorPolicy* gcp = gch->gen_policy();
 
   // Generation counters -- generation 0, 3 subspaces
   _gen_counters = new GenerationCounters("new", 0, 3,
--- a/src/share/vm/gc/serial/tenuredGeneration.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/serial/tenuredGeneration.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -57,8 +57,7 @@
   // initialize performance counters
 
   const char* gen_name = "old";
-  GenCollectorPolicy* gcp = (GenCollectorPolicy*) GenCollectedHeap::heap()->collector_policy();
-
+  GenCollectorPolicy* gcp = GenCollectedHeap::heap()->gen_policy();
   // Generation Counters -- generation 1, 1 subspace
   _gen_counters = new GenerationCounters(gen_name, 1, 1,
       gcp->min_old_size(), gcp->max_old_size(), &_virtual_space);
--- a/src/share/vm/gc/shared/ageTable.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/shared/ageTable.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -28,7 +28,6 @@
 #include "gc/shared/collectorPolicy.hpp"
 #include "gc/shared/gcPolicyCounters.hpp"
 #include "memory/resourceArea.hpp"
-#include "runtime/atomic.inline.hpp"
 #include "utilities/copy.hpp"
 
 /* Copyright (c) 1992, 2015, Oracle and/or its affiliates, and Stanford University.
@@ -73,12 +72,6 @@
   }
 }
 
-void ageTable::merge_par(ageTable* subTable) {
-  for (int i = 0; i < table_size; i++) {
-    Atomic::add_ptr(subTable->sizes[i], &sizes[i]);
-  }
-}
-
 uint ageTable::compute_tenuring_threshold(size_t survivor_capacity, GCPolicyCounters* gc_counters) {
   size_t desired_survivor_size = (size_t)((((double) survivor_capacity)*TargetSurvivorRatio)/100);
   uint result;
--- a/src/share/vm/gc/shared/ageTable.hpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/shared/ageTable.hpp	Tue Sep 15 15:49:33 2015 +0200
@@ -68,7 +68,6 @@
   // Merge another age table with the current one.  Used
   // for parallel young generation gc.
   void merge(ageTable* subTable);
-  void merge_par(ageTable* subTable);
 
   // calculate new tenuring threshold based on age information
   uint compute_tenuring_threshold(size_t survivor_capacity, GCPolicyCounters* gc_counters);
--- a/src/share/vm/gc/shared/blockOffsetTable.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/shared/blockOffsetTable.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -447,14 +447,16 @@
       } else {
         // Unilaterally fix the first (num_pref_cards - 1) following
         // the "offset card" in the suffix block.
+        const size_t right_most_fixed_index = suff_index + num_pref_cards - 1;
         set_remainder_to_point_to_start_incl(suff_index + 1,
-          suff_index + num_pref_cards - 1, true /* reducing */);
+          right_most_fixed_index, true /* reducing */);
         // Fix the appropriate cards in the remainder of the
         // suffix block -- these are the last num_pref_cards
         // cards in each power block of the "new" range plumbed
         // from suff_addr.
         bool more = true;
         uint i = 1;
+        // Fix the first power block with  back_by > num_pref_cards.
         while (more && (i < N_powers)) {
           size_t back_by = power_to_cards_back(i);
           size_t right_index = suff_index + back_by - 1;
@@ -463,6 +465,9 @@
             right_index = end_index - 1;
             more = false;
           }
+          if (left_index <= right_most_fixed_index) {
+                left_index = right_most_fixed_index + 1;
+          }
           if (back_by > num_pref_cards) {
             // Fill in the remainder of this "power block", if it
             // is non-null.
@@ -471,12 +476,14 @@
                                      N_words + i - 1, true /* reducing */);
             } else {
               more = false; // we are done
+              assert((end_index - 1) == right_index, "Must be at the end.");
             }
             i++;
             break;
           }
           i++;
         }
+        // Fix the rest of the power blocks.
         while (more && (i < N_powers)) {
           size_t back_by = power_to_cards_back(i);
           size_t right_index = suff_index + back_by - 1;
--- a/src/share/vm/gc/shared/genCollectedHeap.cpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/shared/genCollectedHeap.cpp	Tue Sep 15 15:49:33 2015 +0200
@@ -172,8 +172,6 @@
 void GenCollectedHeap::post_initialize() {
   CollectedHeap::post_initialize();
   ref_processing_init();
-  GenCollectorPolicy *policy = (GenCollectorPolicy *)collector_policy();
-  guarantee(policy->is_generation_policy(), "Illegal policy type");
   assert((_young_gen->kind() == Generation::DefNew) ||
          (_young_gen->kind() == Generation::ParNew),
     "Wrong youngest generation type");
@@ -183,10 +181,10 @@
          _old_gen->kind() == Generation::MarkSweepCompact,
     "Wrong generation kind");
 
-  policy->initialize_size_policy(def_new_gen->eden()->capacity(),
-                                 _old_gen->capacity(),
-                                 def_new_gen->from()->capacity());
-  policy->initialize_gc_policy_counters();
+  _gen_policy->initialize_size_policy(def_new_gen->eden()->capacity(),
+                                      _old_gen->capacity(),
+                                      def_new_gen->from()->capacity());
+  _gen_policy->initialize_gc_policy_counters();
 }
 
 void GenCollectedHeap::ref_processing_init() {
@@ -822,10 +820,11 @@
          "Unexpected generation kinds");
   // Skip two header words in the block content verification
   NOT_PRODUCT(_skip_header_HeapWords = CMSCollector::skip_header_HeapWords();)
-  CMSCollector* collector = new CMSCollector(
-    (ConcurrentMarkSweepGeneration*)_old_gen,
-    _rem_set->as_CardTableRS(),
-    (ConcurrentMarkSweepPolicy*) collector_policy());
+  assert(_gen_policy->is_concurrent_mark_sweep_policy(), "Unexpected policy type");
+  CMSCollector* collector =
+    new CMSCollector((ConcurrentMarkSweepGeneration*)_old_gen,
+                     _rem_set->as_CardTableRS(),
+                     _gen_policy->as_concurrent_mark_sweep_policy());
 
   if (collector == NULL || !collector->completed_initialization()) {
     if (collector) {
--- a/src/share/vm/gc/shared/genCollectedHeap.hpp	Thu Sep 10 14:55:19 2015 -0700
+++ b/src/share/vm/gc/shared/genCollectedHeap.hpp	Tue Sep 15 15:49:33 2015 +0200
@@ -153,7 +153,7 @@
   // The generational collector policy.
   GenCollectorPolicy* gen_policy() const { return _gen_policy; }
 
-  virtual CollectorPolicy* collector_policy() const { return (CollectorPolicy*) gen_policy(); }
+  virtual CollectorPolicy* collector_policy() const { return gen_policy(); }
 
   // Adaptive size policy
   virtual AdaptiveSizePolicy* size_policy() {
--- a/test/gc/g1/humongousObjects/TestHumongousThreshold.java	Thu Sep 10 14:55:19 2015 -0700
+++ b/test/gc/g1/humongousObjects/TestHumongousThreshold.java	Tue Sep 15 15:49:33 2015 +0200
@@ -56,11 +56,11 @@
  * gc.g1.humongousObjects.TestHumongousThreshold
  *
  * @run main/othervm -XX:+UseG1GC -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
- * -XX:G1HeapRegionSize=16M
+ * -Xms128M -XX:G1HeapRegionSize=16M
  * gc.g1.humongousObjects.TestHumongousThreshold
  *
  * @run main/othervm -XX:+UseG1GC -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI -Xbootclasspath/a:.
- * -XX:G1HeapRegionSize=32M
+ * -Xms200M -XX:G1HeapRegionSize=32M
  * gc.g1.humongousObjects.TestHumongousThreshold
  *
  */
--- a/test/serviceability/dcmd/compiler/CodelistTest.java	Thu Sep 10 14:55:19 2015 -0700
+++ b/test/serviceability/dcmd/compiler/CodelistTest.java	Tue Sep 15 15:49:33 2015 +0200
@@ -90,6 +90,9 @@
             if (methodPrintedInLogFormat.contains("MethodHandle")) {
                 continue;
             }
+            if (methodPrintedInLogFormat.contains("sun.misc.Unsafe.getUnsafe")) {
+                continue;
+            }
 
             MethodIdentifierParser mf = new MethodIdentifierParser(methodPrintedInLogFormat);
             Method m = null;