changeset 8454:4229cf175d3f

Merge
author jwilhelm
date Mon, 25 May 2015 16:59:28 +0200
parents 3581455dc18c da8d3eff08ea
children c8f0a089a41f
files
diffstat 69 files changed, 497 insertions(+), 781 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Mon May 25 16:59:28 2015 +0200
@@ -1469,7 +1469,9 @@
   } else {
     guarantee(in_bytes(PtrQueue::byte_width_of_active()) == 1,
               "Assumption");
-    flag_type = T_BYTE;
+    // Use unsigned type T_BOOLEAN here rather than signed T_BYTE since some platforms, eg. ARM,
+    // need to use unsigned instructions to use the large offset to load the satb_mark_queue.
+    flag_type = T_BOOLEAN;
   }
   LIR_Opr thrd = getThreadPointer();
   LIR_Address* mark_active_flag_addr =
--- a/src/share/vm/gc/cms/cmsOopClosures.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/cms/cmsOopClosures.hpp	Mon May 25 16:59:28 2015 +0200
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_CMS_CMSOOPCLOSURES_HPP
 
 #include "gc/shared/genOopClosures.hpp"
+#include "gc/shared/taskqueue.hpp"
 #include "memory/iterator.hpp"
 
 /////////////////////////////////////////////////////////////////
--- a/src/share/vm/gc/cms/compactibleFreeListSpace.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/cms/compactibleFreeListSpace.cpp	Mon May 25 16:59:28 2015 +0200
@@ -641,6 +641,7 @@
 class FreeListSpace_DCTOC : public Filtering_DCTOC {
   CompactibleFreeListSpace* _cfls;
   CMSCollector* _collector;
+  bool _parallel;
 protected:
   // Override.
 #define walk_mem_region_with_cl_DECL(ClosureType)                       \
@@ -661,9 +662,10 @@
                       CMSCollector* collector,
                       ExtendedOopClosure* cl,
                       CardTableModRefBS::PrecisionStyle precision,
-                      HeapWord* boundary) :
+                      HeapWord* boundary,
+                      bool parallel) :
     Filtering_DCTOC(sp, cl, precision, boundary),
-    _cfls(sp), _collector(collector) {}
+    _cfls(sp), _collector(collector), _parallel(parallel) {}
 };
 
 // We de-virtualize the block-related calls below, since we know that our
@@ -674,10 +676,7 @@
                                                  HeapWord* bottom,              \
                                                  HeapWord* top,                 \
                                                  ClosureType* cl) {             \
-   bool is_par = GenCollectedHeap::heap()->n_par_threads() > 0;                 \
-   if (is_par) {                                                                \
-     assert(GenCollectedHeap::heap()->n_par_threads() ==                        \
-            GenCollectedHeap::heap()->workers()->active_workers(), "Mismatch"); \
+   if (_parallel) {                                                             \
      walk_mem_region_with_cl_par(mr, bottom, top, cl);                          \
    } else {                                                                     \
      walk_mem_region_with_cl_nopar(mr, bottom, top, cl);                        \
@@ -747,8 +746,9 @@
 DirtyCardToOopClosure*
 CompactibleFreeListSpace::new_dcto_cl(ExtendedOopClosure* cl,
                                       CardTableModRefBS::PrecisionStyle precision,
-                                      HeapWord* boundary) {
-  return new FreeListSpace_DCTOC(this, _collector, cl, precision, boundary);
+                                      HeapWord* boundary,
+                                      bool parallel) {
+  return new FreeListSpace_DCTOC(this, _collector, cl, precision, boundary, parallel);
 }
 
 
@@ -1897,11 +1897,9 @@
   assert(chunk->is_free() && ffc->is_free(), "Error");
   _bt.split_block((HeapWord*)chunk, chunk->size(), new_size);
   if (rem_sz < SmallForDictionary) {
-    bool is_par = (GenCollectedHeap::heap()->n_par_threads() > 0);
+    // The freeList lock is held, but multiple GC task threads might be executing in parallel.
+    bool is_par = Thread::current()->is_GC_task_thread();
     if (is_par) _indexedFreeListParLocks[rem_sz]->lock();
-    assert(!is_par ||
-           (GenCollectedHeap::heap()->n_par_threads() ==
-            GenCollectedHeap::heap()->workers()->active_workers()), "Mismatch");
     returnChunkToFreeList(ffc);
     split(size, rem_sz);
     if (is_par) _indexedFreeListParLocks[rem_sz]->unlock();
@@ -1972,8 +1970,6 @@
 
 bool CompactibleFreeListSpace::no_allocs_since_save_marks() {
   assert(_promoInfo.tracking(), "No preceding save_marks?");
-  assert(GenCollectedHeap::heap()->n_par_threads() == 0,
-         "Shouldn't be called if using parallel gc.");
   return _promoInfo.noPromotions();
 }
 
@@ -1981,8 +1977,6 @@
                                                                             \
 void CompactibleFreeListSpace::                                             \
 oop_since_save_marks_iterate##nv_suffix(OopClosureType* blk) {              \
-  assert(GenCollectedHeap::heap()->n_par_threads() == 0,                    \
-         "Shouldn't be called (yet) during parallel part of gc.");          \
   _promoInfo.promoted_oops_iterate##nv_suffix(blk);                         \
   /*                                                                        \
    * This also restores any displaced headers and removes the elements from \
--- a/src/share/vm/gc/cms/compactibleFreeListSpace.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/cms/compactibleFreeListSpace.hpp	Mon May 25 16:59:28 2015 +0200
@@ -438,7 +438,8 @@
   // Override: provides a DCTO_CL specific to this kind of space.
   DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
                                      CardTableModRefBS::PrecisionStyle precision,
-                                     HeapWord* boundary);
+                                     HeapWord* boundary,
+                                     bool parallel);
 
   void blk_iterate(BlkClosure* cl);
   void blk_iterate_careful(BlkClosureCareful* cl);
--- a/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/cms/concurrentMarkSweepGeneration.cpp	Mon May 25 16:59:28 2015 +0200
@@ -2428,14 +2428,18 @@
   MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
 
-  gch->gen_process_roots(_cmsGen->level(),
-                         true,   // younger gens are roots
-                         true,   // activate StrongRootsScope
-                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
-                         should_unload_classes(),
-                         &notOlder,
-                         NULL,
-                         NULL);  // SSS: Provide correct closure
+  {
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           _cmsGen->level(),
+                           true,   // younger gens are roots
+                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
+                           should_unload_classes(),
+                           &notOlder,
+                           NULL,
+                           NULL);
+  }
 
   // Now mark from the roots
   MarkFromRootsClosure markFromRootsClosure(this, _span,
@@ -2496,14 +2500,18 @@
 
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
 
-  gch->gen_process_roots(_cmsGen->level(),
-                         true,   // younger gens are roots
-                         true,   // activate StrongRootsScope
-                         GenCollectedHeap::ScanningOption(roots_scanning_options()),
-                         should_unload_classes(),
-                         &notOlder,
-                         NULL,
-                         &cld_closure);
+  {
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           _cmsGen->level(),
+                           true,   // younger gens are roots
+                           GenCollectedHeap::ScanningOption(roots_scanning_options()),
+                           should_unload_classes(),
+                           &notOlder,
+                           NULL,
+                           &cld_closure);
+  }
 
   // Now mark from the roots
   MarkFromRootsVerifyClosure markFromRootsClosure(this, _span,
@@ -2913,10 +2921,11 @@
 
 // Parallel initial mark task
 class CMSParInitialMarkTask: public CMSParMarkTask {
+  StrongRootsScope* _strong_roots_scope;
  public:
-  CMSParInitialMarkTask(CMSCollector* collector, uint n_workers) :
-      CMSParMarkTask("Scan roots and young gen for initial mark in parallel",
-                     collector, n_workers) {}
+  CMSParInitialMarkTask(CMSCollector* collector, StrongRootsScope* strong_roots_scope, uint n_workers) :
+      CMSParMarkTask("Scan roots and young gen for initial mark in parallel", collector, n_workers),
+      _strong_roots_scope(strong_roots_scope) {}
   void work(uint worker_id);
 };
 
@@ -3004,24 +3013,26 @@
       FlexibleWorkGang* workers = gch->workers();
       assert(workers != NULL, "Need parallel worker threads.");
       uint n_workers = workers->active_workers();
-      CMSParInitialMarkTask tsk(this, n_workers);
-      gch->set_par_threads(n_workers);
+
+      StrongRootsScope srs(n_workers);
+
+      CMSParInitialMarkTask tsk(this, &srs, n_workers);
       initialize_sequential_subtasks_for_young_gen_rescan(n_workers);
       if (n_workers > 1) {
-        StrongRootsScope srs;
         workers->run_task(&tsk);
       } else {
-        StrongRootsScope srs;
         tsk.work(0);
       }
-      gch->set_par_threads(0);
     } else {
       // The serial version.
       CLDToOopClosure cld_closure(&notOlder, true);
       gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-      gch->gen_process_roots(_cmsGen->level(),
+
+      StrongRootsScope srs(1);
+
+      gch->gen_process_roots(&srs,
+                             _cmsGen->level(),
                              true,   // younger gens are roots
-                             true,   // activate StrongRootsScope
                              GenCollectedHeap::ScanningOption(roots_scanning_options()),
                              should_unload_classes(),
                              &notOlder,
@@ -4452,9 +4463,9 @@
 
   CLDToOopClosure cld_closure(&par_mri_cl, true);
 
-  gch->gen_process_roots(_collector->_cmsGen->level(),
+  gch->gen_process_roots(_strong_roots_scope,
+                         _collector->_cmsGen->level(),
                          false,     // yg was scanned above
-                         false,     // this is parallel code
                          GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                          _collector->should_unload_classes(),
                          &par_mri_cl,
@@ -4478,6 +4489,7 @@
   // The per-thread work queues, available here for stealing.
   OopTaskQueueSet*       _task_queues;
   ParallelTaskTerminator _term;
+  StrongRootsScope*      _strong_roots_scope;
 
  public:
   // A value of 0 passed to n_workers will cause the number of
@@ -4485,12 +4497,14 @@
   CMSParRemarkTask(CMSCollector* collector,
                    CompactibleFreeListSpace* cms_space,
                    uint n_workers, FlexibleWorkGang* workers,
-                   OopTaskQueueSet* task_queues):
+                   OopTaskQueueSet* task_queues,
+                   StrongRootsScope* strong_roots_scope):
     CMSParMarkTask("Rescan roots and grey objects in parallel",
                    collector, n_workers),
     _cms_space(cms_space),
     _task_queues(task_queues),
-    _term(n_workers, task_queues) { }
+    _term(n_workers, task_queues),
+    _strong_roots_scope(strong_roots_scope) { }
 
   OopTaskQueueSet* task_queues() { return _task_queues; }
 
@@ -4588,9 +4602,9 @@
   // ---------- remaining roots --------------
   _timer.reset();
   _timer.start();
-  gch->gen_process_roots(_collector->_cmsGen->level(),
+  gch->gen_process_roots(_strong_roots_scope,
+                         _collector->_cmsGen->level(),
                          false,     // yg was scanned above
-                         false,     // this is parallel code
                          GenCollectedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
                          _collector->should_unload_classes(),
                          &par_mrias_cl,
@@ -5058,22 +5072,15 @@
   FlexibleWorkGang* workers = gch->workers();
   assert(workers != NULL, "Need parallel worker threads.");
   // Choose to use the number of GC workers most recently set
-  // into "active_workers".  If active_workers is not set, set it
-  // to ParallelGCThreads.
+  // into "active_workers".
   uint n_workers = workers->active_workers();
-  if (n_workers == 0) {
-    assert(n_workers > 0, "Should have been set during scavenge");
-    n_workers = ParallelGCThreads;
-    workers->set_active_workers(n_workers);
-  }
+
   CompactibleFreeListSpace* cms_space  = _cmsGen->cmsSpace();
 
-  CMSParRemarkTask tsk(this,
-    cms_space,
-    n_workers, workers, task_queues());
-
-  // Set up for parallel process_roots work.
-  gch->set_par_threads(n_workers);
+  StrongRootsScope srs(n_workers);
+
+  CMSParRemarkTask tsk(this, cms_space, n_workers, workers, task_queues(), &srs);
+
   // We won't be iterating over the cards in the card table updating
   // the younger_gen cards, so we shouldn't call the following else
   // the verification code as well as subsequent younger_refs_iterate
@@ -5105,15 +5112,12 @@
     // necessarily be so, since it's possible that we are doing
     // ST marking.
     ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), true);
-    StrongRootsScope srs;
     workers->run_task(&tsk);
   } else {
     ReferenceProcessorMTDiscoveryMutator mt(ref_processor(), false);
-    StrongRootsScope srs;
     tsk.work(0);
   }
 
-  gch->set_par_threads(0);  // 0 ==> non-parallel.
   // restore, single-threaded for now, any preserved marks
   // as a result of work_q overflow
   restore_preserved_marks_if_any();
@@ -5177,11 +5181,11 @@
     verify_work_stacks_empty();
 
     gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-    StrongRootsScope srs;
-
-    gch->gen_process_roots(_cmsGen->level(),
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           _cmsGen->level(),
                            true,  // younger gens as roots
-                           false, // use the local StrongRootsScope
                            GenCollectedHeap::ScanningOption(roots_scanning_options()),
                            should_unload_classes(),
                            &mrias_cl,
@@ -5254,18 +5258,14 @@
                       CMSBitMap*       mark_bit_map,
                       AbstractWorkGang* workers,
                       OopTaskQueueSet* task_queues):
-    // XXX Should superclass AGTWOQ also know about AWG since it knows
-    // about the task_queues used by the AWG? Then it could initialize
-    // the terminator() object. See 6984287. The set_for_termination()
-    // below is a temporary band-aid for the regression in 6984287.
     AbstractGangTaskWOopQueues("Process referents by policy in parallel",
-      task_queues),
+      task_queues,
+      workers->active_workers()),
     _task(task),
     _collector(collector), _span(span), _mark_bit_map(mark_bit_map)
   {
     assert(_collector->_span.equals(_span) && !_span.is_empty(),
            "Inconsistency in _span");
-    set_for_termination(workers->active_workers());
   }
 
   OopTaskQueueSet* task_queues() { return queues(); }
--- a/src/share/vm/gc/cms/parCardTableModRefBS.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/cms/parCardTableModRefBS.cpp	Mon May 25 16:59:28 2015 +0200
@@ -39,16 +39,11 @@
 void CardTableModRefBS::non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
                                                              OopsInGenClosure* cl,
                                                              CardTableRS* ct,
-                                                             int n_threads) {
-  assert(n_threads > 0, "Error: expected n_threads > 0");
-  assert((n_threads == 1 && ParallelGCThreads == 0) ||
-         n_threads <= (int)ParallelGCThreads,
-         "# worker threads != # requested!");
-  assert(!Thread::current()->is_VM_thread() || (n_threads == 1), "There is only 1 VM thread");
-  assert(UseDynamicNumberOfGCThreads ||
-         !FLAG_IS_DEFAULT(ParallelGCThreads) ||
-         n_threads == (int)ParallelGCThreads,
-         "# worker threads != # requested!");
+                                                             uint n_threads) {
+  assert(n_threads > 0, "expected n_threads > 0");
+  assert(n_threads <= ParallelGCThreads,
+         err_msg("n_threads: %u > ParallelGCThreads: " UINTX_FORMAT, n_threads, ParallelGCThreads));
+
   // Make sure the LNC array is valid for the space.
   jbyte**   lowest_non_clean;
   uintptr_t lowest_non_clean_base_chunk_index;
@@ -66,7 +61,8 @@
 
   uint stride = 0;
   while (!pst->is_task_claimed(/* reference */ stride)) {
-    process_stride(sp, mr, stride, n_strides, cl, ct,
+    process_stride(sp, mr, stride, n_strides,
+                   cl, ct,
                    lowest_non_clean,
                    lowest_non_clean_base_chunk_index,
                    lowest_non_clean_chunk_size);
@@ -132,9 +128,13 @@
     assert(chunk_mr.word_size() > 0, "[chunk_card_start > used_end)");
     assert(used.contains(chunk_mr), "chunk_mr should be subset of used");
 
+    // This function is used by the parallel card table iteration.
+    const bool parallel = true;
+
     DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(),
-                                                     cl->gen_boundary());
-    ClearNoncleanCardWrapper clear_cl(dcto_cl, ct);
+                                                     cl->gen_boundary(),
+                                                     parallel);
+    ClearNoncleanCardWrapper clear_cl(dcto_cl, ct, parallel);
 
 
     // Process the chunk.
--- a/src/share/vm/gc/cms/parNewGeneration.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/cms/parNewGeneration.cpp	Mon May 25 16:59:28 2015 +0200
@@ -567,23 +567,15 @@
 }
 
 ParNewGenTask::ParNewGenTask(ParNewGeneration* gen, Generation* old_gen,
-                             HeapWord* young_old_boundary, ParScanThreadStateSet* state_set) :
+                             HeapWord* young_old_boundary, ParScanThreadStateSet* state_set,
+                             StrongRootsScope* strong_roots_scope) :
     AbstractGangTask("ParNewGeneration collection"),
     _gen(gen), _old_gen(old_gen),
     _young_old_boundary(young_old_boundary),
-    _state_set(state_set)
+    _state_set(state_set),
+    _strong_roots_scope(strong_roots_scope)
   {}
 
-// Reset the terminator for the given number of
-// active threads.
-void ParNewGenTask::set_for_termination(uint active_workers) {
-  _state_set->reset(active_workers, _gen->promotion_failed());
-  // Should the heap be passed in?  There's only 1 for now so
-  // grab it instead.
-  GenCollectedHeap* gch = GenCollectedHeap::heap();
-  gch->set_n_termination(active_workers);
-}
-
 void ParNewGenTask::work(uint worker_id) {
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   // Since this is being done in a separate thread, need new resource
@@ -603,10 +595,10 @@
                                            false);
 
   par_scan_state.start_strong_roots();
-  gch->gen_process_roots(_gen->level(),
+  gch->gen_process_roots(_strong_roots_scope,
+                         _gen->level(),
                          true,  // Process younger gens, if any,
                                 // as strong roots.
-                         false, // no scope; this is parallel code
                          GenCollectedHeap::SO_ScavengeCodeCache,
                          GenCollectedHeap::StrongAndWeakRoots,
                          &par_scan_state.to_space_root_closure(),
@@ -759,9 +751,6 @@
 
 private:
   virtual void work(uint worker_id);
-  virtual void set_for_termination(uint active_workers) {
-    _state_set.terminator()->reset_for_reuse(active_workers);
-  }
 private:
   ParNewGeneration&      _gen;
   ProcessTask&           _task;
@@ -838,7 +827,6 @@
 {
   _state_set.flush();
   GenCollectedHeap* gch = GenCollectedHeap::heap();
-  gch->set_par_threads(0);  // 0 ==> non-parallel.
   gch->save_marks();
 }
 
@@ -952,20 +940,24 @@
                                          *to(), *this, *_old_gen, *task_queues(),
                                          _overflow_stacks, desired_plab_sz(), _term);
 
-  ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set);
-  gch->set_par_threads(n_workers);
-  gch->rem_set()->prepare_for_younger_refs_iterate(true);
-  // It turns out that even when we're using 1 thread, doing the work in a
-  // separate thread causes wide variance in run times.  We can't help this
-  // in the multi-threaded case, but we special-case n=1 here to get
-  // repeatable measurements of the 1-thread overhead of the parallel code.
-  if (n_workers > 1) {
-    StrongRootsScope srs;
-    workers->run_task(&tsk);
-  } else {
-    StrongRootsScope srs;
-    tsk.work(0);
+  thread_state_set.reset(n_workers, promotion_failed());
+
+  {
+    StrongRootsScope srs(n_workers);
+
+    ParNewGenTask tsk(this, _old_gen, reserved().end(), &thread_state_set, &srs);
+    gch->rem_set()->prepare_for_younger_refs_iterate(true);
+    // It turns out that even when we're using 1 thread, doing the work in a
+    // separate thread causes wide variance in run times.  We can't help this
+    // in the multi-threaded case, but we special-case n=1 here to get
+    // repeatable measurements of the 1-thread overhead of the parallel code.
+    if (n_workers > 1) {
+      workers->run_task(&tsk);
+    } else {
+      tsk.work(0);
+    }
   }
+
   thread_state_set.reset(0 /* Bad value in debug if not reset */,
                          promotion_failed());
 
@@ -995,7 +987,6 @@
                                               _gc_timer, _gc_tracer.gc_id());
   } else {
     thread_state_set.flush();
-    gch->set_par_threads(0);  // 0 ==> non-parallel.
     gch->save_marks();
     stats = rp->process_discovered_references(&is_alive, &keep_alive,
                                               &evacuate_followers, NULL,
@@ -1477,9 +1468,9 @@
     _ref_processor =
       new ReferenceProcessor(_reserved,                  // span
                              ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
-                             (int) ParallelGCThreads,    // mt processing degree
+                             (uint) ParallelGCThreads,   // mt processing degree
                              refs_discovery_is_mt(),     // mt discovery
-                             (int) ParallelGCThreads,    // mt discovery degree
+                             (uint) ParallelGCThreads,   // mt discovery degree
                              refs_discovery_is_atomic(), // atomic_discovery
                              NULL);                      // is_alive_non_header
   }
--- a/src/share/vm/gc/cms/parNewGeneration.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/cms/parNewGeneration.hpp	Mon May 25 16:59:28 2015 +0200
@@ -39,6 +39,7 @@
 class ParRootScanWithoutBarrierClosure;
 class ParRootScanWithBarrierTwoGensClosure;
 class ParEvacuateFollowersClosure;
+class StrongRootsScope;
 
 // It would be better if these types could be kept local to the .cpp file,
 // but they must be here to allow ParScanClosure::do_oop_work to be defined
@@ -237,20 +238,18 @@
   Generation*                  _old_gen;
   HeapWord*                    _young_old_boundary;
   class ParScanThreadStateSet* _state_set;
+  StrongRootsScope*            _strong_roots_scope;
 
 public:
   ParNewGenTask(ParNewGeneration*      gen,
                 Generation*            old_gen,
                 HeapWord*              young_old_boundary,
-                ParScanThreadStateSet* state_set);
+                ParScanThreadStateSet* state_set,
+                StrongRootsScope*      strong_roots_scope);
 
   HeapWord* young_old_boundary() { return _young_old_boundary; }
 
   void work(uint worker_id);
-
-  // Reset the terminator in ParScanThreadStateSet for
-  // "active_workers" threads.
-  virtual void set_for_termination(uint active_workers);
 };
 
 class KeepAliveClosure: public DefNewGeneration::KeepAliveClosure {
--- a/src/share/vm/gc/cms/parOopClosures.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/cms/parOopClosures.hpp	Mon May 25 16:59:28 2015 +0200
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_CMS_PAROOPCLOSURES_HPP
 
 #include "gc/shared/genOopClosures.hpp"
+#include "gc/shared/taskqueue.hpp"
 #include "memory/padded.hpp"
 
 // Closures for ParNewGeneration
--- a/src/share/vm/gc/cms/yieldingWorkgroup.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/cms/yieldingWorkgroup.hpp	Mon May 25 16:59:28 2015 +0200
@@ -147,6 +147,13 @@
   bool completed() const { return _status == COMPLETED; }
   bool aborted()   const { return _status == ABORTED; }
   bool active()    const { return _status == ACTIVE; }
+
+  // This method configures the task for proper termination.
+  // Some tasks do not have any requirements on termination
+  // and may inherit this method that does nothing.  Some
+  // tasks do some coordination on termination and override
+  // this method to implement that coordination.
+  virtual void set_for_termination(uint active_workers) {}
 };
 // Class YieldingWorkGang: A subclass of WorkGang.
 // In particular, a YieldingWorkGang is made up of
--- a/src/share/vm/gc/g1/collectionSetChooser.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/collectionSetChooser.cpp	Mon May 25 16:59:28 2015 +0200
@@ -158,20 +158,10 @@
   hr->calc_gc_efficiency();
 }
 
-void CollectionSetChooser::prepare_for_par_region_addition(uint n_regions,
+void CollectionSetChooser::prepare_for_par_region_addition(uint n_threads,
+                                                           uint n_regions,
                                                            uint chunk_size) {
   _first_par_unreserved_idx = 0;
-  uint n_threads = (uint) ParallelGCThreads;
-  if (UseDynamicNumberOfGCThreads) {
-    assert(G1CollectedHeap::heap()->workers()->active_workers() > 0,
-      "Should have been set earlier");
-    // This is defensive code. As the assertion above says, the number
-    // of active threads should be > 0, but in case there is some path
-    // or some improperly initialized variable with leads to no
-    // active threads, protect against that in a product build.
-    n_threads = MAX2(G1CollectedHeap::heap()->workers()->active_workers(),
-                     1U);
-  }
   uint max_waste = n_threads * chunk_size;
   // it should be aligned with respect to chunk_size
   uint aligned_n_regions = (n_regions + chunk_size - 1) / chunk_size * chunk_size;
--- a/src/share/vm/gc/g1/collectionSetChooser.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/collectionSetChooser.hpp	Mon May 25 16:59:28 2015 +0200
@@ -121,7 +121,7 @@
 
   // Must be called before calls to claim_array_chunk().
   // n_regions is the number of regions, chunk_size the chunk size.
-  void prepare_for_par_region_addition(uint n_regions, uint chunk_size);
+  void prepare_for_par_region_addition(uint n_threads, uint n_regions, uint chunk_size);
   // Returns the first index in a contiguous chunk of chunk_size indexes
   // that the calling thread has reserved.  These must be set by the
   // calling thread using set_region() (to NULL if necessary).
--- a/src/share/vm/gc/g1/concurrentG1Refine.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/concurrentG1Refine.cpp	Mon May 25 16:59:28 2015 +0200
@@ -35,7 +35,7 @@
 {
   // Ergonomically select initial concurrent refinement parameters
   if (FLAG_IS_DEFAULT(G1ConcRefinementGreenZone)) {
-    FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, MAX2<int>(ParallelGCThreads, 1));
+    FLAG_SET_DEFAULT(G1ConcRefinementGreenZone, (intx)ParallelGCThreads);
   }
   set_green_zone(G1ConcRefinementGreenZone);
 
--- a/src/share/vm/gc/g1/concurrentMark.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/concurrentMark.cpp	Mon May 25 16:59:28 2015 +0200
@@ -518,7 +518,7 @@
   _markStack(this),
   // _finger set in set_non_marking_state
 
-  _max_worker_id(MAX2((uint)ParallelGCThreads, 1U)),
+  _max_worker_id((uint)ParallelGCThreads),
   // _active_tasks set in set_non_marking_state
   // _tasks set inside the constructor
   _task_queues(new CMTaskQueueSet((int) _max_worker_id)),
@@ -1218,15 +1218,13 @@
     "Maximum number of marking threads exceeded");
 
   uint active_workers = MAX2(1U, parallel_marking_threads());
+  assert(active_workers > 0, "Should have been set");
 
   // Parallel task terminator is set in "set_concurrency_and_phase()"
   set_concurrency_and_phase(active_workers, true /* concurrent */);
 
   CMConcurrentMarkingTask markingTask(this, cmThread());
   _parallel_workers->set_active_workers(active_workers);
-  // Don't set _n_par_threads because it affects MT in process_roots()
-  // and the decisions on that MT processing is made elsewhere.
-  assert(_parallel_workers->active_workers() > 0, "Should have been set");
   _parallel_workers->run_task(&markingTask);
   print_stats();
 }
@@ -1938,18 +1936,10 @@
 
   HeapRegionRemSet::reset_for_cleanup_tasks();
 
-  uint n_workers;
-
   // Do counting once more with the world stopped for good measure.
   G1ParFinalCountTask g1_par_count_task(g1h, &_region_bm, &_card_bm);
 
-  g1h->set_par_threads();
-  n_workers = g1h->n_par_threads();
-  assert(g1h->n_par_threads() == n_workers,
-         "Should not have been reset");
   g1h->workers()->run_task(&g1_par_count_task);
-  // Done with the parallel phase so reset to 0.
-  g1h->set_par_threads(0);
 
   if (VerifyDuringGC) {
     // Verify that the counting data accumulated during marking matches
@@ -1965,10 +1955,7 @@
                                                  &expected_region_bm,
                                                  &expected_card_bm);
 
-    g1h->set_par_threads((int)n_workers);
     g1h->workers()->run_task(&g1_par_verify_task);
-    // Done with the parallel phase so reset to 0.
-    g1h->set_par_threads(0);
 
     guarantee(g1_par_verify_task.failures() == 0, "Unexpected accounting failures");
   }
@@ -1990,11 +1977,11 @@
 
   g1h->reset_gc_time_stamp();
 
+  uint n_workers = _g1h->workers()->active_workers();
+
   // Note end of marking in all heap regions.
   G1ParNoteEndTask g1_par_note_end_task(g1h, &_cleanup_list, n_workers);
-  g1h->set_par_threads((int)n_workers);
   g1h->workers()->run_task(&g1_par_note_end_task);
-  g1h->set_par_threads(0);
   g1h->check_gc_time_stamps();
 
   if (!cleanup_list_is_empty()) {
@@ -2009,9 +1996,7 @@
   if (G1ScrubRemSets) {
     double rs_scrub_start = os::elapsedTime();
     G1ParScrubRemSetTask g1_par_scrub_rs_task(g1h, &_region_bm, &_card_bm, n_workers);
-    g1h->set_par_threads((int)n_workers);
     g1h->workers()->run_task(&g1_par_scrub_rs_task);
-    g1h->set_par_threads(0);
 
     double rs_scrub_end = os::elapsedTime();
     double this_rs_scrub_time = (rs_scrub_end - rs_scrub_start);
@@ -2020,7 +2005,7 @@
 
   // this will also free any regions totally full of garbage objects,
   // and sort the regions.
-  g1h->g1_policy()->record_concurrent_mark_cleanup_end((int)n_workers);
+  g1h->g1_policy()->record_concurrent_mark_cleanup_end();
 
   // Statistics.
   double end = os::elapsedTime();
@@ -2312,9 +2297,7 @@
   // and overflow handling in CMTask::do_marking_step() knows
   // how many workers to wait for.
   _cm->set_concurrency(_active_workers);
-  _g1h->set_par_threads(_active_workers);
   _workers->run_task(&proc_task_proxy);
-  _g1h->set_par_threads(0);
 }
 
 class G1CMRefEnqueueTaskProxy: public AbstractGangTask {
@@ -2344,9 +2327,7 @@
   // and overflow handling in CMTask::do_marking_step() knows
   // how many workers to wait for.
   _cm->set_concurrency(_active_workers);
-  _g1h->set_par_threads(_active_workers);
   _workers->run_task(&enq_task_proxy);
-  _g1h->set_par_threads(0);
 }
 
 void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
@@ -2608,27 +2589,23 @@
 
   g1h->ensure_parsability(false);
 
-  StrongRootsScope srs;
   // this is remark, so we'll use up all active threads
   uint active_workers = g1h->workers()->active_workers();
-  if (active_workers == 0) {
-    assert(active_workers > 0, "Should have been set earlier");
-    active_workers = (uint) ParallelGCThreads;
-    g1h->workers()->set_active_workers(active_workers);
-  }
   set_concurrency_and_phase(active_workers, false /* concurrent */);
   // Leave _parallel_marking_threads at it's
   // value originally calculated in the ConcurrentMark
   // constructor and pass values of the active workers
   // through the gang in the task.
 
-  CMRemarkTask remarkTask(this, active_workers);
-  // We will start all available threads, even if we decide that the
-  // active_workers will be fewer. The extra ones will just bail out
-  // immediately.
-  g1h->set_par_threads(active_workers);
-  g1h->workers()->run_task(&remarkTask);
-  g1h->set_par_threads(0);
+  {
+    StrongRootsScope srs(active_workers);
+
+    CMRemarkTask remarkTask(this, active_workers);
+    // We will start all available threads, even if we decide that the
+    // active_workers will be fewer. The extra ones will just bail out
+    // immediately.
+    g1h->workers()->run_task(&remarkTask);
+  }
 
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   guarantee(has_overflown() ||
@@ -3001,9 +2978,7 @@
   G1AggregateCountDataTask g1_par_agg_task(_g1h, this, &_card_bm,
                                            _max_worker_id, n_workers);
 
-  _g1h->set_par_threads(n_workers);
   _g1h->workers()->run_task(&g1_par_agg_task);
-  _g1h->set_par_threads(0);
 }
 
 // Clear the per-worker arrays used to store the per-region counting data
--- a/src/share/vm/gc/g1/g1CollectedHeap.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1CollectedHeap.cpp	Mon May 25 16:59:28 2015 +0200
@@ -1326,27 +1326,10 @@
         AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
                                                 workers()->active_workers(),
                                                 Threads::number_of_non_daemon_threads());
-      assert(UseDynamicNumberOfGCThreads ||
-             n_workers == workers()->total_workers(),
-             "If not dynamic should be using all the  workers");
       workers()->set_active_workers(n_workers);
-      // Set parallel threads in the heap (_n_par_threads) only
-      // before a parallel phase and always reset it to 0 after
-      // the phase so that the number of parallel threads does
-      // no get carried forward to a serial phase where there
-      // may be code that is "possibly_parallel".
-      set_par_threads(n_workers);
 
       ParRebuildRSTask rebuild_rs_task(this);
-      assert(UseDynamicNumberOfGCThreads ||
-             workers()->active_workers() == workers()->total_workers(),
-             "Unless dynamic should use total workers");
-      // Use the most recent number of  active workers
-      assert(workers()->active_workers() > 0,
-             "Active workers not properly set");
-      set_par_threads(workers()->active_workers());
       workers()->run_task(&rebuild_rs_task);
-      set_par_threads(0);
 
       // Rebuild the strong code root lists for each region
       rebuild_strong_code_roots();
@@ -1769,7 +1752,7 @@
   _allocator = G1Allocator::create_allocator(this);
   _humongous_object_threshold_in_words = HeapRegion::GrainWords / 2;
 
-  int n_queues = MAX2((int)ParallelGCThreads, 1);
+  int n_queues = (int)ParallelGCThreads;
   _task_queues = new RefToScanQueueSet(n_queues);
 
   uint n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
@@ -2081,11 +2064,11 @@
     new ReferenceProcessor(mr,    // span
                            ParallelRefProcEnabled && (ParallelGCThreads > 1),
                                 // mt processing
-                           (int) ParallelGCThreads,
+                           (uint) ParallelGCThreads,
                                 // degree of mt processing
                            (ParallelGCThreads > 1) || (ConcGCThreads > 1),
                                 // mt discovery
-                           (int) MAX2(ParallelGCThreads, ConcGCThreads),
+                           (uint) MAX2(ParallelGCThreads, ConcGCThreads),
                                 // degree of mt discovery
                            false,
                                 // Reference discovery is not atomic
@@ -2098,11 +2081,11 @@
     new ReferenceProcessor(mr,    // span
                            ParallelRefProcEnabled && (ParallelGCThreads > 1),
                                 // mt processing
-                           MAX2((int)ParallelGCThreads, 1),
+                           (uint) ParallelGCThreads,
                                 // degree of mt processing
                            (ParallelGCThreads > 1),
                                 // mt discovery
-                           MAX2((int)ParallelGCThreads, 1),
+                           (uint) ParallelGCThreads,
                                 // degree of mt discovery
                            true,
                                 // Reference discovery is atomic
@@ -2502,8 +2485,7 @@
   assert(_worker_cset_start_region != NULL, "sanity");
   assert(_worker_cset_start_region_time_stamp != NULL, "sanity");
 
-  int n_queues = MAX2((int)ParallelGCThreads, 1);
-  for (int i = 0; i < n_queues; i++) {
+  for (uint i = 0; i < ParallelGCThreads; i++) {
     _worker_cset_start_region[i] = NULL;
     _worker_cset_start_region_time_stamp[i] = 0;
   }
@@ -2541,9 +2523,6 @@
   result = g1_policy()->collection_set();
   uint cs_size = g1_policy()->cset_region_length();
   uint active_workers = workers()->active_workers();
-  assert(UseDynamicNumberOfGCThreads ||
-           active_workers == workers()->total_workers(),
-           "Unless dynamic should use total workers");
 
   uint end_ind   = (cs_size * worker_i) / active_workers;
   uint start_ind = 0;
@@ -3021,7 +3000,7 @@
     G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
 
     {
-      G1RootProcessor root_processor(this);
+      G1RootProcessor root_processor(this, 1);
       root_processor.process_all_roots(&rootsCl,
                                        &cldCl,
                                        &blobsCl);
@@ -3042,13 +3021,7 @@
     if (GCParallelVerificationEnabled && ParallelGCThreads > 1) {
 
       G1ParVerifyTask task(this, vo);
-      assert(UseDynamicNumberOfGCThreads ||
-        workers()->active_workers() == workers()->total_workers(),
-        "If not dynamic should be using all the workers");
-      uint n_workers = workers()->active_workers();
-      set_par_threads(n_workers);
       workers()->run_task(&task);
-      set_par_threads(0);
       if (task.failures()) {
         failures = true;
       }
@@ -3572,6 +3545,10 @@
 };
 #endif // ASSERT
 
+uint G1CollectedHeap::num_task_queues() const {
+  return _task_queues->size();
+}
+
 #if TASKQUEUE_STATS
 void G1CollectedHeap::print_taskqueue_stats_hdr(outputStream* const st) {
   st->print_raw_cr("GC Task Stats");
@@ -3583,7 +3560,7 @@
   print_taskqueue_stats_hdr(st);
 
   TaskQueueStats totals;
-  const uint n = workers()->total_workers();
+  const uint n = num_task_queues();
   for (uint i = 0; i < n; ++i) {
     st->print("%3u ", i); task_queue(i)->stats.print(st); st->cr();
     totals += task_queue(i)->stats;
@@ -3594,7 +3571,7 @@
 }
 
 void G1CollectedHeap::reset_taskqueue_stats() {
-  const uint n = workers()->total_workers();
+  const uint n = num_task_queues();
   for (uint i = 0; i < n; ++i) {
     task_queue(i)->stats.reset();
   }
@@ -3696,9 +3673,6 @@
     uint active_workers = AdaptiveSizePolicy::calc_active_workers(workers()->total_workers(),
                                                                   workers()->active_workers(),
                                                                   Threads::number_of_non_daemon_threads());
-    assert(UseDynamicNumberOfGCThreads ||
-           active_workers == workers()->total_workers(),
-           "If not dynamic should be using all the  workers");
     workers()->set_active_workers(active_workers);
 
     double pause_start_sec = os::elapsedTime();
@@ -3873,8 +3847,7 @@
 
         if (evacuation_failed()) {
           _allocator->set_used(recalculate_used());
-          uint n_queues = MAX2((int)ParallelGCThreads, 1);
-          for (uint i = 0; i < n_queues; i++) {
+          for (uint i = 0; i < ParallelGCThreads; i++) {
             if (_evacuation_failed_info_array[i].has_failed()) {
               _gc_tracer_stw->report_evacuation_failed(_evacuation_failed_info_array[i]);
             }
@@ -4041,10 +4014,8 @@
 void G1CollectedHeap::remove_self_forwarding_pointers() {
   double remove_self_forwards_start = os::elapsedTime();
 
-  set_par_threads();
   G1ParRemoveSelfForwardPtrsTask rsfp_task(this);
   workers()->run_task(&rsfp_task);
-  set_par_threads(0);
 
   // Now restore saved marks, if any.
   assert(_objs_with_preserved_marks.size() ==
@@ -4308,12 +4279,13 @@
   Mutex* stats_lock() { return &_stats_lock; }
 
 public:
-  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor)
+  G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor, uint n_workers)
     : AbstractGangTask("G1 collection"),
       _g1h(g1h),
       _queues(task_queues),
       _root_processor(root_processor),
-      _terminator(0, _queues),
+      _terminator(n_workers, _queues),
+      _n_workers(n_workers),
       _stats_lock(Mutex::leaf, "parallel G1 stats lock", true)
   {}
 
@@ -4325,12 +4297,6 @@
 
   ParallelTaskTerminator* terminator() { return &_terminator; }
 
-  virtual void set_for_termination(uint active_workers) {
-    _root_processor->set_num_workers(active_workers);
-    terminator()->reset_for_reuse(active_workers);
-    _n_workers = active_workers;
-  }
-
   // Helps out with CLD processing.
   //
   // During InitialMark we need to:
@@ -4811,19 +4777,14 @@
 
   G1ParallelCleaningTask g1_unlink_task(is_alive, process_strings, process_symbols,
                                         n_workers, class_unloading_occurred);
-  set_par_threads(n_workers);
   workers()->run_task(&g1_unlink_task);
-  set_par_threads(0);
 }
 
 void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
                                                      bool process_strings, bool process_symbols) {
   {
-    uint n_workers = workers()->active_workers();
     G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
-    set_par_threads(n_workers);
     workers()->run_task(&g1_unlink_task);
-    set_par_threads(0);
   }
 
   if (G1StringDedup::is_enabled()) {
@@ -4851,13 +4812,9 @@
 void G1CollectedHeap::redirty_logged_cards() {
   double redirty_logged_cards_start = os::elapsedTime();
 
-  uint n_workers = workers()->active_workers();
-
   G1RedirtyLoggedCardsTask redirty_task(&dirty_card_queue_set());
   dirty_card_queue_set().reset_for_par_iteration();
-  set_par_threads(n_workers);
   workers()->run_task(&redirty_task);
-  set_par_threads(0);
 
   DirtyCardQueueSet& dcq = JavaThread::dirty_card_queue_set();
   dcq.merge_bufferlists(&dirty_card_queue_set());
@@ -5093,9 +5050,7 @@
   ParallelTaskTerminator terminator(_active_workers, _queues);
   G1STWRefProcTaskProxy proc_task_proxy(proc_task, _g1h, _queues, &terminator);
 
-  _g1h->set_par_threads(_active_workers);
   _workers->run_task(&proc_task_proxy);
-  _g1h->set_par_threads(0);
 }
 
 // Gang task for parallel reference enqueueing.
@@ -5124,9 +5079,7 @@
 
   G1STWRefEnqueueTaskProxy enq_task_proxy(enq_task);
 
-  _g1h->set_par_threads(_active_workers);
   _workers->run_task(&enq_task_proxy);
-  _g1h->set_par_threads(0);
 }
 
 // End of weak reference support closures
@@ -5219,7 +5172,7 @@
 };
 
 // Weak Reference processing during an evacuation pause (part 1).
-void G1CollectedHeap::process_discovered_references(uint no_of_gc_workers) {
+void G1CollectedHeap::process_discovered_references() {
   double ref_proc_start = os::elapsedTime();
 
   ReferenceProcessor* rp = _ref_processor_stw;
@@ -5246,17 +5199,14 @@
   // referents points to another object which is also referenced by an
   // object discovered by the STW ref processor.
 
-  assert(no_of_gc_workers == workers()->active_workers(), "Need to reset active GC workers");
-
-  set_par_threads(no_of_gc_workers);
+  uint no_of_gc_workers = workers()->active_workers();
+
   G1ParPreserveCMReferentsTask keep_cm_referents(this,
                                                  no_of_gc_workers,
                                                  _task_queues);
 
   workers()->run_task(&keep_cm_referents);
 
-  set_par_threads(0);
-
   // Closure to test whether a referent is alive.
   G1STWIsAliveClosure is_alive(this);
 
@@ -5330,7 +5280,7 @@
 }
 
 // Weak Reference processing during an evacuation pause (part 2).
-void G1CollectedHeap::enqueue_discovered_references(uint no_of_gc_workers) {
+void G1CollectedHeap::enqueue_discovered_references() {
   double ref_enq_start = os::elapsedTime();
 
   ReferenceProcessor* rp = _ref_processor_stw;
@@ -5344,12 +5294,12 @@
   } else {
     // Parallel reference enqueueing
 
-    assert(no_of_gc_workers == workers()->active_workers(),
-           "Need to reset active workers");
-    assert(rp->num_q() == no_of_gc_workers, "sanity");
-    assert(no_of_gc_workers <= rp->max_num_q(), "sanity");
-
-    G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, no_of_gc_workers);
+    uint n_workers = workers()->active_workers();
+
+    assert(rp->num_q() == n_workers, "sanity");
+    assert(n_workers <= rp->max_num_q(), "sanity");
+
+    G1STWRefProcTaskExecutor par_task_executor(this, workers(), _task_queues, n_workers);
     rp->enqueue_discovered_references(&par_task_executor);
   }
 
@@ -5380,11 +5330,6 @@
   hot_card_cache->set_use_cache(false);
 
   const uint n_workers = workers()->active_workers();
-  assert(UseDynamicNumberOfGCThreads ||
-         n_workers == workers()->total_workers(),
-         "If not dynamic should be using all the  workers");
-  set_par_threads(n_workers);
-
 
   init_for_evac_failure(NULL);
 
@@ -5393,19 +5338,16 @@
   double end_par_time_sec;
 
   {
-    G1RootProcessor root_processor(this);
-    G1ParTask g1_par_task(this, _task_queues, &root_processor);
+    G1RootProcessor root_processor(this, n_workers);
+    G1ParTask g1_par_task(this, _task_queues, &root_processor, n_workers);
     // InitialMark needs claim bits to keep track of the marked-through CLDs.
     if (g1_policy()->during_initial_mark_pause()) {
       ClassLoaderDataGraph::clear_claimed_marks();
     }
 
-     // The individual threads will set their evac-failure closures.
-     if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr();
-     // These tasks use ShareHeap::_process_strong_tasks
-     assert(UseDynamicNumberOfGCThreads ||
-            workers()->active_workers() == workers()->total_workers(),
-            "If not dynamic should be using all the  workers");
+    // The individual threads will set their evac-failure closures.
+    if (PrintTerminationStats) G1ParScanThreadState::print_termination_stats_hdr();
+
     workers()->run_task(&g1_par_task);
     end_par_time_sec = os::elapsedTime();
 
@@ -5425,14 +5367,12 @@
         (os::elapsedTime() - end_par_time_sec) * 1000.0;
   phase_times->record_code_root_fixup_time(code_root_fixup_time_ms);
 
-  set_par_threads(0);
-
   // Process any discovered reference objects - we have
   // to do this _before_ we retire the GC alloc regions
   // as we may have to copy some 'reachable' referent
   // objects (and their reachable sub-graphs) that were
   // not copied during the pause.
-  process_discovered_references(n_workers);
+  process_discovered_references();
 
   if (G1StringDedup::is_enabled()) {
     double fixup_start = os::elapsedTime();
@@ -5474,7 +5414,7 @@
   // will log these updates (and dirty their associated
   // cards). We need these updates logged to update any
   // RSets.
-  enqueue_discovered_references(n_workers);
+  enqueue_discovered_references();
 
   redirty_logged_cards();
   COMPILER2_PRESENT(DerivedPointerTable::update_pointers());
@@ -5779,9 +5719,7 @@
     // Iterate over the dirty cards region list.
     G1ParCleanupCTTask cleanup_task(ct_bs, this);
 
-    set_par_threads();
     workers()->run_task(&cleanup_task);
-    set_par_threads(0);
 #ifndef PRODUCT
     if (G1VerifyCTCleanup || VerifyAfterGC) {
       G1VerifyCardTableCleanup cleanup_verifier(this, ct_bs);
@@ -6314,21 +6252,6 @@
   g1mm()->update_eden_size();
 }
 
-void G1CollectedHeap::set_par_threads() {
-  // Don't change the number of workers.  Use the value previously set
-  // in the workgroup.
-  uint n_workers = workers()->active_workers();
-  assert(UseDynamicNumberOfGCThreads ||
-           n_workers == workers()->total_workers(),
-      "Otherwise should be using the total number of workers");
-  if (n_workers == 0) {
-    assert(false, "Should have been set in prior evacuation pause.");
-    n_workers = ParallelGCThreads;
-    workers()->set_active_workers(n_workers);
-  }
-  set_par_threads(n_workers);
-}
-
 // Methods for the GC alloc regions
 
 HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size,
--- a/src/share/vm/gc/g1/g1CollectedHeap.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1CollectedHeap.hpp	Mon May 25 16:59:28 2015 +0200
@@ -606,11 +606,11 @@
 
   // Process any reference objects discovered during
   // an incremental evacuation pause.
-  void process_discovered_references(uint no_of_gc_workers);
+  void process_discovered_references();
 
   // Enqueue any remaining discovered references
   // after processing.
-  void enqueue_discovered_references(uint no_of_gc_workers);
+  void enqueue_discovered_references();
 
 public:
   FlexibleWorkGang* workers() const { return _workers; }
@@ -981,6 +981,8 @@
 
   RefToScanQueue *task_queue(uint i) const;
 
+  uint num_task_queues() const;
+
   // A set of cards where updates happened during the GC
   DirtyCardQueueSet& dirty_card_queue_set() { return _dirty_card_queue_set; }
 
@@ -1012,11 +1014,6 @@
   // Initialize weak reference processing.
   void ref_processing_init();
 
-  // Explicitly import set_par_threads into this scope
-  using CollectedHeap::set_par_threads;
-  // Set _n_par_threads according to a policy TBD.
-  void set_par_threads();
-
   virtual Name kind() const {
     return CollectedHeap::G1CollectedHeap;
   }
--- a/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Mon May 25 16:59:28 2015 +0200
@@ -1587,14 +1587,17 @@
 }
 
 void
-G1CollectorPolicy::record_concurrent_mark_cleanup_end(uint n_workers) {
+G1CollectorPolicy::record_concurrent_mark_cleanup_end() {
   _collectionSetChooser->clear();
 
+  FlexibleWorkGang* workers = _g1->workers();
+  uint n_workers = workers->active_workers();
+
   uint n_regions = _g1->num_regions();
   uint chunk_size = calculate_parallel_work_chunk_size(n_workers, n_regions);
-  _collectionSetChooser->prepare_for_par_region_addition(n_regions, chunk_size);
+  _collectionSetChooser->prepare_for_par_region_addition(n_workers, n_regions, chunk_size);
   ParKnownGarbageTask par_known_garbage_task(_collectionSetChooser, chunk_size, n_workers);
-  _g1->workers()->run_task(&par_known_garbage_task);
+  workers->run_task(&par_known_garbage_task);
 
   _collectionSetChooser->sort_regions();
 
--- a/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1CollectorPolicy.hpp	Mon May 25 16:59:28 2015 +0200
@@ -692,7 +692,7 @@
 
   // Record start, end, and completion of cleanup.
   void record_concurrent_mark_cleanup_start();
-  void record_concurrent_mark_cleanup_end(uint n_workers);
+  void record_concurrent_mark_cleanup_end();
   void record_concurrent_mark_cleanup_completed();
 
   // Records the information about the heap size for reporting in
--- a/src/share/vm/gc/g1/g1MarkSweep.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1MarkSweep.cpp	Mon May 25 16:59:28 2015 +0200
@@ -127,7 +127,7 @@
 
   MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations);
   {
-    G1RootProcessor root_processor(g1h);
+    G1RootProcessor root_processor(g1h, 1);
     root_processor.process_strong_roots(&GenMarkSweep::follow_root_closure,
                                         &GenMarkSweep::follow_cld_closure,
                                         &follow_code_closure);
@@ -237,7 +237,7 @@
 
   CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations);
   {
-    G1RootProcessor root_processor(g1h);
+    G1RootProcessor root_processor(g1h, 1);
     root_processor.process_all_roots(&GenMarkSweep::adjust_pointer_closure,
                                      &GenMarkSweep::adjust_cld_closure,
                                      &adjust_code_closure);
--- a/src/share/vm/gc/g1/g1OopClosures.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1OopClosures.cpp	Mon May 25 16:59:28 2015 +0200
@@ -50,8 +50,8 @@
   _par_scan_state = par_scan_state;
   _worker_id = par_scan_state->queue_num();
 
-  assert(_worker_id < MAX2((uint)ParallelGCThreads, 1u),
-         err_msg("The given worker id %u must be less than the number of threads %u", _worker_id, MAX2((uint)ParallelGCThreads, 1u)));
+  assert(_worker_id < ParallelGCThreads,
+         err_msg("The given worker id %u must be less than the number of threads " UINTX_FORMAT, _worker_id, ParallelGCThreads));
 }
 
 // Generate G1 specialized oop_oop_iterate functions.
--- a/src/share/vm/gc/g1/g1RootProcessor.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1RootProcessor.cpp	Mon May 25 16:59:28 2015 +0200
@@ -90,11 +90,10 @@
 
 
 void G1RootProcessor::worker_has_discovered_all_strong_classes() {
-  uint n_workers = _g1h->n_par_threads();
   assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
 
   uint new_value = (uint)Atomic::add(1, &_n_workers_discovered_strong_classes);
-  if (new_value == n_workers) {
+  if (new_value == n_workers()) {
     // This thread is last. Notify the others.
     MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
     _lock.notify_all();
@@ -102,21 +101,20 @@
 }
 
 void G1RootProcessor::wait_until_all_strong_classes_discovered() {
-  uint n_workers = _g1h->n_par_threads();
   assert(ClassUnloadingWithConcurrentMark, "Currently only needed when doing G1 Class Unloading");
 
-  if ((uint)_n_workers_discovered_strong_classes != n_workers) {
+  if ((uint)_n_workers_discovered_strong_classes != n_workers()) {
     MonitorLockerEx ml(&_lock, Mutex::_no_safepoint_check_flag);
-    while ((uint)_n_workers_discovered_strong_classes != n_workers) {
+    while ((uint)_n_workers_discovered_strong_classes != n_workers()) {
       _lock.wait(Mutex::_no_safepoint_check_flag, 0, false);
     }
   }
 }
 
-G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h) :
+G1RootProcessor::G1RootProcessor(G1CollectedHeap* g1h, uint n_workers) :
     _g1h(g1h),
     _process_strong_tasks(new SubTasksDone(G1RP_PS_NumElements)),
-    _srs(),
+    _srs(n_workers),
     _lock(Mutex::leaf, "G1 Root Scanning barrier lock", false, Monitor::_safepoint_check_never),
     _n_workers_discovered_strong_classes(0) {}
 
@@ -206,7 +204,7 @@
     }
   }
 
-  _process_strong_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed(n_workers());
 }
 
 void G1RootProcessor::process_strong_roots(OopClosure* oops,
@@ -216,7 +214,7 @@
   process_java_roots(oops, clds, clds, NULL, blobs, NULL, 0);
   process_vm_roots(oops, NULL, NULL, 0);
 
-  _process_strong_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed(n_workers());
 }
 
 void G1RootProcessor::process_all_roots(OopClosure* oops,
@@ -230,7 +228,7 @@
     CodeCache::blobs_do(blobs);
   }
 
-  _process_strong_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed(n_workers());
 }
 
 void G1RootProcessor::process_java_roots(OopClosure* strong_roots,
@@ -253,7 +251,7 @@
 
   {
     G1GCParPhaseTimesTracker x(phase_times, G1GCPhaseTimes::ThreadRoots, worker_i);
-    bool is_par = _g1h->n_par_threads() > 0;
+    bool is_par = n_workers() > 1;
     Threads::possibly_parallel_oops_do(is_par, strong_roots, thread_stack_clds, strong_code);
   }
 }
@@ -329,6 +327,6 @@
   _g1h->g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
 }
 
-void G1RootProcessor::set_num_workers(uint active_workers) {
-  _process_strong_tasks->set_n_threads(active_workers);
+uint G1RootProcessor::n_workers() const {
+  return _srs.n_threads();
 }
--- a/src/share/vm/gc/g1/g1RootProcessor.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1RootProcessor.hpp	Mon May 25 16:59:28 2015 +0200
@@ -85,7 +85,7 @@
                         uint worker_i);
 
 public:
-  G1RootProcessor(G1CollectedHeap* g1h);
+  G1RootProcessor(G1CollectedHeap* g1h, uint n_workers);
 
   // Apply closures to the strongly and weakly reachable roots in the system
   // in a single pass.
@@ -114,8 +114,8 @@
                             OopClosure* scan_non_heap_weak_roots,
                             uint worker_i);
 
-  // Inform the root processor about the number of worker threads
-  void set_num_workers(uint active_workers);
+  // Number of worker threads used by the root processor.
+  uint n_workers() const;
 };
 
 #endif // SHARE_VM_GC_G1_G1ROOTPROCESSOR_HPP
--- a/src/share/vm/gc/g1/g1StringDedup.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1StringDedup.cpp	Mon May 25 16:59:28 2015 +0200
@@ -153,9 +153,7 @@
 
   G1StringDedupUnlinkOrOopsDoTask task(is_alive, keep_alive, allow_resize_and_rehash, phase_times);
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  g1h->set_par_threads();
   g1h->workers()->run_task(&task);
-  g1h->set_par_threads(0);
 }
 
 void G1StringDedup::threads_do(ThreadClosure* tc) {
--- a/src/share/vm/gc/g1/g1StringDedupQueue.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1StringDedupQueue.cpp	Mon May 25 16:59:28 2015 +0200
@@ -42,7 +42,7 @@
   _cancel(false),
   _empty(true),
   _dropped(0) {
-  _nqueues = MAX2(ParallelGCThreads, (size_t)1);
+  _nqueues = ParallelGCThreads;
   _queues = NEW_C_HEAP_ARRAY(G1StringDedupWorkerQueue, _nqueues, mtGC);
   for (size_t i = 0; i < _nqueues; i++) {
     new (_queues + i) G1StringDedupWorkerQueue(G1StringDedupWorkerQueue::default_segment_size(), _max_cache_size, _max_size);
--- a/src/share/vm/gc/g1/g1StringDedupTable.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/g1/g1StringDedupTable.cpp	Mon May 25 16:59:28 2015 +0200
@@ -112,7 +112,7 @@
 };
 
 G1StringDedupEntryCache::G1StringDedupEntryCache() {
-  _nlists = MAX2(ParallelGCThreads, (size_t)1);
+  _nlists = ParallelGCThreads;
   _lists = PaddedArray<G1StringDedupEntryFreeList, mtGC>::create_unfreeable((uint)_nlists);
 }
 
--- a/src/share/vm/gc/parallel/psParallelCompact.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/parallel/psParallelCompact.cpp	Mon May 25 16:59:28 2015 +0200
@@ -832,9 +832,9 @@
   _ref_processor =
     new ReferenceProcessor(mr,            // span
                            ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
-                           (int) ParallelGCThreads, // mt processing degree
+                           (uint) ParallelGCThreads, // mt processing degree
                            true,          // mt discovery
-                           (int) ParallelGCThreads, // mt discovery degree
+                           (uint) ParallelGCThreads, // mt discovery degree
                            true,          // atomic_discovery
                            &_is_alive_closure); // non-header is alive closure
   _counters = new CollectorCounters("PSParallelCompact", 1);
@@ -2029,7 +2029,6 @@
     // Set the number of GC threads to be used in this collection
     gc_task_manager()->set_active_gang();
     gc_task_manager()->task_idle_workers();
-    heap->set_par_threads(gc_task_manager()->active_workers());
 
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
     GCTraceTime t1(GCCauseString("Full GC", gc_cause), PrintGC, !PrintGCDetails, NULL, _gc_tracer.gc_id());
--- a/src/share/vm/gc/parallel/psScavenge.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/parallel/psScavenge.cpp	Mon May 25 16:59:28 2015 +0200
@@ -382,7 +382,6 @@
     // Get the active number of workers here and use that value
     // throughout the methods.
     uint active_workers = gc_task_manager()->active_workers();
-    heap->set_par_threads(active_workers);
 
     PSPromotionManager::pre_scavenge();
 
@@ -846,9 +845,9 @@
   _ref_processor =
     new ReferenceProcessor(mr,                         // span
                            ParallelRefProcEnabled && (ParallelGCThreads > 1), // mt processing
-                           (int) ParallelGCThreads,    // mt processing degree
+                           (uint) ParallelGCThreads,   // mt processing degree
                            true,                       // mt discovery
-                           (int) ParallelGCThreads,    // mt discovery degree
+                           (uint) ParallelGCThreads,   // mt discovery degree
                            true,                       // atomic_discovery
                            NULL);                      // header provides liveness info
 
--- a/src/share/vm/gc/serial/defNewGeneration.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/serial/defNewGeneration.cpp	Mon May 25 16:59:28 2015 +0200
@@ -38,6 +38,7 @@
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/space.inline.hpp"
 #include "gc/shared/spaceDecorator.hpp"
+#include "gc/shared/strongRootsScope.hpp"
 #include "memory/iterator.hpp"
 #include "oops/instanceRefKlass.hpp"
 #include "oops/oop.inline.hpp"
@@ -454,7 +455,7 @@
   }
 }
 
-void DefNewGeneration::younger_refs_iterate(OopsInGenClosure* cl) {
+void DefNewGeneration::younger_refs_iterate(OopsInGenClosure* cl, uint n_threads) {
   assert(false, "NYI -- are you sure you want to call this?");
 }
 
@@ -625,15 +626,22 @@
   assert(gch->no_allocs_since_save_marks(0),
          "save marks have not been newly set.");
 
-  gch->gen_process_roots(_level,
-                         true,  // Process younger gens, if any,
-                                // as strong roots.
-                         true,  // activate StrongRootsScope
-                         GenCollectedHeap::SO_ScavengeCodeCache,
-                         GenCollectedHeap::StrongAndWeakRoots,
-                         &fsc_with_no_gc_barrier,
-                         &fsc_with_gc_barrier,
-                         &cld_scan_closure);
+  {
+    // DefNew needs to run with n_threads == 0, to make sure the serial
+    // version of the card table scanning code is used.
+    // See: CardTableModRefBS::non_clean_card_iterate_possibly_parallel.
+    StrongRootsScope srs(0);
+
+    gch->gen_process_roots(&srs,
+                           _level,
+                           true,  // Process younger gens, if any,
+                                  // as strong roots.
+                           GenCollectedHeap::SO_ScavengeCodeCache,
+                           GenCollectedHeap::StrongAndWeakRoots,
+                           &fsc_with_no_gc_barrier,
+                           &fsc_with_gc_barrier,
+                           &cld_scan_closure);
+  }
 
   // "evacuate followers".
   evacuate_followers.do_void();
--- a/src/share/vm/gc/serial/defNewGeneration.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/serial/defNewGeneration.hpp	Mon May 25 16:59:28 2015 +0200
@@ -255,7 +255,7 @@
   // Iteration
   void object_iterate(ObjectClosure* blk);
 
-  void younger_refs_iterate(OopsInGenClosure* cl);
+  void younger_refs_iterate(OopsInGenClosure* cl, uint n_threads);
 
   void space_iterate(SpaceClosure* blk, bool usedOnly = false);
 
--- a/src/share/vm/gc/serial/genMarkSweep.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/serial/genMarkSweep.cpp	Mon May 25 16:59:28 2015 +0200
@@ -40,6 +40,7 @@
 #include "gc/shared/modRefBarrierSet.hpp"
 #include "gc/shared/referencePolicy.hpp"
 #include "gc/shared/space.hpp"
+#include "gc/shared/strongRootsScope.hpp"
 #include "oops/instanceRefKlass.hpp"
 #include "oops/oop.inline.hpp"
 #include "prims/jvmtiExport.hpp"
@@ -200,14 +201,18 @@
   // Need new claim bits before marking starts.
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  gch->gen_process_roots(level,
-                         false, // Younger gens are not roots.
-                         true,  // activate StrongRootsScope
-                         GenCollectedHeap::SO_None,
-                         GenCollectedHeap::StrongRootsOnly,
-                         &follow_root_closure,
-                         &follow_root_closure,
-                         &follow_cld_closure);
+  {
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           level,
+                           false, // Younger gens are not roots.
+                           GenCollectedHeap::SO_None,
+                           GenCollectedHeap::StrongRootsOnly,
+                           &follow_root_closure,
+                           &follow_root_closure,
+                           &follow_cld_closure);
+  }
 
   // Process reference objects found during marking
   {
@@ -284,14 +289,18 @@
   assert(level == 1, "We don't use mark-sweep on young generations.");
   adjust_pointer_closure.set_orig_generation(gch->old_gen());
 
-  gch->gen_process_roots(level,
-                         false, // Younger gens are not roots.
-                         true,  // activate StrongRootsScope
-                         GenCollectedHeap::SO_AllCodeCache,
-                         GenCollectedHeap::StrongAndWeakRoots,
-                         &adjust_pointer_closure,
-                         &adjust_pointer_closure,
-                         &adjust_cld_closure);
+  {
+    StrongRootsScope srs(1);
+
+    gch->gen_process_roots(&srs,
+                           level,
+                           false, // Younger gens are not roots.
+                           GenCollectedHeap::SO_AllCodeCache,
+                           GenCollectedHeap::StrongAndWeakRoots,
+                           &adjust_pointer_closure,
+                           &adjust_pointer_closure,
+                           &adjust_cld_closure);
+  }
 
   gch->gen_process_weak_roots(&adjust_pointer_closure);
 
--- a/src/share/vm/gc/shared/adaptiveSizePolicy.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/adaptiveSizePolicy.cpp	Mon May 25 16:59:28 2015 +0200
@@ -161,7 +161,7 @@
       }
       _debug_perturbation = !_debug_perturbation;
     }
-    assert((new_active_workers <= (uintx) ParallelGCThreads) &&
+    assert((new_active_workers <= ParallelGCThreads) &&
            (new_active_workers >= min_workers),
       "Jiggled active workers too much");
   }
--- a/src/share/vm/gc/shared/cardGeneration.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/cardGeneration.cpp	Mon May 25 16:59:28 2015 +0200
@@ -353,8 +353,8 @@
   blk->do_space(space());
 }
 
-void CardGeneration::younger_refs_iterate(OopsInGenClosure* blk) {
+void CardGeneration::younger_refs_iterate(OopsInGenClosure* blk, uint n_threads) {
   blk->set_generation(this);
-  younger_refs_in_space_iterate(space(), blk);
+  younger_refs_in_space_iterate(space(), blk, n_threads);
   blk->reset_generation();
 }
--- a/src/share/vm/gc/shared/cardGeneration.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/cardGeneration.hpp	Mon May 25 16:59:28 2015 +0200
@@ -89,7 +89,7 @@
 
   void space_iterate(SpaceClosure* blk, bool usedOnly = false);
 
-  void younger_refs_iterate(OopsInGenClosure* blk);
+  void younger_refs_iterate(OopsInGenClosure* blk, uint n_threads);
 
   bool is_in(const void* p) const;
 
--- a/src/share/vm/gc/shared/cardTableModRefBS.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/cardTableModRefBS.cpp	Mon May 25 16:59:28 2015 +0200
@@ -440,31 +440,11 @@
 void CardTableModRefBS::non_clean_card_iterate_possibly_parallel(Space* sp,
                                                                  MemRegion mr,
                                                                  OopsInGenClosure* cl,
-                                                                 CardTableRS* ct) {
+                                                                 CardTableRS* ct,
+                                                                 uint n_threads) {
   if (!mr.is_empty()) {
-    // Caller (process_roots()) claims that all GC threads
-    // execute this call.  With UseDynamicNumberOfGCThreads now all
-    // active GC threads execute this call.  The number of active GC
-    // threads needs to be passed to par_non_clean_card_iterate_work()
-    // to get proper partitioning and termination.
-    //
-    // This is an example of where n_par_threads() is used instead
-    // of workers()->active_workers().  n_par_threads can be set to 0 to
-    // turn off parallelism.  For example when this code is called as
-    // part of verification during root processing then n_par_threads()
-    // may have been set to 0. active_workers is not overloaded with
-    // the meaning that it is a switch to disable parallelism and so keeps
-    // the meaning of the number of active gc workers. If parallelism has
-    // not been shut off by setting n_par_threads to 0, then n_par_threads
-    // should be equal to active_workers.  When a different mechanism for
-    // shutting off parallelism is used, then active_workers can be used in
-    // place of n_par_threads.
-    int n_threads =  GenCollectedHeap::heap()->n_par_threads();
-    bool is_par = n_threads > 0;
-    if (is_par) {
+    if (n_threads > 0) {
 #if INCLUDE_ALL_GCS
-      assert(GenCollectedHeap::heap()->n_par_threads() ==
-             GenCollectedHeap::heap()->workers()->active_workers(), "Mismatch");
       non_clean_card_iterate_parallel_work(sp, mr, cl, ct, n_threads);
 #else  // INCLUDE_ALL_GCS
       fatal("Parallel gc not supported here.");
@@ -472,8 +452,11 @@
     } else {
       // clear_cl finds contiguous dirty ranges of cards to process and clear.
 
-      DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), cl->gen_boundary());
-      ClearNoncleanCardWrapper clear_cl(dcto_cl, ct);
+      // This is the single-threaded version used by DefNew.
+      const bool parallel = false;
+
+      DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision(), cl->gen_boundary(), parallel);
+      ClearNoncleanCardWrapper clear_cl(dcto_cl, ct, parallel);
 
       clear_cl.do_MemRegion(mr);
     }
--- a/src/share/vm/gc/shared/cardTableModRefBS.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/cardTableModRefBS.hpp	Mon May 25 16:59:28 2015 +0200
@@ -178,14 +178,15 @@
   // region mr in the given space and apply cl to any dirty sub-regions
   // of mr. Clears the dirty cards as they are processed.
   void non_clean_card_iterate_possibly_parallel(Space* sp, MemRegion mr,
-                                                OopsInGenClosure* cl, CardTableRS* ct);
+                                                OopsInGenClosure* cl, CardTableRS* ct,
+                                                uint n_threads);
 
  private:
   // Work method used to implement non_clean_card_iterate_possibly_parallel()
   // above in the parallel case.
   void non_clean_card_iterate_parallel_work(Space* sp, MemRegion mr,
                                             OopsInGenClosure* cl, CardTableRS* ct,
-                                            int n_threads);
+                                            uint n_threads);
 
  protected:
   // Dirty the bytes corresponding to "mr" (not all of which must be
--- a/src/share/vm/gc/shared/cardTableRS.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/cardTableRS.cpp	Mon May 25 16:59:28 2015 +0200
@@ -102,9 +102,10 @@
 }
 
 void CardTableRS::younger_refs_iterate(Generation* g,
-                                       OopsInGenClosure* blk) {
+                                       OopsInGenClosure* blk,
+                                       uint n_threads) {
   _last_cur_val_in_gen[g->level()+1] = cur_youngergen_card_val();
-  g->younger_refs_iterate(blk);
+  g->younger_refs_iterate(blk, n_threads);
 }
 
 inline bool ClearNoncleanCardWrapper::clear_card(jbyte* entry) {
@@ -164,15 +165,8 @@
 }
 
 ClearNoncleanCardWrapper::ClearNoncleanCardWrapper(
-  DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct) :
-    _dirty_card_closure(dirty_card_closure), _ct(ct) {
-    // Cannot yet substitute active_workers for n_par_threads
-    // in the case where parallelism is being turned off by
-    // setting n_par_threads to 0.
-    _is_par = (GenCollectedHeap::heap()->n_par_threads() > 0);
-    assert(!_is_par ||
-           (GenCollectedHeap::heap()->n_par_threads() ==
-            GenCollectedHeap::heap()->workers()->active_workers()), "Mismatch");
+  DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct, bool is_par) :
+    _dirty_card_closure(dirty_card_closure), _ct(ct), _is_par(is_par) {
 }
 
 bool ClearNoncleanCardWrapper::is_word_aligned(jbyte* entry) {
@@ -272,7 +266,8 @@
 }
 
 void CardTableRS::younger_refs_in_space_iterate(Space* sp,
-                                                OopsInGenClosure* cl) {
+                                                OopsInGenClosure* cl,
+                                                uint n_threads) {
   const MemRegion urasm = sp->used_region_at_save_marks();
 #ifdef ASSERT
   // Convert the assertion check to a warning if we are running
@@ -301,7 +296,7 @@
     ShouldNotReachHere();
   }
 #endif
-  _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm, cl, this);
+  _ct_bs->non_clean_card_iterate_possibly_parallel(sp, urasm, cl, this, n_threads);
 }
 
 void CardTableRS::clear_into_younger(Generation* old_gen) {
--- a/src/share/vm/gc/shared/cardTableRS.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/cardTableRS.hpp	Mon May 25 16:59:28 2015 +0200
@@ -56,7 +56,7 @@
 
   CardTableModRefBSForCTRS* _ct_bs;
 
-  virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl);
+  virtual void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl, uint n_threads);
 
   void verify_space(Space* s, HeapWord* gen_start);
 
@@ -116,7 +116,7 @@
   // Card table entries are cleared before application; "blk" is
   // responsible for dirtying if the oop is still older-to-younger after
   // closure application.
-  void younger_refs_iterate(Generation* g, OopsInGenClosure* blk);
+  void younger_refs_iterate(Generation* g, OopsInGenClosure* blk, uint n_threads);
 
   void inline_write_ref_field_gc(void* field, oop new_val) {
     jbyte* byte = _ct_bs->byte_for(field);
@@ -183,7 +183,7 @@
   bool is_word_aligned(jbyte* entry);
 
 public:
-  ClearNoncleanCardWrapper(DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct);
+  ClearNoncleanCardWrapper(DirtyCardToOopClosure* dirty_card_closure, CardTableRS* ct, bool is_par);
   void do_MemRegion(MemRegion mr);
 };
 
--- a/src/share/vm/gc/shared/collectedHeap.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/collectedHeap.cpp	Mon May 25 16:59:28 2015 +0200
@@ -160,8 +160,7 @@
 // Memory state functions.
 
 
-CollectedHeap::CollectedHeap() : _n_par_threads(0)
-{
+CollectedHeap::CollectedHeap() {
   const size_t max_len = size_t(arrayOopDesc::max_array_length(T_INT));
   const size_t elements_per_word = HeapWordSize / sizeof(jint);
   _filler_array_max_size = align_object_size(filler_array_hdr_size() +
--- a/src/share/vm/gc/shared/collectedHeap.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/collectedHeap.hpp	Mon May 25 16:59:28 2015 +0200
@@ -101,7 +101,6 @@
  protected:
   BarrierSet* _barrier_set;
   bool _is_gc_active;
-  uint _n_par_threads;
 
   unsigned int _total_collections;          // ... started
   unsigned int _total_full_collections;     // ... started
@@ -291,12 +290,6 @@
   }
   GCCause::Cause gc_cause() { return _gc_cause; }
 
-  // Number of threads currently working on GC tasks.
-  uint n_par_threads() { return _n_par_threads; }
-
-  // May be overridden to set additional parallelism.
-  virtual void set_par_threads(uint t) { _n_par_threads = t; };
-
   // General obj/array allocation facilities.
   inline static oop obj_allocate(KlassHandle klass, int size, TRAPS);
   inline static oop array_allocate(KlassHandle klass, int size, int length, TRAPS);
--- a/src/share/vm/gc/shared/genCollectedHeap.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/genCollectedHeap.cpp	Mon May 25 16:59:28 2015 +0200
@@ -561,16 +561,6 @@
   return collector_policy()->satisfy_failed_allocation(size, is_tlab);
 }
 
-void GenCollectedHeap::set_par_threads(uint t) {
-  assert(t == 0 || !UseSerialGC, "Cannot have parallel threads");
-  CollectedHeap::set_par_threads(t);
-  set_n_termination(t);
-}
-
-void GenCollectedHeap::set_n_termination(uint t) {
-  _process_strong_tasks->set_n_threads(t);
-}
-
 #ifdef ASSERT
 class AssertNonScavengableClosure: public OopClosure {
 public:
@@ -582,15 +572,13 @@
 static AssertNonScavengableClosure assert_is_non_scavengable_closure;
 #endif
 
-void GenCollectedHeap::process_roots(bool activate_scope,
+void GenCollectedHeap::process_roots(StrongRootsScope* scope,
                                      ScanningOption so,
                                      OopClosure* strong_roots,
                                      OopClosure* weak_roots,
                                      CLDClosure* strong_cld_closure,
                                      CLDClosure* weak_cld_closure,
                                      CodeBlobClosure* code_roots) {
-  StrongRootsScope srs(activate_scope);
-
   // General roots.
   assert(Threads::thread_claim_parity() != 0, "must have called prologue code");
   assert(code_roots != NULL, "code root closure should always be set");
@@ -609,7 +597,7 @@
   // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
   CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
 
-  bool is_par = n_par_threads() > 0;
+  bool is_par = scope->n_threads() > 1;
   Threads::possibly_parallel_oops_do(is_par, strong_roots, roots_from_clds_p, roots_from_code_p);
 
   if (!_process_strong_tasks->is_task_claimed(GCH_PS_Universe_oops_do)) {
@@ -669,9 +657,9 @@
 
 }
 
-void GenCollectedHeap::gen_process_roots(int level,
+void GenCollectedHeap::gen_process_roots(StrongRootsScope* scope,
+                                         int level,
                                          bool younger_gens_as_roots,
-                                         bool activate_scope,
                                          ScanningOption so,
                                          bool only_strong_roots,
                                          OopsInGenClosure* not_older_gens,
@@ -689,7 +677,7 @@
   OopsInGenClosure* weak_roots = only_strong_roots ? NULL : not_older_gens;
   CLDClosure* weak_cld_closure = only_strong_roots ? NULL : cld_closure;
 
-  process_roots(activate_scope, so,
+  process_roots(scope, so,
                 not_older_gens, weak_roots,
                 cld_closure, weak_cld_closure,
                 &mark_code_closure);
@@ -707,11 +695,11 @@
   // older-gen scanning.
   if (level == 0) {
     older_gens->set_generation(_old_gen);
-    rem_set()->younger_refs_iterate(_old_gen, older_gens);
+    rem_set()->younger_refs_iterate(_old_gen, older_gens, scope->n_threads());
     older_gens->reset_generation();
   }
 
-  _process_strong_tasks->all_tasks_completed();
+  _process_strong_tasks->all_tasks_completed(scope->n_threads());
 }
 
 
--- a/src/share/vm/gc/shared/genCollectedHeap.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/genCollectedHeap.hpp	Mon May 25 16:59:28 2015 +0200
@@ -30,8 +30,9 @@
 #include "gc/shared/collectorPolicy.hpp"
 #include "gc/shared/generation.hpp"
 
+class FlexibleWorkGang;
+class StrongRootsScope;
 class SubTasksDone;
-class FlexibleWorkGang;
 
 // A "GenCollectedHeap" is a CollectedHeap that uses generational
 // collection.  It has two generations, young and old.
@@ -363,9 +364,6 @@
   // asserted to be this type.
   static GenCollectedHeap* heap();
 
-  void set_par_threads(uint t);
-  void set_n_termination(uint t);
-
   // Invoke the "do_oop" method of one of the closures "not_older_gens"
   // or "older_gens" on root locations for the generation at
   // "level".  (The "older_gens" closure is used for scanning references
@@ -385,7 +383,7 @@
   };
 
  private:
-  void process_roots(bool activate_scope,
+  void process_roots(StrongRootsScope* scope,
                      ScanningOption so,
                      OopClosure* strong_roots,
                      OopClosure* weak_roots,
@@ -393,24 +391,13 @@
                      CLDClosure* weak_cld_closure,
                      CodeBlobClosure* code_roots);
 
-  void gen_process_roots(int level,
-                         bool younger_gens_as_roots,
-                         bool activate_scope,
-                         ScanningOption so,
-                         OopsInGenClosure* not_older_gens,
-                         OopsInGenClosure* weak_roots,
-                         OopsInGenClosure* older_gens,
-                         CLDClosure* cld_closure,
-                         CLDClosure* weak_cld_closure,
-                         CodeBlobClosure* code_closure);
-
  public:
   static const bool StrongAndWeakRoots = false;
   static const bool StrongRootsOnly    = true;
 
-  void gen_process_roots(int level,
+  void gen_process_roots(StrongRootsScope* scope,
+                         int level,
                          bool younger_gens_as_roots,
-                         bool activate_scope,
                          ScanningOption so,
                          bool only_strong_roots,
                          OopsInGenClosure* not_older_gens,
--- a/src/share/vm/gc/shared/genOopClosures.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/genOopClosures.hpp	Mon May 25 16:59:28 2015 +0200
@@ -35,11 +35,6 @@
 class DefNewGeneration;
 class KlassRemSet;
 
-template<class E, MEMFLAGS F, unsigned int N> class GenericTaskQueue;
-typedef GenericTaskQueue<oop, mtGC, TASKQUEUE_SIZE> OopTaskQueue;
-template<class T, MEMFLAGS F> class GenericTaskQueueSet;
-typedef GenericTaskQueueSet<OopTaskQueue, mtGC> OopTaskQueueSet;
-
 // Closure for iterating roots from a particular generation
 // Note: all classes deriving from this MUST call this do_barrier
 // method at the end of their own do_oop method!
--- a/src/share/vm/gc/shared/genRemSet.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/genRemSet.hpp	Mon May 25 16:59:28 2015 +0200
@@ -77,10 +77,11 @@
   //  1) that are in objects allocated in "g" at the time of the last call
   //     to "save_Marks", and
   //  2) that point to objects in younger generations.
-  virtual void younger_refs_iterate(Generation* g, OopsInGenClosure* blk) = 0;
+  virtual void younger_refs_iterate(Generation* g, OopsInGenClosure* blk, uint n_threads) = 0;
 
   virtual void younger_refs_in_space_iterate(Space* sp,
-                                             OopsInGenClosure* cl) = 0;
+                                             OopsInGenClosure* cl,
+                                             uint n_threads) = 0;
 
   // This method is used to notify the remembered set that "new_val" has
   // been written into "field" by the garbage collector.
--- a/src/share/vm/gc/shared/generation.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/generation.cpp	Mon May 25 16:59:28 2015 +0200
@@ -293,9 +293,10 @@
 }
 
 void Generation::younger_refs_in_space_iterate(Space* sp,
-                                               OopsInGenClosure* cl) {
+                                               OopsInGenClosure* cl,
+                                               uint n_threads) {
   GenRemSet* rs = GenCollectedHeap::heap()->rem_set();
-  rs->younger_refs_in_space_iterate(sp, cl);
+  rs->younger_refs_in_space_iterate(sp, cl, n_threads);
 }
 
 class GenerationObjIterateClosure : public SpaceClosure {
--- a/src/share/vm/gc/shared/generation.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/generation.hpp	Mon May 25 16:59:28 2015 +0200
@@ -122,7 +122,7 @@
   // The iteration is only over objects allocated at the start of the
   // iterations; objects allocated as a result of applying the closure are
   // not included.
-  void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl);
+  void younger_refs_in_space_iterate(Space* sp, OopsInGenClosure* cl, uint n_threads);
 
  public:
   // The set of possible generation kinds.
@@ -526,7 +526,7 @@
   // in the current generation that contain pointers to objects in younger
   // generations. Objects allocated since the last "save_marks" call are
   // excluded.
-  virtual void younger_refs_iterate(OopsInGenClosure* cl) = 0;
+  virtual void younger_refs_iterate(OopsInGenClosure* cl, uint n_threads) = 0;
 
   // Inform a generation that it longer contains references to objects
   // in any younger generation.    [e.g. Because younger gens are empty,
--- a/src/share/vm/gc/shared/space.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/space.cpp	Mon May 25 16:59:28 2015 +0200
@@ -181,7 +181,8 @@
 
 DirtyCardToOopClosure* Space::new_dcto_cl(ExtendedOopClosure* cl,
                                           CardTableModRefBS::PrecisionStyle precision,
-                                          HeapWord* boundary) {
+                                          HeapWord* boundary,
+                                          bool parallel) {
   return new DirtyCardToOopClosure(this, cl, precision, boundary);
 }
 
@@ -260,7 +261,8 @@
 DirtyCardToOopClosure*
 ContiguousSpace::new_dcto_cl(ExtendedOopClosure* cl,
                              CardTableModRefBS::PrecisionStyle precision,
-                             HeapWord* boundary) {
+                             HeapWord* boundary,
+                             bool parallel) {
   return new ContiguousSpaceDCTOC(this, cl, precision, boundary);
 }
 
--- a/src/share/vm/gc/shared/space.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/space.hpp	Mon May 25 16:59:28 2015 +0200
@@ -183,7 +183,8 @@
   // operate. ResourceArea allocated.
   virtual DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
                                              CardTableModRefBS::PrecisionStyle precision,
-                                             HeapWord* boundary = NULL);
+                                             HeapWord* boundary,
+                                             bool parallel);
 
   // If "p" is in the space, returns the address of the start of the
   // "block" that contains "p".  We say "block" instead of "object" since
@@ -629,7 +630,8 @@
   // Override.
   DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
                                      CardTableModRefBS::PrecisionStyle precision,
-                                     HeapWord* boundary = NULL);
+                                     HeapWord* boundary,
+                                     bool parallel);
 
   // Apply "blk->do_oop" to the addresses of all reference fields in objects
   // starting with the _saved_mark_word, which was noted during a generation's
--- a/src/share/vm/gc/shared/strongRootsScope.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/strongRootsScope.cpp	Mon May 25 16:59:28 2015 +0200
@@ -28,24 +28,18 @@
 #include "gc/shared/strongRootsScope.hpp"
 #include "runtime/thread.hpp"
 
-MarkScope::MarkScope(bool activate) : _active(activate) {
-  if (_active) {
-    nmethod::oops_do_marking_prologue();
-  }
+MarkScope::MarkScope() {
+  nmethod::oops_do_marking_prologue();
 }
 
 MarkScope::~MarkScope() {
-  if (_active) {
-    nmethod::oops_do_marking_epilogue();
-  }
+  nmethod::oops_do_marking_epilogue();
 }
 
-StrongRootsScope::StrongRootsScope(bool activate) : MarkScope(activate) {
-  if (_active) {
-    Threads::change_thread_claim_parity();
-    // Zero the claimed high water mark in the StringTable
-    StringTable::clear_parallel_claimed_index();
-  }
+StrongRootsScope::StrongRootsScope(uint n_threads) : _n_threads(n_threads) {
+  Threads::change_thread_claim_parity();
+  // Zero the claimed high water mark in the StringTable
+  StringTable::clear_parallel_claimed_index();
 }
 
 StrongRootsScope::~StrongRootsScope() {
--- a/src/share/vm/gc/shared/strongRootsScope.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/strongRootsScope.hpp	Mon May 25 16:59:28 2015 +0200
@@ -29,18 +29,21 @@
 
 class MarkScope : public StackObj {
  protected:
-  bool _active;
- public:
-  MarkScope(bool activate = true);
+  MarkScope();
   ~MarkScope();
 };
 
 // Sets up and tears down the required state for parallel root processing.
 
 class StrongRootsScope : public MarkScope {
+  // Number of threads participating in the roots processing.
+  const uint _n_threads;
+
  public:
-  StrongRootsScope(bool activate = true);
+  StrongRootsScope(uint n_threads);
   ~StrongRootsScope();
+
+  uint n_threads() const { return _n_threads; }
 };
 
 #endif // SHARE_VM_GC_SHARED_STRONGROOTSSCOPE_HPP
--- a/src/share/vm/gc/shared/taskqueue.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/taskqueue.hpp	Mon May 25 16:59:28 2015 +0200
@@ -382,6 +382,8 @@
   bool steal(uint queue_num, int* seed, E& t);
 
   bool peek();
+
+  uint size() const { return _n; }
 };
 
 template<class T, MEMFLAGS F> void
--- a/src/share/vm/gc/shared/workgroup.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/workgroup.cpp	Mon May 25 16:59:28 2015 +0200
@@ -133,8 +133,6 @@
 }
 
 void WorkGang::run_task(AbstractGangTask* task, uint no_of_parallel_workers) {
-  task->set_for_termination(no_of_parallel_workers);
-
   // This thread is executed by the VM thread which does not block
   // on ordinary MutexLocker's.
   MutexLockerEx ml(monitor(), Mutex::_no_safepoint_check_flag);
@@ -434,7 +432,7 @@
 // SubTasksDone functions.
 
 SubTasksDone::SubTasksDone(uint n) :
-  _n_tasks(n), _n_threads(1), _tasks(NULL) {
+  _n_tasks(n), _tasks(NULL) {
   _tasks = NEW_C_HEAP_ARRAY(uint, n, mtInternal);
   guarantee(_tasks != NULL, "alloc failure");
   clear();
@@ -444,12 +442,6 @@
   return _tasks != NULL;
 }
 
-void SubTasksDone::set_n_threads(uint t) {
-  assert(_claimed == 0 || _threads_completed == _n_threads,
-         "should not be called while tasks are being processed!");
-  _n_threads = (t == 0 ? 1 : t);
-}
-
 void SubTasksDone::clear() {
   for (uint i = 0; i < _n_tasks; i++) {
     _tasks[i] = 0;
@@ -477,7 +469,7 @@
   return res;
 }
 
-void SubTasksDone::all_tasks_completed() {
+void SubTasksDone::all_tasks_completed(uint n_threads) {
   jint observed = _threads_completed;
   jint old;
   do {
@@ -485,7 +477,10 @@
     observed = Atomic::cmpxchg(old+1, &_threads_completed, old);
   } while (observed != old);
   // If this was the last thread checking in, clear the tasks.
-  if (observed+1 == (jint)_n_threads) clear();
+  uint adjusted_thread_count = (n_threads == 0 ? 1 : n_threads);
+  if (observed + 1 == (jint)adjusted_thread_count) {
+    clear();
+  }
 }
 
 
--- a/src/share/vm/gc/shared/workgroup.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/gc/shared/workgroup.hpp	Mon May 25 16:59:28 2015 +0200
@@ -59,13 +59,6 @@
   // The argument tells you which member of the gang you are.
   virtual void work(uint worker_id) = 0;
 
-  // This method configures the task for proper termination.
-  // Some tasks do not have any requirements on termination
-  // and may inherit this method that does nothing.  Some
-  // tasks do some coordination on termination and override
-  // this method to implement that coordination.
-  virtual void set_for_termination(uint active_workers) {};
-
   // Debugging accessor for the name.
   const char* name() const PRODUCT_RETURN_(return NULL;);
   int counter() { return _counter; }
@@ -99,12 +92,9 @@
   OopTaskQueueSet*       _queues;
   ParallelTaskTerminator _terminator;
  public:
-  AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues) :
-    AbstractGangTask(name), _queues(queues), _terminator(0, _queues) {}
+  AbstractGangTaskWOopQueues(const char* name, OopTaskQueueSet* queues, uint n_threads) :
+    AbstractGangTask(name), _queues(queues), _terminator(n_threads, _queues) {}
   ParallelTaskTerminator* terminator() { return &_terminator; }
-  virtual void set_for_termination(uint active_workers) {
-    terminator()->reset_for_reuse(active_workers);
-  }
   OopTaskQueueSet* queues() { return _queues; }
 };
 
@@ -315,16 +305,20 @@
   uint _active_workers;
  public:
   // Constructor and destructor.
-  // Initialize active_workers to a minimum value.  Setting it to
-  // the parameter "workers" will initialize it to a maximum
-  // value which is not desirable.
   FlexibleWorkGang(const char* name, uint workers,
                    bool are_GC_task_threads,
                    bool  are_ConcurrentGC_threads) :
     WorkGang(name, workers, are_GC_task_threads, are_ConcurrentGC_threads),
-    _active_workers(UseDynamicNumberOfGCThreads ? 1U : ParallelGCThreads) {}
-  // Accessors for fields
-  virtual uint active_workers() const { return _active_workers; }
+    _active_workers(UseDynamicNumberOfGCThreads ? 1U : workers) {}
+
+  // Accessors for fields.
+  virtual uint active_workers() const {
+    assert(_active_workers <= _total_workers,
+           err_msg("_active_workers: %u > _total_workers: %u", _active_workers, _total_workers));
+    assert(UseDynamicNumberOfGCThreads || _active_workers == _total_workers,
+           "Unless dynamic should use total workers");
+    return _active_workers;
+  }
   void set_active_workers(uint v) {
     assert(v <= _total_workers,
            "Trying to set more workers active than there are");
@@ -390,12 +384,6 @@
 class SubTasksDone: public CHeapObj<mtInternal> {
   uint* _tasks;
   uint _n_tasks;
-  // _n_threads is used to determine when a sub task is done.
-  // It does not control how many threads will execute the subtask
-  // but must be initialized to the number that do execute the task
-  // in order to correctly decide when the subtask is done (all the
-  // threads working on the task have finished).
-  uint _n_threads;
   uint _threads_completed;
 #ifdef ASSERT
   volatile uint _claimed;
@@ -413,11 +401,6 @@
   // True iff the object is in a valid state.
   bool valid();
 
-  // Get/set the number of parallel threads doing the tasks to "t".  Can only
-  // be called before tasks start or after they are complete.
-  uint n_threads() { return _n_threads; }
-  void set_n_threads(uint t);
-
   // Returns "false" if the task "t" is unclaimed, and ensures that task is
   // claimed.  The task "t" is required to be within the range of "this".
   bool is_task_claimed(uint t);
@@ -426,7 +409,9 @@
   // tasks that it will try to claim.  Every thread in the parallel task
   // must execute this.  (When the last thread does so, the task array is
   // cleared.)
-  void all_tasks_completed();
+  //
+  // n_threads - Number of threads executing the sub-tasks.
+  void all_tasks_completed(uint n_threads);
 
   // Destructor.
   ~SubTasksDone();
--- a/src/share/vm/memory/iterator.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/memory/iterator.hpp	Mon May 25 16:59:28 2015 +0200
@@ -381,9 +381,4 @@
   template <class OopClosureType>             static bool do_metadata(OopClosureType* closure);
 };
 
-// Helper to convert the oop iterate macro suffixes into bool values that can be used by template functions.
-#define nvs_nv_to_bool true
-#define nvs_v_to_bool  false
-#define nvs_to_bool(nv_suffix) nvs##nv_suffix##_to_bool
-
 #endif // SHARE_VM_MEMORY_ITERATOR_HPP
--- a/src/share/vm/oops/arrayKlass.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/arrayKlass.hpp	Mon May 25 16:59:28 2015 +0200
@@ -144,4 +144,36 @@
   void oop_verify_on(oop obj, outputStream* st);
 };
 
+// Array oop iteration macros for declarations.
+// Used to generate the declarations in the *ArrayKlass header files.
+
+#define OOP_OOP_ITERATE_DECL_RANGE(OopClosureType, nv_suffix)                                  \
+  int oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, int start, int end);
+
+#if INCLUDE_ALL_GCS
+// Named NO_BACKWARDS because the definition used by *ArrayKlass isn't reversed, see below.
+#define OOP_OOP_ITERATE_DECL_NO_BACKWARDS(OopClosureType, nv_suffix)           \
+  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
+#endif // INCLUDE_ALL_GCS
+
+
+// Array oop iteration macros for definitions.
+// Used to generate the definitions in the *ArrayKlass.inline.hpp files.
+
+#define OOP_OOP_ITERATE_DEFN_RANGE(KlassType, OopClosureType, nv_suffix)                                 \
+                                                                                                         \
+int KlassType::oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, int start, int end) {  \
+  return oop_oop_iterate_range<nvs_to_bool(nv_suffix)>(obj, closure, start, end);                        \
+}
+
+#if INCLUDE_ALL_GCS
+#define OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(KlassType, OopClosureType, nv_suffix)          \
+int KlassType::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
+  /* No reverse implementation ATM. */                                                   \
+  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                          \
+}
+#else
+#define OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(KlassType, OopClosureType, nv_suffix)
+#endif
+
 #endif // SHARE_VM_OOPS_ARRAYKLASS_HPP
--- a/src/share/vm/oops/instanceClassLoaderKlass.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/instanceClassLoaderKlass.hpp	Mon May 25 16:59:28 2015 +0200
@@ -87,19 +87,12 @@
 
  public:
 
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)   \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);                    \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceClassLoaderKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceClassLoaderKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)  \
-  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
 };
--- a/src/share/vm/oops/instanceClassLoaderKlass.inline.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/instanceClassLoaderKlass.inline.hpp	Mon May 25 16:59:28 2015 +0200
@@ -78,33 +78,9 @@
   return size;
 }
 
-
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-                                                                                              \
-int InstanceClassLoaderKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                               \
-}
-
-#if INCLUDE_ALL_GCS
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-                                                                                                        \
-int InstanceClassLoaderKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                                 \
-}
-#else
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-
-#define InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-                                                                                                                \
-int InstanceClassLoaderKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                                     \
-}
-
 #define ALL_INSTANCE_CLASS_LOADER_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)     \
-  InstanceClassLoaderKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)     \
-  InstanceClassLoaderKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
+  OOP_OOP_ITERATE_DEFN(          InstanceClassLoaderKlass, OopClosureType, nv_suffix)    \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(  InstanceClassLoaderKlass, OopClosureType, nv_suffix)    \
+  OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceClassLoaderKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_INSTANCECLASSLOADERKLASS_INLINE_HPP
--- a/src/share/vm/oops/instanceKlass.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/instanceKlass.hpp	Mon May 25 16:59:28 2015 +0200
@@ -1084,19 +1084,12 @@
 
  public:
 
-#define InstanceKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)                   \
-  int  oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure);                    \
-  int  oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)  \
-  int  oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
   u2 idnum_allocated_count() const      { return _idnum_allocated_count; }
--- a/src/share/vm/oops/instanceKlass.inline.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/instanceKlass.inline.hpp	Mon May 25 16:59:28 2015 +0200
@@ -27,6 +27,7 @@
 
 #include "memory/iterator.hpp"
 #include "oops/instanceKlass.hpp"
+#include "oops/klass.hpp"
 #include "oops/oop.inline.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
@@ -187,29 +188,9 @@
 
 #undef INLINE
 
-
-#define InstanceKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-int InstanceKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                    \
-}
-
-#if INCLUDE_ALL_GCS
-#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-int InstanceKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                      \
-}
-#else
-#define InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-#define InstanceKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-int InstanceKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                          \
-}
-
 #define ALL_INSTANCE_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  InstanceKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)   \
-  InstanceKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)   \
-  InstanceKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
+  OOP_OOP_ITERATE_DEFN(          InstanceKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(  InstanceKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_INSTANCEKLASS_INLINE_HPP
--- a/src/share/vm/oops/instanceMirrorKlass.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/instanceMirrorKlass.hpp	Mon May 25 16:59:28 2015 +0200
@@ -149,19 +149,12 @@
 
  public:
 
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)           \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);                       \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk, MemRegion mr);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceMirrorKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceMirrorKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \
-  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 };
 
--- a/src/share/vm/oops/instanceMirrorKlass.inline.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/instanceMirrorKlass.inline.hpp	Mon May 25 16:59:28 2015 +0200
@@ -27,6 +27,7 @@
 #include "classfile/javaClasses.hpp"
 #include "oops/instanceKlass.inline.hpp"
 #include "oops/instanceMirrorKlass.hpp"
+#include "oops/klass.hpp"
 #include "oops/oop.inline.hpp"
 #include "utilities/debug.hpp"
 #include "utilities/globalDefinitions.hpp"
@@ -132,33 +133,9 @@
   return oop_size(obj);
 }
 
-
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-                                                                                         \
-int InstanceMirrorKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                          \
-}
-
-#if INCLUDE_ALL_GCS
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-                                                                                                   \
-int InstanceMirrorKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                            \
-}
-#else
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-
-#define InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-                                                                                                           \
-int InstanceMirrorKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                                \
-}
-
 #define ALL_INSTANCE_MIRROR_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)    \
-  InstanceMirrorKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)    \
-  InstanceMirrorKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
+  OOP_OOP_ITERATE_DEFN(          InstanceMirrorKlass, OopClosureType, nv_suffix)   \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(  InstanceMirrorKlass, OopClosureType, nv_suffix)   \
+  OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceMirrorKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_INSTANCEMIRRORKLASS_INLINE_HPP
--- a/src/share/vm/oops/instanceRefKlass.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/instanceRefKlass.hpp	Mon May 25 16:59:28 2015 +0200
@@ -119,19 +119,12 @@
 
  public:
 
-#define InstanceRefKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)               \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure);                    \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)     \
-  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
   static void release_and_notify_pending_list_lock(BasicLock *pending_list_basic_lock);
--- a/src/share/vm/oops/instanceRefKlass.inline.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/instanceRefKlass.inline.hpp	Mon May 25 16:59:28 2015 +0200
@@ -141,34 +141,9 @@
 
 // Macro to define InstanceRefKlass::oop_oop_iterate for virtual/nonvirtual for
 // all closures.  Macros calling macros above for each oop size.
-
-#define InstanceRefKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-                                                                                      \
-int InstanceRefKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                       \
-}
-
-#if INCLUDE_ALL_GCS
-#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-                                                                                                \
-int InstanceRefKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                         \
-}
-#else
-#define InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-
-#define InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-                                                                                                        \
-int InstanceRefKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                             \
-}
-
 #define ALL_INSTANCE_REF_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  InstanceRefKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)    \
-  InstanceRefKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)    \
-  InstanceRefKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-
+  OOP_OOP_ITERATE_DEFN(          InstanceRefKlass, OopClosureType, nv_suffix)   \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(  InstanceRefKlass, OopClosureType, nv_suffix)   \
+  OOP_OOP_ITERATE_DEFN_BACKWARDS(InstanceRefKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_INSTANCEREFKLASS_INLINE_HPP
--- a/src/share/vm/oops/klass.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/klass.hpp	Mon May 25 16:59:28 2015 +0200
@@ -583,20 +583,20 @@
 
   // Iterators specialized to particular subtypes
   // of ExtendedOopClosure, to avoid closure virtual calls.
-#define Klass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)                                      \
-  virtual int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) = 0;                    \
-  /* Iterates "closure" over all the oops in "obj" (of type "this") within "mr". */                \
-  virtual int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) = 0;
+#define Klass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)                                          \
+  virtual int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) = 0;                        \
+  /* Iterates "closure" over all the oops in "obj" (of type "this") within "mr". */                    \
+  virtual int oop_oop_iterate_bounded##nv_suffix(oop obj, OopClosureType* closure, MemRegion mr) = 0;
 
   ALL_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL)
   ALL_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL)
 
 #if INCLUDE_ALL_GCS
-#define Klass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)                    \
+#define Klass_OOP_OOP_ITERATE_DECL_BACKWARDS(OopClosureType, nv_suffix)                    \
   virtual int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) = 0;
 
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(Klass_OOP_OOP_ITERATE_DECL_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(Klass_OOP_OOP_ITERATE_DECL_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
   virtual void array_klasses_do(void f(Klass* k)) {}
@@ -651,4 +651,44 @@
   void klass_update_barrier_set_pre(oop* p, oop v);
 };
 
+// Helper to convert the oop iterate macro suffixes into bool values that can be used by template functions.
+#define nvs_nv_to_bool true
+#define nvs_v_to_bool  false
+#define nvs_to_bool(nv_suffix) nvs##nv_suffix##_to_bool
+
+// Oop iteration macros for declarations.
+// Used to generate declarations in the *Klass header files.
+
+#define OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)                                    \
+  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure);                        \
+  int oop_oop_iterate_bounded##nv_suffix(oop obj, OopClosureType* closure, MemRegion mr);
+
+#if INCLUDE_ALL_GCS
+#define OOP_OOP_ITERATE_DECL_BACKWARDS(OopClosureType, nv_suffix)              \
+  int oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
+#endif // INCLUDE_ALL_GCS
+
+
+// Oop iteration macros for definitions.
+// Used to generate definitions in the *Klass.inline.hpp files.
+
+#define OOP_OOP_ITERATE_DEFN(KlassType, OopClosureType, nv_suffix)             \
+int KlassType::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
+  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                \
+}
+
+#if INCLUDE_ALL_GCS
+#define OOP_OOP_ITERATE_DEFN_BACKWARDS(KlassType, OopClosureType, nv_suffix)             \
+int KlassType::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
+  return oop_oop_iterate_reverse<nvs_to_bool(nv_suffix)>(obj, closure);                  \
+}
+#else
+#define OOP_OOP_ITERATE_DEFN_BACKWARDS(KlassType, OopClosureType, nv_suffix)
+#endif
+
+#define OOP_OOP_ITERATE_DEFN_BOUNDED(KlassType, OopClosureType, nv_suffix)                           \
+int KlassType::oop_oop_iterate_bounded##nv_suffix(oop obj, OopClosureType* closure, MemRegion mr) {  \
+  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                          \
+}
+
 #endif // SHARE_VM_OOPS_KLASS_HPP
--- a/src/share/vm/oops/objArrayKlass.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/objArrayKlass.hpp	Mon May 25 16:59:28 2015 +0200
@@ -163,22 +163,14 @@
 
  public:
 
-#define ObjArrayKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)   \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* blk);         \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* blk,      \
-                                     MemRegion mr);                     \
-  int oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* blk,    \
-                                     int start, int end);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_RANGE)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_RANGE)
 
 #if INCLUDE_ALL_GCS
-#define ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix) \
-  int  oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* blk);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_NO_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_NO_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
   // JVM support
--- a/src/share/vm/oops/objArrayKlass.inline.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/objArrayKlass.inline.hpp	Mon May 25 16:59:28 2015 +0200
@@ -27,6 +27,8 @@
 
 #include "memory/memRegion.hpp"
 #include "memory/iterator.inline.hpp"
+#include "oops/arrayKlass.hpp"
+#include "oops/klass.hpp"
 #include "oops/objArrayKlass.hpp"
 #include "oops/objArrayOop.inline.hpp"
 #include "oops/oop.inline.hpp"
@@ -149,41 +151,10 @@
   return size;
 }
 
-
-#define ObjArrayKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)              \
-                                                                                   \
-int ObjArrayKlass::oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {  \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                    \
-}
-
-#if INCLUDE_ALL_GCS
-#define ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)              \
-int ObjArrayKlass::oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {  \
-  /* No reverse implementation ATM. */                                                       \
-  return oop_oop_iterate<nvs_to_bool(nv_suffix)>(obj, closure);                              \
-}
-#else
-#define ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-#define ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)                              \
-                                                                                                     \
-int ObjArrayKlass::oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_bounded<nvs_to_bool(nv_suffix)>(obj, closure, mr);                          \
-}
-
-#define ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r(OopClosureType, nv_suffix)                                      \
-                                                                                                             \
-int ObjArrayKlass::oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure, int start, int end) {  \
-  return oop_oop_iterate_range<nvs_to_bool(nv_suffix)>(obj, closure, start, end);                            \
-}
-
-
-#define ALL_OBJ_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  ObjArrayKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)    \
-  ObjArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)    \
-  ObjArrayKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)    \
-  ObjArrayKlass_OOP_OOP_ITERATE_DEFN_r(        OopClosureType, nv_suffix)
-
+#define ALL_OBJ_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)    \
+  OOP_OOP_ITERATE_DEFN(             ObjArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(     ObjArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_RANGE(       ObjArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(ObjArrayKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_OBJARRAYKLASS_INLINE_HPP
--- a/src/share/vm/oops/oop.inline.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/oop.inline.hpp	Mon May 25 16:59:28 2015 +0200
@@ -741,7 +741,7 @@
 }                                                                     \
                                                                       \
 inline int oopDesc::oop_iterate(OopClosureType* blk, MemRegion mr) {  \
-  return klass()->oop_oop_iterate##nv_suffix##_m(this, blk, mr);      \
+  return klass()->oop_oop_iterate_bounded##nv_suffix(this, blk, mr);  \
 }
 
 
--- a/src/share/vm/oops/typeArrayKlass.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/typeArrayKlass.hpp	Mon May 25 16:59:28 2015 +0200
@@ -92,24 +92,24 @@
   // The implementation used by all oop_oop_iterate functions in TypeArrayKlasses.
   inline int oop_oop_iterate_impl(oop obj, ExtendedOopClosure* closure);
 
+  // Wraps oop_oop_iterate_impl to conform to macros.
+  template <bool nv, typename OopClosureType>
+  inline int oop_oop_iterate(oop obj, OopClosureType* closure);
+
+  // Wraps oop_oop_iterate_impl to conform to macros.
+  template <bool nv, typename OopClosureType>
+  inline int oop_oop_iterate_bounded(oop obj, OopClosureType* closure, MemRegion mr);
+
  public:
 
-#define TypeArrayKlass_OOP_OOP_ITERATE_DECL(OopClosureType, nv_suffix)    \
-  int oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure);       \
-  int oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure,    \
-                                     MemRegion mr);                       \
-  int oop_oop_iterate_range##nv_suffix(oop obj, OopClosureType* closure,  \
-                                     int start, int end);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(TypeArrayKlass_OOP_OOP_ITERATE_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(TypeArrayKlass_OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_RANGE)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_RANGE)
 
 #if INCLUDE_ALL_GCS
-#define TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL(OopClosureType, nv_suffix)  \
-  int  oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure);
-
-  ALL_OOP_OOP_ITERATE_CLOSURES_1(TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
-  ALL_OOP_OOP_ITERATE_CLOSURES_2(TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DECL)
+  ALL_OOP_OOP_ITERATE_CLOSURES_1(OOP_OOP_ITERATE_DECL_NO_BACKWARDS)
+  ALL_OOP_OOP_ITERATE_CLOSURES_2(OOP_OOP_ITERATE_DECL_NO_BACKWARDS)
 #endif // INCLUDE_ALL_GCS
 
 
--- a/src/share/vm/oops/typeArrayKlass.inline.hpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/oops/typeArrayKlass.inline.hpp	Mon May 25 16:59:28 2015 +0200
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_OOPS_TYPEARRAYKLASS_INLINE_HPP
 #define SHARE_VM_OOPS_TYPEARRAYKLASS_INLINE_HPP
 
+#include "oops/arrayKlass.hpp"
+#include "oops/klass.hpp"
 #include "oops/oop.inline.hpp"
 #include "oops/typeArrayKlass.hpp"
 #include "oops/typeArrayOop.hpp"
@@ -39,35 +41,19 @@
   return t->object_size();
 }
 
-#define TypeArrayKlass_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-                                                                        \
-int TypeArrayKlass::                                                    \
-oop_oop_iterate##nv_suffix(oop obj, OopClosureType* closure) {          \
-  return oop_oop_iterate_impl(obj, closure);                            \
+template <bool nv, typename OopClosureType>
+int TypeArrayKlass::oop_oop_iterate(oop obj, OopClosureType* closure) {
+  return oop_oop_iterate_impl(obj, closure);
 }
 
-#if INCLUDE_ALL_GCS
-#define TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)  \
-                                                                                  \
-int TypeArrayKlass::                                                              \
-oop_oop_iterate_backwards##nv_suffix(oop obj, OopClosureType* closure) {          \
-  return oop_oop_iterate_impl(obj, closure);                                      \
-}
-#else
-#define TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
-#endif
-
-
-#define TypeArrayKlass_OOP_OOP_ITERATE_DEFN_m(OopClosureType, nv_suffix)          \
-                                                                                  \
-int TypeArrayKlass::                                                              \
-oop_oop_iterate##nv_suffix##_m(oop obj, OopClosureType* closure, MemRegion mr) {  \
-  return oop_oop_iterate_impl(obj, closure);                                      \
+template <bool nv, typename OopClosureType>
+int TypeArrayKlass::oop_oop_iterate_bounded(oop obj, OopClosureType* closure, MemRegion mr) {
+  return oop_oop_iterate_impl(obj, closure);
 }
 
-#define ALL_TYPE_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)  \
-  TypeArrayKlass_OOP_OOP_ITERATE_DEFN(          OopClosureType, nv_suffix)    \
-  TypeArrayKlass_OOP_OOP_ITERATE_DEFN_m(        OopClosureType, nv_suffix)    \
-  TypeArrayKlass_OOP_OOP_ITERATE_BACKWARDS_DEFN(OopClosureType, nv_suffix)
+#define ALL_TYPE_ARRAY_KLASS_OOP_OOP_ITERATE_DEFN(OopClosureType, nv_suffix)    \
+  OOP_OOP_ITERATE_DEFN(             TypeArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_BOUNDED(     TypeArrayKlass, OopClosureType, nv_suffix)  \
+  OOP_OOP_ITERATE_DEFN_NO_BACKWARDS(TypeArrayKlass, OopClosureType, nv_suffix)
 
 #endif // SHARE_VM_OOPS_TYPEARRAYKLASS_INLINE_HPP
--- a/src/share/vm/runtime/arguments.cpp	Mon May 25 09:13:41 2015 +0200
+++ b/src/share/vm/runtime/arguments.cpp	Mon May 25 16:59:28 2015 +0200
@@ -1278,10 +1278,8 @@
 
   // Preferred young gen size for "short" pauses:
   // upper bound depends on # of threads and NewRatio.
-  const uintx parallel_gc_threads =
-    (ParallelGCThreads == 0 ? 1 : ParallelGCThreads);
   const size_t preferred_max_new_size_unaligned =
-    MIN2(max_heap/(NewRatio+1), ScaleForWordSize(young_gen_per_worker * parallel_gc_threads));
+    MIN2(max_heap/(NewRatio+1), ScaleForWordSize(young_gen_per_worker * ParallelGCThreads));
   size_t preferred_max_new_size =
     align_size_up(preferred_max_new_size_unaligned, os::vm_page_size());