changeset 47969:a9fbc10dc406

Merge
author dcubed
date Thu, 23 Nov 2017 19:42:56 -0800
parents 1a3ac5e7cfb8 62d5973082e3
children 9d00b6f9cbed
files
diffstat 12 files changed, 318 insertions(+), 161 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/arm/stubGenerator_arm.cpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/cpu/arm/stubGenerator_arm.cpp	Thu Nov 23 19:42:56 2017 -0800
@@ -2968,7 +2968,9 @@
         CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
         assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
 
-        Label L_cardtable_loop;
+        Label L_cardtable_loop, L_done;
+
+        __ cbz_32(count, L_done); // zero count - nothing to do
 
         __ add_ptr_scaled_int32(count, addr, count, LogBytesPerHeapOop);
         __ sub(count, count, BytesPerHeapOop);                            // last addr
@@ -2987,6 +2989,7 @@
         __ strb(zero, Address(addr, 1, post_indexed));
         __ subs(count, count, 1);
         __ b(L_cardtable_loop, ge);
+        __ BIND(L_done);
       }
       break;
     case BarrierSet::ModRef:
--- a/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/cpu/sparc/stubGenerator_sparc.cpp	Thu Nov 23 19:42:56 2017 -0800
@@ -898,7 +898,9 @@
           assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
           assert_different_registers(addr, count, tmp);
 
-          Label L_loop;
+          Label L_loop, L_done;
+
+          __ cmp_and_br_short(count, 0, Assembler::equal, Assembler::pt, L_done); // zero count - nothing to do
 
           __ sll_ptr(count, LogBytesPerHeapOop, count);
           __ sub(count, BytesPerHeapOop, count);
@@ -914,6 +916,7 @@
           __ subcc(count, 1, count);
           __ brx(Assembler::greaterEqual, false, Assembler::pt, L_loop);
           __ delayed()->add(addr, 1, addr);
+        __ BIND(L_done);
         }
         break;
       case BarrierSet::ModRef:
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Thu Nov 23 19:42:56 2017 -0800
@@ -1264,9 +1264,12 @@
           CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
           assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
 
-          Label L_loop;
+          Label L_loop, L_done;
           const Register end = count;
 
+          __ testl(count, count);
+          __ jcc(Assembler::zero, L_done); // zero count - nothing to do
+
           __ leaq(end, Address(start, count, TIMES_OOP, 0));  // end == start+count*oop_size
           __ subptr(end, BytesPerHeapOop); // end - 1 to make inclusive
           __ shrptr(start, CardTableModRefBS::card_shift);
@@ -1280,6 +1283,7 @@
           __ movb(Address(start, count, Address::times_1), 0);
           __ decrement(count);
           __ jcc(Assembler::greaterEqual, L_loop);
+        __ BIND(L_done);
         }
         break;
       default:
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.cpp	Thu Nov 23 19:42:56 2017 -0800
@@ -33,6 +33,107 @@
 #include "utilities/pair.hpp"
 #include <math.h>
 
+G1ConcurrentRefineThread* G1ConcurrentRefineThreadControl::create_refinement_thread(uint worker_id, bool initializing) {
+  G1ConcurrentRefineThread* result = NULL;
+  if (initializing || !InjectGCWorkerCreationFailure) {
+    result = new G1ConcurrentRefineThread(_cr, worker_id);
+  }
+  if (result == NULL || result->osthread() == NULL) {
+    log_warning(gc)("Failed to create refinement thread %u, no more %s",
+                    worker_id,
+                    result == NULL ? "memory" : "OS threads");
+  }
+  return result;
+}
+
+G1ConcurrentRefineThreadControl::G1ConcurrentRefineThreadControl() :
+  _cr(NULL),
+  _threads(NULL),
+  _num_max_threads(0)
+{
+}
+
+G1ConcurrentRefineThreadControl::~G1ConcurrentRefineThreadControl() {
+  for (uint i = 0; i < _num_max_threads; i++) {
+    G1ConcurrentRefineThread* t = _threads[i];
+    if (t != NULL) {
+      delete t;
+    }
+  }
+  FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads);
+}
+
+jint G1ConcurrentRefineThreadControl::initialize(G1ConcurrentRefine* cr, uint num_max_threads) {
+  assert(cr != NULL, "G1ConcurrentRefine must not be NULL");
+  _cr = cr;
+  _num_max_threads = num_max_threads;
+
+  _threads = NEW_C_HEAP_ARRAY_RETURN_NULL(G1ConcurrentRefineThread*, num_max_threads, mtGC);
+  if (_threads == NULL) {
+    vm_shutdown_during_initialization("Could not allocate thread holder array.");
+    return JNI_ENOMEM;
+  }
+
+  for (uint i = 0; i < num_max_threads; i++) {
+    if (UseDynamicNumberOfGCThreads && i != 0 /* Always start first thread. */) {
+      _threads[i] = NULL;
+    } else {
+      _threads[i] = create_refinement_thread(i, true);
+      if (_threads[i] == NULL) {
+        vm_shutdown_during_initialization("Could not allocate refinement threads.");
+        return JNI_ENOMEM;
+      }
+    }
+  }
+  return JNI_OK;
+}
+
+void G1ConcurrentRefineThreadControl::maybe_activate_next(uint cur_worker_id) {
+  assert(cur_worker_id < _num_max_threads,
+         "Activating another thread from %u not allowed since there can be at most %u",
+         cur_worker_id, _num_max_threads);
+  if (cur_worker_id == (_num_max_threads - 1)) {
+    // Already the last thread, there is no more thread to activate.
+    return;
+  }
+
+  uint worker_id = cur_worker_id + 1;
+  G1ConcurrentRefineThread* thread_to_activate = _threads[worker_id];
+  if (thread_to_activate == NULL) {
+    // Still need to create the thread...
+    _threads[worker_id] = create_refinement_thread(worker_id, false);
+    thread_to_activate = _threads[worker_id];
+  }
+  if (thread_to_activate != NULL && !thread_to_activate->is_active()) {
+    thread_to_activate->activate();
+  }
+}
+
+void G1ConcurrentRefineThreadControl::print_on(outputStream* st) const {
+  for (uint i = 0; i < _num_max_threads; ++i) {
+    if (_threads[i] != NULL) {
+      _threads[i]->print_on(st);
+      st->cr();
+    }
+  }
+}
+
+void G1ConcurrentRefineThreadControl::worker_threads_do(ThreadClosure* tc) {
+  for (uint i = 0; i < _num_max_threads; i++) {
+    if (_threads[i] != NULL) {
+      tc->do_thread(_threads[i]);
+    }
+  }
+}
+
+void G1ConcurrentRefineThreadControl::stop() {
+  for (uint i = 0; i < _num_max_threads; i++) {
+    if (_threads[i] != NULL) {
+      _threads[i]->stop();
+    }
+  }
+}
+
 // Arbitrary but large limits, to simplify some of the zone calculations.
 // The general idea is to allow expressions like
 //   MIN2(x OP y, max_XXX_zone)
@@ -96,7 +197,7 @@
                                   size_t yellow_zone,
                                   uint worker_i) {
   double yellow_size = yellow_zone - green_zone;
-  double step = yellow_size / G1ConcurrentRefine::thread_num();
+  double step = yellow_size / G1ConcurrentRefine::max_num_threads();
   if (worker_i == 0) {
     // Potentially activate worker 0 more aggressively, to keep
     // available buffers near green_zone value.  When yellow_size is
@@ -115,8 +216,7 @@
                                        size_t yellow_zone,
                                        size_t red_zone,
                                        size_t min_yellow_zone_size) :
-  _threads(NULL),
-  _n_worker_threads(thread_num()),
+  _thread_control(),
   _green_zone(green_zone),
   _yellow_zone(yellow_zone),
   _red_zone(red_zone),
@@ -125,9 +225,13 @@
   assert_zone_constraints_gyr(green_zone, yellow_zone, red_zone);
 }
 
+jint G1ConcurrentRefine::initialize() {
+  return _thread_control.initialize(this, max_num_threads());
+}
+
 static size_t calc_min_yellow_zone_size() {
   size_t step = G1ConcRefinementThresholdStep;
-  uint n_workers = G1ConcurrentRefine::thread_num();
+  uint n_workers = G1ConcurrentRefine::max_num_threads();
   if ((max_yellow_zone / step) < n_workers) {
     return max_yellow_zone;
   } else {
@@ -191,77 +295,27 @@
     return NULL;
   }
 
-  cr->_threads = NEW_C_HEAP_ARRAY_RETURN_NULL(G1ConcurrentRefineThread*, cr->_n_worker_threads, mtGC);
-  if (cr->_threads == NULL) {
-    *ecode = JNI_ENOMEM;
-    vm_shutdown_during_initialization("Could not allocate an array for G1ConcurrentRefineThread");
-    return NULL;
-  }
-
-  uint worker_id_offset = DirtyCardQueueSet::num_par_ids();
-
-  G1ConcurrentRefineThread *next = NULL;
-  for (uint i = cr->_n_worker_threads - 1; i != UINT_MAX; i--) {
-    Thresholds thresholds = calc_thresholds(green_zone, yellow_zone, i);
-    G1ConcurrentRefineThread* t =
-      new G1ConcurrentRefineThread(cr,
-                                   next,
-                                   worker_id_offset,
-                                   i,
-                                   activation_level(thresholds),
-                                   deactivation_level(thresholds));
-    assert(t != NULL, "Conc refine should have been created");
-    if (t->osthread() == NULL) {
-      *ecode = JNI_ENOMEM;
-      vm_shutdown_during_initialization("Could not create G1ConcurrentRefineThread");
-      return NULL;
-    }
-
-    assert(t->cr() == cr, "Conc refine thread should refer to this");
-    cr->_threads[i] = t;
-    next = t;
-  }
-
-  *ecode = JNI_OK;
+  *ecode = cr->initialize();
   return cr;
 }
 
 void G1ConcurrentRefine::stop() {
-  for (uint i = 0; i < _n_worker_threads; i++) {
-    _threads[i]->stop();
-  }
-}
-
-void G1ConcurrentRefine::update_thread_thresholds() {
-  for (uint i = 0; i < _n_worker_threads; i++) {
-    Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, i);
-    _threads[i]->update_thresholds(activation_level(thresholds),
-                                   deactivation_level(thresholds));
-  }
+  _thread_control.stop();
 }
 
 G1ConcurrentRefine::~G1ConcurrentRefine() {
-  for (uint i = 0; i < _n_worker_threads; i++) {
-    delete _threads[i];
-  }
-  FREE_C_HEAP_ARRAY(G1ConcurrentRefineThread*, _threads);
 }
 
 void G1ConcurrentRefine::threads_do(ThreadClosure *tc) {
-  for (uint i = 0; i < _n_worker_threads; i++) {
-    tc->do_thread(_threads[i]);
-  }
+  _thread_control.worker_threads_do(tc);
 }
 
-uint G1ConcurrentRefine::thread_num() {
+uint G1ConcurrentRefine::max_num_threads() {
   return G1ConcRefinementThreads;
 }
 
 void G1ConcurrentRefine::print_threads_on(outputStream* st) const {
-  for (uint i = 0; i < _n_worker_threads; ++i) {
-    _threads[i]->print_on(st);
-    st->cr();
-  }
+  _thread_control.print_on(st);
 }
 
 static size_t calc_new_green_zone(size_t green,
@@ -326,16 +380,15 @@
 
   if (G1UseAdaptiveConcRefinement) {
     update_zones(update_rs_time, update_rs_processed_buffers, goal_ms);
-    update_thread_thresholds();
 
     // Change the barrier params
-    if (_n_worker_threads == 0) {
+    if (max_num_threads() == 0) {
       // Disable dcqs notification when there are no threads to notify.
       dcqs.set_process_completed_threshold(INT_MAX);
     } else {
       // Worker 0 is the primary; wakeup is via dcqs notification.
       STATIC_ASSERT(max_yellow_zone <= INT_MAX);
-      size_t activate = _threads[0]->activation_threshold();
+      size_t activate = activation_threshold(0);
       dcqs.set_process_completed_threshold((int)activate);
     }
     dcqs.set_max_completed_queue((int)red_zone());
@@ -349,3 +402,42 @@
   }
   dcqs.notify_if_necessary();
 }
+
+size_t G1ConcurrentRefine::activation_threshold(uint worker_id) const {
+  Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id);
+  return activation_level(thresholds);
+}
+
+size_t G1ConcurrentRefine::deactivation_threshold(uint worker_id) const {
+  Thresholds thresholds = calc_thresholds(_green_zone, _yellow_zone, worker_id);
+  return deactivation_level(thresholds);
+}
+
+uint G1ConcurrentRefine::worker_id_offset() {
+  return DirtyCardQueueSet::num_par_ids();
+}
+
+void G1ConcurrentRefine::maybe_activate_more_threads(uint worker_id, size_t num_cur_buffers) {
+  if (num_cur_buffers > activation_threshold(worker_id + 1)) {
+    _thread_control.maybe_activate_next(worker_id);
+  }
+}
+
+bool G1ConcurrentRefine::do_refinement_step(uint worker_id) {
+  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
+
+  size_t curr_buffer_num = dcqs.completed_buffers_num();
+  // If the number of the buffers falls down into the yellow zone,
+  // that means that the transition period after the evacuation pause has ended.
+  // Since the value written to the DCQS is the same for all threads, there is no
+  // need to synchronize.
+  if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= yellow_zone()) {
+    dcqs.set_completed_queue_padding(0);
+  }
+
+  maybe_activate_more_threads(worker_id, curr_buffer_num);
+
+  // Process the next buffer, if there are enough left.
+  return dcqs.refine_completed_buffer_concurrently(worker_id + worker_id_offset(),
+                                                   deactivation_threshold(worker_id));
+}
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefine.hpp	Thu Nov 23 19:42:56 2017 -0800
@@ -30,30 +30,63 @@
 
 // Forward decl
 class CardTableEntryClosure;
+class G1ConcurrentRefine;
 class G1ConcurrentRefineThread;
 class outputStream;
 class ThreadClosure;
 
+// Helper class for refinement thread management. Used to start, stop and
+// iterate over them.
+class G1ConcurrentRefineThreadControl VALUE_OBJ_CLASS_SPEC {
+  G1ConcurrentRefine* _cr;
+
+  G1ConcurrentRefineThread** _threads;
+  uint _num_max_threads;
+
+  // Create the refinement thread for the given worker id.
+  // If initializing is true, ignore InjectGCWorkerCreationFailure.
+  G1ConcurrentRefineThread* create_refinement_thread(uint worker_id, bool initializing);
+public:
+  G1ConcurrentRefineThreadControl();
+  ~G1ConcurrentRefineThreadControl();
+
+  jint initialize(G1ConcurrentRefine* cr, uint num_max_threads);
+
+  // If there is a "successor" thread that can be activated given the current id,
+  // activate it.
+  void maybe_activate_next(uint cur_worker_id);
+
+  void print_on(outputStream* st) const;
+  void worker_threads_do(ThreadClosure* tc);
+  void stop();
+};
+
+// Controls refinement threads and their activation based on the number of completed
+// buffers currently available in the global dirty card queue.
+// Refinement threads pick work from the queue based on these thresholds. They are activated
+// gradually based on the amount of work to do.
+// Refinement thread n activates thread n+1 if the instance of this class determines there
+// is enough work available. Threads deactivate themselves if the current amount of
+// completed buffers falls below their individual threshold.
 class G1ConcurrentRefine : public CHeapObj<mtGC> {
-  G1ConcurrentRefineThread** _threads;
-  uint _n_worker_threads;
- /*
-  * The value of the update buffer queue length falls into one of 3 zones:
-  * green, yellow, red. If the value is in [0, green) nothing is
-  * done, the buffers are left unprocessed to enable the caching effect of the
-  * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement
-  * threads are gradually activated. In [yellow, red) all threads are
-  * running. If the length becomes red (max queue length) the mutators start
-  * processing the buffers.
-  *
-  * There are some interesting cases (when G1UseAdaptiveConcRefinement
-  * is turned off):
-  * 1) green = yellow = red = 0. In this case the mutator will process all
-  *    buffers. Except for those that are created by the deferred updates
-  *    machinery during a collection.
-  * 2) green = 0. Means no caching. Can be a good way to minimize the
-  *    amount of time spent updating rsets during a collection.
-  */
+  G1ConcurrentRefineThreadControl _thread_control;
+  /*
+   * The value of the completed dirty card queue length falls into one of 3 zones:
+   * green, yellow, red. If the value is in [0, green) nothing is
+   * done, the buffers are left unprocessed to enable the caching effect of the
+   * dirtied cards. In the yellow zone [green, yellow) the concurrent refinement
+   * threads are gradually activated. In [yellow, red) all threads are
+   * running. If the length becomes red (max queue length) the mutators start
+   * processing the buffers.
+   *
+   * There are some interesting cases (when G1UseAdaptiveConcRefinement
+   * is turned off):
+   * 1) green = yellow = red = 0. In this case the mutator will process all
+   *    buffers. Except for those that are created by the deferred updates
+   *    machinery during a collection.
+   * 2) green = 0. Means no caching. Can be a good way to minimize the
+   *    amount of time spent updating remembered sets during a collection.
+   */
   size_t _green_zone;
   size_t _yellow_zone;
   size_t _red_zone;
@@ -69,24 +102,32 @@
                     size_t update_rs_processed_buffers,
                     double goal_ms);
 
-  // Update thread thresholds to account for updated zone values.
-  void update_thread_thresholds();
+  static uint worker_id_offset();
+  void maybe_activate_more_threads(uint worker_id, size_t num_cur_buffers);
 
- public:
+  jint initialize();
+public:
   ~G1ConcurrentRefine();
 
-  // Returns a G1ConcurrentRefine instance if succeeded to create/initialize G1ConcurrentRefine and G1ConcurrentRefineThreads.
-  // Otherwise, returns NULL with error code.
+  // Returns a G1ConcurrentRefine instance if succeeded to create/initialize the
+  // G1ConcurrentRefine instance. Otherwise, returns NULL with error code.
   static G1ConcurrentRefine* create(jint* ecode);
 
   void stop();
 
+  // Adjust refinement thresholds based on work done during the pause and the goal time.
   void adjust(double update_rs_time, size_t update_rs_processed_buffers, double goal_ms);
 
+  size_t activation_threshold(uint worker_id) const;
+  size_t deactivation_threshold(uint worker_id) const;
+  // Perform a single refinement step. Called by the refinement threads when woken up.
+  bool do_refinement_step(uint worker_id);
+
   // Iterate over all concurrent refinement threads applying the given closure.
   void threads_do(ThreadClosure *tc);
 
-  static uint thread_num();
+  // Maximum number of refinement threads.
+  static uint max_num_threads();
 
   void print_threads_on(outputStream* st) const;
 
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.cpp	Thu Nov 23 19:42:56 2017 -0800
@@ -25,32 +25,20 @@
 #include "precompiled.hpp"
 #include "gc/g1/g1ConcurrentRefine.hpp"
 #include "gc/g1/g1ConcurrentRefineThread.hpp"
-#include "gc/g1/g1CollectedHeap.inline.hpp"
-#include "gc/g1/g1RemSet.hpp"
 #include "gc/shared/suspendibleThreadSet.hpp"
 #include "logging/log.hpp"
 #include "memory/resourceArea.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/mutexLocker.hpp"
 
-G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr,
-                                                   G1ConcurrentRefineThread *next,
-                                                   uint worker_id_offset,
-                                                   uint worker_id,
-                                                   size_t activate,
-                                                   size_t deactivate) :
+G1ConcurrentRefineThread::G1ConcurrentRefineThread(G1ConcurrentRefine* cr, uint worker_id) :
   ConcurrentGCThread(),
-  _worker_id_offset(worker_id_offset),
   _worker_id(worker_id),
   _active(false),
-  _next(next),
   _monitor(NULL),
   _cr(cr),
-  _vtime_accum(0.0),
-  _activation_threshold(activate),
-  _deactivation_threshold(deactivate)
+  _vtime_accum(0.0)
 {
-
   // Each thread has its own monitor. The i-th thread is responsible for signaling
   // to thread i+1 if the number of buffers in the queue exceeds a threshold for this
   // thread. Monitors are also used to wake up the threads during termination.
@@ -67,13 +55,6 @@
   create_and_start();
 }
 
-void G1ConcurrentRefineThread::update_thresholds(size_t activate,
-                                                 size_t deactivate) {
-  assert(deactivate < activate, "precondition");
-  _activation_threshold = activate;
-  _deactivation_threshold = deactivate;
-}
-
 void G1ConcurrentRefineThread::wait_for_completed_buffers() {
   MutexLockerEx x(_monitor, Mutex::_no_safepoint_check_flag);
   while (!should_terminate() && !is_active()) {
@@ -118,9 +99,9 @@
     }
 
     size_t buffers_processed = 0;
-    DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-    log_debug(gc, refine)("Activated %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT,
-                          _worker_id, _activation_threshold, dcqs.completed_buffers_num());
+    log_debug(gc, refine)("Activated worker %d, on threshold: " SIZE_FORMAT ", current: " SIZE_FORMAT,
+                          _worker_id, _cr->activation_threshold(_worker_id),
+                           JavaThread::dirty_card_queue_set().completed_buffers_num());
 
     {
       SuspendibleThreadSetJoiner sts_join;
@@ -131,33 +112,18 @@
           continue;             // Re-check for termination after yield delay.
         }
 
-        size_t curr_buffer_num = dcqs.completed_buffers_num();
-        // If the number of the buffers falls down into the yellow zone,
-        // that means that the transition period after the evacuation pause has ended.
-        if (dcqs.completed_queue_padding() > 0 && curr_buffer_num <= cr()->yellow_zone()) {
-          dcqs.set_completed_queue_padding(0);
-        }
-
-        // Check if we need to activate the next thread.
-        if ((_next != NULL) &&
-            !_next->is_active() &&
-            (curr_buffer_num > _next->_activation_threshold)) {
-          _next->activate();
-        }
-
-        // Process the next buffer, if there are enough left.
-        if (!dcqs.refine_completed_buffer_concurrently(_worker_id + _worker_id_offset, _deactivation_threshold)) {
-          break; // Deactivate, number of buffers fell below threshold.
+        if (!_cr->do_refinement_step(_worker_id)) {
+          break;
         }
         ++buffers_processed;
       }
     }
 
     deactivate();
-    log_debug(gc, refine)("Deactivated %d, off threshold: " SIZE_FORMAT
+    log_debug(gc, refine)("Deactivated worker %d, off threshold: " SIZE_FORMAT
                           ", current: " SIZE_FORMAT ", processed: " SIZE_FORMAT,
-                          _worker_id, _deactivation_threshold,
-                          dcqs.completed_buffers_num(),
+                          _worker_id, _cr->deactivation_threshold(_worker_id),
+                          JavaThread::dirty_card_queue_set().completed_buffers_num(),
                           buffers_processed);
 
     if (os::supports_vtime()) {
--- a/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/share/gc/g1/g1ConcurrentRefineThread.hpp	Thu Nov 23 19:42:56 2017 -0800
@@ -43,43 +43,29 @@
   uint _worker_id;
   uint _worker_id_offset;
 
-  // The refinement threads collection is linked list. A predecessor can activate a successor
-  // when the number of the rset update buffer crosses a certain threshold. A successor
-  // would self-deactivate when the number of the buffers falls below the threshold.
   bool _active;
-  G1ConcurrentRefineThread* _next;
   Monitor* _monitor;
   G1ConcurrentRefine* _cr;
 
-  // This thread's activation/deactivation thresholds
-  size_t _activation_threshold;
-  size_t _deactivation_threshold;
-
   void wait_for_completed_buffers();
 
   void set_active(bool x) { _active = x; }
-  bool is_active();
-  void activate();
+  // Deactivate this thread.
   void deactivate();
 
   bool is_primary() { return (_worker_id == 0); }
 
   void run_service();
   void stop_service();
+public:
+  G1ConcurrentRefineThread(G1ConcurrentRefine* cg1r, uint worker_id);
 
-public:
-  // Constructor
-  G1ConcurrentRefineThread(G1ConcurrentRefine* cr, G1ConcurrentRefineThread* next,
-                           uint worker_id_offset, uint worker_id,
-                           size_t activate, size_t deactivate);
-
-  void update_thresholds(size_t activate, size_t deactivate);
-  size_t activation_threshold() const { return _activation_threshold; }
+  bool is_active();
+  // Activate this thread.
+  void activate();
 
   // Total virtual time so far.
   double vtime_accum() { return _vtime_accum; }
-
-  G1ConcurrentRefine* cr() { return _cr;     }
 };
 
 #endif // SHARE_VM_GC_G1_G1CONCURRENTREFINETHREAD_HPP
--- a/src/hotspot/share/gc/g1/g1RemSet.cpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/share/gc/g1/g1RemSet.cpp	Thu Nov 23 19:42:56 2017 -0800
@@ -298,7 +298,7 @@
 }
 
 uint G1RemSet::num_par_rem_sets() {
-  return MAX2(DirtyCardQueueSet::num_par_ids() + G1ConcurrentRefine::thread_num(), ParallelGCThreads);
+  return MAX2(DirtyCardQueueSet::num_par_ids() + G1ConcurrentRefine::max_num_threads(), ParallelGCThreads);
 }
 
 void G1RemSet::initialize(size_t capacity, uint max_regions) {
--- a/src/hotspot/share/gc/g1/g1RemSetSummary.cpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/share/gc/g1/g1RemSetSummary.cpp	Thu Nov 23 19:42:56 2017 -0800
@@ -86,7 +86,7 @@
   _num_processed_buf_mutator(0),
   _num_processed_buf_rs_threads(0),
   _num_coarsenings(0),
-  _num_vtimes(G1ConcurrentRefine::thread_num()),
+  _num_vtimes(G1ConcurrentRefine::max_num_threads()),
   _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
   _sampling_thread_vtime(0.0f) {
 
@@ -99,7 +99,7 @@
   _num_processed_buf_mutator(0),
   _num_processed_buf_rs_threads(0),
   _num_coarsenings(0),
-  _num_vtimes(G1ConcurrentRefine::thread_num()),
+  _num_vtimes(G1ConcurrentRefine::max_num_threads()),
   _rs_threads_vtimes(NEW_C_HEAP_ARRAY(double, _num_vtimes, mtGC)),
   _sampling_thread_vtime(0.0f) {
   update();
--- a/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.cpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/share/gc/g1/g1SATBCardTableModRefBS.cpp	Thu Nov 23 19:42:56 2017 -0800
@@ -175,6 +175,9 @@
 
 void
 G1SATBCardTableLoggingModRefBS::invalidate(MemRegion mr) {
+  if (mr.is_empty()) {
+    return;
+  }
   volatile jbyte* byte = byte_for(mr.start());
   jbyte* last_byte = byte_for(mr.last());
   Thread* thr = Thread::current();
--- a/src/hotspot/share/oops/instanceMirrorKlass.inline.hpp	Thu Nov 23 06:01:57 2017 -0800
+++ b/src/hotspot/share/oops/instanceMirrorKlass.inline.hpp	Thu Nov 23 19:42:56 2017 -0800
@@ -71,10 +71,15 @@
         Devirtualizer<nv>::do_klass(closure, klass);
       }
     } else {
-      // If klass is NULL then this a mirror for a primitive type.
-      // We don't have to follow them, since they are handled as strong
-      // roots in Universe::oops_do.
-      assert(java_lang_Class::is_primitive(obj), "Sanity check");
+      // We would like to assert here (as below) that if klass has been NULL, then
+      // this has been a mirror for a primitive type that we do not need to follow
+      // as they are always strong roots.
+      // However, we might get across a klass that just changed during CMS concurrent
+      // marking if allocation occurred in the old generation.
+      // This is benign here, as we keep alive all CLDs that were loaded during the
+      // CMS concurrent phase in the class loading, i.e. they will be iterated over
+      // and kept alive during remark.
+      // assert(java_lang_Class::is_primitive(obj), "Sanity check");
     }
   }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/gc/g1/TestInvalidateArrayCopy.java	Thu Nov 23 19:42:56 2017 -0800
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test TestInvalidateArrayCopy
+ * @bug 8182050
+ * @summary Check that benign (0-sized) out of heap bounds card table invalidations do not assert.
+ * @requires vm.gc.G1
+ * @requires vm.debug
+ * @key gc
+ * @run main/othervm -XX:NewSize=1M -Xlog:gc -XX:MaxNewSize=1m -XX:-UseTLAB -XX:OldSize=63M -XX:MaxHeapSize=64M TestInvalidateArrayCopy
+ */
+
+// The test allocates zero-sized arrays of j.l.O and tries to arraycopy random data into it so
+// that the asserting post barrier calls are executed. It assumes that G1 allocates eden regions
+// at the top of the heap for this problem to occur.
+public class TestInvalidateArrayCopy {
+
+    static final int NumIterations = 1000000;
+
+    // "Random" source data to "copy" into the target.
+    static Object[] sourceArray = new Object[10];
+
+    public static void main(String[] args) {
+        for (int i = 0; i < NumIterations; i++) {
+            Object[] x = new Object[0];
+            // Make sure that the compiler can't optimize out the above allocations.
+            if (i % (NumIterations / 10) == 0) {
+                System.out.println(x);
+            }
+            System.arraycopy(sourceArray, 0, x, 0, Math.min(x.length, sourceArray.length));
+        }
+    }
+}