changeset 58053:02b2e4f3391f

8238867: Improve G1DirtyCardQueueSet::Queue::pop Summary: Allow one of competing pops for last element to succeed. Reviewed-by: iwalulya, sjohanss
author kbarrett
date Thu, 13 Feb 2020 15:16:50 -0500
parents b9ff1541faf9
children 6bcfcb7fe83b
files src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp
diffstat 2 files changed, 48 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Thu Feb 13 12:08:04 2020 -0800
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.cpp	Thu Feb 13 15:16:50 2020 -0500
@@ -138,7 +138,6 @@
   assert(last.next() == NULL, "precondition");
   BufferNode* old_tail = Atomic::xchg(&_tail, &last);
   if (old_tail == NULL) {       // Was empty.
-    assert(Atomic::load(&_head) == NULL, "invariant");
     Atomic::store(&_head, &first);
   } else {
     assert(old_tail->next() == NULL, "invariant");
@@ -146,53 +145,65 @@
   }
 }
 
-// pop gets the queue head as the candidate result (returning NULL if the
-// queue head was NULL), and then gets that result node's "next" value.  If
-// that "next" value is NULL and the queue head hasn't changed, then there
-// is only one element in the accessible part of the list (the sequence from
-// head to a node with a NULL "next" value).  We can't return that element,
-// because it may be the old tail of a concurrent push/append that has not
-// yet had its "next" field set to the new tail.  So return NULL in this case.
-// Otherwise, attempt to cmpxchg that "next" value into the queue head,
-// retrying the whole operation if that fails. This is the "usual" lock-free
-// pop from the head of a singly linked list, with the additional restriction
-// on taking the last element.
 BufferNode* G1DirtyCardQueueSet::Queue::pop() {
   Thread* current_thread = Thread::current();
   while (true) {
     // Use a critical section per iteration, rather than over the whole
-    // operation.  We're not guaranteed to make progress, because of possible
-    // contention on the queue head.  Lingering in one CS the whole time could
-    // lead to excessive allocation of buffers, because the CS blocks return
-    // of released buffers to the free list for reuse.
+    // operation.  We're not guaranteed to make progress.  Lingering in one
+    // CS could lead to excessive allocation of buffers, because the CS
+    // blocks return of released buffers to the free list for reuse.
     GlobalCounter::CriticalSection cs(current_thread);
 
     BufferNode* result = Atomic::load_acquire(&_head);
-    // Check for empty queue.  Only needs to be done on first iteration,
-    // since we never take the last element, but it's messy to make use
-    // of that and we expect one iteration to be the common case.
-    if (result == NULL) return NULL;
+    if (result == NULL) return NULL; // Queue is empty.
 
     BufferNode* next = Atomic::load_acquire(BufferNode::next_ptr(*result));
     if (next != NULL) {
-      next = Atomic::cmpxchg(&_head, result, next);
-      if (next == result) {
+      // The "usual" lock-free pop from the head of a singly linked list.
+      if (result == Atomic::cmpxchg(&_head, result, next)) {
         // Former head successfully taken; it is not the last.
         assert(Atomic::load(&_tail) != result, "invariant");
         assert(result->next() != NULL, "invariant");
         result->set_next(NULL);
         return result;
       }
-      // cmpxchg failed; try again.
-    } else if (result == Atomic::load_acquire(&_head)) {
-      // If follower of head is NULL and head hasn't changed, then only
-      // the one element is currently accessible.  We don't take the last
-      // accessible element, because there may be a concurrent add using it.
-      // The check for unchanged head isn't needed for correctness, but the
-      // retry on change may sometimes let us get a buffer after all.
-      return NULL;
+      // Lost the race; try again.
+      continue;
     }
-    // Head changed; try again.
+
+    // next is NULL.  This case is handled differently from the "usual"
+    // lock-free pop from the head of a singly linked list.
+
+    // If _tail == result then result is the only element in the list. We can
+    // remove it from the list by first setting _tail to NULL and then setting
+    // _head to NULL, the order being important.  We set _tail with cmpxchg in
+    // case of a concurrent push/append/pop also changing _tail.  If we win
+    // then we've claimed result.
+    if (Atomic::cmpxchg(&_tail, result, (BufferNode*)NULL) == result) {
+      assert(result->next() == NULL, "invariant");
+      // Now that we've claimed result, also set _head to NULL.  But we must
+      // be careful of a concurrent push/append after we NULLed _tail, since
+      // it may have already performed its list-was-empty update of _head,
+      // which we must not overwrite.
+      Atomic::cmpxchg(&_head, result, (BufferNode*)NULL);
+      return result;
+    }
+
+    // If _head != result then we lost the race to take result; try again.
+    if (result != Atomic::load_acquire(&_head)) {
+      continue;
+    }
+
+    // An in-progress concurrent operation interfered with taking the head
+    // element when it was the only element.  A concurrent pop may have won
+    // the race to clear the tail but not yet cleared the head. Alternatively,
+    // a concurrent push/append may have changed the tail but not yet linked
+    // result->next().  We cannot take result in either case.  We don't just
+    // try again, because we could spin for a long time waiting for that
+    // concurrent operation to finish.  In the first case, returning NULL is
+    // fine; we lost the race for the only element to another thread.  We
+    // also return NULL for the second case, and let the caller cope.
+    return NULL;
   }
 }
 
--- a/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Thu Feb 13 12:08:04 2020 -0800
+++ b/src/hotspot/share/gc/g1/g1DirtyCardQueue.hpp	Thu Feb 13 15:16:50 2020 -0500
@@ -77,11 +77,8 @@
   };
 
   // A lock-free FIFO of BufferNodes, linked through their next() fields.
-  // This class has a restriction that pop() cannot return the last buffer
-  // in the queue, or what was the last buffer for a concurrent push/append
-  // operation.  It is expected that there will be a later push/append that
-  // will make that buffer available to a future pop(), or there will
-  // eventually be a complete transfer via take_all().
+  // This class has a restriction that pop() may return NULL when there are
+  // buffers in the queue if there is a concurrent push/append operation.
   class Queue {
     BufferNode* volatile _head;
     DEFINE_PAD_MINUS_SIZE(1, DEFAULT_CACHE_LINE_SIZE, sizeof(BufferNode*));
@@ -105,9 +102,10 @@
     void append(BufferNode& first, BufferNode& last);
 
     // Thread-safe attempt to remove and return the first buffer in the queue.
-    // Returns NULL if the queue is empty, or if only one buffer is found.
-    // Uses GlobalCounter critical sections to address the ABA problem; this
-    // works with the buffer allocator's use of GlobalCounter synchronization.
+    // Returns NULL if the queue is empty, or if a concurrent push/append
+    // interferes.  Uses GlobalCounter critical sections to address the ABA
+    // problem; this works with the buffer allocator's use of GlobalCounter
+    // synchronization.
     BufferNode* pop();
 
     // Take all the buffers from the queue, leaving the queue empty.