changeset 13366:a3b8c747b6bf

8180932: Parallelize safepoint cleanup Summary: Provide infrastructure to do safepoint cleanup tasks using parallel worker threads Reviewed-by: dholmes, rehn, dcubed, thartmann
author rkennke
date Fri, 07 Jul 2017 12:49:11 +0200
parents ff28370e679e
children 0807b715cec5
files src/share/vm/code/nmethod.hpp src/share/vm/gc/shared/collectedHeap.hpp src/share/vm/runtime/safepoint.cpp src/share/vm/runtime/safepoint.hpp src/share/vm/runtime/sweeper.cpp src/share/vm/runtime/sweeper.hpp src/share/vm/runtime/synchronizer.cpp src/share/vm/runtime/synchronizer.hpp src/share/vm/runtime/thread.cpp src/share/vm/runtime/thread.hpp src/share/vm/runtime/vmStructs.cpp test/runtime/logging/SafepointCleanupTest.java
diffstat 12 files changed, 268 insertions(+), 111 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/code/nmethod.hpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/code/nmethod.hpp	Fri Jul 07 12:49:11 2017 +0200
@@ -136,7 +136,7 @@
   // stack.  An not_entrant method can be removed when there are no
   // more activations, i.e., when the _stack_traversal_mark is less than
   // current sweep traversal index.
-  long _stack_traversal_mark;
+  volatile jlong _stack_traversal_mark;
 
   // The _hotness_counter indicates the hotness of a method. The higher
   // the value the hotter the method. The hotness counter of a nmethod is
@@ -396,8 +396,8 @@
  public:
 
   // Sweeper support
-  long  stack_traversal_mark()                    { return _stack_traversal_mark; }
-  void  set_stack_traversal_mark(long l)          { _stack_traversal_mark = l; }
+  jlong  stack_traversal_mark()                    { return OrderAccess::load_acquire(&_stack_traversal_mark); }
+  void  set_stack_traversal_mark(jlong l)          { OrderAccess::release_store(&_stack_traversal_mark, l); }
 
   // implicit exceptions support
   address continuation_for_implicit_exception(address pc);
--- a/src/share/vm/gc/shared/collectedHeap.hpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/gc/shared/collectedHeap.hpp	Fri Jul 07 12:49:11 2017 +0200
@@ -50,6 +50,7 @@
 class Thread;
 class ThreadClosure;
 class VirtualSpaceSummary;
+class WorkGang;
 class nmethod;
 
 class GCMessage : public FormatBuffer<1024> {
@@ -603,6 +604,16 @@
   // unknown phase.  The default implementation returns false.
   virtual bool request_concurrent_phase(const char* phase);
 
+  // Provides a thread pool to SafepointSynchronize to use
+  // for parallel safepoint cleanup.
+  // GCs that use a GC worker thread pool may want to share
+  // it for use during safepoint cleanup. This is only possible
+  // if the GC can pause and resume concurrent work (e.g. G1
+  // concurrent marking) for an intermittent non-GC safepoint.
+  // If this method returns NULL, SafepointSynchronize will
+  // perform cleanup tasks serially in the VMThread.
+  virtual WorkGang* get_safepoint_workers() { return NULL; }
+
   // Non product verification and debugging.
 #ifndef PRODUCT
   // Support for PromotionFailureALot.  Return true if it's time to cause a
--- a/src/share/vm/runtime/safepoint.cpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/runtime/safepoint.cpp	Fri Jul 07 12:49:11 2017 +0200
@@ -33,6 +33,8 @@
 #include "code/scopeDesc.hpp"
 #include "gc/shared/collectedHeap.hpp"
 #include "gc/shared/gcLocker.inline.hpp"
+#include "gc/shared/strongRootsScope.hpp"
+#include "gc/shared/workgroup.hpp"
 #include "interpreter/interpreter.hpp"
 #include "logging/log.hpp"
 #include "logging/logStream.hpp"
@@ -543,64 +545,128 @@
   }
 }
 
-// Various cleaning tasks that should be done periodically at safepoints
+class ParallelSPCleanupThreadClosure : public ThreadClosure {
+private:
+  CodeBlobClosure* _nmethod_cl;
+  DeflateMonitorCounters* _counters;
+
+public:
+  ParallelSPCleanupThreadClosure(DeflateMonitorCounters* counters) :
+    _counters(counters),
+    _nmethod_cl(NMethodSweeper::prepare_mark_active_nmethods()) {}
+
+  void do_thread(Thread* thread) {
+    ObjectSynchronizer::deflate_thread_local_monitors(thread, _counters);
+    if (_nmethod_cl != NULL && thread->is_Java_thread() &&
+        ! thread->is_Code_cache_sweeper_thread()) {
+      JavaThread* jt = (JavaThread*) thread;
+      jt->nmethods_do(_nmethod_cl);
+    }
+  }
+};
+
+class ParallelSPCleanupTask : public AbstractGangTask {
+private:
+  SubTasksDone _subtasks;
+  ParallelSPCleanupThreadClosure _cleanup_threads_cl;
+  uint _num_workers;
+  DeflateMonitorCounters* _counters;
+public:
+  ParallelSPCleanupTask(uint num_workers, DeflateMonitorCounters* counters) :
+    AbstractGangTask("Parallel Safepoint Cleanup"),
+    _cleanup_threads_cl(ParallelSPCleanupThreadClosure(counters)),
+    _num_workers(num_workers),
+    _subtasks(SubTasksDone(SafepointSynchronize::SAFEPOINT_CLEANUP_NUM_TASKS)),
+    _counters(counters) {}
+
+  void work(uint worker_id) {
+    // All threads deflate monitors and mark nmethods (if necessary).
+    Threads::parallel_java_threads_do(&_cleanup_threads_cl);
+
+    if (!_subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_DEFLATE_MONITORS)) {
+      const char* name = "deflating idle monitors";
+      EventSafepointCleanupTask event;
+      TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
+      ObjectSynchronizer::deflate_idle_monitors(_counters);
+      event_safepoint_cleanup_task_commit(event, name);
+    }
+
+    if (!_subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_UPDATE_INLINE_CACHES)) {
+      const char* name = "updating inline caches";
+      EventSafepointCleanupTask event;
+      TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
+      InlineCacheBuffer::update_inline_caches();
+      event_safepoint_cleanup_task_commit(event, name);
+    }
+
+    if (!_subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_COMPILATION_POLICY)) {
+      const char* name = "compilation policy safepoint handler";
+      EventSafepointCleanupTask event;
+      TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
+      CompilationPolicy::policy()->do_safepoint_work();
+      event_safepoint_cleanup_task_commit(event, name);
+    }
+
+    if (!_subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_SYMBOL_TABLE_REHASH)) {
+      if (SymbolTable::needs_rehashing()) {
+        const char* name = "rehashing symbol table";
+        EventSafepointCleanupTask event;
+        TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
+        SymbolTable::rehash_table();
+        event_safepoint_cleanup_task_commit(event, name);
+      }
+    }
+
+    if (!_subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_STRING_TABLE_REHASH)) {
+      if (StringTable::needs_rehashing()) {
+        const char* name = "rehashing string table";
+        EventSafepointCleanupTask event;
+        TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
+        StringTable::rehash_table();
+        event_safepoint_cleanup_task_commit(event, name);
+      }
+    }
+
+    if (!_subtasks.is_task_claimed(SafepointSynchronize::SAFEPOINT_CLEANUP_CLD_PURGE)) {
+      // CMS delays purging the CLDG until the beginning of the next safepoint and to
+      // make sure concurrent sweep is done
+      const char* name = "purging class loader data graph";
+      EventSafepointCleanupTask event;
+      TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
+      ClassLoaderDataGraph::purge_if_needed();
+      event_safepoint_cleanup_task_commit(event, name);
+    }
+    _subtasks.all_tasks_completed(_num_workers);
+  }
+};
+
+// Various cleaning tasks that should be done periodically at safepoints.
 void SafepointSynchronize::do_cleanup_tasks() {
-  {
-    const char* name = "deflating idle monitors";
-    EventSafepointCleanupTask event;
-    TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-    ObjectSynchronizer::deflate_idle_monitors();
-    event_safepoint_cleanup_task_commit(event, name);
+
+  TraceTime timer("safepoint cleanup tasks", TRACETIME_LOG(Info, safepoint, cleanup));
+
+  // Prepare for monitor deflation.
+  DeflateMonitorCounters deflate_counters;
+  ObjectSynchronizer::prepare_deflate_idle_monitors(&deflate_counters);
+
+  CollectedHeap* heap = Universe::heap();
+  assert(heap != NULL, "heap not initialized yet?");
+  WorkGang* cleanup_workers = heap->get_safepoint_workers();
+  if (cleanup_workers != NULL) {
+    // Parallel cleanup using GC provided thread pool.
+    uint num_cleanup_workers = cleanup_workers->active_workers();
+    ParallelSPCleanupTask cleanup(num_cleanup_workers, &deflate_counters);
+    StrongRootsScope srs(num_cleanup_workers);
+    cleanup_workers->run_task(&cleanup);
+  } else {
+    // Serial cleanup using VMThread.
+    ParallelSPCleanupTask cleanup(1, &deflate_counters);
+    StrongRootsScope srs(1);
+    cleanup.work(0);
   }
 
-  {
-    const char* name = "updating inline caches";
-    EventSafepointCleanupTask event;
-    TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-    InlineCacheBuffer::update_inline_caches();
-    event_safepoint_cleanup_task_commit(event, name);
-  }
-  {
-    const char* name = "compilation policy safepoint handler";
-    EventSafepointCleanupTask event;
-    TraceTime timer("compilation policy safepoint handler", TRACETIME_LOG(Info, safepoint, cleanup));
-    CompilationPolicy::policy()->do_safepoint_work();
-    event_safepoint_cleanup_task_commit(event, name);
-  }
-
-  {
-    const char* name = "mark nmethods";
-    EventSafepointCleanupTask event;
-    TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-    NMethodSweeper::mark_active_nmethods();
-    event_safepoint_cleanup_task_commit(event, name);
-  }
-
-  if (SymbolTable::needs_rehashing()) {
-    const char* name = "rehashing symbol table";
-    EventSafepointCleanupTask event;
-    TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-    SymbolTable::rehash_table();
-    event_safepoint_cleanup_task_commit(event, name);
-  }
-
-  if (StringTable::needs_rehashing()) {
-    const char* name = "rehashing string table";
-    EventSafepointCleanupTask event;
-    TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-    StringTable::rehash_table();
-    event_safepoint_cleanup_task_commit(event, name);
-  }
-
-  {
-    // CMS delays purging the CLDG until the beginning of the next safepoint and to
-    // make sure concurrent sweep is done
-    const char* name = "purging class loader data graph";
-    EventSafepointCleanupTask event;
-    TraceTime timer(name, TRACETIME_LOG(Info, safepoint, cleanup));
-    ClassLoaderDataGraph::purge_if_needed();
-    event_safepoint_cleanup_task_commit(event, name);
-  }
+  // Finish monitor deflation.
+  ObjectSynchronizer::finish_deflate_idle_monitors(&deflate_counters);
 }
 
 
--- a/src/share/vm/runtime/safepoint.hpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/runtime/safepoint.hpp	Fri Jul 07 12:49:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -75,6 +75,18 @@
     _blocking_timeout = 1
   };
 
+  // The enums are listed in the order of the tasks when done serially.
+  enum SafepointCleanupTasks {
+    SAFEPOINT_CLEANUP_DEFLATE_MONITORS,
+    SAFEPOINT_CLEANUP_UPDATE_INLINE_CACHES,
+    SAFEPOINT_CLEANUP_COMPILATION_POLICY,
+    SAFEPOINT_CLEANUP_SYMBOL_TABLE_REHASH,
+    SAFEPOINT_CLEANUP_STRING_TABLE_REHASH,
+    SAFEPOINT_CLEANUP_CLD_PURGE,
+    // Leave this one last.
+    SAFEPOINT_CLEANUP_NUM_TASKS
+  };
+
   typedef struct {
     float  _time_stamp;                        // record when the current safepoint occurs in seconds
     int    _vmop_type;                         // type of VM operation triggers the safepoint
--- a/src/share/vm/runtime/sweeper.cpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/runtime/sweeper.cpp	Fri Jul 07 12:49:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -53,7 +53,7 @@
  public:
   int traversal;
   int compile_id;
-  long traversal_mark;
+  jlong traversal_mark;
   int state;
   const char* kind;
   address vep;
@@ -62,7 +62,7 @@
 
   void print() {
       tty->print_cr("traversal = %d compile_id = %d %s uep = " PTR_FORMAT " vep = "
-                    PTR_FORMAT " state = %d traversal_mark %ld line = %d",
+                    PTR_FORMAT " state = %d traversal_mark "JLONG_FORMAT" line = %d",
                     traversal,
                     compile_id,
                     kind == NULL ? "" : kind,
@@ -114,7 +114,7 @@
 void NMethodSweeper::record_sweep(CompiledMethod* nm, int line) {
   if (_records != NULL) {
     _records[_sweep_index].traversal = _traversals;
-    _records[_sweep_index].traversal_mark = nm->is_nmethod() ? ((nmethod*)nm)->_stack_traversal_mark : 0;
+    _records[_sweep_index].traversal_mark = nm->is_nmethod() ? ((nmethod*)nm)->stack_traversal_mark() : 0;
     _records[_sweep_index].compile_id = nm->compile_id();
     _records[_sweep_index].kind = nm->compile_kind();
     _records[_sweep_index].state = nm->get_state();
@@ -201,11 +201,18 @@
   * safepoint.
   */
 void NMethodSweeper::mark_active_nmethods() {
+  CodeBlobClosure* cl = prepare_mark_active_nmethods();
+  if (cl != NULL) {
+    Threads::nmethods_do(cl);
+  }
+}
+
+CodeBlobClosure* NMethodSweeper::prepare_mark_active_nmethods() {
   assert(SafepointSynchronize::is_at_safepoint(), "must be executed at a safepoint");
   // If we do not want to reclaim not-entrant or zombie methods there is no need
   // to scan stacks
   if (!MethodFlushing) {
-    return;
+    return NULL;
   }
 
   // Increase time so that we can estimate when to invoke the sweeper again.
@@ -233,14 +240,13 @@
     if (PrintMethodFlushing) {
       tty->print_cr("### Sweep: stack traversal %ld", _traversals);
     }
-    Threads::nmethods_do(&mark_activation_closure);
+    return &mark_activation_closure;
 
   } else {
     // Only set hotness counter
-    Threads::nmethods_do(&set_hotness_closure);
+    return &set_hotness_closure;
   }
 
-  OrderAccess::storestore();
 }
 
 /**
--- a/src/share/vm/runtime/sweeper.hpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/runtime/sweeper.hpp	Fri Jul 07 12:49:11 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2017, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,8 @@
 #include "code/codeCache.hpp"
 #include "utilities/ticks.hpp"
 
+class CodeBlobClosure;
+
 // An NmethodSweeper is an incremental cleaner for:
 //    - cleanup inline caches
 //    - reclamation of nmethods
@@ -114,6 +116,7 @@
 #endif
 
   static void mark_active_nmethods();      // Invoked at the end of each safepoint
+  static CodeBlobClosure* prepare_mark_active_nmethods();
   static void sweeper_loop();
   static void notify(int code_blob_type);  // Possibly start the sweeper thread.
   static void force_sweep();
--- a/src/share/vm/runtime/synchronizer.cpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/runtime/synchronizer.cpp	Fri Jul 07 12:49:11 2017 +0200
@@ -1661,7 +1661,17 @@
 
 // Walk a given monitor list, and deflate idle monitors
 // The given list could be a per-thread list or a global list
-// Caller acquires gListLock
+// Caller acquires gListLock.
+//
+// In the case of parallel processing of thread local monitor lists,
+// work is done by Threads::parallel_threads_do() which ensures that
+// each Java thread is processed by exactly one worker thread, and
+// thus avoid conflicts that would arise when worker threads would
+// process the same monitor lists concurrently.
+//
+// See also ParallelSPCleanupTask and
+// SafepointSynchronizer::do_cleanup_tasks() in safepoint.cpp and
+// Threads::parallel_java_threads_do() in thread.cpp.
 int ObjectSynchronizer::deflate_monitor_list(ObjectMonitor** listHeadp,
                                              ObjectMonitor** freeHeadp,
                                              ObjectMonitor** freeTailp) {
@@ -1692,11 +1702,14 @@
   return deflated_count;
 }
 
-void ObjectSynchronizer::deflate_idle_monitors() {
+void ObjectSynchronizer::prepare_deflate_idle_monitors(DeflateMonitorCounters* counters) {
+  counters->nInuse = 0;          // currently associated with objects
+  counters->nInCirculation = 0;  // extant
+  counters->nScavenged = 0;      // reclaimed
+}
+
+void ObjectSynchronizer::deflate_idle_monitors(DeflateMonitorCounters* counters) {
   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
-  int nInuse = 0;              // currently associated with objects
-  int nInCirculation = 0;      // extant
-  int nScavenged = 0;          // reclaimed
   bool deflated = false;
 
   ObjectMonitor * freeHeadp = NULL;  // Local SLL of scavenged monitors
@@ -1709,25 +1722,16 @@
   Thread::muxAcquire(&gListLock, "scavenge - return");
 
   if (MonitorInUseLists) {
-    int inUse = 0;
-    for (JavaThread* cur = Threads::first(); cur != NULL; cur = cur->next()) {
-      nInCirculation+= cur->omInUseCount;
-      int deflated_count = deflate_monitor_list(cur->omInUseList_addr(), &freeHeadp, &freeTailp);
-      cur->omInUseCount-= deflated_count;
-      if (ObjectMonitor::Knob_VerifyInUse) {
-        verifyInUse(cur);
-      }
-      nScavenged += deflated_count;
-      nInuse += cur->omInUseCount;
-    }
+    // Note: the thread-local monitors lists get deflated in
+    // a separate pass. See deflate_thread_local_monitors().
 
     // For moribund threads, scan gOmInUseList
     if (gOmInUseList) {
-      nInCirculation += gOmInUseCount;
+      counters->nInCirculation += gOmInUseCount;
       int deflated_count = deflate_monitor_list((ObjectMonitor **)&gOmInUseList, &freeHeadp, &freeTailp);
-      gOmInUseCount-= deflated_count;
-      nScavenged += deflated_count;
-      nInuse += gOmInUseCount;
+      gOmInUseCount -= deflated_count;
+      counters->nScavenged += deflated_count;
+      counters->nInuse += gOmInUseCount;
     }
 
   } else {
@@ -1736,7 +1740,7 @@
     for (; block != NULL; block = (PaddedEnd<ObjectMonitor> *)next(block)) {
       // Iterate over all extant monitors - Scavenge all idle monitors.
       assert(block->object() == CHAINMARKER, "must be a block header");
-      nInCirculation += _BLOCKSIZE;
+      counters->nInCirculation += _BLOCKSIZE;
       for (int i = 1; i < _BLOCKSIZE; i++) {
         ObjectMonitor* mid = (ObjectMonitor*)&block[i];
         oop obj = (oop)mid->object();
@@ -1753,31 +1757,17 @@
 
         if (deflated) {
           mid->FreeNext = NULL;
-          nScavenged++;
+          counters->nScavenged++;
         } else {
-          nInuse++;
+          counters->nInuse++;
         }
       }
     }
   }
 
-  gMonitorFreeCount += nScavenged;
-
-  // Consider: audit gFreeList to ensure that gMonitorFreeCount and list agree.
-
-  if (ObjectMonitor::Knob_Verbose) {
-    tty->print_cr("INFO: Deflate: InCirc=%d InUse=%d Scavenged=%d "
-                  "ForceMonitorScavenge=%d : pop=%d free=%d",
-                  nInCirculation, nInuse, nScavenged, ForceMonitorScavenge,
-                  gMonitorPopulation, gMonitorFreeCount);
-    tty->flush();
-  }
-
-  ForceMonitorScavenge = 0;    // Reset
-
   // Move the scavenged monitors back to the global free list.
   if (freeHeadp != NULL) {
-    guarantee(freeTailp != NULL && nScavenged > 0, "invariant");
+    guarantee(freeTailp != NULL && counters->nScavenged > 0, "invariant");
     assert(freeTailp->FreeNext == NULL, "invariant");
     // constant-time list splice - prepend scavenged segment to gFreeList
     freeTailp->FreeNext = gFreeList;
@@ -1785,8 +1775,25 @@
   }
   Thread::muxRelease(&gListLock);
 
-  OM_PERFDATA_OP(Deflations, inc(nScavenged));
-  OM_PERFDATA_OP(MonExtant, set_value(nInCirculation));
+}
+
+void ObjectSynchronizer::finish_deflate_idle_monitors(DeflateMonitorCounters* counters) {
+  gMonitorFreeCount += counters->nScavenged;
+
+  // Consider: audit gFreeList to ensure that gMonitorFreeCount and list agree.
+
+  if (ObjectMonitor::Knob_Verbose) {
+    tty->print_cr("INFO: Deflate: InCirc=%d InUse=%d Scavenged=%d "
+                  "ForceMonitorScavenge=%d : pop=%d free=%d",
+                  counters->nInCirculation, counters->nInuse, counters->nScavenged, ForceMonitorScavenge,
+                  gMonitorPopulation, gMonitorFreeCount);
+    tty->flush();
+  }
+
+  ForceMonitorScavenge = 0;    // Reset
+
+  OM_PERFDATA_OP(Deflations, inc(counters->nScavenged));
+  OM_PERFDATA_OP(MonExtant, set_value(counters->nInCirculation));
 
   // TODO: Add objectMonitor leak detection.
   // Audit/inventory the objectMonitors -- make sure they're all accounted for.
@@ -1794,6 +1801,38 @@
   GVars.stwCycle++;
 }
 
+void ObjectSynchronizer::deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters) {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+  if (!MonitorInUseLists) return;
+
+  ObjectMonitor * freeHeadp = NULL;  // Local SLL of scavenged monitors
+  ObjectMonitor * freeTailp = NULL;
+
+  int deflated_count = deflate_monitor_list(thread->omInUseList_addr(), &freeHeadp, &freeTailp);
+
+  Thread::muxAcquire(&gListLock, "scavenge - return");
+
+  // Adjust counters
+  counters->nInCirculation += thread->omInUseCount;
+  thread->omInUseCount -= deflated_count;
+  if (ObjectMonitor::Knob_VerifyInUse) {
+    verifyInUse(thread);
+  }
+  counters->nScavenged += deflated_count;
+  counters->nInuse += thread->omInUseCount;
+
+  // Move the scavenged monitors back to the global free list.
+  if (freeHeadp != NULL) {
+    guarantee(freeTailp != NULL && deflated_count > 0, "invariant");
+    assert(freeTailp->FreeNext == NULL, "invariant");
+
+    // constant-time list splice - prepend scavenged segment to gFreeList
+    freeTailp->FreeNext = gFreeList;
+    gFreeList = freeHeadp;
+  }
+  Thread::muxRelease(&gListLock);
+}
+
 // Monitor cleanup on JavaThread::exit
 
 // Iterate through monitor cache and attempt to release thread's monitors
--- a/src/share/vm/runtime/synchronizer.hpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/runtime/synchronizer.hpp	Fri Jul 07 12:49:11 2017 +0200
@@ -32,6 +32,12 @@
 
 class ObjectMonitor;
 
+struct DeflateMonitorCounters {
+  int nInuse;          // currently associated with objects
+  int nInCirculation;  // extant
+  int nScavenged;      // reclaimed
+};
+
 class ObjectSynchronizer : AllStatic {
   friend class VMStructs;
  public:
@@ -127,7 +133,11 @@
   // GC: we current use aggressive monitor deflation policy
   // Basically we deflate all monitors that are not busy.
   // An adaptive profile-based deflation policy could be used if needed
-  static void deflate_idle_monitors();
+  static void deflate_idle_monitors(DeflateMonitorCounters* counters);
+  static void deflate_thread_local_monitors(Thread* thread, DeflateMonitorCounters* counters);
+  static void prepare_deflate_idle_monitors(DeflateMonitorCounters* counters);
+  static void finish_deflate_idle_monitors(DeflateMonitorCounters* counters);
+
   // For a given monitor list: global or per-thread, deflate idle monitors
   static int deflate_monitor_list(ObjectMonitor** listheadp,
                                   ObjectMonitor** freeHeadp,
--- a/src/share/vm/runtime/thread.cpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/runtime/thread.cpp	Fri Jul 07 12:49:11 2017 +0200
@@ -3385,6 +3385,15 @@
   // If CompilerThreads ever become non-JavaThreads, add them here
 }
 
+void Threads::parallel_java_threads_do(ThreadClosure* tc) {
+  int cp = Threads::thread_claim_parity();
+  ALL_JAVA_THREADS(p) {
+    if (p->claim_oops_do(true, cp)) {
+      tc->do_thread(p);
+    }
+  }
+}
+
 // The system initialization in the library has three phases.
 //
 // Phase 1: java.lang.System class initialization
--- a/src/share/vm/runtime/thread.hpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/runtime/thread.hpp	Fri Jul 07 12:49:11 2017 +0200
@@ -2069,6 +2069,7 @@
   static bool includes(JavaThread* p);
   static JavaThread* first()                     { return _thread_list; }
   static void threads_do(ThreadClosure* tc);
+  static void parallel_java_threads_do(ThreadClosure* tc);
 
   // Initializes the vm and creates the vm thread
   static jint create_vm(JavaVMInitArgs* args, bool* canTryAgain);
--- a/src/share/vm/runtime/vmStructs.cpp	Fri Jul 21 09:50:12 2017 +0200
+++ b/src/share/vm/runtime/vmStructs.cpp	Fri Jul 07 12:49:11 2017 +0200
@@ -902,7 +902,7 @@
   nonstatic_field(nmethod,                     _verified_entry_point,                         address)                               \
   nonstatic_field(nmethod,                     _osr_entry_point,                              address)                               \
   volatile_nonstatic_field(nmethod,            _lock_count,                                   jint)                                  \
-  nonstatic_field(nmethod,                     _stack_traversal_mark,                         long)                                  \
+  volatile_nonstatic_field(nmethod,            _stack_traversal_mark,                         jlong)                                 \
   nonstatic_field(nmethod,                     _compile_id,                                   int)                                   \
   nonstatic_field(nmethod,                     _comp_level,                                   int)                                   \
                                                                                                                                      \
--- a/test/runtime/logging/SafepointCleanupTest.java	Fri Jul 21 09:50:12 2017 +0200
+++ b/test/runtime/logging/SafepointCleanupTest.java	Fri Jul 07 12:49:11 2017 +0200
@@ -38,10 +38,10 @@
     static void analyzeOutputOn(ProcessBuilder pb) throws Exception {
         OutputAnalyzer output = new OutputAnalyzer(pb.start());
         output.shouldContain("[safepoint,cleanup]");
+        output.shouldContain("safepoint cleanup tasks");
         output.shouldContain("deflating idle monitors");
         output.shouldContain("updating inline caches");
         output.shouldContain("compilation policy safepoint handler");
-        output.shouldContain("mark nmethods");
         output.shouldContain("purging class loader data graph");
         output.shouldHaveExitValue(0);
     }