changeset 57450:c86d2493d1a8

8235390: JfrEmergencyDump::on_vm_shutdown crashes Reviewed-by: egahlin
author mgronlun
date Fri, 20 Dec 2019 16:04:34 +0100
parents 7118b1a7d9fb
children b33b49462d72
files src/hotspot/share/jfr/leakprofiler/checkpoint/eventEmitter.cpp src/hotspot/share/jfr/leakprofiler/leakProfiler.cpp src/hotspot/share/jfr/recorder/service/jfrRecorderService.cpp src/hotspot/share/jfr/recorder/service/jfrRecorderService.hpp src/hotspot/share/runtime/mutexLocker.cpp
diffstat 5 files changed, 70 insertions(+), 22 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/share/jfr/leakprofiler/checkpoint/eventEmitter.cpp	Thu Dec 19 15:13:24 2019 -0800
+++ b/src/hotspot/share/jfr/leakprofiler/checkpoint/eventEmitter.cpp	Fri Dec 20 16:04:34 2019 +0100
@@ -53,12 +53,15 @@
 }
 
 void EventEmitter::emit(ObjectSampler* sampler, int64_t cutoff_ticks, bool emit_all) {
-  assert(JfrStream_lock->owned_by_self(), "invariant");
   assert(sampler != NULL, "invariant");
   ResourceMark rm;
   EdgeStore edge_store;
   if (cutoff_ticks <= 0) {
     // no reference chains
+    MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
+    // The lock is needed here to prevent the recorder thread (running flush())
+    // from writing old object events out from the thread local buffer
+    // before the required constant pools have been serialized.
     JfrTicks time_stamp = JfrTicks::now();
     EventEmitter emitter(time_stamp, time_stamp);
     emitter.write_events(sampler, &edge_store, emit_all);
--- a/src/hotspot/share/jfr/leakprofiler/leakProfiler.cpp	Thu Dec 19 15:13:24 2019 -0800
+++ b/src/hotspot/share/jfr/leakprofiler/leakProfiler.cpp	Fri Dec 20 16:04:34 2019 +0100
@@ -31,7 +31,6 @@
 #include "jfr/recorder/service/jfrOptionSet.hpp"
 #include "logging/log.hpp"
 #include "memory/iterator.hpp"
-#include "runtime/mutexLocker.hpp"
 #include "runtime/thread.inline.hpp"
 #include "runtime/vmThread.hpp"
 
@@ -83,7 +82,6 @@
   if (!is_running()) {
     return;
   }
-  MutexLocker lock(JfrStream_lock);
   // exclusive access to object sampler instance
   ObjectSampler* const sampler = ObjectSampler::acquire();
   assert(sampler != NULL, "invariant");
--- a/src/hotspot/share/jfr/recorder/service/jfrRecorderService.cpp	Thu Dec 19 15:13:24 2019 -0800
+++ b/src/hotspot/share/jfr/recorder/service/jfrRecorderService.cpp	Fri Dec 20 16:04:34 2019 +0100
@@ -359,7 +359,7 @@
 }
 
 void JfrRecorderService::start() {
-  MutexLocker lock(JfrStream_lock);
+  MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
   log_debug(jfr, system)("Request to START recording");
   assert(!is_recording(), "invariant");
   clear();
@@ -402,6 +402,7 @@
 }
 
 void JfrRecorderService::open_new_chunk(bool vm_error) {
+  assert(JfrStream_lock->owned_by_self(), "invariant");
   JfrChunkRotation::on_rotation();
   const bool valid_chunk = _repository.open_chunk(vm_error);
   _storage.control().set_to_disk(valid_chunk);
@@ -411,24 +412,61 @@
 }
 
 static void stop() {
+  assert(JfrStream_lock->owned_by_self(), "invariant");
   assert(JfrRecorderService::is_recording(), "invariant");
   log_debug(jfr, system)("Recording STOPPED");
   set_recording_state(false);
   assert(!JfrRecorderService::is_recording(), "invariant");
 }
 
-void JfrRecorderService::prepare_for_vm_error_rotation() {
-  assert(JfrStream_lock->owned_by_self(), "invariant");
-  if (!_chunkwriter.is_valid()) {
-    open_new_chunk(true);
+// 'rotation_safepoint_pending' is currently only relevant in the unusual case of an emergency dump.
+// Since the JfrStream_lock must be acquired using _no_safepoint_check,
+// if the thread running the emergency dump is a JavaThread, a pending safepoint, induced by rotation,
+// would lead to a deadlock. This deadlock, although unpleasant, is not completely horrendous at this
+// location because the WatcherThread will terminate the VM after a timeout.
+// Deadlock avoidance is done not to affect the stability of general VM error reporting.
+static bool rotation_safepoint_pending = false;
+
+static bool is_rotation_safepoint_pending() {
+  return Atomic::load_acquire(&rotation_safepoint_pending);
+}
+
+static void set_rotation_safepoint_pending(bool value) {
+  assert(value ? !is_rotation_safepoint_pending() : is_rotation_safepoint_pending(), "invariant");
+  Atomic::release_store(&rotation_safepoint_pending, value);
+}
+
+static bool vm_error = false;
+static const Thread* vm_error_thread = NULL;
+
+static bool prepare_for_vm_error_rotation() {
+  assert(!JfrStream_lock->owned_by_self(), "invariant");
+  Thread* const t = Thread::current();
+  assert(t != NULL, "invariant");
+  if (is_rotation_safepoint_pending() && t->is_Java_thread()) {
+    // A safepoint is pending, avoid deadlock.
+    log_warning(jfr, system)("Unable to issue successful emergency dump");
+    return false;
   }
-  _checkpoint_manager.register_service_thread(Thread::current());
+  vm_error_thread = t;
+  vm_error = true;
+  OrderAccess::fence();
+  return true;
 }
 
 void JfrRecorderService::vm_error_rotation() {
   assert(JfrStream_lock->owned_by_self(), "invariant");
+  assert(vm_error, "invariant");
+  Thread* const t = Thread::current();
+  if (vm_error_thread != t) {
+    return;
+  }
+  assert(vm_error_thread == t, "invariant");
+  if (!_chunkwriter.is_valid()) {
+    open_new_chunk(true);
+  }
   if (_chunkwriter.is_valid()) {
-    Thread* const t = Thread::current();
+    _checkpoint_manager.register_service_thread(t);
     _storage.flush_regular_buffer(t->jfr_thread_local()->native_buffer(), t);
     _chunkwriter.mark_chunk_final();
     invoke_flush();
@@ -441,18 +479,21 @@
 
 void JfrRecorderService::rotate(int msgs) {
   assert(!JfrStream_lock->owned_by_self(), "invariant");
-  MutexLocker lock(JfrStream_lock);
-  static bool vm_error = false;
   if (msgs & MSGBIT(MSG_VM_ERROR)) {
-    vm_error = true;
-    prepare_for_vm_error_rotation();
+    // emergency dump
+    if (!prepare_for_vm_error_rotation()) {
+      return;
+    }
   }
-  if (!_storage.control().to_disk()) {
+  MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
+  if (vm_error) {
+    vm_error_rotation();
+    return;
+  }
+  if (_storage.control().to_disk()) {
+    chunk_rotation();
+  } else {
     in_memory_rotation();
-  } else if (vm_error) {
-    vm_error_rotation();
-  } else {
-    chunk_rotation();
   }
   if (msgs & (MSGBIT(MSG_STOP))) {
     stop();
@@ -478,7 +519,10 @@
 
 void JfrRecorderService::finalize_current_chunk() {
   assert(_chunkwriter.is_valid(), "invariant");
+  assert(!is_rotation_safepoint_pending(), "invariant");
+  set_rotation_safepoint_pending(true);
   write();
+  assert(!is_rotation_safepoint_pending(), "invariant");
 }
 
 void JfrRecorderService::write() {
@@ -491,6 +535,7 @@
 
 void JfrRecorderService::pre_safepoint_write() {
   assert(_chunkwriter.is_valid(), "invariant");
+  assert(is_rotation_safepoint_pending(), "invariant");
   if (LeakProfiler::is_running()) {
     // Exclusive access to the object sampler instance.
     // The sampler is released (unlocked) later in post_safepoint_write.
@@ -512,6 +557,8 @@
 
 void JfrRecorderService::safepoint_write() {
   assert(SafepointSynchronize::is_at_safepoint(), "invariant");
+  assert(is_rotation_safepoint_pending(), "invariant");
+  set_rotation_safepoint_pending(false);
   if (_string_pool.is_modified()) {
     write_stringpool_safepoint(_string_pool, _chunkwriter);
   }
@@ -524,6 +571,7 @@
 
 void JfrRecorderService::post_safepoint_write() {
   assert(_chunkwriter.is_valid(), "invariant");
+  assert(!is_rotation_safepoint_pending(), "invariant");
   // During the safepoint tasks just completed, the system transitioned to a new epoch.
   // Type tagging is epoch relative which entails we are able to write out the
   // already tagged artifacts for the previous epoch. We can accomplish this concurrently
@@ -606,7 +654,7 @@
 }
 
 void JfrRecorderService::flushpoint() {
-  MutexLocker lock(JfrStream_lock);
+  MutexLocker lock(JfrStream_lock, Mutex::_no_safepoint_check_flag);
   invoke_flush();
 }
 
--- a/src/hotspot/share/jfr/recorder/service/jfrRecorderService.hpp	Thu Dec 19 15:13:24 2019 -0800
+++ b/src/hotspot/share/jfr/recorder/service/jfrRecorderService.hpp	Fri Dec 20 16:04:34 2019 +0100
@@ -47,7 +47,6 @@
   void chunk_rotation();
   void in_memory_rotation();
   void finalize_current_chunk();
-  void prepare_for_vm_error_rotation();
   void vm_error_rotation();
   void invoke_flush();
 
--- a/src/hotspot/share/runtime/mutexLocker.cpp	Thu Dec 19 15:13:24 2019 -0800
+++ b/src/hotspot/share/runtime/mutexLocker.cpp	Fri Dec 20 16:04:34 2019 +0100
@@ -311,7 +311,7 @@
 #if INCLUDE_JFR
   def(JfrMsg_lock                  , PaddedMonitor, leaf,        true,  _safepoint_check_always);
   def(JfrBuffer_lock               , PaddedMutex  , leaf,        true,  _safepoint_check_never);
-  def(JfrStream_lock               , PaddedMutex  , nonleaf + 1, false, _safepoint_check_always);
+  def(JfrStream_lock               , PaddedMutex  , nonleaf + 1, false, _safepoint_check_never);
   def(JfrStacktrace_lock           , PaddedMutex  , special,     true,  _safepoint_check_never);
   def(JfrThreadSampler_lock        , PaddedMonitor, leaf,        true,  _safepoint_check_never);
 #endif