changeset 4376:78538cd4794c

8008916: G1: Evacuation failed tracing event Summary: Evacuation failed event for G1 Reviewed-by: johnc, brutisso, ehelin
author jwilhelm
date Wed, 17 Apr 2013 13:16:25 +0200
parents f438a35cc903
children be693a09e7f5 4135583e60cc
files src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp src/share/vm/gc_implementation/shared/copyFailedInfo.hpp src/share/vm/gc_implementation/shared/gcTrace.cpp src/share/vm/gc_implementation/shared/gcTrace.hpp src/share/vm/gc_implementation/shared/gcTraceSend.cpp src/share/vm/trace/trace.xml
diffstat 8 files changed, 88 insertions(+), 42 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Apr 15 11:38:46 2013 +0200
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Apr 17 13:16:25 2013 +0200
@@ -1905,7 +1905,7 @@
   _ref_processor_stw(NULL),
   _process_strong_tasks(new SubTasksDone(G1H_PS_NumElements)),
   _bot_shared(NULL),
-  _evac_failure_scan_stack(NULL) ,
+  _evac_failure_scan_stack(NULL),
   _mark_in_progress(false),
   _cg1r(NULL), _summary_bytes_used(0),
   _g1mm(NULL),
@@ -1949,21 +1949,19 @@
   int n_rem_sets = HeapRegionRemSet::num_par_rem_sets();
   assert(n_rem_sets > 0, "Invariant.");
 
-  HeapRegionRemSetIterator** iter_arr =
-    NEW_C_HEAP_ARRAY(HeapRegionRemSetIterator*, n_queues, mtGC);
-  for (int i = 0; i < n_queues; i++) {
-    iter_arr[i] = new HeapRegionRemSetIterator();
-  }
-  _rem_set_iterator = iter_arr;
-
+  HeapRegionRemSetIterator** iter_arr = NEW_C_HEAP_ARRAY(HeapRegionRemSetIterator*, n_queues, mtGC);
   _worker_cset_start_region = NEW_C_HEAP_ARRAY(HeapRegion*, n_queues, mtGC);
   _worker_cset_start_region_time_stamp = NEW_C_HEAP_ARRAY(unsigned int, n_queues, mtGC);
+  _evacuation_failed_info_array = NEW_C_HEAP_ARRAY(EvacuationFailedInfo, n_queues, mtGC);
 
   for (int i = 0; i < n_queues; i++) {
     RefToScanQueue* q = new RefToScanQueue();
     q->initialize();
     _task_queues->register_queue(i, q);
-  }
+    iter_arr[i] = new HeapRegionRemSetIterator();
+    ::new (&_evacuation_failed_info_array[i]) EvacuationFailedInfo();
+  }
+  _rem_set_iterator = iter_arr;
 
   clear_cset_start_regions();
 
@@ -4038,13 +4036,19 @@
 #endif // YOUNG_LIST_VERBOSE
 
         g1_policy()->record_survivor_regions(_young_list->survivor_length(),
-                                            _young_list->first_survivor_region(),
-                                            _young_list->last_survivor_region());
+                                             _young_list->first_survivor_region(),
+                                             _young_list->last_survivor_region());
 
         _young_list->reset_auxilary_lists();
 
         if (evacuation_failed()) {
           _summary_bytes_used = recalculate_used();
+          uint n_queues = MAX2((int)ParallelGCThreads, 1);
+          for (uint i = 0; i < n_queues; i++) {
+            if (_evacuation_failed_info_array[i].has_failed()) {
+              _gc_tracer_stw->report_evacuation_failed(_evacuation_failed_info_array[i]);
+            }
+          }
         } else {
           // The "used" of the the collection set have already been subtracted
           // when they were freed.  Add in the bytes evacuated.
@@ -4358,7 +4362,7 @@
 }
 
 oop
-G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
+G1CollectedHeap::handle_evacuation_failure_par(G1ParScanThreadState* _par_scan_state,
                                                oop old) {
   assert(obj_in_cs(old),
          err_msg("obj: "PTR_FORMAT" should still be in the CSet",
@@ -4367,7 +4371,12 @@
   oop forward_ptr = old->forward_to_atomic(old);
   if (forward_ptr == NULL) {
     // Forward-to-self succeeded.
-
+    assert(_par_scan_state != NULL, "par scan state");
+    OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
+    uint queue_num = _par_scan_state->queue_num();
+
+    _evacuation_failed = true;
+    _evacuation_failed_info_array[queue_num].register_copy_failure(old->size());
     if (_evac_failure_closure != cl) {
       MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
       assert(!_drain_in_progress,
@@ -4398,8 +4407,6 @@
 }
 
 void G1CollectedHeap::handle_evacuation_failure_common(oop old, markOop m) {
-  set_evacuation_failed(true);
-
   preserve_mark_if_necessary(old, m);
 
   HeapRegion* r = heap_region_containing(old);
@@ -4649,8 +4656,7 @@
   if (obj_ptr == NULL) {
     // This will either forward-to-self, or detect that someone else has
     // installed a forwarding pointer.
-    OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
-    return _g1->handle_evacuation_failure_par(cl, old);
+    return _g1->handle_evacuation_failure_par(_par_scan_state, old);
   }
 
   oop obj = oop(obj_ptr);
@@ -5663,7 +5669,7 @@
 
 void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) {
   _expand_heap_after_alloc_failure = true;
-  set_evacuation_failed(false);
+  _evacuation_failed = false;
 
   // Should G1EvacuationFailureALot be in effect for this GC?
   NOT_PRODUCT(set_evacuation_failure_alot_for_current_gc();)
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Apr 15 11:38:46 2013 +0200
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Apr 17 13:16:25 2013 +0200
@@ -68,6 +68,7 @@
 class STWGCTimer;
 class G1NewTracer;
 class G1OldTracer;
+class EvacuationFailedInfo;
 
 typedef OverflowTaskQueue<StarTask, mtGC>         RefToScanQueue;
 typedef GenericTaskQueueSet<RefToScanQueue, mtGC> RefToScanQueueSet;
@@ -166,7 +167,7 @@
 // An instance is embedded into the G1CH and used as the
 // (optional) _is_alive_non_header closure in the STW
 // reference processor. It is also extensively used during
-// refence processing during STW evacuation pauses.
+// reference processing during STW evacuation pauses.
 class G1STWIsAliveClosure: public BoolObjectClosure {
   G1CollectedHeap* _g1;
 public:
@@ -885,9 +886,7 @@
   // True iff a evacuation has failed in the current collection.
   bool _evacuation_failed;
 
-  // Set the attribute indicating whether evacuation has failed in the
-  // current collection.
-  void set_evacuation_failed(bool b) { _evacuation_failed = b; }
+  EvacuationFailedInfo* _evacuation_failed_info_array;
 
   // Failed evacuations cause some logical from-space objects to have
   // forwarding pointers to themselves.  Reset them.
@@ -929,7 +928,7 @@
   void finalize_for_evac_failure();
 
   // An attempt to evacuate "obj" has failed; take necessary steps.
-  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
+  oop handle_evacuation_failure_par(G1ParScanThreadState* _par_scan_state, oop obj);
   void handle_evacuation_failure_common(oop obj, markOop m);
 
 #ifndef PRODUCT
@@ -961,13 +960,13 @@
   inline bool evacuation_should_fail();
 
   // Reset the G1EvacuationFailureALot counters.  Should be called at
-  // the end of an evacuation pause in which an evacuation failure ocurred.
+  // the end of an evacuation pause in which an evacuation failure occurred.
   inline void reset_evacuation_should_fail();
 #endif // !PRODUCT
 
   // ("Weak") Reference processing support.
   //
-  // G1 has 2 instances of the referece processor class. One
+  // G1 has 2 instances of the reference processor class. One
   // (_ref_processor_cm) handles reference object discovery
   // and subsequent processing during concurrent marking cycles.
   //
@@ -1238,7 +1237,7 @@
 
   // verify_region_sets_optional() is planted in the code for
   // list verification in non-product builds (and it can be enabled in
-  // product builds by definning HEAP_REGION_SET_FORCE_VERIFY to be 1).
+  // product builds by defining HEAP_REGION_SET_FORCE_VERIFY to be 1).
 #if HEAP_REGION_SET_FORCE_VERIFY
   void verify_region_sets_optional() {
     verify_region_sets();
@@ -1310,7 +1309,7 @@
   // the context of the vm thread.
   virtual void collect_as_vm_thread(GCCause::Cause cause);
 
-  // True iff a evacuation has failed in the most-recent collection.
+  // True iff an evacuation has failed in the most-recent collection.
   bool evacuation_failed() { return _evacuation_failed; }
 
   // It will free a region if it has allocated objects in it that are
@@ -1816,7 +1815,7 @@
   G1ParScanHeapEvacClosure*     _evac_cl;
   G1ParScanPartialArrayClosure* _partial_scan_cl;
 
-  int _hash_seed;
+  int  _hash_seed;
   uint _queue_num;
 
   size_t _term_attempts;
@@ -2020,7 +2019,6 @@
     }
   }
 
-public:
   void trim_queue();
 };
 
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Mon Apr 15 11:38:46 2013 +0200
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Wed Apr 17 13:16:25 2013 +0200
@@ -36,7 +36,7 @@
   bool        _print_sum;
 
   // We are caching the sum and average to only have to calculate them once.
-  // This is not done in an MT-safe way. It is intetened to allow single
+  // This is not done in an MT-safe way. It is intended to allow single
   // threaded code to call sum() and average() multiple times in any order
   // without having to worry about the cost.
   bool   _has_new_data;
@@ -135,7 +135,7 @@
   double _min_clear_cc_time_ms;         // min
   double _max_clear_cc_time_ms;         // max
   double _cur_clear_cc_time_ms;         // clearing time during current pause
-  double _cum_clear_cc_time_ms;         // cummulative clearing time
+  double _cum_clear_cc_time_ms;         // cumulative clearing time
   jlong  _num_cc_clears;                // number of times the card count cache has been cleared
 
   double _cur_collection_start_sec;
--- a/src/share/vm/gc_implementation/shared/copyFailedInfo.hpp	Mon Apr 15 11:38:46 2013 +0200
+++ b/src/share/vm/gc_implementation/shared/copyFailedInfo.hpp	Wed Apr 17 13:16:25 2013 +0200
@@ -28,35 +28,31 @@
 #include "runtime/thread.hpp"
 #include "utilities/globalDefinitions.hpp"
 
-class CopyFailedInfo VALUE_OBJ_CLASS_SPEC {
+class CopyFailedInfo : public CHeapObj<mtGC> {
   size_t    _first_size;
   size_t    _smallest_size;
   size_t    _total_size;
   uint      _count;
-  OSThread* _thread;
 
  public:
-  CopyFailedInfo() : _first_size(0), _smallest_size(0), _total_size(0), _count(0), _thread(NULL) {}
+  CopyFailedInfo() : _first_size(0), _smallest_size(0), _total_size(0), _count(0) {}
 
-  void register_copy_failure(size_t size) {
+  virtual void register_copy_failure(size_t size) {
     if (_first_size == 0) {
       _first_size = size;
       _smallest_size = size;
-      _thread = Thread::current()->osthread();
     } else if (size < _smallest_size) {
       _smallest_size = size;
     }
     _total_size += size;
     _count++;
-    assert(_thread == Thread::current()->osthread(), "The PromotionFailedInfo should be thread local.");
   }
 
-  void reset() {
+  virtual void reset() {
     _first_size = 0;
     _smallest_size = 0;
     _total_size = 0;
     _count = 0;
-    _thread = NULL;
   }
 
   bool has_failed() const { return _count != 0; }
@@ -64,9 +60,31 @@
   size_t smallest_size() const { return _smallest_size; }
   size_t total_size() const { return _total_size; }
   uint failed_count() const { return _count; }
+};
+
+class PromotionFailedInfo : public CopyFailedInfo {
+  OSThread* _thread;
+
+ public:
+  PromotionFailedInfo() : CopyFailedInfo(), _thread(NULL) {}
+
+  void register_copy_failure(size_t size) {
+    CopyFailedInfo::register_copy_failure(size);
+    if (_thread == NULL) {
+      _thread = Thread::current()->osthread();
+    } else {
+      assert(_thread == Thread::current()->osthread(), "The PromotionFailedInfo should be thread local.");
+    }
+  }
+
+  void reset() {
+    CopyFailedInfo::reset();
+    _thread = NULL;
+  }
+
   OSThread* thread() const { return _thread; }
 };
 
-class PromotionFailedInfo : public CopyFailedInfo {};
+class EvacuationFailedInfo : public CopyFailedInfo {};
 
 #endif /* SHARE_VM_GC_IMPLEMENTATION_SHARED_COPYFAILEDINFO_HPP */
--- a/src/share/vm/gc_implementation/shared/gcTrace.cpp	Mon Apr 15 11:38:46 2013 +0200
+++ b/src/share/vm/gc_implementation/shared/gcTrace.cpp	Wed Apr 17 13:16:25 2013 +0200
@@ -186,4 +186,11 @@
 
   send_evacuation_info_event(info);
 }
+
+void G1NewTracer::report_evacuation_failed(EvacuationFailedInfo& ef_info) {
+  assert_set_gc_id();
+
+  send_evacuation_failed_event(ef_info);
+  ef_info.reset();
+}
 #endif
--- a/src/share/vm/gc_implementation/shared/gcTrace.hpp	Mon Apr 15 11:38:46 2013 +0200
+++ b/src/share/vm/gc_implementation/shared/gcTrace.hpp	Wed Apr 17 13:16:25 2013 +0200
@@ -214,10 +214,12 @@
   void report_yc_type(G1YCType type);
   void report_gc_end_impl(jlong timestamp, TimePartitions* time_partitions);
   void report_evacuation_info(EvacuationInfo* info);
+  void report_evacuation_failed(EvacuationFailedInfo& ef_info);
 
  private:
   void send_g1_young_gc_event();
   void send_evacuation_info_event(EvacuationInfo* info);
+  void send_evacuation_failed_event(const EvacuationFailedInfo& ef_info) const;
 };
 #endif
 
--- a/src/share/vm/gc_implementation/shared/gcTraceSend.cpp	Mon Apr 15 11:38:46 2013 +0200
+++ b/src/share/vm/gc_implementation/shared/gcTraceSend.cpp	Wed Apr 17 13:16:25 2013 +0200
@@ -101,7 +101,6 @@
   failed_info.set_firstSize(cf_info.first_size());
   failed_info.set_smallestSize(cf_info.smallest_size());
   failed_info.set_totalSize(cf_info.total_size());
-  failed_info.set_thread(cf_info.thread()->thread_id());
   return failed_info;
 }
 
@@ -110,6 +109,7 @@
   if (e.should_commit()) {
     e.set_gcId(_shared_gc_info.id());
     e.set_data(to_trace_struct(pf_info));
+    e.set_thread(pf_info.thread()->thread_id());
     e.commit();
   }
 }
@@ -169,6 +169,15 @@
     e.commit();
   }
 }
+
+void G1NewTracer::send_evacuation_failed_event(const EvacuationFailedInfo& ef_info) const {
+  EventEvacuationFailed e;
+  if (e.should_commit()) {
+    e.set_gcId(_shared_gc_info.id());
+    e.set_data(to_trace_struct(ef_info));
+    e.commit();
+  }
+}
 #endif
 
 static TraceStructVirtualSpace to_trace_struct(const VirtualSpaceSummary& summary) {
--- a/src/share/vm/trace/trace.xml	Mon Apr 15 11:38:46 2013 +0200
+++ b/src/share/vm/trace/trace.xml	Wed Apr 17 13:16:25 2013 +0200
@@ -208,7 +208,6 @@
       <value type="BYTES64" field="firstSize" label="First Failed Object Size"/>
       <value type="BYTES64" field="smallestSize" label="Smallest Failed Object Size"/>
       <value type="BYTES64" field="totalSize" label="Total Object Size"/>
-      <value type="OSTHREAD" field="thread" label="Running thread"/>
     </struct>
 
     <event id="ObjectCountAfterGC" path="vm/gc/detailed/object_count_after_gc" is_instant="true" label="Object Count After GC">
@@ -222,6 +221,13 @@
            description="Promotion of an object failed">
       <value type="ULONG" field="gcId" label="GC ID" relation="GC_ID"/>
       <structvalue type="CopyFailed" field="data" label="data"/>
+      <value type="OSTHREAD" field="thread" label="Running thread"/>
+    </event>
+
+    <event id="EvacuationFailed" path="vm/gc/detailed/evacuation_failed" label="Evacuation Failed" is_instant="true"
+           description="Evacuation of an object failed">
+      <value type="ULONG" field="gcId" label="GC ID" relation="GC_ID"/>
+      <structvalue type="CopyFailed" field="data" label="data"/>
     </event>
 
     <event id="ConcurrentModeFailure" path="vm/gc/detailed/concurrent_mode_failure" label="Concurrent Mode Failure"