changeset 2586:14a2fd14c0db

7068240: G1: Long "parallel other time" and "ext root scanning" when running specific benchmark Summary: In root processing, move the scanning of the reference processor's discovered lists to before RSet updating and scanning. When scanning the reference processor's discovered lists, use a buffering closure so that the time spent copying any reference object is correctly attributed. Also removed a couple of unused and irrelevant timers. Reviewed-by: ysr, jmasa
author johnc
date Mon, 01 Aug 2011 10:04:28 -0700
parents 8107273fd204
children 6aa4feb8a366
files src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp
diffstat 4 files changed, 98 insertions(+), 144 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Sat Jul 23 10:42:20 2011 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Mon Aug 01 10:04:28 2011 -0700
@@ -4834,6 +4834,7 @@
                                   scan_perm_cl,
                                   i);
     pss.end_strong_roots();
+
     {
       double start = os::elapsedTime();
       G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, &_terminator);
@@ -4890,41 +4891,43 @@
                        &eager_scan_code_roots,
                        &buf_scan_perm);
 
-  // Finish up any enqueued closure apps.
-  buf_scan_non_heap_roots.done();
-  buf_scan_perm.done();
-  double ext_roots_end = os::elapsedTime();
-  g1_policy()->reset_obj_copy_time(worker_i);
-  double obj_copy_time_sec =
-    buf_scan_non_heap_roots.closure_app_seconds() +
-    buf_scan_perm.closure_app_seconds();
-  g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
-  double ext_root_time_ms =
-    ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
-  g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
-
-  // Scan strong roots in mark stack.
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
-    concurrent_mark()->oops_do(scan_non_heap_roots);
-  }
-  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
-  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
-
-  // XXX What should this be doing in the parallel case?
-  g1_policy()->record_collection_pause_end_CH_strong_roots();
-  // Now scan the complement of the collection set.
-  if (scan_rs != NULL) {
-    g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
-  }
-  // Finish with the ref_processor roots.
+  // Now the ref_processor roots.
   if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
     // We need to treat the discovered reference lists as roots and
     // keep entries (which are added by the marking threads) on them
     // live until they can be processed at the end of marking.
-    ref_processor()->weak_oops_do(scan_non_heap_roots);
-    ref_processor()->oops_do(scan_non_heap_roots);
-  }
-  g1_policy()->record_collection_pause_end_G1_strong_roots();
+    ref_processor()->weak_oops_do(&buf_scan_non_heap_roots);
+    ref_processor()->oops_do(&buf_scan_non_heap_roots);
+  }
+
+  // Finish up any enqueued closure apps (attributed as object copy time).
+  buf_scan_non_heap_roots.done();
+  buf_scan_perm.done();
+
+  double ext_roots_end = os::elapsedTime();
+
+  g1_policy()->reset_obj_copy_time(worker_i);
+  double obj_copy_time_sec = buf_scan_perm.closure_app_seconds() +
+                                buf_scan_non_heap_roots.closure_app_seconds();
+  g1_policy()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
+
+  double ext_root_time_ms =
+    ((ext_roots_end - ext_roots_start) - obj_copy_time_sec) * 1000.0;
+
+  g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
+
+  // Scan strong roots in mark stack.
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
+    concurrent_mark()->oops_do(scan_non_heap_roots);
+  }
+  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
+  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
+
+  // Now scan the complement of the collection set.
+  if (scan_rs != NULL) {
+    g1_rem_set()->oops_into_collection_set_do(scan_rs, worker_i);
+  }
+
   _process_strong_tasks->all_tasks_completed();
 }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Sat Jul 23 10:42:20 2011 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Mon Aug 01 10:04:28 2011 -0700
@@ -393,8 +393,7 @@
   // Perform finalization stuff on all allocation regions.
   void retire_all_alloc_regions();
 
-  // The number of regions allocated to hold humongous objects.
-  int         _num_humongous_regions;
+  // The young region list.
   YoungList*  _young_list;
 
   // The current policy object for the collector.
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Sat Jul 23 10:42:20 2011 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Mon Aug 01 10:04:28 2011 -0700
@@ -134,13 +134,10 @@
 
 G1CollectorPolicy::G1CollectorPolicy() :
   _parallel_gc_threads(G1CollectedHeap::use_parallel_gc_threads()
-    ? ParallelGCThreads : 1),
-
+                        ? ParallelGCThreads : 1),
 
   _n_pauses(0),
-  _recent_CH_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
-  _recent_G1_strong_roots_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
-  _recent_evac_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
+  _recent_rs_scan_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _recent_pause_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _recent_rs_sizes(new TruncatedSeq(NumPrevPausesForHeuristics)),
   _recent_gc_times_ms(new TruncatedSeq(NumPrevPausesForHeuristics)),
@@ -1050,18 +1047,6 @@
 void G1CollectorPolicy::record_concurrent_pause_end() {
 }
 
-void G1CollectorPolicy::record_collection_pause_end_CH_strong_roots() {
-  _cur_CH_strong_roots_end_sec = os::elapsedTime();
-  _cur_CH_strong_roots_dur_ms =
-    (_cur_CH_strong_roots_end_sec - _cur_collection_start_sec) * 1000.0;
-}
-
-void G1CollectorPolicy::record_collection_pause_end_G1_strong_roots() {
-  _cur_G1_strong_roots_end_sec = os::elapsedTime();
-  _cur_G1_strong_roots_dur_ms =
-    (_cur_G1_strong_roots_end_sec - _cur_CH_strong_roots_end_sec) * 1000.0;
-}
-
 template<class T>
 T sum_of(T* sum_arr, int start, int n, int N) {
   T sum = (T)0;
@@ -1183,7 +1168,6 @@
   double end_time_sec = os::elapsedTime();
   double elapsed_ms = _last_pause_time_ms;
   bool parallel = G1CollectedHeap::use_parallel_gc_threads();
-  double evac_ms = (end_time_sec - _cur_G1_strong_roots_end_sec) * 1000.0;
   size_t rs_size =
     _cur_collection_pause_used_regions_at_start - collection_set_size();
   size_t cur_used_bytes = _g1->used();
@@ -1256,14 +1240,52 @@
 
   _n_pauses++;
 
+  double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
+  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+  double update_rs_time = avg_value(_par_last_update_rs_times_ms);
+  double update_rs_processed_buffers =
+    sum_of_values(_par_last_update_rs_processed_buffers);
+  double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
+  double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
+  double termination_time = avg_value(_par_last_termination_times_ms);
+
+  double parallel_known_time = update_rs_time +
+                               ext_root_scan_time +
+                               mark_stack_scan_time +
+                               scan_rs_time +
+                               obj_copy_time +
+                               termination_time;
+
+  double parallel_other_time = _cur_collection_par_time_ms - parallel_known_time;
+
+  PauseSummary* summary = _summary;
+
   if (update_stats) {
-    _recent_CH_strong_roots_times_ms->add(_cur_CH_strong_roots_dur_ms);
-    _recent_G1_strong_roots_times_ms->add(_cur_G1_strong_roots_dur_ms);
-    _recent_evac_times_ms->add(evac_ms);
+    _recent_rs_scan_times_ms->add(scan_rs_time);
     _recent_pause_times_ms->add(elapsed_ms);
-
     _recent_rs_sizes->add(rs_size);
 
+    MainBodySummary* body_summary = summary->main_body_summary();
+    guarantee(body_summary != NULL, "should not be null!");
+
+    if (_satb_drain_time_set)
+      body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
+    else
+      body_summary->record_satb_drain_time_ms(0.0);
+
+    body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
+    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+    body_summary->record_update_rs_time_ms(update_rs_time);
+    body_summary->record_scan_rs_time_ms(scan_rs_time);
+    body_summary->record_obj_copy_time_ms(obj_copy_time);
+    if (parallel) {
+      body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
+      body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
+      body_summary->record_termination_time_ms(termination_time);
+      body_summary->record_parallel_other_time_ms(parallel_other_time);
+    }
+    body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
+
     // We exempt parallel collection from this check because Alloc Buffer
     // fragmentation can produce negative collections.  Same with evac
     // failure.
@@ -1328,56 +1350,12 @@
     gclog_or_tty->print_cr("   Recording collection pause(%d)", _n_pauses);
   }
 
-  PauseSummary* summary = _summary;
-
-  double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
-  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
-  double update_rs_time = avg_value(_par_last_update_rs_times_ms);
-  double update_rs_processed_buffers =
-    sum_of_values(_par_last_update_rs_processed_buffers);
-  double scan_rs_time = avg_value(_par_last_scan_rs_times_ms);
-  double obj_copy_time = avg_value(_par_last_obj_copy_times_ms);
-  double termination_time = avg_value(_par_last_termination_times_ms);
-
-  double parallel_other_time = _cur_collection_par_time_ms -
-    (update_rs_time + ext_root_scan_time + mark_stack_scan_time +
-     scan_rs_time + obj_copy_time + termination_time);
-  if (update_stats) {
-    MainBodySummary* body_summary = summary->main_body_summary();
-    guarantee(body_summary != NULL, "should not be null!");
-
-    if (_satb_drain_time_set)
-      body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
-    else
-      body_summary->record_satb_drain_time_ms(0.0);
-    body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
-    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
-    body_summary->record_update_rs_time_ms(update_rs_time);
-    body_summary->record_scan_rs_time_ms(scan_rs_time);
-    body_summary->record_obj_copy_time_ms(obj_copy_time);
-    if (parallel) {
-      body_summary->record_parallel_time_ms(_cur_collection_par_time_ms);
-      body_summary->record_clear_ct_time_ms(_cur_clear_ct_time_ms);
-      body_summary->record_termination_time_ms(termination_time);
-      body_summary->record_parallel_other_time_ms(parallel_other_time);
-    }
-    body_summary->record_mark_closure_time_ms(_mark_closure_time_ms);
-  }
-
   if (G1PolicyVerbose > 1) {
     gclog_or_tty->print_cr("      ET: %10.6f ms           (avg: %10.6f ms)\n"
-                           "        CH Strong: %10.6f ms    (avg: %10.6f ms)\n"
-                           "        G1 Strong: %10.6f ms    (avg: %10.6f ms)\n"
-                           "        Evac:      %10.6f ms    (avg: %10.6f ms)\n"
                            "       ET-RS:  %10.6f ms      (avg: %10.6f ms)\n"
                            "      |RS|: " SIZE_FORMAT,
                            elapsed_ms, recent_avg_time_for_pauses_ms(),
-                           _cur_CH_strong_roots_dur_ms, recent_avg_time_for_CH_strong_ms(),
-                           _cur_G1_strong_roots_dur_ms, recent_avg_time_for_G1_strong_ms(),
-                           evac_ms, recent_avg_time_for_evac_ms(),
-                           scan_rs_time,
-                           recent_avg_time_for_pauses_ms() -
-                           recent_avg_time_for_G1_strong_ms(),
+                           scan_rs_time, recent_avg_time_for_rs_scan_ms(),
                            rs_size);
 
     gclog_or_tty->print_cr("       Used at start: " SIZE_FORMAT"K"
@@ -1438,7 +1416,7 @@
       }
       print_par_stats(2, "GC Worker Times", _par_last_gc_worker_times_ms);
 
-      print_stats(2, "Other", parallel_other_time);
+      print_stats(2, "Parallel Other", parallel_other_time);
       print_stats(1, "Clear CT", _cur_clear_ct_time_ms);
     } else {
       print_stats(1, "Update RS", update_rs_time);
@@ -1967,38 +1945,27 @@
 }
 
 double G1CollectorPolicy::recent_avg_time_for_pauses_ms() {
-  if (_recent_pause_times_ms->num() == 0) return (double) MaxGCPauseMillis;
-  else return _recent_pause_times_ms->avg();
+  if (_recent_pause_times_ms->num() == 0) {
+    return (double) MaxGCPauseMillis;
+  }
+  return _recent_pause_times_ms->avg();
 }
 
-double G1CollectorPolicy::recent_avg_time_for_CH_strong_ms() {
-  if (_recent_CH_strong_roots_times_ms->num() == 0)
+double G1CollectorPolicy::recent_avg_time_for_rs_scan_ms() {
+  if (_recent_rs_scan_times_ms->num() == 0) {
     return (double)MaxGCPauseMillis/3.0;
-  else return _recent_CH_strong_roots_times_ms->avg();
+  }
+  return _recent_rs_scan_times_ms->avg();
 }
 
-double G1CollectorPolicy::recent_avg_time_for_G1_strong_ms() {
-  if (_recent_G1_strong_roots_times_ms->num() == 0)
-    return (double)MaxGCPauseMillis/3.0;
-  else return _recent_G1_strong_roots_times_ms->avg();
-}
-
-double G1CollectorPolicy::recent_avg_time_for_evac_ms() {
-  if (_recent_evac_times_ms->num() == 0) return (double)MaxGCPauseMillis/3.0;
-  else return _recent_evac_times_ms->avg();
-}
-
 int G1CollectorPolicy::number_of_recent_gcs() {
-  assert(_recent_CH_strong_roots_times_ms->num() ==
-         _recent_G1_strong_roots_times_ms->num(), "Sequence out of sync");
-  assert(_recent_G1_strong_roots_times_ms->num() ==
-         _recent_evac_times_ms->num(), "Sequence out of sync");
-  assert(_recent_evac_times_ms->num() ==
+  assert(_recent_rs_scan_times_ms->num() ==
          _recent_pause_times_ms->num(), "Sequence out of sync");
   assert(_recent_pause_times_ms->num() ==
          _recent_CS_bytes_used_before->num(), "Sequence out of sync");
   assert(_recent_CS_bytes_used_before->num() ==
          _recent_CS_bytes_surviving->num(), "Sequence out of sync");
+
   return _recent_pause_times_ms->num();
 }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Sat Jul 23 10:42:20 2011 -0400
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Mon Aug 01 10:04:28 2011 -0700
@@ -129,15 +129,9 @@
   jlong  _num_cc_clears;                // number of times the card count cache has been cleared
 #endif
 
-  double _cur_CH_strong_roots_end_sec;
-  double _cur_CH_strong_roots_dur_ms;
-  double _cur_G1_strong_roots_end_sec;
-  double _cur_G1_strong_roots_dur_ms;
+  // Statistics for recent GC pauses.  See below for how indexed.
+  TruncatedSeq* _recent_rs_scan_times_ms;
 
-  // Statistics for recent GC pauses.  See below for how indexed.
-  TruncatedSeq* _recent_CH_strong_roots_times_ms;
-  TruncatedSeq* _recent_G1_strong_roots_times_ms;
-  TruncatedSeq* _recent_evac_times_ms;
   // These exclude marking times.
   TruncatedSeq* _recent_pause_times_ms;
   TruncatedSeq* _recent_gc_times_ms;
@@ -692,17 +686,11 @@
   // The average time in ms per collection pause, averaged over recent pauses.
   double recent_avg_time_for_pauses_ms();
 
-  // The average time in ms for processing CollectedHeap strong roots, per
-  // collection pause, averaged over recent pauses.
-  double recent_avg_time_for_CH_strong_ms();
-
-  // The average time in ms for processing the G1 remembered set, per
-  // pause, averaged over recent pauses.
-  double recent_avg_time_for_G1_strong_ms();
-
-  // The average time in ms for "evacuating followers", per pause, averaged
-  // over recent pauses.
-  double recent_avg_time_for_evac_ms();
+  // The average time in ms for RS scanning, per pause, averaged
+  // over recent pauses. (Note the RS scanning time for a pause
+  // is itself an average of the RS scanning time for each worker
+  // thread.)
+  double recent_avg_time_for_rs_scan_ms();
 
   // The number of "recent" GCs recorded in the number sequences
   int number_of_recent_gcs();
@@ -887,9 +875,6 @@
   virtual void record_concurrent_pause();
   virtual void record_concurrent_pause_end();
 
-  virtual void record_collection_pause_end_CH_strong_roots();
-  virtual void record_collection_pause_end_G1_strong_roots();
-
   virtual void record_collection_pause_end();
   void print_heap_transition();