changeset 5179:c76437921025

8027476: Improve performance of Stringtable unlink 8027455: Improve symbol table scan times during gc pauses Summary: Parallelize string table and symbol table scan during remark and full GC. Some additional statistics output if the experimental flag G1TraceStringSymbolTableScrubbing is set. Reviewed-by: mgerdin, coleenp, brutisso
author tschatzl
date Fri, 31 Jan 2014 10:08:49 +0100
parents 8820d6ecaa4e
children d2782fe9769e
files src/share/vm/classfile/symbolTable.cpp src/share/vm/classfile/symbolTable.hpp src/share/vm/gc_implementation/g1/concurrentMark.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp src/share/vm/gc_implementation/g1/g1MarkSweep.cpp src/share/vm/gc_implementation/g1/g1_globals.hpp
diffstat 7 files changed, 238 insertions(+), 49 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/classfile/symbolTable.cpp	Thu Jan 30 13:12:29 2014 -0800
+++ b/src/share/vm/classfile/symbolTable.cpp	Fri Jan 31 10:08:49 2014 +0100
@@ -39,6 +39,9 @@
 
 // --------------------------------------------------------------------------
 
+// the number of buckets a thread claims
+const int ClaimChunkSize = 32;
+
 SymbolTable* SymbolTable::_the_table = NULL;
 // Static arena for symbols that are not deallocated
 Arena* SymbolTable::_arena = NULL;
@@ -81,16 +84,12 @@
   }
 }
 
-int SymbolTable::symbols_removed = 0;
-int SymbolTable::symbols_counted = 0;
+int SymbolTable::_symbols_removed = 0;
+int SymbolTable::_symbols_counted = 0;
+volatile int SymbolTable::_parallel_claimed_idx = 0;
 
-// Remove unreferenced symbols from the symbol table
-// This is done late during GC.
-void SymbolTable::unlink() {
-  int removed = 0;
-  int total = 0;
-  size_t memory_total = 0;
-  for (int i = 0; i < the_table()->table_size(); ++i) {
+void SymbolTable::buckets_unlink(int start_idx, int end_idx, int* processed, int* removed, size_t* memory_total) {
+  for (int i = start_idx; i < end_idx; ++i) {
     HashtableEntry<Symbol*, mtSymbol>** p = the_table()->bucket_addr(i);
     HashtableEntry<Symbol*, mtSymbol>* entry = the_table()->bucket(i);
     while (entry != NULL) {
@@ -102,14 +101,14 @@
         break;
       }
       Symbol* s = entry->literal();
-      memory_total += s->object_size();
-      total++;
+      (*memory_total) += s->object_size();
+      (*processed)++;
       assert(s != NULL, "just checking");
       // If reference count is zero, remove.
       if (s->refcount() == 0) {
         assert(!entry->is_shared(), "shared entries should be kept live");
         delete s;
-        removed++;
+        (*removed)++;
         *p = entry->next();
         the_table()->free_entry(entry);
       } else {
@@ -119,12 +118,45 @@
       entry = (HashtableEntry<Symbol*, mtSymbol>*)HashtableEntry<Symbol*, mtSymbol>::make_ptr(*p);
     }
   }
-  symbols_removed += removed;
-  symbols_counted += total;
+}
+
+// Remove unreferenced symbols from the symbol table
+// This is done late during GC.
+void SymbolTable::unlink(int* processed, int* removed) {
+  size_t memory_total = 0;
+  buckets_unlink(0, the_table()->table_size(), processed, removed, &memory_total);
+  _symbols_removed += *removed;
+  _symbols_counted += *processed;
   // Exclude printing for normal PrintGCDetails because people parse
   // this output.
   if (PrintGCDetails && Verbose && WizardMode) {
-    gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", total,
+    gclog_or_tty->print(" [Symbols=%d size=" SIZE_FORMAT "K] ", *processed,
+                        (memory_total*HeapWordSize)/1024);
+  }
+}
+
+void SymbolTable::possibly_parallel_unlink(int* processed, int* removed) {
+  const int limit = the_table()->table_size();
+
+  size_t memory_total = 0;
+
+  for (;;) {
+    // Grab next set of buckets to scan
+    int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
+    if (start_idx >= limit) {
+      // End of table
+      break;
+    }
+
+    int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
+    buckets_unlink(start_idx, end_idx, processed, removed, &memory_total);
+  }
+  Atomic::add(*processed, &_symbols_counted);
+  Atomic::add(*removed, &_symbols_removed);
+  // Exclude printing for normal PrintGCDetails because people parse
+  // this output.
+  if (PrintGCDetails && Verbose && WizardMode) {
+    gclog_or_tty->print(" [Symbols: scanned=%d removed=%d size=" SIZE_FORMAT "K] ", *processed, *removed,
                         (memory_total*HeapWordSize)/1024);
   }
 }
@@ -503,21 +535,21 @@
     }
   }
   tty->print_cr("Symbol Table:");
-  tty->print_cr("Total number of symbols  %5d", count);
-  tty->print_cr("Total size in memory     %5dK",
+  tty->print_cr("Total number of symbols  "INT32_FORMAT, count);
+  tty->print_cr("Total size in memory     "INT32_FORMAT"K",
           (memory_total*HeapWordSize)/1024);
-  tty->print_cr("Total counted            %5d", symbols_counted);
-  tty->print_cr("Total removed            %5d", symbols_removed);
-  if (symbols_counted > 0) {
+  tty->print_cr("Total counted            "INT32_FORMAT, _symbols_counted);
+  tty->print_cr("Total removed            "INT32_FORMAT, _symbols_removed);
+  if (_symbols_counted > 0) {
     tty->print_cr("Percent removed          %3.2f",
-          ((float)symbols_removed/(float)symbols_counted)* 100);
+          ((float)_symbols_removed/(float)_symbols_counted)* 100);
   }
-  tty->print_cr("Reference counts         %5d", Symbol::_total_count);
-  tty->print_cr("Symbol arena size        %5d used %5d",
+  tty->print_cr("Reference counts         "INT32_FORMAT, Symbol::_total_count);
+  tty->print_cr("Symbol arena size        "SIZE_FORMAT" used "SIZE_FORMAT,
                  arena()->size_in_bytes(), arena()->used());
   tty->print_cr("Histogram of symbol length:");
-  tty->print_cr("%8s %5d", "Total  ", total);
-  tty->print_cr("%8s %5d", "Maximum", max_symbols);
+  tty->print_cr("%8s "INT32_FORMAT, "Total  ", total);
+  tty->print_cr("%8s "INT32_FORMAT, "Maximum", max_symbols);
   tty->print_cr("%8s %3.2f", "Average",
           ((float) total / (float) the_table()->table_size()));
   tty->print_cr("%s", "Histogram:");
@@ -746,11 +778,41 @@
   return result;
 }
 
-void StringTable::unlink(BoolObjectClosure* is_alive) {
+void StringTable::unlink(BoolObjectClosure* is_alive, int* processed, int* removed) {
+    buckets_unlink(is_alive, 0, the_table()->table_size(), processed, removed);
+}
+
+void StringTable::possibly_parallel_unlink(BoolObjectClosure* is_alive, int* processed, int* removed) {
   // Readers of the table are unlocked, so we should only be removing
   // entries at a safepoint.
   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
-  for (int i = 0; i < the_table()->table_size(); ++i) {
+  const int limit = the_table()->table_size();
+
+  for (;;) {
+    // Grab next set of buckets to scan
+    int start_idx = Atomic::add(ClaimChunkSize, &_parallel_claimed_idx) - ClaimChunkSize;
+    if (start_idx >= limit) {
+      // End of table
+      break;
+    }
+
+    int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
+    buckets_unlink(is_alive, start_idx, end_idx, processed, removed);
+  }
+}
+
+void StringTable::buckets_unlink(BoolObjectClosure* is_alive, int start_idx, int end_idx, int* processed, int* removed) {
+  const int limit = the_table()->table_size();
+
+  assert(0 <= start_idx && start_idx <= limit,
+         err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
+  assert(0 <= end_idx && end_idx <= limit,
+         err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
+  assert(start_idx <= end_idx,
+         err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
+                 start_idx, end_idx));
+
+  for (int i = start_idx; i < end_idx; ++i) {
     HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
     HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
     while (entry != NULL) {
@@ -767,24 +829,26 @@
       } else {
         *p = entry->next();
         the_table()->free_entry(entry);
+        (*removed)++;
       }
+      (*processed)++;
       entry = (HashtableEntry<oop, mtSymbol>*)HashtableEntry<oop, mtSymbol>::make_ptr(*p);
     }
   }
 }
 
-void StringTable::buckets_do(OopClosure* f, int start_idx, int end_idx) {
+void StringTable::buckets_oops_do(OopClosure* f, int start_idx, int end_idx) {
   const int limit = the_table()->table_size();
 
   assert(0 <= start_idx && start_idx <= limit,
-         err_msg("start_idx (" INT32_FORMAT ") oob?", start_idx));
+         err_msg("start_idx (" INT32_FORMAT ") is out of bounds", start_idx));
   assert(0 <= end_idx && end_idx <= limit,
-         err_msg("end_idx (" INT32_FORMAT ") oob?", end_idx));
+         err_msg("end_idx (" INT32_FORMAT ") is out of bounds", end_idx));
   assert(start_idx <= end_idx,
-         err_msg("Ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
+         err_msg("Index ordering: start_idx=" INT32_FORMAT", end_idx=" INT32_FORMAT,
                  start_idx, end_idx));
 
-  for (int i = start_idx; i < end_idx; i += 1) {
+  for (int i = start_idx; i < end_idx; i++) {
     HashtableEntry<oop, mtSymbol>** p = the_table()->bucket_addr(i);
     HashtableEntry<oop, mtSymbol>* entry = the_table()->bucket(i);
     while (entry != NULL) {
@@ -804,11 +868,10 @@
 }
 
 void StringTable::oops_do(OopClosure* f) {
-  buckets_do(f, 0, the_table()->table_size());
+  buckets_oops_do(f, 0, the_table()->table_size());
 }
 
 void StringTable::possibly_parallel_oops_do(OopClosure* f) {
-  const int ClaimChunkSize = 32;
   const int limit = the_table()->table_size();
 
   for (;;) {
@@ -820,7 +883,7 @@
     }
 
     int end_idx = MIN2(limit, start_idx + ClaimChunkSize);
-    buckets_do(f, start_idx, end_idx);
+    buckets_oops_do(f, start_idx, end_idx);
   }
 }
 
--- a/src/share/vm/classfile/symbolTable.hpp	Thu Jan 30 13:12:29 2014 -0800
+++ b/src/share/vm/classfile/symbolTable.hpp	Fri Jan 31 10:08:49 2014 +0100
@@ -86,8 +86,8 @@
   static bool _needs_rehashing;
 
   // For statistics
-  static int symbols_removed;
-  static int symbols_counted;
+  static int _symbols_removed;
+  static int _symbols_counted;
 
   Symbol* allocate_symbol(const u1* name, int len, bool c_heap, TRAPS); // Assumes no characters larger than 0x7F
 
@@ -126,6 +126,11 @@
   static Arena* arena() { return _arena; }  // called for statistics
 
   static void initialize_symbols(int arena_alloc_size = 0);
+
+  static volatile int _parallel_claimed_idx;
+
+  // Release any dead symbols
+  static void buckets_unlink(int start_idx, int end_idx, int* processed, int* removed, size_t* memory_total);
 public:
   enum {
     symbol_alloc_batch_size = 8,
@@ -175,7 +180,19 @@
                   unsigned int* hashValues, TRAPS);
 
   // Release any dead symbols
-  static void unlink();
+  static void unlink() {
+    int processed = 0;
+    int removed = 0;
+    unlink(&processed, &removed);
+  }
+  static void unlink(int* processed, int* removed);
+  // Release any dead symbols, possibly parallel version
+  static void possibly_parallel_unlink() {
+    int processed = 0;
+    int removed = 0;
+    possibly_parallel_unlink(&processed, &removed);
+  }
+  static void possibly_parallel_unlink(int* processed, int* removed);
 
   // iterate over symbols
   static void symbols_do(SymbolClosure *cl);
@@ -233,6 +250,9 @@
   // Rehash the symbol table if it gets out of balance
   static void rehash_table();
   static bool needs_rehashing()         { return _needs_rehashing; }
+  // Parallel chunked scanning
+  static void clear_parallel_claimed_index() { _parallel_claimed_idx = 0; }
+  static int parallel_claimed_index() { return _parallel_claimed_idx; }
 };
 
 class StringTable : public Hashtable<oop, mtSymbol> {
@@ -256,7 +276,9 @@
 
   // Apply the give oop closure to the entries to the buckets
   // in the range [start_idx, end_idx).
-  static void buckets_do(OopClosure* f, int start_idx, int end_idx);
+  static void buckets_oops_do(OopClosure* f, int start_idx, int end_idx);
+  // Unlink the entries to the buckets in the range [start_idx, end_idx).
+  static void buckets_unlink(BoolObjectClosure* is_alive, int start_idx, int end_idx, int* processed, int* removed);
 
   StringTable() : Hashtable<oop, mtSymbol>((int)StringTableSize,
                               sizeof (HashtableEntry<oop, mtSymbol>)) {}
@@ -283,7 +305,13 @@
 
   // GC support
   //   Delete pointers to otherwise-unreachable objects.
-  static void unlink(BoolObjectClosure* cl);
+  static void unlink(BoolObjectClosure* cl) {
+    int processed = 0;
+    int removed = 0;
+    unlink(cl, &processed, &removed);
+  }
+
+  static void unlink(BoolObjectClosure* cl, int* processed, int* removed);
 
   // Serially invoke "f->do_oop" on the locations of all oops in the table.
   static void oops_do(OopClosure* f);
@@ -291,6 +319,8 @@
   // Possibly parallel version of the above
   static void possibly_parallel_oops_do(OopClosure* f);
 
+  static void possibly_parallel_unlink(BoolObjectClosure* cl, int* processed, int* removed);
+
   // Hashing algorithm, used as the hash value used by the
   //     StringTable for bucket selection and comparison (stored in the
   //     HashtableEntry structures).  This is used in the String.intern() method.
@@ -328,5 +358,6 @@
 
   // Parallel chunked scanning
   static void clear_parallel_claimed_index() { _parallel_claimed_idx = 0; }
+  static int parallel_claimed_index() { return _parallel_claimed_idx; }
 };
 #endif // SHARE_VM_CLASSFILE_SYMBOLTABLE_HPP
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Jan 30 13:12:29 2014 -0800
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Jan 31 10:08:49 2014 +0100
@@ -1495,7 +1495,6 @@
   }
 };
 
-
 class G1ParVerifyFinalCountTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
@@ -2407,10 +2406,7 @@
     assert(!rp->discovery_enabled(), "Post condition");
   }
 
-  // Now clean up stale oops in StringTable
-  StringTable::unlink(&g1_is_alive);
-  // Clean up unreferenced symbols in symbol table.
-  SymbolTable::unlink();
+  g1h->unlink_string_and_symbol_table(&g1_is_alive);
 }
 
 void ConcurrentMark::swapMarkBitMaps() {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Jan 30 13:12:29 2014 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Jan 31 10:08:49 2014 +0100
@@ -5204,6 +5204,100 @@
   SharedHeap::process_weak_roots(root_closure, &roots_in_blobs, non_root_closure);
 }
 
+class G1StringSymbolTableUnlinkTask : public AbstractGangTask {
+private:
+  BoolObjectClosure* _is_alive;
+  int _initial_string_table_size;
+  int _initial_symbol_table_size;
+
+  bool  _process_strings;
+  int _strings_processed;
+  int _strings_removed;
+
+  bool  _process_symbols;
+  int _symbols_processed;
+  int _symbols_removed;
+public:
+  G1StringSymbolTableUnlinkTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) :
+    AbstractGangTask("Par String/Symbol table unlink"), _is_alive(is_alive),
+    _process_strings(process_strings), _strings_processed(0), _strings_removed(0),
+    _process_symbols(process_symbols), _symbols_processed(0), _symbols_removed(0) {
+
+    _initial_string_table_size = StringTable::the_table()->table_size();
+    _initial_symbol_table_size = SymbolTable::the_table()->table_size();
+    if (process_strings) {
+      StringTable::clear_parallel_claimed_index();
+    }
+    if (process_symbols) {
+      SymbolTable::clear_parallel_claimed_index();
+    }
+  }
+
+  ~G1StringSymbolTableUnlinkTask() {
+    guarantee(!_process_strings || StringTable::parallel_claimed_index() >= _initial_string_table_size,
+              err_msg("claim value "INT32_FORMAT" after unlink less than initial string table size "INT32_FORMAT,
+                      StringTable::parallel_claimed_index(), _initial_string_table_size));
+    guarantee(!_process_symbols || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size,
+              err_msg("claim value "INT32_FORMAT" after unlink less than initial symbol table size "INT32_FORMAT,
+                      SymbolTable::parallel_claimed_index(), _initial_symbol_table_size));
+
+  }
+
+  void work(uint worker_id) {
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      int strings_processed = 0;
+      int strings_removed = 0;
+      int symbols_processed = 0;
+      int symbols_removed = 0;
+      if (_process_strings) {
+        StringTable::possibly_parallel_unlink(_is_alive, &strings_processed, &strings_removed);
+        Atomic::add(strings_processed, &_strings_processed);
+        Atomic::add(strings_removed, &_strings_removed);
+      }
+      if (_process_symbols) {
+        SymbolTable::possibly_parallel_unlink(&symbols_processed, &symbols_removed);
+        Atomic::add(symbols_processed, &_symbols_processed);
+        Atomic::add(symbols_removed, &_symbols_removed);
+      }
+    } else {
+      if (_process_strings) {
+        StringTable::unlink(_is_alive, &_strings_processed, &_strings_removed);
+      }
+      if (_process_symbols) {
+        SymbolTable::unlink(&_symbols_processed, &_symbols_removed);
+      }
+    }
+  }
+
+  size_t strings_processed() const { return (size_t)_strings_processed; }
+  size_t strings_removed()   const { return (size_t)_strings_removed; }
+
+  size_t symbols_processed() const { return (size_t)_symbols_processed; }
+  size_t symbols_removed()   const { return (size_t)_symbols_removed; }
+};
+
+void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
+                                                     bool process_strings, bool process_symbols) {
+  uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                   _g1h->workers()->active_workers() : 1);
+
+  G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    set_par_threads(n_workers);
+    workers()->run_task(&g1_unlink_task);
+    set_par_threads(0);
+  } else {
+    g1_unlink_task.work(0);
+  }
+  if (G1TraceStringSymbolTableScrubbing) {
+    gclog_or_tty->print_cr("Cleaned string and symbol table, "
+                           "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, "
+                           "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed",
+                           g1_unlink_task.strings_processed(), g1_unlink_task.strings_removed(),
+                           g1_unlink_task.symbols_processed(), g1_unlink_task.symbols_removed());
+  }
+}
+
 // Weak Reference Processing support
 
 // An always "is_alive" closure that is used to preserve referents.
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Jan 30 13:12:29 2014 -0800
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Jan 31 10:08:49 2014 +0100
@@ -1719,6 +1719,10 @@
   // after a full GC
   void rebuild_strong_code_roots();
 
+  // Delete entries for dead interned string and clean up unreferenced symbols
+  // in symbol table, possibly in parallel.
+  void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true);
+
   // Verification
 
   // The following is just to alert the verification code
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Thu Jan 30 13:12:29 2014 -0800
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Fri Jan 31 10:08:49 2014 +0100
@@ -177,10 +177,8 @@
   GenMarkSweep::follow_mdo_weak_refs();
   assert(GenMarkSweep::_marking_stack.is_empty(), "just drained");
 
-  // Visit interned string tables and delete unmarked oops
-  StringTable::unlink(&GenMarkSweep::is_alive);
-  // Clean up unreferenced symbols in symbol table.
-  SymbolTable::unlink();
+  // Delete entries for dead interned string and clean up unreferenced symbols in symbol table.
+  G1CollectedHeap::heap()->unlink_string_and_symbol_table(&GenMarkSweep::is_alive);
 
   assert(GenMarkSweep::_marking_stack.is_empty(),
          "stack should be empty by now");
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Jan 30 13:12:29 2014 -0800
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Jan 31 10:08:49 2014 +0100
@@ -71,6 +71,9 @@
   diagnostic(bool, G1TraceConcRefinement, false,                            \
           "Trace G1 concurrent refinement")                                 \
                                                                             \
+  experimental(bool, G1TraceStringSymbolTableScrubbing, false,              \
+          "Trace information string and symbol table scrubbing.")           \
+                                                                            \
   product(double, G1ConcMarkStepDurationMillis, 10.0,                       \
           "Target duration of individual concurrent marking steps "         \
           "in milliseconds.")                                               \