changeset 1145:e018e6884bd8

6631166: CMS: better heuristics when combatting fragmentation Summary: Autonomic per-worker free block cache sizing, tunable coalition policies, fixes to per-size block statistics, retuned gain and bandwidth of some feedback loop filters to allow quicker reactivity to abrupt changes in ambient demand, and other heuristics to reduce fragmentation of the CMS old gen. Also tightened some assertions, including those related to locking. Reviewed-by: jmasa
author ysr
date Wed, 23 Dec 2009 09:23:54 -0800
parents 44f61c24ddab
children 504830073409
files src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep src/share/vm/gc_implementation/includeDB_gc_serial src/share/vm/gc_implementation/parNew/parNewGeneration.cpp src/share/vm/gc_implementation/parNew/parNewGeneration.hpp src/share/vm/gc_implementation/shared/allocationStats.hpp src/share/vm/gc_implementation/shared/gcUtil.cpp src/share/vm/gc_implementation/shared/gcUtil.hpp src/share/vm/includeDB_gc_parallel src/share/vm/memory/defNewGeneration.cpp src/share/vm/memory/generation.hpp src/share/vm/runtime/arguments.cpp src/share/vm/runtime/globals.hpp src/share/vm/services/classLoadingService.cpp
diffstat 26 files changed, 1105 insertions(+), 351 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -62,12 +62,13 @@
   tl->link_head(tc);
   tl->link_tail(tc);
   tl->set_count(1);
-  tl->init_statistics();
+  tl->init_statistics(true /* split_birth */);
   tl->setParent(NULL);
   tl->setLeft(NULL);
   tl->setRight(NULL);
   return tl;
 }
+
 TreeList* TreeList::as_TreeList(HeapWord* addr, size_t size) {
   TreeChunk* tc = (TreeChunk*) addr;
   assert(size >= sizeof(TreeChunk), "Chunk is too small for a TreeChunk");
@@ -267,6 +268,31 @@
   return retTC;
 }
 
+// Returns the block with the largest heap address amongst
+// those in the list for this size; potentially slow and expensive,
+// use with caution!
+TreeChunk* TreeList::largest_address() {
+  guarantee(head() != NULL, "The head of the list cannot be NULL");
+  FreeChunk* fc = head()->next();
+  TreeChunk* retTC;
+  if (fc == NULL) {
+    retTC = head_as_TreeChunk();
+  } else {
+    // walk down the list and return the one with the highest
+    // heap address among chunks of this size.
+    FreeChunk* last = fc;
+    while (fc->next() != NULL) {
+      if ((HeapWord*)last < (HeapWord*)fc) {
+        last = fc;
+      }
+      fc = fc->next();
+    }
+    retTC = TreeChunk::as_TreeChunk(last);
+  }
+  assert(retTC->list() == this, "Wrong type of chunk.");
+  return retTC;
+}
+
 BinaryTreeDictionary::BinaryTreeDictionary(MemRegion mr, bool splay):
   _splay(splay)
 {
@@ -379,7 +405,7 @@
             break;
           }
           // The evm code reset the hint of the candidate as
-          // at an interrim point.  Why?  Seems like this leaves
+          // at an interim point.  Why?  Seems like this leaves
           // the hint pointing to a list that didn't work.
           // curTL->set_hint(hintTL->size());
         }
@@ -436,7 +462,7 @@
   TreeList *curTL = root();
   if (curTL != NULL) {
     while(curTL->right() != NULL) curTL = curTL->right();
-    return curTL->first_available();
+    return curTL->largest_address();
   } else {
     return NULL;
   }
@@ -664,7 +690,7 @@
     }
   }
   TreeChunk* tc = TreeChunk::as_TreeChunk(fc);
-  // This chunk is being returned to the binary try.  It's embedded
+  // This chunk is being returned to the binary tree.  Its embedded
   // TreeList should be unused at this point.
   tc->initialize();
   if (curTL != NULL) {          // exact match
@@ -807,6 +833,8 @@
 }
 
 bool BinaryTreeDictionary::coalDictOverPopulated(size_t size) {
+  if (FLSAlwaysCoalesceLarge) return true;
+
   TreeList* list_of_size = findList(size);
   // None of requested size implies overpopulated.
   return list_of_size == NULL || list_of_size->coalDesired() <= 0 ||
@@ -854,17 +882,20 @@
   double _percentage;
   float _inter_sweep_current;
   float _inter_sweep_estimate;
+  float _intra_sweep_estimate;
 
  public:
   BeginSweepClosure(double p, float inter_sweep_current,
-                              float inter_sweep_estimate) :
+                              float inter_sweep_estimate,
+                              float intra_sweep_estimate) :
    _percentage(p),
    _inter_sweep_current(inter_sweep_current),
-   _inter_sweep_estimate(inter_sweep_estimate) { }
+   _inter_sweep_estimate(inter_sweep_estimate),
+   _intra_sweep_estimate(intra_sweep_estimate) { }
 
   void do_list(FreeList* fl) {
     double coalSurplusPercent = _percentage;
-    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate);
+    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate);
     fl->set_coalDesired((ssize_t)((double)fl->desired() * coalSurplusPercent));
     fl->set_beforeSweep(fl->count());
     fl->set_bfrSurp(fl->surplus());
@@ -939,9 +970,10 @@
 }
 
 void BinaryTreeDictionary::beginSweepDictCensus(double coalSurplusPercent,
-  float inter_sweep_current, float inter_sweep_estimate) {
+  float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) {
   BeginSweepClosure bsc(coalSurplusPercent, inter_sweep_current,
-                                            inter_sweep_estimate);
+                                            inter_sweep_estimate,
+                                            intra_sweep_estimate);
   bsc.do_tree(root());
 }
 
@@ -1077,13 +1109,13 @@
 // Print census information - counts, births, deaths, etc.
 // for each list in the tree.  Also print some summary
 // information.
-class printTreeCensusClosure : public AscendTreeCensusClosure {
+class PrintTreeCensusClosure : public AscendTreeCensusClosure {
   int _print_line;
   size_t _totalFree;
   FreeList _total;
 
  public:
-  printTreeCensusClosure() {
+  PrintTreeCensusClosure() {
     _print_line = 0;
     _totalFree = 0;
   }
@@ -1113,7 +1145,7 @@
 
   gclog_or_tty->print("\nBinaryTree\n");
   FreeList::print_labels_on(gclog_or_tty, "size");
-  printTreeCensusClosure ptc;
+  PrintTreeCensusClosure ptc;
   ptc.do_tree(root());
 
   FreeList* total = ptc.total();
@@ -1130,6 +1162,38 @@
              /(total->desired() != 0 ? (double)total->desired() : 1.0));
 }
 
+class PrintFreeListsClosure : public AscendTreeCensusClosure {
+  outputStream* _st;
+  int _print_line;
+
+ public:
+  PrintFreeListsClosure(outputStream* st) {
+    _st = st;
+    _print_line = 0;
+  }
+  void do_list(FreeList* fl) {
+    if (++_print_line >= 40) {
+      FreeList::print_labels_on(_st, "size");
+      _print_line = 0;
+    }
+    fl->print_on(gclog_or_tty);
+    size_t sz = fl->size();
+    for (FreeChunk* fc = fl->head(); fc != NULL;
+         fc = fc->next()) {
+      _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
+                    fc, (HeapWord*)fc + sz,
+                    fc->cantCoalesce() ? "\t CC" : "");
+    }
+  }
+};
+
+void BinaryTreeDictionary::print_free_lists(outputStream* st) const {
+
+  FreeList::print_labels_on(st, "size");
+  PrintFreeListsClosure pflc(st);
+  pflc.do_tree(root());
+}
+
 // Verify the following tree invariants:
 // . _root has no parent
 // . parent and child point to each other
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -42,9 +42,6 @@
   friend class AscendTreeCensusClosure;
   friend class DescendTreeCensusClosure;
   friend class DescendTreeSearchClosure;
-  TreeList* _parent;
-  TreeList* _left;
-  TreeList* _right;
 
  protected:
   TreeList* parent() const { return _parent; }
@@ -82,6 +79,11 @@
   // to a TreeChunk.
   TreeChunk* first_available();
 
+  // Returns the block with the largest heap address amongst
+  // those in the list for this size; potentially slow and expensive,
+  // use with caution!
+  TreeChunk* largest_address();
+
   // removeChunkReplaceIfNeeded() removes the given "tc" from the TreeList.
   // If "tc" is the first chunk in the list, it is also the
   // TreeList that is the node in the tree.  removeChunkReplaceIfNeeded()
@@ -254,8 +256,9 @@
   // Methods called at the beginning of a sweep to prepare the
   // statistics for the sweep.
   void       beginSweepDictCensus(double coalSurplusPercent,
-                                  float sweep_current,
-                                  float sweep_estimate);
+                                  float inter_sweep_current,
+                                  float inter_sweep_estimate,
+                                  float intra_sweep_estimate);
   // Methods called after the end of a sweep to modify the
   // statistics for the sweep.
   void       endSweepDictCensus(double splitSurplusPercent);
@@ -269,6 +272,7 @@
   // Print the statistcis for all the lists in the tree.  Also may
   // print out summaries.
   void       printDictCensus(void) const;
+  void       print_free_lists(outputStream* st) const;
 
   // For debugging.  Returns the sum of the _returnedBytes for
   // all lists in the tree.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -32,7 +32,9 @@
 // threads. The second argument is in support of an extra locking
 // check for CFL spaces' free list locks.
 #ifndef PRODUCT
-void CMSLockVerifier::assert_locked(const Mutex* lock, const Mutex* p_lock) {
+void CMSLockVerifier::assert_locked(const Mutex* lock,
+                                    const Mutex* p_lock1,
+                                    const Mutex* p_lock2) {
   if (!Universe::is_fully_initialized()) {
     return;
   }
@@ -40,7 +42,7 @@
   Thread* myThread = Thread::current();
 
   if (lock == NULL) { // a "lock-free" structure, e.g. MUT, protected by CMS token
-    assert(p_lock == NULL, "Unexpected state");
+    assert(p_lock1 == NULL && p_lock2 == NULL, "Unexpected caller error");
     if (myThread->is_ConcurrentGC_thread()) {
       // This test might have to change in the future, if there can be
       // multiple peer CMS threads.  But for now, if we're testing the CMS
@@ -60,36 +62,39 @@
     return;
   }
 
-  if (ParallelGCThreads == 0) {
+  if (myThread->is_VM_thread()
+      || myThread->is_ConcurrentGC_thread()
+      || myThread->is_Java_thread()) {
+    // Make sure that we are holding the associated lock.
     assert_lock_strong(lock);
+    // The checking of p_lock is a spl case for CFLS' free list
+    // locks: we make sure that none of the parallel GC work gang
+    // threads are holding "sub-locks" of freeListLock(). We check only
+    // the parDictionaryAllocLock because the others are too numerous.
+    // This spl case code is somewhat ugly and any improvements
+    // are welcome.
+    assert(p_lock1 == NULL || !p_lock1->is_locked() || p_lock1->owned_by_self(),
+           "Possible race between this and parallel GC threads");
+    assert(p_lock2 == NULL || !p_lock2->is_locked() || p_lock2->owned_by_self(),
+           "Possible race between this and parallel GC threads");
+  } else if (myThread->is_GC_task_thread()) {
+    // Make sure that the VM or CMS thread holds lock on our behalf
+    // XXX If there were a concept of a gang_master for a (set of)
+    // gang_workers, we could have used the identity of that thread
+    // for checking ownership here; for now we just disjunct.
+    assert(lock->owner() == VMThread::vm_thread() ||
+           lock->owner() == ConcurrentMarkSweepThread::cmst(),
+           "Should be locked by VM thread or CMS thread on my behalf");
+    if (p_lock1 != NULL) {
+      assert_lock_strong(p_lock1);
+    }
+    if (p_lock2 != NULL) {
+      assert_lock_strong(p_lock2);
+    }
   } else {
-    if (myThread->is_VM_thread()
-        || myThread->is_ConcurrentGC_thread()
-        || myThread->is_Java_thread()) {
-      // Make sure that we are holding the associated lock.
-      assert_lock_strong(lock);
-      // The checking of p_lock is a spl case for CFLS' free list
-      // locks: we make sure that none of the parallel GC work gang
-      // threads are holding "sub-locks" of freeListLock(). We check only
-      // the parDictionaryAllocLock because the others are too numerous.
-      // This spl case code is somewhat ugly and any improvements
-      // are welcome XXX FIX ME!!
-      if (p_lock != NULL) {
-        assert(!p_lock->is_locked() || p_lock->owned_by_self(),
-               "Possible race between this and parallel GC threads");
-      }
-    } else if (myThread->is_GC_task_thread()) {
-      // Make sure that the VM or CMS thread holds lock on our behalf
-      // XXX If there were a concept of a gang_master for a (set of)
-      // gang_workers, we could have used the identity of that thread
-      // for checking ownership here; for now we just disjunct.
-      assert(lock->owner() == VMThread::vm_thread() ||
-             lock->owner() == ConcurrentMarkSweepThread::cmst(),
-             "Should be locked by VM thread or CMS thread on my behalf");
-    } else {
-      // Make sure we didn't miss some obscure corner case
-      ShouldNotReachHere();
-    }
+    // Make sure we didn't miss some other thread type calling into here;
+    // perhaps as a result of future VM evolution.
+    ShouldNotReachHere();
   }
 }
 #endif
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -29,8 +29,11 @@
 // the parallel threads.
 class CMSLockVerifier: AllStatic {
  public:
-  static void assert_locked(const Mutex* lock, const Mutex* p_lock)
+  static void assert_locked(const Mutex* lock, const Mutex* p_lock1, const Mutex* p_lock2)
     PRODUCT_RETURN;
+  static void assert_locked(const Mutex* lock, const Mutex* p_lock) {
+    assert_locked(lock, p_lock, NULL);
+  }
   static void assert_locked(const Mutex* lock) {
     assert_locked(lock, NULL);
   }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -62,18 +62,15 @@
   // implementation, namely, the simple binary tree (splaying
   // temporarily disabled).
   switch (dictionaryChoice) {
-    case FreeBlockDictionary::dictionaryBinaryTree:
-      _dictionary = new BinaryTreeDictionary(mr);
-      break;
     case FreeBlockDictionary::dictionarySplayTree:
     case FreeBlockDictionary::dictionarySkipList:
     default:
       warning("dictionaryChoice: selected option not understood; using"
               " default BinaryTreeDictionary implementation instead.");
+    case FreeBlockDictionary::dictionaryBinaryTree:
       _dictionary = new BinaryTreeDictionary(mr);
       break;
   }
-  splitBirth(mr.word_size());
   assert(_dictionary != NULL, "CMS dictionary initialization");
   // The indexed free lists are initially all empty and are lazily
   // filled in on demand. Initialize the array elements to NULL.
@@ -388,6 +385,105 @@
   return res;
 }
 
+void CompactibleFreeListSpace::print_indexed_free_lists(outputStream* st)
+const {
+  reportIndexedFreeListStatistics();
+  gclog_or_tty->print_cr("Layout of Indexed Freelists");
+  gclog_or_tty->print_cr("---------------------------");
+  FreeList::print_labels_on(st, "size");
+  for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
+    _indexedFreeList[i].print_on(gclog_or_tty);
+    for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
+         fc = fc->next()) {
+      gclog_or_tty->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
+                          fc, (HeapWord*)fc + i,
+                          fc->cantCoalesce() ? "\t CC" : "");
+    }
+  }
+}
+
+void CompactibleFreeListSpace::print_promo_info_blocks(outputStream* st)
+const {
+  _promoInfo.print_on(st);
+}
+
+void CompactibleFreeListSpace::print_dictionary_free_lists(outputStream* st)
+const {
+  _dictionary->reportStatistics();
+  st->print_cr("Layout of Freelists in Tree");
+  st->print_cr("---------------------------");
+  _dictionary->print_free_lists(st);
+}
+
+class BlkPrintingClosure: public BlkClosure {
+  const CMSCollector*             _collector;
+  const CompactibleFreeListSpace* _sp;
+  const CMSBitMap*                _live_bit_map;
+  const bool                      _post_remark;
+  outputStream*                   _st;
+public:
+  BlkPrintingClosure(const CMSCollector* collector,
+                     const CompactibleFreeListSpace* sp,
+                     const CMSBitMap* live_bit_map,
+                     outputStream* st):
+    _collector(collector),
+    _sp(sp),
+    _live_bit_map(live_bit_map),
+    _post_remark(collector->abstract_state() > CMSCollector::FinalMarking),
+    _st(st) { }
+  size_t do_blk(HeapWord* addr);
+};
+
+size_t BlkPrintingClosure::do_blk(HeapWord* addr) {
+  size_t sz = _sp->block_size_no_stall(addr, _collector);
+  assert(sz != 0, "Should always be able to compute a size");
+  if (_sp->block_is_obj(addr)) {
+    const bool dead = _post_remark && !_live_bit_map->isMarked(addr);
+    _st->print_cr(PTR_FORMAT ": %s object of size " SIZE_FORMAT "%s",
+      addr,
+      dead ? "dead" : "live",
+      sz,
+      (!dead && CMSPrintObjectsInDump) ? ":" : ".");
+    if (CMSPrintObjectsInDump && !dead) {
+      oop(addr)->print_on(_st);
+      _st->print_cr("--------------------------------------");
+    }
+  } else { // free block
+    _st->print_cr(PTR_FORMAT ": free block of size " SIZE_FORMAT "%s",
+      addr, sz, CMSPrintChunksInDump ? ":" : ".");
+    if (CMSPrintChunksInDump) {
+      ((FreeChunk*)addr)->print_on(_st);
+      _st->print_cr("--------------------------------------");
+    }
+  }
+  return sz;
+}
+
+void CompactibleFreeListSpace::dump_at_safepoint_with_locks(CMSCollector* c,
+  outputStream* st) {
+  st->print_cr("\n=========================");
+  st->print_cr("Block layout in CMS Heap:");
+  st->print_cr("=========================");
+  BlkPrintingClosure  bpcl(c, this, c->markBitMap(), st);
+  blk_iterate(&bpcl);
+
+  st->print_cr("\n=======================================");
+  st->print_cr("Order & Layout of Promotion Info Blocks");
+  st->print_cr("=======================================");
+  print_promo_info_blocks(st);
+
+  st->print_cr("\n===========================");
+  st->print_cr("Order of Indexed Free Lists");
+  st->print_cr("=========================");
+  print_indexed_free_lists(st);
+
+  st->print_cr("\n=================================");
+  st->print_cr("Order of Free Lists in Dictionary");
+  st->print_cr("=================================");
+  print_dictionary_free_lists(st);
+}
+
+
 void CompactibleFreeListSpace::reportFreeListStatistics() const {
   assert_lock_strong(&_freelistLock);
   assert(PrintFLSStatistics != 0, "Reporting error");
@@ -449,37 +545,37 @@
   if (prevEnd != NULL) {
     // Resize the underlying block offset table.
     _bt.resize(pointer_delta(value, bottom()));
-  if (value <= prevEnd) {
-    assert(value >= unallocated_block(), "New end is below unallocated block");
-  } else {
-    // Now, take this new chunk and add it to the free blocks.
-    // Note that the BOT has not yet been updated for this block.
-    size_t newFcSize = pointer_delta(value, prevEnd);
-    // XXX This is REALLY UGLY and should be fixed up. XXX
-    if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) {
-      // Mark the boundary of the new block in BOT
-      _bt.mark_block(prevEnd, value);
-      // put it all in the linAB
-      if (ParallelGCThreads == 0) {
-        _smallLinearAllocBlock._ptr = prevEnd;
-        _smallLinearAllocBlock._word_size = newFcSize;
-        repairLinearAllocBlock(&_smallLinearAllocBlock);
-      } else { // ParallelGCThreads > 0
-        MutexLockerEx x(parDictionaryAllocLock(),
-                        Mutex::_no_safepoint_check_flag);
-        _smallLinearAllocBlock._ptr = prevEnd;
-        _smallLinearAllocBlock._word_size = newFcSize;
-        repairLinearAllocBlock(&_smallLinearAllocBlock);
+    if (value <= prevEnd) {
+      assert(value >= unallocated_block(), "New end is below unallocated block");
+    } else {
+      // Now, take this new chunk and add it to the free blocks.
+      // Note that the BOT has not yet been updated for this block.
+      size_t newFcSize = pointer_delta(value, prevEnd);
+      // XXX This is REALLY UGLY and should be fixed up. XXX
+      if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) {
+        // Mark the boundary of the new block in BOT
+        _bt.mark_block(prevEnd, value);
+        // put it all in the linAB
+        if (ParallelGCThreads == 0) {
+          _smallLinearAllocBlock._ptr = prevEnd;
+          _smallLinearAllocBlock._word_size = newFcSize;
+          repairLinearAllocBlock(&_smallLinearAllocBlock);
+        } else { // ParallelGCThreads > 0
+          MutexLockerEx x(parDictionaryAllocLock(),
+                          Mutex::_no_safepoint_check_flag);
+          _smallLinearAllocBlock._ptr = prevEnd;
+          _smallLinearAllocBlock._word_size = newFcSize;
+          repairLinearAllocBlock(&_smallLinearAllocBlock);
+        }
+        // Births of chunks put into a LinAB are not recorded.  Births
+        // of chunks as they are allocated out of a LinAB are.
+      } else {
+        // Add the block to the free lists, if possible coalescing it
+        // with the last free block, and update the BOT and census data.
+        addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize);
       }
-      // Births of chunks put into a LinAB are not recorded.  Births
-      // of chunks as they are allocated out of a LinAB are.
-    } else {
-      // Add the block to the free lists, if possible coalescing it
-      // with the last free block, and update the BOT and census data.
-      addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize);
     }
   }
-  }
 }
 
 class FreeListSpace_DCTOC : public Filtering_DCTOC {
@@ -732,7 +828,7 @@
 
 void CompactibleFreeListSpace::object_iterate_mem(MemRegion mr,
                                                   UpwardsObjectClosure* cl) {
-  assert_locked();
+  assert_locked(freelistLock());
   NOT_PRODUCT(verify_objects_initialized());
   Space::object_iterate_mem(mr, cl);
 }
@@ -1212,12 +1308,15 @@
 void CompactibleFreeListSpace::assert_locked() const {
   CMSLockVerifier::assert_locked(freelistLock(), parDictionaryAllocLock());
 }
+
+void CompactibleFreeListSpace::assert_locked(const Mutex* lock) const {
+  CMSLockVerifier::assert_locked(lock);
+}
 #endif
 
 FreeChunk* CompactibleFreeListSpace::allocateScratch(size_t size) {
   // In the parallel case, the main thread holds the free list lock
   // on behalf the parallel threads.
-  assert_locked();
   FreeChunk* fc;
   {
     // If GC is parallel, this might be called by several threads.
@@ -1298,17 +1397,18 @@
     res = blk->_ptr;
     _bt.allocated(res, blk->_word_size);
   } else if (size + MinChunkSize <= blk->_refillSize) {
+    size_t sz = blk->_word_size;
     // Update _unallocated_block if the size is such that chunk would be
     // returned to the indexed free list.  All other chunks in the indexed
     // free lists are allocated from the dictionary so that _unallocated_block
     // has already been adjusted for them.  Do it here so that the cost
     // for all chunks added back to the indexed free lists.
-    if (blk->_word_size < SmallForDictionary) {
-      _bt.allocated(blk->_ptr, blk->_word_size);
+    if (sz < SmallForDictionary) {
+      _bt.allocated(blk->_ptr, sz);
     }
     // Return the chunk that isn't big enough, and then refill below.
-    addChunkToFreeLists(blk->_ptr, blk->_word_size);
-    _bt.verify_single_block(blk->_ptr, (blk->_ptr + blk->_word_size));
+    addChunkToFreeLists(blk->_ptr, sz);
+    splitBirth(sz);
     // Don't keep statistics on adding back chunk from a LinAB.
   } else {
     // A refilled block would not satisfy the request.
@@ -1376,11 +1476,13 @@
     res = getChunkFromIndexedFreeListHelper(size);
   }
   _bt.verify_not_unallocated((HeapWord*) res, size);
+  assert(res == NULL || res->size() == size, "Incorrect block size");
   return res;
 }
 
 FreeChunk*
-CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size) {
+CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size,
+  bool replenish) {
   assert_locked();
   FreeChunk* fc = NULL;
   if (size < SmallForDictionary) {
@@ -1398,54 +1500,66 @@
       // and replenishing indexed lists from the small linAB.
       //
       FreeChunk* newFc = NULL;
-      size_t replenish_size = CMSIndexedFreeListReplenish * size;
+      const size_t replenish_size = CMSIndexedFreeListReplenish * size;
       if (replenish_size < SmallForDictionary) {
         // Do not replenish from an underpopulated size.
         if (_indexedFreeList[replenish_size].surplus() > 0 &&
             _indexedFreeList[replenish_size].head() != NULL) {
-          newFc =
-            _indexedFreeList[replenish_size].getChunkAtHead();
-        } else {
+          newFc = _indexedFreeList[replenish_size].getChunkAtHead();
+        } else if (bestFitFirst()) {
           newFc = bestFitSmall(replenish_size);
         }
       }
+      if (newFc == NULL && replenish_size > size) {
+        assert(CMSIndexedFreeListReplenish > 1, "ctl pt invariant");
+        newFc = getChunkFromIndexedFreeListHelper(replenish_size, false);
+      }
+      // Note: The stats update re split-death of block obtained above
+      // will be recorded below precisely when we know we are going to
+      // be actually splitting it into more than one pieces below.
       if (newFc != NULL) {
-        splitDeath(replenish_size);
-      } else if (replenish_size > size) {
-        assert(CMSIndexedFreeListReplenish > 1, "ctl pt invariant");
-        newFc =
-          getChunkFromIndexedFreeListHelper(replenish_size);
-      }
-      if (newFc != NULL) {
-        assert(newFc->size() == replenish_size, "Got wrong size");
-        size_t i;
-        FreeChunk *curFc, *nextFc;
-        // carve up and link blocks 0, ..., CMSIndexedFreeListReplenish - 2
-        // The last chunk is not added to the lists but is returned as the
-        // free chunk.
-        for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size),
-             i = 0;
-             i < (CMSIndexedFreeListReplenish - 1);
-             curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size),
-             i++) {
+        if  (replenish || CMSReplenishIntermediate) {
+          // Replenish this list and return one block to caller.
+          size_t i;
+          FreeChunk *curFc, *nextFc;
+          size_t num_blk = newFc->size() / size;
+          assert(num_blk >= 1, "Smaller than requested?");
+          assert(newFc->size() % size == 0, "Should be integral multiple of request");
+          if (num_blk > 1) {
+            // we are sure we will be splitting the block just obtained
+            // into multiple pieces; record the split-death of the original
+            splitDeath(replenish_size);
+          }
+          // carve up and link blocks 0, ..., num_blk - 2
+          // The last chunk is not added to the lists but is returned as the
+          // free chunk.
+          for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size),
+               i = 0;
+               i < (num_blk - 1);
+               curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size),
+               i++) {
+            curFc->setSize(size);
+            // Don't record this as a return in order to try and
+            // determine the "returns" from a GC.
+            _bt.verify_not_unallocated((HeapWord*) fc, size);
+            _indexedFreeList[size].returnChunkAtTail(curFc, false);
+            _bt.mark_block((HeapWord*)curFc, size);
+            splitBirth(size);
+            // Don't record the initial population of the indexed list
+            // as a split birth.
+          }
+
+          // check that the arithmetic was OK above
+          assert((HeapWord*)nextFc == (HeapWord*)newFc + num_blk*size,
+            "inconsistency in carving newFc");
           curFc->setSize(size);
-          // Don't record this as a return in order to try and
-          // determine the "returns" from a GC.
-          _bt.verify_not_unallocated((HeapWord*) fc, size);
-          _indexedFreeList[size].returnChunkAtTail(curFc, false);
           _bt.mark_block((HeapWord*)curFc, size);
           splitBirth(size);
-          // Don't record the initial population of the indexed list
-          // as a split birth.
+          fc = curFc;
+        } else {
+          // Return entire block to caller
+          fc = newFc;
         }
-
-        // check that the arithmetic was OK above
-        assert((HeapWord*)nextFc == (HeapWord*)newFc + replenish_size,
-          "inconsistency in carving newFc");
-        curFc->setSize(size);
-        _bt.mark_block((HeapWord*)curFc, size);
-        splitBirth(size);
-        return curFc;
       }
     }
   } else {
@@ -1453,7 +1567,7 @@
     // replenish the indexed free list.
     fc = getChunkFromDictionaryExact(size);
   }
-  assert(fc == NULL || fc->isFree(), "Should be returning a free chunk");
+  // assert(fc == NULL || fc->isFree(), "Should be returning a free chunk");
   return fc;
 }
 
@@ -1512,6 +1626,11 @@
   // adjust _unallocated_block downward, as necessary
   _bt.freed((HeapWord*)chunk, size);
   _dictionary->returnChunk(chunk);
+#ifndef PRODUCT
+  if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
+    TreeChunk::as_TreeChunk(chunk)->list()->verify_stats();
+  }
+#endif // PRODUCT
 }
 
 void
@@ -1525,6 +1644,11 @@
   } else {
     _indexedFreeList[size].returnChunkAtHead(fc);
   }
+#ifndef PRODUCT
+  if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
+     _indexedFreeList[size].verify_stats();
+  }
+#endif // PRODUCT
 }
 
 // Add chunk to end of last block -- if it's the largest
@@ -1537,7 +1661,6 @@
   HeapWord* chunk, size_t     size) {
   // check that the chunk does lie in this space!
   assert(chunk != NULL && is_in_reserved(chunk), "Not in this space!");
-  assert_locked();
   // One of the parallel gc task threads may be here
   // whilst others are allocating.
   Mutex* lock = NULL;
@@ -1991,24 +2114,26 @@
   return frag;
 }
 
-#define CoalSurplusPercent 1.05
-#define SplitSurplusPercent 1.10
-
 void CompactibleFreeListSpace::beginSweepFLCensus(
   float inter_sweep_current,
-  float inter_sweep_estimate) {
+  float inter_sweep_estimate,
+  float intra_sweep_estimate) {
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     FreeList* fl    = &_indexedFreeList[i];
-    fl->compute_desired(inter_sweep_current, inter_sweep_estimate);
-    fl->set_coalDesired((ssize_t)((double)fl->desired() * CoalSurplusPercent));
+    if (PrintFLSStatistics > 1) {
+      gclog_or_tty->print("size[%d] : ", i);
+    }
+    fl->compute_desired(inter_sweep_current, inter_sweep_estimate, intra_sweep_estimate);
+    fl->set_coalDesired((ssize_t)((double)fl->desired() * CMSSmallCoalSurplusPercent));
     fl->set_beforeSweep(fl->count());
     fl->set_bfrSurp(fl->surplus());
   }
-  _dictionary->beginSweepDictCensus(CoalSurplusPercent,
+  _dictionary->beginSweepDictCensus(CMSLargeCoalSurplusPercent,
                                     inter_sweep_current,
-                                    inter_sweep_estimate);
+                                    inter_sweep_estimate,
+                                    intra_sweep_estimate);
 }
 
 void CompactibleFreeListSpace::setFLSurplus() {
@@ -2017,7 +2142,7 @@
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     FreeList *fl = &_indexedFreeList[i];
     fl->set_surplus(fl->count() -
-                    (ssize_t)((double)fl->desired() * SplitSurplusPercent));
+                    (ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent));
   }
 }
 
@@ -2048,6 +2173,11 @@
 }
 
 void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) {
+  if (PrintFLSStatistics > 0) {
+    HeapWord* largestAddr = (HeapWord*) dictionary()->findLargestDict();
+    gclog_or_tty->print_cr("CMS: Large block " PTR_FORMAT,
+                           largestAddr);
+  }
   setFLSurplus();
   setFLHints();
   if (PrintGC && PrintFLSCensus > 0) {
@@ -2055,7 +2185,7 @@
   }
   clearFLCensus();
   assert_locked();
-  _dictionary->endSweepDictCensus(SplitSurplusPercent);
+  _dictionary->endSweepDictCensus(CMSLargeSplitSurplusPercent);
 }
 
 bool CompactibleFreeListSpace::coalOverPopulated(size_t size) {
@@ -2312,13 +2442,18 @@
 }
 
 void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
-  FreeChunk* fc =  _indexedFreeList[size].head();
+  FreeChunk* fc   =  _indexedFreeList[size].head();
+  FreeChunk* tail =  _indexedFreeList[size].tail();
+  size_t    num = _indexedFreeList[size].count();
+  size_t      n = 0;
   guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
-  for (; fc != NULL; fc = fc->next()) {
+  for (; fc != NULL; fc = fc->next(), n++) {
     guarantee(fc->size() == size, "Size inconsistency");
     guarantee(fc->isFree(), "!free?");
     guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
+    guarantee((fc->next() == NULL) == (fc == tail), "Incorrect tail");
   }
+  guarantee(n == num, "Incorrect count");
 }
 
 #ifndef PRODUCT
@@ -2516,11 +2651,41 @@
   _tracking = true;
 }
 
-void PromotionInfo::stopTrackingPromotions() {
+#define CMSPrintPromoBlockInfo 1
+
+void PromotionInfo::stopTrackingPromotions(uint worker_id) {
   assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex,
          "spooling inconsistency?");
   _firstIndex = _nextIndex = 1;
   _tracking = false;
+  if (CMSPrintPromoBlockInfo > 1) {
+    print_statistics(worker_id);
+  }
+}
+
+void PromotionInfo::print_statistics(uint worker_id) const {
+  assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex,
+         "Else will undercount");
+  assert(CMSPrintPromoBlockInfo > 0, "Else unnecessary call");
+  // Count the number of blocks and slots in the free pool
+  size_t slots  = 0;
+  size_t blocks = 0;
+  for (SpoolBlock* cur_spool = _spareSpool;
+       cur_spool != NULL;
+       cur_spool = cur_spool->nextSpoolBlock) {
+    // the first entry is just a self-pointer; indices 1 through
+    // bufferSize - 1 are occupied (thus, bufferSize - 1 slots).
+    guarantee((void*)cur_spool->displacedHdr == (void*)&cur_spool->displacedHdr,
+              "first entry of displacedHdr should be self-referential");
+    slots += cur_spool->bufferSize - 1;
+    blocks++;
+  }
+  if (_spoolHead != NULL) {
+    slots += _spoolHead->bufferSize - 1;
+    blocks++;
+  }
+  gclog_or_tty->print_cr(" [worker %d] promo_blocks = %d, promo_slots = %d ",
+                         worker_id, blocks, slots);
 }
 
 // When _spoolTail is not NULL, then the slot <_spoolTail, _nextIndex>
@@ -2584,15 +2749,84 @@
   guarantee(numDisplacedHdrs == numObjsWithDisplacedHdrs, "Displaced hdr count");
 }
 
+void PromotionInfo::print_on(outputStream* st) const {
+  SpoolBlock* curSpool = NULL;
+  size_t i = 0;
+  st->print_cr("start & end indices: [" SIZE_FORMAT ", " SIZE_FORMAT ")",
+               _firstIndex, _nextIndex);
+  for (curSpool = _spoolHead; curSpool != _spoolTail && curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" active ");
+    i++;
+  }
+  for (curSpool = _spoolTail; curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" inactive ");
+    i++;
+  }
+  for (curSpool = _spareSpool; curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" free ");
+    i++;
+  }
+  st->print_cr(SIZE_FORMAT " header spooling blocks", i);
+}
+
+void SpoolBlock::print_on(outputStream* st) const {
+  st->print("[" PTR_FORMAT "," PTR_FORMAT "), " SIZE_FORMAT " HeapWords -> " PTR_FORMAT,
+            this, (HeapWord*)displacedHdr + bufferSize,
+            bufferSize, nextSpoolBlock);
+}
+
+///////////////////////////////////////////////////////////////////////////
+// CFLS_LAB
+///////////////////////////////////////////////////////////////////////////
+
+#define VECTOR_257(x)                                                                                  \
+  /* 1  2  3  4  5  6  7  8  9 1x 11 12 13 14 15 16 17 18 19 2x 21 22 23 24 25 26 27 28 29 3x 31 32 */ \
+  {  x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x }
+
+// Initialize with default setting of CMSParPromoteBlocksToClaim, _not_
+// OldPLABSize, whose static default is different; if overridden at the
+// command-line, this will get reinitialized via a call to
+// modify_initialization() below.
+AdaptiveWeightedAverage CFLS_LAB::_blocks_to_claim[]    =
+  VECTOR_257(AdaptiveWeightedAverage(OldPLABWeight, (float)CMSParPromoteBlocksToClaim));
+size_t CFLS_LAB::_global_num_blocks[]  = VECTOR_257(0);
+int    CFLS_LAB::_global_num_workers[] = VECTOR_257(0);
 
 CFLS_LAB::CFLS_LAB(CompactibleFreeListSpace* cfls) :
   _cfls(cfls)
 {
-  _blocks_to_claim = CMSParPromoteBlocksToClaim;
+  assert(CompactibleFreeListSpace::IndexSetSize == 257, "Modify VECTOR_257() macro above");
   for (size_t i = CompactibleFreeListSpace::IndexSetStart;
        i < CompactibleFreeListSpace::IndexSetSize;
        i += CompactibleFreeListSpace::IndexSetStride) {
     _indexedFreeList[i].set_size(i);
+    _num_blocks[i] = 0;
+  }
+}
+
+static bool _CFLS_LAB_modified = false;
+
+void CFLS_LAB::modify_initialization(size_t n, unsigned wt) {
+  assert(!_CFLS_LAB_modified, "Call only once");
+  _CFLS_LAB_modified = true;
+  for (size_t i = CompactibleFreeListSpace::IndexSetStart;
+       i < CompactibleFreeListSpace::IndexSetSize;
+       i += CompactibleFreeListSpace::IndexSetStride) {
+    _blocks_to_claim[i].modify(n, wt, true /* force */);
   }
 }
 
@@ -2607,11 +2841,9 @@
     if (res == NULL) return NULL;
   } else {
     FreeList* fl = &_indexedFreeList[word_sz];
-    bool filled = false; //TRAP
     if (fl->count() == 0) {
-      bool filled = true; //TRAP
       // Attempt to refill this local free list.
-      _cfls->par_get_chunk_of_blocks(word_sz, _blocks_to_claim, fl);
+      get_from_global_pool(word_sz, fl);
       // If it didn't work, give up.
       if (fl->count() == 0) return NULL;
     }
@@ -2626,80 +2858,190 @@
   return (HeapWord*)res;
 }
 
-void CFLS_LAB::retire() {
-  for (size_t i = CompactibleFreeListSpace::IndexSetStart;
+// Get a chunk of blocks of the right size and update related
+// book-keeping stats
+void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList* fl) {
+  // Get the #blocks we want to claim
+  size_t n_blks = (size_t)_blocks_to_claim[word_sz].average();
+  assert(n_blks > 0, "Error");
+  assert(ResizePLAB || n_blks == OldPLABSize, "Error");
+  // In some cases, when the application has a phase change,
+  // there may be a sudden and sharp shift in the object survival
+  // profile, and updating the counts at the end of a scavenge
+  // may not be quick enough, giving rise to large scavenge pauses
+  // during these phase changes. It is beneficial to detect such
+  // changes on-the-fly during a scavenge and avoid such a phase-change
+  // pothole. The following code is a heuristic attempt to do that.
+  // It is protected by a product flag until we have gained
+  // enough experience with this heuristic and fine-tuned its behaviour.
+  // WARNING: This might increase fragmentation if we overreact to
+  // small spikes, so some kind of historical smoothing based on
+  // previous experience with the greater reactivity might be useful.
+  // Lacking sufficient experience, CMSOldPLABResizeQuicker is disabled by
+  // default.
+  if (ResizeOldPLAB && CMSOldPLABResizeQuicker) {
+    size_t multiple = _num_blocks[word_sz]/(CMSOldPLABToleranceFactor*CMSOldPLABNumRefills*n_blks);
+    n_blks +=  CMSOldPLABReactivityFactor*multiple*n_blks;
+    n_blks = MIN2(n_blks, CMSOldPLABMax);
+  }
+  assert(n_blks > 0, "Error");
+  _cfls->par_get_chunk_of_blocks(word_sz, n_blks, fl);
+  // Update stats table entry for this block size
+  _num_blocks[word_sz] += fl->count();
+}
+
+void CFLS_LAB::compute_desired_plab_size() {
+  for (size_t i =  CompactibleFreeListSpace::IndexSetStart;
        i < CompactibleFreeListSpace::IndexSetSize;
        i += CompactibleFreeListSpace::IndexSetStride) {
-    if (_indexedFreeList[i].count() > 0) {
-      MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
-                      Mutex::_no_safepoint_check_flag);
-      _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
-      // Reset this list.
-      _indexedFreeList[i] = FreeList();
-      _indexedFreeList[i].set_size(i);
+    assert((_global_num_workers[i] == 0) == (_global_num_blocks[i] == 0),
+           "Counter inconsistency");
+    if (_global_num_workers[i] > 0) {
+      // Need to smooth wrt historical average
+      if (ResizeOldPLAB) {
+        _blocks_to_claim[i].sample(
+          MAX2((size_t)CMSOldPLABMin,
+          MIN2((size_t)CMSOldPLABMax,
+               _global_num_blocks[i]/(_global_num_workers[i]*CMSOldPLABNumRefills))));
+      }
+      // Reset counters for next round
+      _global_num_workers[i] = 0;
+      _global_num_blocks[i] = 0;
+      if (PrintOldPLAB) {
+        gclog_or_tty->print_cr("[%d]: %d", i, (size_t)_blocks_to_claim[i].average());
+      }
     }
   }
 }
 
-void
-CompactibleFreeListSpace::
-par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
+void CFLS_LAB::retire(int tid) {
+  // We run this single threaded with the world stopped;
+  // so no need for locks and such.
+#define CFLS_LAB_PARALLEL_ACCESS 0
+  NOT_PRODUCT(Thread* t = Thread::current();)
+  assert(Thread::current()->is_VM_thread(), "Error");
+  assert(CompactibleFreeListSpace::IndexSetStart == CompactibleFreeListSpace::IndexSetStride,
+         "Will access to uninitialized slot below");
+#if CFLS_LAB_PARALLEL_ACCESS
+  for (size_t i = CompactibleFreeListSpace::IndexSetSize - 1;
+       i > 0;
+       i -= CompactibleFreeListSpace::IndexSetStride) {
+#else // CFLS_LAB_PARALLEL_ACCESS
+  for (size_t i =  CompactibleFreeListSpace::IndexSetStart;
+       i < CompactibleFreeListSpace::IndexSetSize;
+       i += CompactibleFreeListSpace::IndexSetStride) {
+#endif // !CFLS_LAB_PARALLEL_ACCESS
+    assert(_num_blocks[i] >= (size_t)_indexedFreeList[i].count(),
+           "Can't retire more than what we obtained");
+    if (_num_blocks[i] > 0) {
+      size_t num_retire =  _indexedFreeList[i].count();
+      assert(_num_blocks[i] > num_retire, "Should have used at least one");
+      {
+#if CFLS_LAB_PARALLEL_ACCESS
+        MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
+                        Mutex::_no_safepoint_check_flag);
+#endif // CFLS_LAB_PARALLEL_ACCESS
+        // Update globals stats for num_blocks used
+        _global_num_blocks[i] += (_num_blocks[i] - num_retire);
+        _global_num_workers[i]++;
+        assert(_global_num_workers[i] <= (ssize_t)ParallelGCThreads, "Too big");
+        if (num_retire > 0) {
+          _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
+          // Reset this list.
+          _indexedFreeList[i] = FreeList();
+          _indexedFreeList[i].set_size(i);
+        }
+      }
+      if (PrintOldPLAB) {
+        gclog_or_tty->print_cr("%d[%d]: %d/%d/%d",
+                               tid, i, num_retire, _num_blocks[i], (size_t)_blocks_to_claim[i].average());
+      }
+      // Reset stats for next round
+      _num_blocks[i]         = 0;
+    }
+  }
+}
+
+void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
   assert(fl->count() == 0, "Precondition.");
   assert(word_sz < CompactibleFreeListSpace::IndexSetSize,
          "Precondition");
 
-  // We'll try all multiples of word_sz in the indexed set (starting with
-  // word_sz itself), then try getting a big chunk and splitting it.
-  int k = 1;
-  size_t cur_sz = k * word_sz;
-  bool found = false;
-  while (cur_sz < CompactibleFreeListSpace::IndexSetSize && k == 1) {
-    FreeList* gfl = &_indexedFreeList[cur_sz];
-    FreeList fl_for_cur_sz;  // Empty.
-    fl_for_cur_sz.set_size(cur_sz);
-    {
-      MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
-                      Mutex::_no_safepoint_check_flag);
-      if (gfl->count() != 0) {
-        size_t nn = MAX2(n/k, (size_t)1);
-        gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz);
-        found = true;
+  // We'll try all multiples of word_sz in the indexed set, starting with
+  // word_sz itself and, if CMSSplitIndexedFreeListBlocks, try larger multiples,
+  // then try getting a big chunk and splitting it.
+  {
+    bool found;
+    int  k;
+    size_t cur_sz;
+    for (k = 1, cur_sz = k * word_sz, found = false;
+         (cur_sz < CompactibleFreeListSpace::IndexSetSize) &&
+         (CMSSplitIndexedFreeListBlocks || k <= 1);
+         k++, cur_sz = k * word_sz) {
+      FreeList* gfl = &_indexedFreeList[cur_sz];
+      FreeList fl_for_cur_sz;  // Empty.
+      fl_for_cur_sz.set_size(cur_sz);
+      {
+        MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
+                        Mutex::_no_safepoint_check_flag);
+        if (gfl->count() != 0) {
+          // nn is the number of chunks of size cur_sz that
+          // we'd need to split k-ways each, in order to create
+          // "n" chunks of size word_sz each.
+          const size_t nn = MAX2(n/k, (size_t)1);
+          gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz);
+          found = true;
+          if (k > 1) {
+            // Update split death stats for the cur_sz-size blocks list:
+            // we increment the split death count by the number of blocks
+            // we just took from the cur_sz-size blocks list and which
+            // we will be splitting below.
+            ssize_t deaths = _indexedFreeList[cur_sz].splitDeaths() +
+                             fl_for_cur_sz.count();
+            _indexedFreeList[cur_sz].set_splitDeaths(deaths);
+          }
+        }
+      }
+      // Now transfer fl_for_cur_sz to fl.  Common case, we hope, is k = 1.
+      if (found) {
+        if (k == 1) {
+          fl->prepend(&fl_for_cur_sz);
+        } else {
+          // Divide each block on fl_for_cur_sz up k ways.
+          FreeChunk* fc;
+          while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) {
+            // Must do this in reverse order, so that anybody attempting to
+            // access the main chunk sees it as a single free block until we
+            // change it.
+            size_t fc_size = fc->size();
+            for (int i = k-1; i >= 0; i--) {
+              FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
+              ffc->setSize(word_sz);
+              ffc->linkNext(NULL);
+              ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
+              // Above must occur before BOT is updated below.
+              // splitting from the right, fc_size == (k - i + 1) * wordsize
+              _bt.mark_block((HeapWord*)ffc, word_sz);
+              fc_size -= word_sz;
+              _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
+              _bt.verify_single_block((HeapWord*)fc, fc_size);
+              _bt.verify_single_block((HeapWord*)ffc, ffc->size());
+              // Push this on "fl".
+              fl->returnChunkAtHead(ffc);
+            }
+            // TRAP
+            assert(fl->tail()->next() == NULL, "List invariant.");
+          }
+        }
+        // Update birth stats for this block size.
+        size_t num = fl->count();
+        MutexLockerEx x(_indexedFreeListParLocks[word_sz],
+                        Mutex::_no_safepoint_check_flag);
+        ssize_t births = _indexedFreeList[word_sz].splitBirths() + num;
+        _indexedFreeList[word_sz].set_splitBirths(births);
+        return;
       }
     }
-    // Now transfer fl_for_cur_sz to fl.  Common case, we hope, is k = 1.
-    if (found) {
-      if (k == 1) {
-        fl->prepend(&fl_for_cur_sz);
-      } else {
-        // Divide each block on fl_for_cur_sz up k ways.
-        FreeChunk* fc;
-        while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) {
-          // Must do this in reverse order, so that anybody attempting to
-          // access the main chunk sees it as a single free block until we
-          // change it.
-          size_t fc_size = fc->size();
-          for (int i = k-1; i >= 0; i--) {
-            FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
-            ffc->setSize(word_sz);
-            ffc->linkNext(NULL);
-            ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
-            // Above must occur before BOT is updated below.
-            // splitting from the right, fc_size == (k - i + 1) * wordsize
-            _bt.mark_block((HeapWord*)ffc, word_sz);
-            fc_size -= word_sz;
-            _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
-            _bt.verify_single_block((HeapWord*)fc, fc_size);
-            _bt.verify_single_block((HeapWord*)ffc, ffc->size());
-            // Push this on "fl".
-            fl->returnChunkAtHead(ffc);
-          }
-          // TRAP
-          assert(fl->tail()->next() == NULL, "List invariant.");
-        }
-      }
-      return;
-    }
-    k++; cur_sz = k * word_sz;
   }
   // Otherwise, we'll split a block from the dictionary.
   FreeChunk* fc = NULL;
@@ -2723,17 +3065,20 @@
       }
     }
     if (fc == NULL) return;
+    assert((ssize_t)n >= 1, "Control point invariant");
     // Otherwise, split up that block.
-    size_t nn = fc->size() / word_sz;
+    const size_t nn = fc->size() / word_sz;
     n = MIN2(nn, n);
+    assert((ssize_t)n >= 1, "Control point invariant");
     rem = fc->size() - n * word_sz;
     // If there is a remainder, and it's too small, allocate one fewer.
     if (rem > 0 && rem < MinChunkSize) {
       n--; rem += word_sz;
     }
+    assert((ssize_t)n >= 1, "Control point invariant");
     // First return the remainder, if any.
     // Note that we hold the lock until we decide if we're going to give
-    // back the remainder to the dictionary, since a contending allocator
+    // back the remainder to the dictionary, since a concurrent allocation
     // may otherwise see the heap as empty.  (We're willing to take that
     // hit if the block is a small block.)
     if (rem > 0) {
@@ -2743,18 +3088,16 @@
       rem_fc->linkNext(NULL);
       rem_fc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
       // Above must occur before BOT is updated below.
+      assert((ssize_t)n > 0 && prefix_size > 0 && rem_fc > fc, "Error");
       _bt.split_block((HeapWord*)fc, fc->size(), prefix_size);
       if (rem >= IndexSetSize) {
         returnChunkToDictionary(rem_fc);
-        dictionary()->dictCensusUpdate(fc->size(),
-                                       true /*split*/,
-                                       true /*birth*/);
+        dictionary()->dictCensusUpdate(rem, true /*split*/, true /*birth*/);
         rem_fc = NULL;
       }
       // Otherwise, return it to the small list below.
     }
   }
-  //
   if (rem_fc != NULL) {
     MutexLockerEx x(_indexedFreeListParLocks[rem],
                     Mutex::_no_safepoint_check_flag);
@@ -2762,7 +3105,7 @@
     _indexedFreeList[rem].returnChunkAtHead(rem_fc);
     smallSplitBirth(rem);
   }
-
+  assert((ssize_t)n > 0 && fc != NULL, "Consistency");
   // Now do the splitting up.
   // Must do this in reverse order, so that anybody attempting to
   // access the main chunk sees it as a single free block until we
@@ -2792,13 +3135,15 @@
   _bt.verify_single_block((HeapWord*)fc, fc->size());
   fl->returnChunkAtHead(fc);
 
+  assert((ssize_t)n > 0 && (ssize_t)n == fl->count(), "Incorrect number of blocks");
   {
+    // Update the stats for this block size.
     MutexLockerEx x(_indexedFreeListParLocks[word_sz],
                     Mutex::_no_safepoint_check_flag);
-    ssize_t new_births = _indexedFreeList[word_sz].splitBirths() + n;
-    _indexedFreeList[word_sz].set_splitBirths(new_births);
-    ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n;
-    _indexedFreeList[word_sz].set_surplus(new_surplus);
+    const ssize_t births = _indexedFreeList[word_sz].splitBirths() + n;
+    _indexedFreeList[word_sz].set_splitBirths(births);
+    // ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n;
+    // _indexedFreeList[word_sz].set_surplus(new_surplus);
   }
 
   // TRAP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -25,8 +25,6 @@
 // Classes in support of keeping track of promotions into a non-Contiguous
 // space, in this case a CompactibleFreeListSpace.
 
-#define CFLS_LAB_REFILL_STATS 0
-
 // Forward declarations
 class CompactibleFreeListSpace;
 class BlkClosure;
@@ -89,6 +87,9 @@
     displacedHdr = (markOop*)&displacedHdr;
     nextSpoolBlock = NULL;
   }
+
+  void print_on(outputStream* st) const;
+  void print() const { print_on(gclog_or_tty); }
 };
 
 class PromotionInfo VALUE_OBJ_CLASS_SPEC {
@@ -121,7 +122,7 @@
     return _promoHead == NULL;
   }
   void startTrackingPromotions();
-  void stopTrackingPromotions();
+  void stopTrackingPromotions(uint worker_id = 0);
   bool tracking() const          { return _tracking;  }
   void track(PromotedObject* trackOop);      // keep track of a promoted oop
   // The following variant must be used when trackOop is not fully
@@ -161,6 +162,9 @@
     _nextIndex = 0;
 
   }
+
+  void print_on(outputStream* st) const;
+  void print_statistics(uint worker_id) const;
 };
 
 class LinearAllocBlock VALUE_OBJ_CLASS_SPEC {
@@ -243,6 +247,7 @@
   mutable Mutex _freelistLock;
   // locking verifier convenience function
   void assert_locked() const PRODUCT_RETURN;
+  void assert_locked(const Mutex* lock) const PRODUCT_RETURN;
 
   // Linear allocation blocks
   LinearAllocBlock _smallLinearAllocBlock;
@@ -281,13 +286,6 @@
   // Locks protecting the exact lists during par promotion allocation.
   Mutex* _indexedFreeListParLocks[IndexSetSize];
 
-#if CFLS_LAB_REFILL_STATS
-  // Some statistics.
-  jint  _par_get_chunk_from_small;
-  jint  _par_get_chunk_from_large;
-#endif
-
-
   // Attempt to obtain up to "n" blocks of the size "word_sz" (which is
   // required to be smaller than "IndexSetSize".)  If successful,
   // adds them to "fl", which is required to be an empty free list.
@@ -320,7 +318,7 @@
   // Helper function for getChunkFromIndexedFreeList.
   // Replenish the indexed free list for this "size".  Do not take from an
   // underpopulated size.
-  FreeChunk*  getChunkFromIndexedFreeListHelper(size_t size);
+  FreeChunk*  getChunkFromIndexedFreeListHelper(size_t size, bool replenish = true);
 
   // Get a chunk from the indexed free list.  If the indexed free list
   // does not have a free chunk, try to replenish the indexed free list
@@ -430,10 +428,6 @@
   void initialize_sequential_subtasks_for_marking(int n_threads,
          HeapWord* low = NULL);
 
-#if CFLS_LAB_REFILL_STATS
-  void print_par_alloc_stats();
-#endif
-
   // Space enquiries
   size_t used() const;
   size_t free() const;
@@ -617,6 +611,12 @@
   // Do some basic checks on the the free lists.
   void checkFreeListConsistency()         const PRODUCT_RETURN;
 
+  // Printing support
+  void dump_at_safepoint_with_locks(CMSCollector* c, outputStream* st);
+  void print_indexed_free_lists(outputStream* st) const;
+  void print_dictionary_free_lists(outputStream* st) const;
+  void print_promo_info_blocks(outputStream* st) const;
+
   NOT_PRODUCT (
     void initializeIndexedFreeListArrayReturnedBytes();
     size_t sumIndexedFreeListArrayReturnedBytes();
@@ -638,8 +638,9 @@
 
   // Statistics functions
   // Initialize census for lists before the sweep.
-  void beginSweepFLCensus(float sweep_current,
-                          float sweep_estimate);
+  void beginSweepFLCensus(float inter_sweep_current,
+                          float inter_sweep_estimate,
+                          float intra_sweep_estimate);
   // Set the surplus for each of the free lists.
   void setFLSurplus();
   // Set the hint for each of the free lists.
@@ -730,16 +731,17 @@
   FreeList _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
 
   // Initialized from a command-line arg.
-  size_t _blocks_to_claim;
 
-#if CFLS_LAB_REFILL_STATS
-  // Some statistics.
-  int _refills;
-  int _blocksTaken;
-  static int _tot_refills;
-  static int _tot_blocksTaken;
-  static int _next_threshold;
-#endif
+  // Allocation statistics in support of dynamic adjustment of
+  // #blocks to claim per get_from_global_pool() call below.
+  static AdaptiveWeightedAverage
+                 _blocks_to_claim  [CompactibleFreeListSpace::IndexSetSize];
+  static size_t _global_num_blocks [CompactibleFreeListSpace::IndexSetSize];
+  static int    _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
+  size_t        _num_blocks        [CompactibleFreeListSpace::IndexSetSize];
+
+  // Internal work method
+  void get_from_global_pool(size_t word_sz, FreeList* fl);
 
 public:
   CFLS_LAB(CompactibleFreeListSpace* cfls);
@@ -748,7 +750,12 @@
   HeapWord* alloc(size_t word_sz);
 
   // Return any unused portions of the buffer to the global pool.
-  void retire();
+  void retire(int tid);
+
+  // Dynamic OldPLABSize sizing
+  static void compute_desired_plab_size();
+  // When the settings are modified from default static initialization
+  static void modify_initialization(size_t n, unsigned wt);
 };
 
 size_t PromotionInfo::refillSize() const {
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -253,7 +253,6 @@
   }
 }
 
-
 void ConcurrentMarkSweepGeneration::ref_processor_init() {
   assert(collector() != NULL, "no collector");
   collector()->ref_processor_init();
@@ -341,6 +340,14 @@
   _icms_duty_cycle = CMSIncrementalDutyCycle;
 }
 
+double CMSStats::cms_free_adjustment_factor(size_t free) const {
+  // TBD: CR 6909490
+  return 1.0;
+}
+
+void CMSStats::adjust_cms_free_adjustment_factor(bool fail, size_t free) {
+}
+
 // If promotion failure handling is on use
 // the padded average size of the promotion for each
 // young generation collection.
@@ -361,7 +368,11 @@
 
     // Adjust by the safety factor.
     double cms_free_dbl = (double)cms_free;
-    cms_free_dbl = cms_free_dbl * (100.0 - CMSIncrementalSafetyFactor) / 100.0;
+    double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor)/100.0;
+    // Apply a further correction factor which tries to adjust
+    // for recent occurance of concurrent mode failures.
+    cms_adjustment = cms_adjustment * cms_free_adjustment_factor(cms_free);
+    cms_free_dbl = cms_free_dbl * cms_adjustment;
 
     if (PrintGCDetails && Verbose) {
       gclog_or_tty->print_cr("CMSStats::time_until_cms_gen_full: cms_free "
@@ -395,6 +406,8 @@
   // late.
   double work = cms_duration() + gc0_period();
   double deadline = time_until_cms_gen_full();
+  // If a concurrent mode failure occurred recently, we want to be
+  // more conservative and halve our expected time_until_cms_gen_full()
   if (work > deadline) {
     if (Verbose && PrintGCDetails) {
       gclog_or_tty->print(
@@ -556,7 +569,8 @@
   _should_unload_classes(false),
   _concurrent_cycles_since_last_unload(0),
   _roots_scanning_options(0),
-  _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
+  _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
+  _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
 {
   if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
     ExplicitGCInvokesConcurrent = true;
@@ -773,7 +787,7 @@
   NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;)
   _gc_counters = new CollectorCounters("CMS", 1);
   _completed_initialization = true;
-  _sweep_timer.start();  // start of time
+  _inter_sweep_timer.start();  // start of time
 }
 
 const char* ConcurrentMarkSweepGeneration::name() const {
@@ -900,6 +914,14 @@
   return result;
 }
 
+// At a promotion failure dump information on block layout in heap
+// (cms old generation).
+void ConcurrentMarkSweepGeneration::promotion_failure_occurred() {
+  if (CMSDumpAtPromotionFailure) {
+    cmsSpace()->dump_at_safepoint_with_locks(collector(), gclog_or_tty);
+  }
+}
+
 CompactibleSpace*
 ConcurrentMarkSweepGeneration::first_compaction_space() const {
   return _cmsSpace;
@@ -1368,12 +1390,7 @@
 ConcurrentMarkSweepGeneration::
 par_promote_alloc_done(int thread_num) {
   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
-  ps->lab.retire();
-#if CFLS_LAB_REFILL_STATS
-  if (thread_num == 0) {
-    _cmsSpace->print_par_alloc_stats();
-  }
-#endif
+  ps->lab.retire(thread_num);
 }
 
 void
@@ -1974,11 +1991,14 @@
   // We must adjust the allocation statistics being maintained
   // in the free list space. We do so by reading and clearing
   // the sweep timer and updating the block flux rate estimates below.
-  assert(_sweep_timer.is_active(), "We should never see the timer inactive");
-  _sweep_timer.stop();
-  // Note that we do not use this sample to update the _sweep_estimate.
-  _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()),
-                                          _sweep_estimate.padded_average());
+  assert(!_intra_sweep_timer.is_active(), "_intra_sweep_timer should be inactive");
+  if (_inter_sweep_timer.is_active()) {
+    _inter_sweep_timer.stop();
+    // Note that we do not use this sample to update the _inter_sweep_estimate.
+    _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
+                                            _inter_sweep_estimate.padded_average(),
+                                            _intra_sweep_estimate.padded_average());
+  }
 
   GenMarkSweep::invoke_at_safepoint(_cmsGen->level(),
     ref_processor(), clear_all_soft_refs);
@@ -2015,10 +2035,10 @@
   }
 
   // Adjust the per-size allocation stats for the next epoch.
-  _cmsGen->cmsSpace()->endSweepFLCensus(sweepCount() /* fake */);
-  // Restart the "sweep timer" for next epoch.
-  _sweep_timer.reset();
-  _sweep_timer.start();
+  _cmsGen->cmsSpace()->endSweepFLCensus(sweep_count() /* fake */);
+  // Restart the "inter sweep timer" for the next epoch.
+  _inter_sweep_timer.reset();
+  _inter_sweep_timer.start();
 
   // Sample collection pause time and reset for collection interval.
   if (UseAdaptiveSizePolicy) {
@@ -2676,7 +2696,7 @@
   // Also reset promotion tracking in par gc thread states.
   if (ParallelGCThreads > 0) {
     for (uint i = 0; i < ParallelGCThreads; i++) {
-      _par_gc_thread_states[i]->promo.stopTrackingPromotions();
+      _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
     }
   }
 }
@@ -2771,7 +2791,7 @@
   bool do_bit(size_t offset) {
     HeapWord* addr = _marks->offsetToHeapWord(offset);
     if (!_marks->isMarked(addr)) {
-      oop(addr)->print();
+      oop(addr)->print_on(gclog_or_tty);
       gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
       _failed = true;
     }
@@ -2820,7 +2840,7 @@
   // Clear any marks from a previous round
   verification_mark_bm()->clear_all();
   assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
-  assert(overflow_list_is_empty(), "overflow list should be empty");
+  verify_work_stacks_empty();
 
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
@@ -2893,8 +2913,8 @@
   verification_mark_bm()->iterate(&vcl);
   if (vcl.failed()) {
     gclog_or_tty->print("Verification failed");
-    Universe::heap()->print();
-    fatal(" ... aborting");
+    Universe::heap()->print_on(gclog_or_tty);
+    fatal("CMS: failed marking verification after remark");
   }
 }
 
@@ -3314,7 +3334,7 @@
     Universe::heap()->barrier_set()->resize_covered_region(mr);
     // Hmmmm... why doesn't CFLS::set_end verify locking?
     // This is quite ugly; FIX ME XXX
-    _cmsSpace->assert_locked();
+    _cmsSpace->assert_locked(freelistLock());
     _cmsSpace->set_end((HeapWord*)_virtual_space.high());
 
     // update the space and generation capacity counters
@@ -5868,9 +5888,9 @@
   check_correct_thread_executing();
   verify_work_stacks_empty();
   verify_overflow_empty();
-  incrementSweepCount();
-  _sweep_timer.stop();
-  _sweep_estimate.sample(_sweep_timer.seconds());
+  increment_sweep_count();
+  _inter_sweep_timer.stop();
+  _inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
   size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free());
 
   // PermGen verification support: If perm gen sweeping is disabled in
@@ -5893,6 +5913,9 @@
     }
   }
 
+  assert(!_intra_sweep_timer.is_active(), "Should not be active");
+  _intra_sweep_timer.reset();
+  _intra_sweep_timer.start();
   if (asynch) {
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
     CMSPhaseAccounting pa(this, "sweep", !PrintGCDetails);
@@ -5937,8 +5960,11 @@
   verify_work_stacks_empty();
   verify_overflow_empty();
 
-  _sweep_timer.reset();
-  _sweep_timer.start();
+  _intra_sweep_timer.stop();
+  _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
+
+  _inter_sweep_timer.reset();
+  _inter_sweep_timer.start();
 
   update_time_of_last_gc(os::javaTimeMillis());
 
@@ -5981,11 +6007,11 @@
 // FIX ME!!! Looks like this belongs in CFLSpace, with
 // CMSGen merely delegating to it.
 void ConcurrentMarkSweepGeneration::setNearLargestChunk() {
-  double nearLargestPercent = 0.999;
+  double nearLargestPercent = FLSLargestBlockCoalesceProximity;
   HeapWord*  minAddr        = _cmsSpace->bottom();
   HeapWord*  largestAddr    =
     (HeapWord*) _cmsSpace->dictionary()->findLargestDict();
-  if (largestAddr == 0) {
+  if (largestAddr == NULL) {
     // The dictionary appears to be empty.  In this case
     // try to coalesce at the end of the heap.
     largestAddr = _cmsSpace->end();
@@ -5993,6 +6019,13 @@
   size_t largestOffset     = pointer_delta(largestAddr, minAddr);
   size_t nearLargestOffset =
     (size_t)((double)largestOffset * nearLargestPercent) - MinChunkSize;
+  if (PrintFLSStatistics != 0) {
+    gclog_or_tty->print_cr(
+      "CMS: Large Block: " PTR_FORMAT ";"
+      " Proximity: " PTR_FORMAT " -> " PTR_FORMAT,
+      largestAddr,
+      _cmsSpace->nearLargestChunk(), minAddr + nearLargestOffset);
+  }
   _cmsSpace->set_nearLargestChunk(minAddr + nearLargestOffset);
 }
 
@@ -6072,9 +6105,11 @@
   assert_lock_strong(gen->freelistLock());
   assert_lock_strong(bitMapLock());
 
-  assert(!_sweep_timer.is_active(), "Was switched off in an outer context");
-  gen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()),
-                                      _sweep_estimate.padded_average());
+  assert(!_inter_sweep_timer.is_active(), "Was switched off in an outer context");
+  assert(_intra_sweep_timer.is_active(),  "Was switched on  in an outer context");
+  gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
+                                      _inter_sweep_estimate.padded_average(),
+                                      _intra_sweep_estimate.padded_average());
   gen->setNearLargestChunk();
 
   {
@@ -6087,7 +6122,7 @@
     // end-of-sweep-census below will be off by a little bit.
   }
   gen->cmsSpace()->sweep_completed();
-  gen->cmsSpace()->endSweepFLCensus(sweepCount());
+  gen->cmsSpace()->endSweepFLCensus(sweep_count());
   if (should_unload_classes()) {                // unloaded classes this cycle,
     _concurrent_cycles_since_last_unload = 0;   // ... reset count
   } else {                                      // did not unload classes,
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -355,6 +355,11 @@
                                              unsigned int new_duty_cycle);
   unsigned int icms_update_duty_cycle_impl();
 
+  // In support of adjusting of cms trigger ratios based on history
+  // of concurrent mode failure.
+  double cms_free_adjustment_factor(size_t free) const;
+  void   adjust_cms_free_adjustment_factor(bool fail, size_t free);
+
  public:
   CMSStats(ConcurrentMarkSweepGeneration* cms_gen,
            unsigned int alpha = CMSExpAvgFactor);
@@ -570,8 +575,11 @@
   // appropriately.
   void check_gc_time_limit();
   // XXX Move these to CMSStats ??? FIX ME !!!
-  elapsedTimer _sweep_timer;
-  AdaptivePaddedAverage _sweep_estimate;
+  elapsedTimer _inter_sweep_timer;   // time between sweeps
+  elapsedTimer _intra_sweep_timer;   // time _in_ sweeps
+  // padded decaying average estimates of the above
+  AdaptivePaddedAverage _inter_sweep_estimate;
+  AdaptivePaddedAverage _intra_sweep_estimate;
 
  protected:
   ConcurrentMarkSweepGeneration* _cmsGen;  // old gen (CMS)
@@ -625,6 +633,7 @@
   // . _collectorState <= Idling ==  post-sweep && pre-mark
   // . _collectorState in (Idling, Sweeping) == {initial,final}marking ||
   //                                            precleaning || abortablePrecleanb
+ public:
   enum CollectorState {
     Resizing            = 0,
     Resetting           = 1,
@@ -636,6 +645,7 @@
     FinalMarking        = 7,
     Sweeping            = 8
   };
+ protected:
   static CollectorState _collectorState;
 
   // State related to prologue/epilogue invocation for my generations
@@ -655,7 +665,7 @@
 
   int    _numYields;
   size_t _numDirtyCards;
-  uint   _sweepCount;
+  size_t _sweep_count;
   // number of full gc's since the last concurrent gc.
   uint   _full_gcs_since_conc_gc;
 
@@ -905,7 +915,7 @@
 
   // Check that the currently executing thread is the expected
   // one (foreground collector or background collector).
-  void check_correct_thread_executing()        PRODUCT_RETURN;
+  static void check_correct_thread_executing() PRODUCT_RETURN;
   // XXXPERM void print_statistics()           PRODUCT_RETURN;
 
   bool is_cms_reachable(HeapWord* addr);
@@ -930,8 +940,8 @@
   static void set_foregroundGCShouldWait(bool v) { _foregroundGCShouldWait = v; }
   static bool foregroundGCIsActive() { return _foregroundGCIsActive; }
   static void set_foregroundGCIsActive(bool v) { _foregroundGCIsActive = v; }
-  uint  sweepCount() const             { return _sweepCount; }
-  void incrementSweepCount()           { _sweepCount++; }
+  size_t sweep_count() const             { return _sweep_count; }
+  void   increment_sweep_count()         { _sweep_count++; }
 
   // Timers/stats for gc scheduling and incremental mode pacing.
   CMSStats& stats() { return _stats; }
@@ -1165,6 +1175,11 @@
   virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes,
     bool younger_handles_promotion_failure) const;
 
+  // Inform this (non-young) generation that a promotion failure was
+  // encountered during a collection of a younger generation that
+  // promotes into this generation.
+  virtual void promotion_failure_occurred();
+
   bool should_collect(bool full, size_t size, bool tlab);
   virtual bool should_concurrent_collect() const;
   virtual bool is_too_full() const;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -55,7 +55,8 @@
   virtual void       dictCensusUpdate(size_t size, bool split, bool birth) = 0;
   virtual bool       coalDictOverPopulated(size_t size) = 0;
   virtual void       beginSweepDictCensus(double coalSurplusPercent,
-                       float sweep_current, float sweep_ewstimate) = 0;
+                       float inter_sweep_current, float inter_sweep_estimate,
+                       float intra__sweep_current) = 0;
   virtual void       endSweepDictCensus(double splitSurplusPercent) = 0;
   virtual FreeChunk* findLargestDict() const = 0;
   // verify that the given chunk is in the dictionary.
@@ -79,6 +80,7 @@
   }
 
   virtual void       printDictCensus() const = 0;
+  virtual void       print_free_lists(outputStream* st) const = 0;
 
   virtual void       verify()         const = 0;
 
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -67,3 +67,8 @@
   }
 }
 #endif
+
+void FreeChunk::print_on(outputStream* st) {
+  st->print_cr("Next: " PTR_FORMAT " Prev: " PTR_FORMAT " %s",
+    next(), prev(), cantCoalesce() ? "[can't coalesce]" : "");
+}
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -129,6 +129,8 @@
   void verifyList()         const PRODUCT_RETURN;
   void mangleAllocated(size_t size) PRODUCT_RETURN;
   void mangleFreed(size_t size)     PRODUCT_RETURN;
+
+  void print_on(outputStream* st);
 };
 
 // Alignment helpers etc.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -81,8 +81,8 @@
   set_hint(hint);
 }
 
-void FreeList::init_statistics() {
-  _allocation_stats.initialize();
+void FreeList::init_statistics(bool split_birth) {
+  _allocation_stats.initialize(split_birth);
 }
 
 FreeChunk* FreeList::getChunkAtHead() {
@@ -292,14 +292,31 @@
 }
 
 #ifndef PRODUCT
+void FreeList::verify_stats() const {
+  // The +1 of the LH comparand is to allow some "looseness" in
+  // checking: we usually call this interface when adding a block
+  // and we'll subsequently update the stats; we cannot update the
+  // stats beforehand because in the case of the large-block BT
+  // dictionary for example, this might be the first block and
+  // in that case there would be no place that we could record
+  // the stats (which are kept in the block itself).
+  assert(_allocation_stats.prevSweep() + _allocation_stats.splitBirths() + 1   // Total Stock + 1
+          >= _allocation_stats.splitDeaths() + (ssize_t)count(), "Conservation Principle");
+}
+
 void FreeList::assert_proper_lock_protection_work() const {
-#ifdef ASSERT
-  if (_protecting_lock != NULL &&
-      SharedHeap::heap()->n_par_threads() > 0) {
-    // Should become an assert.
-    guarantee(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+  assert(_protecting_lock != NULL, "Don't call this directly");
+  assert(ParallelGCThreads > 0, "Don't call this directly");
+  Thread* thr = Thread::current();
+  if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
+    // assert that we are holding the freelist lock
+  } else if (thr->is_GC_task_thread()) {
+    assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+  } else if (thr->is_Java_thread()) {
+    assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
+  } else {
+    ShouldNotReachHere();  // unaccounted thread type?
   }
-#endif
 }
 #endif
 
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -35,18 +35,26 @@
 // for that implementation.
 
 class Mutex;
+class TreeList;
 
 class FreeList VALUE_OBJ_CLASS_SPEC {
   friend class CompactibleFreeListSpace;
   friend class VMStructs;
-  friend class printTreeCensusClosure;
-  FreeChunk*    _head;          // List of free chunks
+  friend class PrintTreeCensusClosure;
+
+ protected:
+  TreeList* _parent;
+  TreeList* _left;
+  TreeList* _right;
+
+ private:
+  FreeChunk*    _head;          // Head of list of free chunks
   FreeChunk*    _tail;          // Tail of list of free chunks
-  size_t        _size;          // Size in Heap words of each chunks
+  size_t        _size;          // Size in Heap words of each chunk
   ssize_t       _count;         // Number of entries in list
   size_t        _hint;          // next larger size list with a positive surplus
 
-  AllocationStats _allocation_stats;            // statistics for smart allocation
+  AllocationStats _allocation_stats; // allocation-related statistics
 
 #ifdef ASSERT
   Mutex*        _protecting_lock;
@@ -63,9 +71,12 @@
 
   // Initialize the allocation statistics.
  protected:
-  void init_statistics();
+  void init_statistics(bool split_birth = false);
   void set_count(ssize_t v) { _count = v;}
-  void increment_count()    { _count++; }
+  void increment_count()    {
+    _count++;
+  }
+
   void decrement_count() {
     _count--;
     assert(_count >= 0, "Count should not be negative");
@@ -167,11 +178,13 @@
     _allocation_stats.set_desired(v);
   }
   void compute_desired(float inter_sweep_current,
-                       float inter_sweep_estimate) {
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
     assert_proper_lock_protection();
     _allocation_stats.compute_desired(_count,
                                       inter_sweep_current,
-                                      inter_sweep_estimate);
+                                      inter_sweep_estimate,
+                                      intra_sweep_estimate);
   }
   ssize_t coalDesired() const {
     return _allocation_stats.coalDesired();
@@ -306,6 +319,9 @@
   // found.  Return NULL if "fc" is not found.
   bool verifyChunkInFreeLists(FreeChunk* fc) const;
 
+  // Stats verification
+  void verify_stats() const PRODUCT_RETURN;
+
   // Printing support
   static void print_labels_on(outputStream* st, const char* c);
   void print_on(outputStream* st, const char* c = NULL) const;
--- a/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Wed Dec 23 09:23:54 2009 -0800
@@ -221,6 +221,7 @@
 freeList.cpp                            globals.hpp
 freeList.cpp                            mutex.hpp
 freeList.cpp                            sharedHeap.hpp
+freeList.cpp                            vmThread.hpp
 
 freeList.hpp                            allocationStats.hpp
 
--- a/src/share/vm/gc_implementation/includeDB_gc_serial	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_serial	Wed Dec 23 09:23:54 2009 -0800
@@ -71,6 +71,7 @@
 gcUtil.hpp                              allocation.hpp
 gcUtil.hpp                              debug.hpp
 gcUtil.hpp                              globalDefinitions.hpp
+gcUtil.hpp                              ostream.hpp
 gcUtil.hpp				timer.hpp
 
 generationCounters.cpp                  generationCounters.hpp
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -50,6 +50,7 @@
                       work_queue_set_, &term_),
   _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
   _keep_alive_closure(&_scan_weak_ref_closure),
+  _promotion_failure_size(0),
   _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
   _strong_roots_time(0.0), _term_time(0.0)
 {
@@ -249,6 +250,16 @@
   }
 }
 
+void ParScanThreadState::print_and_clear_promotion_failure_size() {
+  if (_promotion_failure_size != 0) {
+    if (PrintPromotionFailure) {
+      gclog_or_tty->print(" (%d: promotion failure size = " SIZE_FORMAT ") ",
+        _thread_num, _promotion_failure_size);
+    }
+    _promotion_failure_size = 0;
+  }
+}
+
 class ParScanThreadStateSet: private ResourceArray {
 public:
   // Initializes states for the specified number of threads;
@@ -260,11 +271,11 @@
                         GrowableArray<oop>**    overflow_stacks_,
                         size_t                  desired_plab_sz,
                         ParallelTaskTerminator& term);
-  inline ParScanThreadState& thread_sate(int i);
+  inline ParScanThreadState& thread_state(int i);
   int pushes() { return _pushes; }
   int pops()   { return _pops; }
   int steals() { return _steals; }
-  void reset();
+  void reset(bool promotion_failed);
   void flush();
 private:
   ParallelTaskTerminator& _term;
@@ -295,22 +306,31 @@
   }
 }
 
-inline ParScanThreadState& ParScanThreadStateSet::thread_sate(int i)
+inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i)
 {
   assert(i >= 0 && i < length(), "sanity check!");
   return ((ParScanThreadState*)_data)[i];
 }
 
 
-void ParScanThreadStateSet::reset()
+void ParScanThreadStateSet::reset(bool promotion_failed)
 {
   _term.reset_for_reuse();
+  if (promotion_failed) {
+    for (int i = 0; i < length(); ++i) {
+      thread_state(i).print_and_clear_promotion_failure_size();
+    }
+  }
 }
 
 void ParScanThreadStateSet::flush()
 {
+  // Work in this loop should be kept as lightweight as
+  // possible since this might otherwise become a bottleneck
+  // to scaling. Should we add heavy-weight work into this
+  // loop, consider parallelizing the loop into the worker threads.
   for (int i = 0; i < length(); ++i) {
-    ParScanThreadState& par_scan_state = thread_sate(i);
+    ParScanThreadState& par_scan_state = thread_state(i);
 
     // Flush stats related to To-space PLAB activity and
     // retire the last buffer.
@@ -362,6 +382,14 @@
       }
     }
   }
+  if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
+    // We need to call this even when ResizeOldPLAB is disabled
+    // so as to avoid breaking some asserts. While we may be able
+    // to avoid this by reorganizing the code a bit, I am loathe
+    // to do that unless we find cases where ergo leads to bad
+    // performance.
+    CFLS_LAB::compute_desired_plab_size();
+  }
 }
 
 ParScanClosure::ParScanClosure(ParNewGeneration* g,
@@ -475,7 +503,7 @@
 
   Generation* old_gen = gch->next_gen(_gen);
 
-  ParScanThreadState& par_scan_state = _state_set->thread_sate(i);
+  ParScanThreadState& par_scan_state = _state_set->thread_state(i);
   par_scan_state.set_young_old_boundary(_young_old_boundary);
 
   par_scan_state.start_strong_roots();
@@ -659,7 +687,7 @@
 {
   ResourceMark rm;
   HandleMark hm;
-  ParScanThreadState& par_scan_state = _state_set.thread_sate(i);
+  ParScanThreadState& par_scan_state = _state_set.thread_state(i);
   par_scan_state.set_young_old_boundary(_young_old_boundary);
   _task.work(i, par_scan_state.is_alive_closure(),
              par_scan_state.keep_alive_closure(),
@@ -693,7 +721,7 @@
   ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(),
                                  _generation.reserved().end(), _state_set);
   workers->run_task(&rp_task);
-  _state_set.reset();
+  _state_set.reset(_generation.promotion_failed());
 }
 
 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
@@ -813,7 +841,7 @@
     GenCollectedHeap::StrongRootsScope srs(gch);
     tsk.work(0);
   }
-  thread_state_set.reset();
+  thread_state_set.reset(promotion_failed());
 
   if (PAR_STATS_ENABLED && ParallelGCVerbose) {
     gclog_or_tty->print("Thread totals:\n"
@@ -882,6 +910,8 @@
     swap_spaces();  // Make life simpler for CMS || rescan; see 6483690.
     from()->set_next_compaction_space(to());
     gch->set_incremental_collection_will_fail();
+    // Inform the next generation that a promotion failure occurred.
+    _next_gen->promotion_failure_occurred();
 
     // Reset the PromotionFailureALot counters.
     NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
@@ -1029,6 +1059,8 @@
       new_obj = old;
 
       preserve_mark_if_necessary(old, m);
+      // Log the size of the maiden promotion failure
+      par_scan_state->log_promotion_failure(sz);
     }
 
     old->forward_to(new_obj);
@@ -1150,6 +1182,8 @@
       failed_to_promote = true;
 
       preserve_mark_if_necessary(old, m);
+      // Log the size of the maiden promotion failure
+      par_scan_state->log_promotion_failure(sz);
     }
   } else {
     // Is in to-space; do copying ourselves.
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -97,6 +97,9 @@
   int _pushes, _pops, _steals, _steal_attempts, _term_attempts;
   int _overflow_pushes, _overflow_refills, _overflow_refill_objs;
 
+  // Stats for promotion failure
+  size_t _promotion_failure_size;
+
   // Timing numbers.
   double _start;
   double _start_strong_roots;
@@ -169,6 +172,15 @@
   // Undo the most recent allocation ("obj", of "word_sz").
   void undo_alloc_in_to_space(HeapWord* obj, size_t word_sz);
 
+  // Promotion failure stats
+  size_t promotion_failure_size() { return promotion_failure_size(); }
+  void log_promotion_failure(size_t sz) {
+    if (_promotion_failure_size == 0) {
+      _promotion_failure_size = sz;
+    }
+  }
+  void print_and_clear_promotion_failure_size();
+
   int pushes() { return _pushes; }
   int pops()   { return _pops; }
   int steals() { return _steals; }
--- a/src/share/vm/gc_implementation/shared/allocationStats.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/allocationStats.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -31,7 +31,7 @@
   // beginning of this sweep:
   //   Count(end_last_sweep) - Count(start_this_sweep)
   //     + splitBirths(between) - splitDeaths(between)
-  // The above number divided by the time since the start [END???] of the
+  // The above number divided by the time since the end of the
   // previous sweep gives us a time rate of demand for blocks
   // of this size. We compute a padded average of this rate as
   // our current estimate for the time rate of demand for blocks
@@ -41,7 +41,7 @@
   // estimates.
   AdaptivePaddedAverage _demand_rate_estimate;
 
-  ssize_t     _desired;          // Estimate computed as described above
+  ssize_t     _desired;         // Demand stimate computed as described above
   ssize_t     _coalDesired;     // desired +/- small-percent for tuning coalescing
 
   ssize_t     _surplus;         // count - (desired +/- small-percent),
@@ -53,9 +53,9 @@
   ssize_t     _coalDeaths;      // loss from coalescing
   ssize_t     _splitBirths;     // additional chunks from splitting
   ssize_t     _splitDeaths;     // loss from splitting
-  size_t     _returnedBytes;    // number of bytes returned to list.
+  size_t      _returnedBytes;   // number of bytes returned to list.
  public:
-  void initialize() {
+  void initialize(bool split_birth = false) {
     AdaptivePaddedAverage* dummy =
       new (&_demand_rate_estimate) AdaptivePaddedAverage(CMS_FLSWeight,
                                                          CMS_FLSPadding);
@@ -67,7 +67,7 @@
     _beforeSweep = 0;
     _coalBirths = 0;
     _coalDeaths = 0;
-    _splitBirths = 0;
+    _splitBirths = split_birth? 1 : 0;
     _splitDeaths = 0;
     _returnedBytes = 0;
   }
@@ -75,10 +75,12 @@
   AllocationStats() {
     initialize();
   }
+
   // The rate estimate is in blocks per second.
   void compute_desired(size_t count,
                        float inter_sweep_current,
-                       float inter_sweep_estimate) {
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
     // If the latest inter-sweep time is below our granularity
     // of measurement, we may call in here with
     // inter_sweep_current == 0. However, even for suitably small
@@ -88,12 +90,31 @@
     // vulnerable to noisy glitches. In such cases, we
     // ignore the current sample and use currently available
     // historical estimates.
+    // XXX NEEDS TO BE FIXED
+    // assert(prevSweep() + splitBirths() >= splitDeaths() + (ssize_t)count, "Conservation Principle");
+    //     ^^^^^^^^^^^^^^^^^^^^^^^^^^^    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    //     "Total Stock"                  "Not used at this block size"
     if (inter_sweep_current > _threshold) {
-      ssize_t demand = prevSweep() - count + splitBirths() - splitDeaths();
+      ssize_t demand = prevSweep() - (ssize_t)count + splitBirths() - splitDeaths();
+      // XXX NEEDS TO BE FIXED
+      // assert(demand >= 0, "Demand should be non-negative");
+      // Defensive: adjust for imprecision in event counting
+      if (demand < 0) {
+        demand = 0;
+      }
+      float old_rate = _demand_rate_estimate.padded_average();
       float rate = ((float)demand)/inter_sweep_current;
       _demand_rate_estimate.sample(rate);
-      _desired = (ssize_t)(_demand_rate_estimate.padded_average()
-                           *inter_sweep_estimate);
+      float new_rate = _demand_rate_estimate.padded_average();
+      ssize_t old_desired = _desired;
+      _desired = (ssize_t)(new_rate * (inter_sweep_estimate
+                                       + CMSExtrapolateSweep
+                                         ? intra_sweep_estimate
+                                         : 0.0));
+      if (PrintFLSStatistics > 1) {
+        gclog_or_tty->print_cr("demand: %d, old_rate: %f, current_rate: %f, new_rate: %f, old_desired: %d, new_desired: %d",
+                                demand,     old_rate,     rate,             new_rate,     old_desired,     _desired);
+      }
     }
   }
 
--- a/src/share/vm/gc_implementation/shared/gcUtil.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/gcUtil.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -52,11 +52,35 @@
   _last_sample = new_sample;
 }
 
+void AdaptiveWeightedAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptiveWeightedAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
+void AdaptivePaddedAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptivePaddedAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
+void AdaptivePaddedNoZeroDevAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptivePaddedNoZeroDevAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
 void AdaptivePaddedAverage::sample(float new_sample) {
-  // Compute our parent classes sample information
+  // Compute new adaptive weighted average based on new sample.
   AdaptiveWeightedAverage::sample(new_sample);
 
-  // Now compute the deviation and the new padded sample
+  // Now update the deviation and the padded average.
   float new_avg = average();
   float new_dev = compute_adaptive_average(fabsd(new_sample - new_avg),
                                            deviation());
--- a/src/share/vm/gc_implementation/shared/gcUtil.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/gcUtil.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -54,8 +54,8 @@
 
  public:
   // Input weight must be between 0 and 100
-  AdaptiveWeightedAverage(unsigned weight) :
-    _average(0.0), _sample_count(0), _weight(weight), _last_sample(0.0) {
+  AdaptiveWeightedAverage(unsigned weight, float avg = 0.0) :
+    _average(avg), _sample_count(0), _weight(weight), _last_sample(0.0) {
   }
 
   void clear() {
@@ -64,6 +64,13 @@
     _last_sample = 0;
   }
 
+  // Useful for modifying static structures after startup.
+  void  modify(size_t avg, unsigned wt, bool force = false)  {
+    assert(force, "Are you sure you want to call this?");
+    _average = (float)avg;
+    _weight  = wt;
+  }
+
   // Accessors
   float    average() const       { return _average;       }
   unsigned weight()  const       { return _weight;        }
@@ -83,6 +90,10 @@
     // Convert to float and back to avoid integer overflow.
     return (size_t)exp_avg((float)avg, (float)sample, weight);
   }
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };
 
 
@@ -129,6 +140,10 @@
 
   // Override
   void  sample(float new_sample);
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };
 
 // A weighted average that includes a deviation from the average,
@@ -146,7 +161,12 @@
     AdaptivePaddedAverage(weight, padding)  {}
   // Override
   void  sample(float new_sample);
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };
+
 // Use a least squares fit to a set of data to generate a linear
 // equation.
 //              y = intercept + slope * x
--- a/src/share/vm/includeDB_gc_parallel	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/includeDB_gc_parallel	Wed Dec 23 09:23:54 2009 -0800
@@ -21,6 +21,8 @@
 // have any questions.
 //  
 
+arguments.cpp                           compactibleFreeListSpace.hpp
+
 assembler_<arch>.cpp                    g1SATBCardTableModRefBS.hpp
 assembler_<arch>.cpp                    g1CollectedHeap.inline.hpp
 assembler_<arch>.cpp                    heapRegion.hpp
--- a/src/share/vm/memory/defNewGeneration.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/memory/defNewGeneration.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -609,7 +609,7 @@
 
     remove_forwarding_pointers();
     if (PrintGCDetails) {
-      gclog_or_tty->print(" (promotion failed)");
+      gclog_or_tty->print(" (promotion failed) ");
     }
     // Add to-space to the list of space to compact
     // when a promotion failure has occurred.  In that
@@ -620,6 +620,9 @@
     from()->set_next_compaction_space(to());
     gch->set_incremental_collection_will_fail();
 
+    // Inform the next generation that a promotion failure occurred.
+    _next_gen->promotion_failure_occurred();
+
     // Reset the PromotionFailureALot counters.
     NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
   }
@@ -679,6 +682,11 @@
 
 void DefNewGeneration::handle_promotion_failure(oop old) {
   preserve_mark_if_necessary(old, old->mark());
+  if (!_promotion_failed && PrintPromotionFailure) {
+    gclog_or_tty->print(" (promotion failure size = " SIZE_FORMAT ") ",
+                        old->size());
+  }
+
   // forward to self
   old->forward_to(old);
   _promotion_failed = true;
--- a/src/share/vm/memory/generation.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/memory/generation.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -181,6 +181,12 @@
   virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes,
     bool younger_handles_promotion_failure) const;
 
+  // For a non-young generation, this interface can be used to inform a
+  // generation that a promotion attempt into that generation failed.
+  // Typically used to enable diagnostic output for post-mortem analysis,
+  // but other uses of the interface are not ruled out.
+  virtual void promotion_failure_occurred() { /* does nothing */ }
+
   // Return an estimate of the maximum allocation that could be performed
   // in the generation without triggering any collection or expansion
   // activity.  It is "unsafe" because no locks are taken; the result
--- a/src/share/vm/runtime/arguments.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -948,6 +948,7 @@
   }
 }
 
+#ifndef KERNEL
 // If the user has chosen ParallelGCThreads > 0, we set UseParNewGC
 // if it's not explictly set or unset. If the user has chosen
 // UseParNewGC and not explicitly set ParallelGCThreads we
@@ -1177,8 +1178,7 @@
       // the value (either from the command line or ergonomics) of
       // OldPLABSize.  Following OldPLABSize is an ergonomics decision.
       FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, OldPLABSize);
-    }
-    else {
+    } else {
       // OldPLABSize and CMSParPromoteBlocksToClaim are both set.
       // CMSParPromoteBlocksToClaim is a collector-specific flag, so
       // we'll let it to take precedence.
@@ -1188,7 +1188,23 @@
                   " CMSParPromoteBlocksToClaim will take precedence.\n");
     }
   }
+  if (!FLAG_IS_DEFAULT(ResizeOldPLAB) && !ResizeOldPLAB) {
+    // OldPLAB sizing manually turned off: Use a larger default setting,
+    // unless it was manually specified. This is because a too-low value
+    // will slow down scavenges.
+    if (FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim)) {
+      FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, 50); // default value before 6631166
+    }
+  }
+  // Overwrite OldPLABSize which is the variable we will internally use everywhere.
+  FLAG_SET_ERGO(uintx, OldPLABSize, CMSParPromoteBlocksToClaim);
+  // If either of the static initialization defaults have changed, note this
+  // modification.
+  if (!FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim) || !FLAG_IS_DEFAULT(OldPLABWeight)) {
+    CFLS_LAB::modify_initialization(OldPLABSize, OldPLABWeight);
+  }
 }
+#endif // KERNEL
 
 inline uintx max_heap_for_compressed_oops() {
   LP64_ONLY(return oopDesc::OopEncodingHeapMax - MaxPermSize - os::vm_page_size());
@@ -2370,22 +2386,25 @@
                   "ExtendedDTraceProbes flag is only applicable on Solaris\n");
       return JNI_EINVAL;
 #endif // ndef SOLARIS
-    } else
 #ifdef ASSERT
-    if (match_option(option, "-XX:+FullGCALot", &tail)) {
+    } else if (match_option(option, "-XX:+FullGCALot", &tail)) {
       FLAG_SET_CMDLINE(bool, FullGCALot, true);
       // disable scavenge before parallel mark-compact
       FLAG_SET_CMDLINE(bool, ScavengeBeforeFullGC, false);
-    } else
 #endif
-    if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) {
+    } else if (match_option(option, "-XX:CMSParPromoteBlocksToClaim=", &tail)) {
       julong cms_blocks_to_claim = (julong)atol(tail);
       FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim);
       jio_fprintf(defaultStream::error_stream(),
-        "Please use -XX:CMSParPromoteBlocksToClaim in place of "
+        "Please use -XX:OldPLABSize in place of "
+        "-XX:CMSParPromoteBlocksToClaim in the future\n");
+    } else if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) {
+      julong cms_blocks_to_claim = (julong)atol(tail);
+      FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim);
+      jio_fprintf(defaultStream::error_stream(),
+        "Please use -XX:OldPLABSize in place of "
         "-XX:ParCMSPromoteBlocksToClaim in the future\n");
-    } else
-    if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) {
+    } else if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) {
       julong old_plab_size = 0;
       ArgsRange errcode = parse_memory_size(tail, &old_plab_size, 1);
       if (errcode != arg_in_range) {
@@ -2398,8 +2417,7 @@
       jio_fprintf(defaultStream::error_stream(),
                   "Please use -XX:OldPLABSize in place of "
                   "-XX:ParallelGCOldGenAllocBufferSize in the future\n");
-    } else
-    if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) {
+    } else if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) {
       julong young_plab_size = 0;
       ArgsRange errcode = parse_memory_size(tail, &young_plab_size, 1);
       if (errcode != arg_in_range) {
@@ -2412,8 +2430,7 @@
       jio_fprintf(defaultStream::error_stream(),
                   "Please use -XX:YoungPLABSize in place of "
                   "-XX:ParallelGCToSpaceAllocBufferSize in the future\n");
-    } else
-    if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
+    } else if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
       // Skip -XX:Flags= since that case has already been handled
       if (strncmp(tail, "Flags=", strlen("Flags=")) != 0) {
         if (!process_argument(tail, args->ignoreUnrecognized, origin)) {
@@ -2727,6 +2744,7 @@
     return JNI_EINVAL;
   }
 
+#ifndef KERNEL
   if (UseConcMarkSweepGC) {
     // Set flags for CMS and ParNew.  Check UseConcMarkSweep first
     // to ensure that when both UseConcMarkSweepGC and UseParNewGC
@@ -2744,6 +2762,7 @@
       set_g1_gc_flags();
     }
   }
+#endif // KERNEL
 
 #ifdef SERIALGC
   assert(verify_serial_gc_flags(), "SerialGC unset");
--- a/src/share/vm/runtime/globals.hpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/runtime/globals.hpp	Wed Dec 23 09:23:54 2009 -0800
@@ -1355,10 +1355,46 @@
   product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
           "The desired number of objects to claim from the overflow list")  \
                                                                             \
-  product(uintx, CMSParPromoteBlocksToClaim, 50,                            \
+  product(uintx, CMSParPromoteBlocksToClaim, 16,                             \
           "Number of blocks to attempt to claim when refilling CMS LAB for "\
           "parallel GC.")                                                   \
                                                                             \
+  product(uintx, OldPLABWeight, 50,                                         \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponentially decaying average for resizing CMSParPromoteBlocksToClaim.") \
+                                                                            \
+  product(bool, ResizeOldPLAB, true,                                        \
+          "Dynamically resize (old gen) promotion labs")                    \
+                                                                            \
+  product(bool, PrintOldPLAB, false,                                        \
+          "Print (old gen) promotion labs sizing decisions")                \
+                                                                            \
+  product(uintx, CMSOldPLABMin, 16,                                         \
+          "Min size of CMS gen promotion lab caches per worker per blksize")\
+                                                                            \
+  product(uintx, CMSOldPLABMax, 1024,                                       \
+          "Max size of CMS gen promotion lab caches per worker per blksize")\
+                                                                            \
+  product(uintx, CMSOldPLABNumRefills, 4,                                   \
+          "Nominal number of refills of CMS gen promotion lab cache"        \
+          " per worker per block size")                                     \
+                                                                            \
+  product(bool, CMSOldPLABResizeQuicker, false,                             \
+          "Whether to react on-the-fly during a scavenge to a sudden"       \
+          " change in block demand rate")                                   \
+                                                                            \
+  product(uintx, CMSOldPLABToleranceFactor, 4,                              \
+          "The tolerance of the phase-change detector for on-the-fly"       \
+          " PLAB resizing during a scavenge")                               \
+                                                                            \
+  product(uintx, CMSOldPLABReactivityFactor, 2,                             \
+          "The gain in the feedback loop for on-the-fly PLAB resizing"      \
+          " during a scavenge")                                             \
+                                                                            \
+  product(uintx, CMSOldPLABReactivityCeiling, 10,                           \
+          "The clamping of the gain in the feedback loop for on-the-fly"    \
+          " PLAB resizing during a scavenge")                               \
+                                                                            \
   product(bool, AlwaysPreTouch, false,                                      \
           "It forces all freshly committed pages to be pre-touched.")       \
                                                                             \
@@ -1400,27 +1436,54 @@
           "Percentage (0-100) by which the CMS incremental mode duty cycle" \
           " is shifted to the right within the period between young GCs")   \
                                                                             \
-  product(uintx, CMSExpAvgFactor, 25,                                       \
-          "Percentage (0-100) used to weight the current sample when "      \
-          "computing exponential averages for CMS statistics")              \
-                                                                            \
-  product(uintx, CMS_FLSWeight, 50,                                         \
-          "Percentage (0-100) used to weight the current sample when "      \
-          "computing exponentially decating averages for CMS FLS statistics") \
-                                                                            \
-  product(uintx, CMS_FLSPadding, 2,                                         \
-          "The multiple of deviation from mean to use for buffering "       \
+  product(uintx, CMSExpAvgFactor, 50,                                       \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponential averages for CMS statistics.")             \
+                                                                            \
+  product(uintx, CMS_FLSWeight, 75,                                         \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponentially decating averages for CMS FLS statistics.") \
+                                                                            \
+  product(uintx, CMS_FLSPadding, 1,                                         \
+          "The multiple of deviation from mean to use for buffering"        \
           "against volatility in free list demand.")                        \
                                                                             \
   product(uintx, FLSCoalescePolicy, 2,                                      \
           "CMS: Aggression level for coalescing, increasing from 0 to 4")   \
                                                                             \
-  product(uintx, CMS_SweepWeight, 50,                                       \
+  product(bool, FLSAlwaysCoalesceLarge, false,                              \
+          "CMS: Larger free blocks are always available for coalescing")    \
+                                                                            \
+  product(double, FLSLargestBlockCoalesceProximity, 0.99,                   \
+          "CMS: the smaller the percentage the greater the coalition force")\
+                                                                            \
+  product(double, CMSSmallCoalSurplusPercent, 1.05,                         \
+          "CMS: the factor by which to inflate estimated demand of small"   \
+          " block sizes to prevent coalescing with an adjoining block")     \
+                                                                            \
+  product(double, CMSLargeCoalSurplusPercent, 0.95,                         \
+          "CMS: the factor by which to inflate estimated demand of large"   \
+          " block sizes to prevent coalescing with an adjoining block")     \
+                                                                            \
+  product(double, CMSSmallSplitSurplusPercent, 1.10,                        \
+          "CMS: the factor by which to inflate estimated demand of small"   \
+          " block sizes to prevent splitting to supply demand for smaller"  \
+          " blocks")                                                        \
+                                                                            \
+  product(double, CMSLargeSplitSurplusPercent, 1.00,                        \
+          "CMS: the factor by which to inflate estimated demand of large"   \
+          " block sizes to prevent splitting to supply demand for smaller"  \
+          " blocks")                                                        \
+                                                                            \
+  product(bool, CMSExtrapolateSweep, false,                                 \
+          "CMS: cushion for block demand during sweep")                     \
+                                                                            \
+  product(uintx, CMS_SweepWeight, 75,                                       \
           "Percentage (0-100) used to weight the current sample when "      \
           "computing exponentially decaying average for inter-sweep "       \
           "duration")                                                       \
                                                                             \
-  product(uintx, CMS_SweepPadding, 2,                                       \
+  product(uintx, CMS_SweepPadding, 1,                                       \
           "The multiple of deviation from mean to use for buffering "       \
           "against volatility in inter-sweep duration.")                    \
                                                                             \
@@ -1459,6 +1522,13 @@
   product(uintx, CMSIndexedFreeListReplenish, 4,                            \
           "Replenish and indexed free list with this number of chunks")     \
                                                                             \
+  product(bool, CMSReplenishIntermediate, true,                             \
+          "Replenish all intermediate free-list caches")                    \
+                                                                            \
+  product(bool, CMSSplitIndexedFreeListBlocks, true,                        \
+          "When satisfying batched demand, splot blocks from the "          \
+          "IndexedFreeList whose size is a multiple of requested size")     \
+                                                                            \
   product(bool, CMSLoopWarn, false,                                         \
           "Warn in case of excessive CMS looping")                          \
                                                                             \
@@ -1593,6 +1663,18 @@
           "Bitmap operations should process at most this many bits"         \
           "between yields")                                                 \
                                                                             \
+  product(bool, CMSDumpAtPromotionFailure, false,                           \
+          "Dump useful information about the state of the CMS old "         \
+          " generation upon a promotion failure.")                          \
+                                                                            \
+  product(bool, CMSPrintChunksInDump, false,                                \
+          "In a dump enabled by CMSDumpAtPromotionFailure, include "        \
+          " more detailed information about the free chunks.")              \
+                                                                            \
+  product(bool, CMSPrintObjectsInDump, false,                               \
+          "In a dump enabled by CMSDumpAtPromotionFailure, include "        \
+          " more detailed information about the allocated objects.")        \
+                                                                            \
   diagnostic(bool, FLSVerifyAllHeapReferences, false,                       \
           "Verify that all refs across the FLS boundary "                   \
           " are to valid objects")                                          \
@@ -1677,6 +1759,10 @@
           "The youngest generation collection does not require "            \
           "a guarantee of full promotion of all live objects.")             \
                                                                             \
+  product(bool, PrintPromotionFailure, false,                               \
+          "Print additional diagnostic information following "              \
+          " promotion failure")                                             \
+                                                                            \
   notproduct(bool, PromotionFailureALot, false,                             \
           "Use promotion failure handling on every youngest generation "    \
           "collection")                                                     \
--- a/src/share/vm/services/classLoadingService.cpp	Wed Dec 16 15:12:51 2009 -0800
+++ b/src/share/vm/services/classLoadingService.cpp	Wed Dec 23 09:23:54 2009 -0800
@@ -128,7 +128,7 @@
 
   if (TraceClassUnloading) {
     ResourceMark rm;
-    tty->print_cr("[Unloading class %s]", k->external_name());
+    gclog_or_tty->print_cr("[Unloading class %s]", k->external_name());
   }
 }