changeset 1146:504830073409

Merge
author jmasa
date Mon, 04 Jan 2010 07:58:42 -0800
parents a5a6adfca6ec e018e6884bd8
children 75bd253e25dd
files src/share/vm/runtime/globals.hpp
diffstat 26 files changed, 1105 insertions(+), 351 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -62,12 +62,13 @@
   tl->link_head(tc);
   tl->link_tail(tc);
   tl->set_count(1);
-  tl->init_statistics();
+  tl->init_statistics(true /* split_birth */);
   tl->setParent(NULL);
   tl->setLeft(NULL);
   tl->setRight(NULL);
   return tl;
 }
+
 TreeList* TreeList::as_TreeList(HeapWord* addr, size_t size) {
   TreeChunk* tc = (TreeChunk*) addr;
   assert(size >= sizeof(TreeChunk), "Chunk is too small for a TreeChunk");
@@ -267,6 +268,31 @@
   return retTC;
 }
 
+// Returns the block with the largest heap address amongst
+// those in the list for this size; potentially slow and expensive,
+// use with caution!
+TreeChunk* TreeList::largest_address() {
+  guarantee(head() != NULL, "The head of the list cannot be NULL");
+  FreeChunk* fc = head()->next();
+  TreeChunk* retTC;
+  if (fc == NULL) {
+    retTC = head_as_TreeChunk();
+  } else {
+    // walk down the list and return the one with the highest
+    // heap address among chunks of this size.
+    FreeChunk* last = fc;
+    while (fc->next() != NULL) {
+      if ((HeapWord*)last < (HeapWord*)fc) {
+        last = fc;
+      }
+      fc = fc->next();
+    }
+    retTC = TreeChunk::as_TreeChunk(last);
+  }
+  assert(retTC->list() == this, "Wrong type of chunk.");
+  return retTC;
+}
+
 BinaryTreeDictionary::BinaryTreeDictionary(MemRegion mr, bool splay):
   _splay(splay)
 {
@@ -379,7 +405,7 @@
             break;
           }
           // The evm code reset the hint of the candidate as
-          // at an interrim point.  Why?  Seems like this leaves
+          // at an interim point.  Why?  Seems like this leaves
           // the hint pointing to a list that didn't work.
           // curTL->set_hint(hintTL->size());
         }
@@ -436,7 +462,7 @@
   TreeList *curTL = root();
   if (curTL != NULL) {
     while(curTL->right() != NULL) curTL = curTL->right();
-    return curTL->first_available();
+    return curTL->largest_address();
   } else {
     return NULL;
   }
@@ -664,7 +690,7 @@
     }
   }
   TreeChunk* tc = TreeChunk::as_TreeChunk(fc);
-  // This chunk is being returned to the binary try.  It's embedded
+  // This chunk is being returned to the binary tree.  Its embedded
   // TreeList should be unused at this point.
   tc->initialize();
   if (curTL != NULL) {          // exact match
@@ -807,6 +833,8 @@
 }
 
 bool BinaryTreeDictionary::coalDictOverPopulated(size_t size) {
+  if (FLSAlwaysCoalesceLarge) return true;
+
   TreeList* list_of_size = findList(size);
   // None of requested size implies overpopulated.
   return list_of_size == NULL || list_of_size->coalDesired() <= 0 ||
@@ -854,17 +882,20 @@
   double _percentage;
   float _inter_sweep_current;
   float _inter_sweep_estimate;
+  float _intra_sweep_estimate;
 
  public:
   BeginSweepClosure(double p, float inter_sweep_current,
-                              float inter_sweep_estimate) :
+                              float inter_sweep_estimate,
+                              float intra_sweep_estimate) :
    _percentage(p),
    _inter_sweep_current(inter_sweep_current),
-   _inter_sweep_estimate(inter_sweep_estimate) { }
+   _inter_sweep_estimate(inter_sweep_estimate),
+   _intra_sweep_estimate(intra_sweep_estimate) { }
 
   void do_list(FreeList* fl) {
     double coalSurplusPercent = _percentage;
-    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate);
+    fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate);
     fl->set_coalDesired((ssize_t)((double)fl->desired() * coalSurplusPercent));
     fl->set_beforeSweep(fl->count());
     fl->set_bfrSurp(fl->surplus());
@@ -939,9 +970,10 @@
 }
 
 void BinaryTreeDictionary::beginSweepDictCensus(double coalSurplusPercent,
-  float inter_sweep_current, float inter_sweep_estimate) {
+  float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) {
   BeginSweepClosure bsc(coalSurplusPercent, inter_sweep_current,
-                                            inter_sweep_estimate);
+                                            inter_sweep_estimate,
+                                            intra_sweep_estimate);
   bsc.do_tree(root());
 }
 
@@ -1077,13 +1109,13 @@
 // Print census information - counts, births, deaths, etc.
 // for each list in the tree.  Also print some summary
 // information.
-class printTreeCensusClosure : public AscendTreeCensusClosure {
+class PrintTreeCensusClosure : public AscendTreeCensusClosure {
   int _print_line;
   size_t _totalFree;
   FreeList _total;
 
  public:
-  printTreeCensusClosure() {
+  PrintTreeCensusClosure() {
     _print_line = 0;
     _totalFree = 0;
   }
@@ -1113,7 +1145,7 @@
 
   gclog_or_tty->print("\nBinaryTree\n");
   FreeList::print_labels_on(gclog_or_tty, "size");
-  printTreeCensusClosure ptc;
+  PrintTreeCensusClosure ptc;
   ptc.do_tree(root());
 
   FreeList* total = ptc.total();
@@ -1130,6 +1162,38 @@
              /(total->desired() != 0 ? (double)total->desired() : 1.0));
 }
 
+class PrintFreeListsClosure : public AscendTreeCensusClosure {
+  outputStream* _st;
+  int _print_line;
+
+ public:
+  PrintFreeListsClosure(outputStream* st) {
+    _st = st;
+    _print_line = 0;
+  }
+  void do_list(FreeList* fl) {
+    if (++_print_line >= 40) {
+      FreeList::print_labels_on(_st, "size");
+      _print_line = 0;
+    }
+    fl->print_on(gclog_or_tty);
+    size_t sz = fl->size();
+    for (FreeChunk* fc = fl->head(); fc != NULL;
+         fc = fc->next()) {
+      _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
+                    fc, (HeapWord*)fc + sz,
+                    fc->cantCoalesce() ? "\t CC" : "");
+    }
+  }
+};
+
+void BinaryTreeDictionary::print_free_lists(outputStream* st) const {
+
+  FreeList::print_labels_on(st, "size");
+  PrintFreeListsClosure pflc(st);
+  pflc.do_tree(root());
+}
+
 // Verify the following tree invariants:
 // . _root has no parent
 // . parent and child point to each other
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/binaryTreeDictionary.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -42,9 +42,6 @@
   friend class AscendTreeCensusClosure;
   friend class DescendTreeCensusClosure;
   friend class DescendTreeSearchClosure;
-  TreeList* _parent;
-  TreeList* _left;
-  TreeList* _right;
 
  protected:
   TreeList* parent() const { return _parent; }
@@ -82,6 +79,11 @@
   // to a TreeChunk.
   TreeChunk* first_available();
 
+  // Returns the block with the largest heap address amongst
+  // those in the list for this size; potentially slow and expensive,
+  // use with caution!
+  TreeChunk* largest_address();
+
   // removeChunkReplaceIfNeeded() removes the given "tc" from the TreeList.
   // If "tc" is the first chunk in the list, it is also the
   // TreeList that is the node in the tree.  removeChunkReplaceIfNeeded()
@@ -254,8 +256,9 @@
   // Methods called at the beginning of a sweep to prepare the
   // statistics for the sweep.
   void       beginSweepDictCensus(double coalSurplusPercent,
-                                  float sweep_current,
-                                  float sweep_estimate);
+                                  float inter_sweep_current,
+                                  float inter_sweep_estimate,
+                                  float intra_sweep_estimate);
   // Methods called after the end of a sweep to modify the
   // statistics for the sweep.
   void       endSweepDictCensus(double splitSurplusPercent);
@@ -269,6 +272,7 @@
   // Print the statistcis for all the lists in the tree.  Also may
   // print out summaries.
   void       printDictCensus(void) const;
+  void       print_free_lists(outputStream* st) const;
 
   // For debugging.  Returns the sum of the _returnedBytes for
   // all lists in the tree.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -32,7 +32,9 @@
 // threads. The second argument is in support of an extra locking
 // check for CFL spaces' free list locks.
 #ifndef PRODUCT
-void CMSLockVerifier::assert_locked(const Mutex* lock, const Mutex* p_lock) {
+void CMSLockVerifier::assert_locked(const Mutex* lock,
+                                    const Mutex* p_lock1,
+                                    const Mutex* p_lock2) {
   if (!Universe::is_fully_initialized()) {
     return;
   }
@@ -40,7 +42,7 @@
   Thread* myThread = Thread::current();
 
   if (lock == NULL) { // a "lock-free" structure, e.g. MUT, protected by CMS token
-    assert(p_lock == NULL, "Unexpected state");
+    assert(p_lock1 == NULL && p_lock2 == NULL, "Unexpected caller error");
     if (myThread->is_ConcurrentGC_thread()) {
       // This test might have to change in the future, if there can be
       // multiple peer CMS threads.  But for now, if we're testing the CMS
@@ -60,36 +62,39 @@
     return;
   }
 
-  if (ParallelGCThreads == 0) {
+  if (myThread->is_VM_thread()
+      || myThread->is_ConcurrentGC_thread()
+      || myThread->is_Java_thread()) {
+    // Make sure that we are holding the associated lock.
     assert_lock_strong(lock);
+    // The checking of p_lock is a spl case for CFLS' free list
+    // locks: we make sure that none of the parallel GC work gang
+    // threads are holding "sub-locks" of freeListLock(). We check only
+    // the parDictionaryAllocLock because the others are too numerous.
+    // This spl case code is somewhat ugly and any improvements
+    // are welcome.
+    assert(p_lock1 == NULL || !p_lock1->is_locked() || p_lock1->owned_by_self(),
+           "Possible race between this and parallel GC threads");
+    assert(p_lock2 == NULL || !p_lock2->is_locked() || p_lock2->owned_by_self(),
+           "Possible race between this and parallel GC threads");
+  } else if (myThread->is_GC_task_thread()) {
+    // Make sure that the VM or CMS thread holds lock on our behalf
+    // XXX If there were a concept of a gang_master for a (set of)
+    // gang_workers, we could have used the identity of that thread
+    // for checking ownership here; for now we just disjunct.
+    assert(lock->owner() == VMThread::vm_thread() ||
+           lock->owner() == ConcurrentMarkSweepThread::cmst(),
+           "Should be locked by VM thread or CMS thread on my behalf");
+    if (p_lock1 != NULL) {
+      assert_lock_strong(p_lock1);
+    }
+    if (p_lock2 != NULL) {
+      assert_lock_strong(p_lock2);
+    }
   } else {
-    if (myThread->is_VM_thread()
-        || myThread->is_ConcurrentGC_thread()
-        || myThread->is_Java_thread()) {
-      // Make sure that we are holding the associated lock.
-      assert_lock_strong(lock);
-      // The checking of p_lock is a spl case for CFLS' free list
-      // locks: we make sure that none of the parallel GC work gang
-      // threads are holding "sub-locks" of freeListLock(). We check only
-      // the parDictionaryAllocLock because the others are too numerous.
-      // This spl case code is somewhat ugly and any improvements
-      // are welcome XXX FIX ME!!
-      if (p_lock != NULL) {
-        assert(!p_lock->is_locked() || p_lock->owned_by_self(),
-               "Possible race between this and parallel GC threads");
-      }
-    } else if (myThread->is_GC_task_thread()) {
-      // Make sure that the VM or CMS thread holds lock on our behalf
-      // XXX If there were a concept of a gang_master for a (set of)
-      // gang_workers, we could have used the identity of that thread
-      // for checking ownership here; for now we just disjunct.
-      assert(lock->owner() == VMThread::vm_thread() ||
-             lock->owner() == ConcurrentMarkSweepThread::cmst(),
-             "Should be locked by VM thread or CMS thread on my behalf");
-    } else {
-      // Make sure we didn't miss some obscure corner case
-      ShouldNotReachHere();
-    }
+    // Make sure we didn't miss some other thread type calling into here;
+    // perhaps as a result of future VM evolution.
+    ShouldNotReachHere();
   }
 }
 #endif
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsLockVerifier.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -29,8 +29,11 @@
 // the parallel threads.
 class CMSLockVerifier: AllStatic {
  public:
-  static void assert_locked(const Mutex* lock, const Mutex* p_lock)
+  static void assert_locked(const Mutex* lock, const Mutex* p_lock1, const Mutex* p_lock2)
     PRODUCT_RETURN;
+  static void assert_locked(const Mutex* lock, const Mutex* p_lock) {
+    assert_locked(lock, p_lock, NULL);
+  }
   static void assert_locked(const Mutex* lock) {
     assert_locked(lock, NULL);
   }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -62,18 +62,15 @@
   // implementation, namely, the simple binary tree (splaying
   // temporarily disabled).
   switch (dictionaryChoice) {
-    case FreeBlockDictionary::dictionaryBinaryTree:
-      _dictionary = new BinaryTreeDictionary(mr);
-      break;
     case FreeBlockDictionary::dictionarySplayTree:
     case FreeBlockDictionary::dictionarySkipList:
     default:
       warning("dictionaryChoice: selected option not understood; using"
               " default BinaryTreeDictionary implementation instead.");
+    case FreeBlockDictionary::dictionaryBinaryTree:
       _dictionary = new BinaryTreeDictionary(mr);
       break;
   }
-  splitBirth(mr.word_size());
   assert(_dictionary != NULL, "CMS dictionary initialization");
   // The indexed free lists are initially all empty and are lazily
   // filled in on demand. Initialize the array elements to NULL.
@@ -388,6 +385,105 @@
   return res;
 }
 
+void CompactibleFreeListSpace::print_indexed_free_lists(outputStream* st)
+const {
+  reportIndexedFreeListStatistics();
+  gclog_or_tty->print_cr("Layout of Indexed Freelists");
+  gclog_or_tty->print_cr("---------------------------");
+  FreeList::print_labels_on(st, "size");
+  for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
+    _indexedFreeList[i].print_on(gclog_or_tty);
+    for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
+         fc = fc->next()) {
+      gclog_or_tty->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
+                          fc, (HeapWord*)fc + i,
+                          fc->cantCoalesce() ? "\t CC" : "");
+    }
+  }
+}
+
+void CompactibleFreeListSpace::print_promo_info_blocks(outputStream* st)
+const {
+  _promoInfo.print_on(st);
+}
+
+void CompactibleFreeListSpace::print_dictionary_free_lists(outputStream* st)
+const {
+  _dictionary->reportStatistics();
+  st->print_cr("Layout of Freelists in Tree");
+  st->print_cr("---------------------------");
+  _dictionary->print_free_lists(st);
+}
+
+class BlkPrintingClosure: public BlkClosure {
+  const CMSCollector*             _collector;
+  const CompactibleFreeListSpace* _sp;
+  const CMSBitMap*                _live_bit_map;
+  const bool                      _post_remark;
+  outputStream*                   _st;
+public:
+  BlkPrintingClosure(const CMSCollector* collector,
+                     const CompactibleFreeListSpace* sp,
+                     const CMSBitMap* live_bit_map,
+                     outputStream* st):
+    _collector(collector),
+    _sp(sp),
+    _live_bit_map(live_bit_map),
+    _post_remark(collector->abstract_state() > CMSCollector::FinalMarking),
+    _st(st) { }
+  size_t do_blk(HeapWord* addr);
+};
+
+size_t BlkPrintingClosure::do_blk(HeapWord* addr) {
+  size_t sz = _sp->block_size_no_stall(addr, _collector);
+  assert(sz != 0, "Should always be able to compute a size");
+  if (_sp->block_is_obj(addr)) {
+    const bool dead = _post_remark && !_live_bit_map->isMarked(addr);
+    _st->print_cr(PTR_FORMAT ": %s object of size " SIZE_FORMAT "%s",
+      addr,
+      dead ? "dead" : "live",
+      sz,
+      (!dead && CMSPrintObjectsInDump) ? ":" : ".");
+    if (CMSPrintObjectsInDump && !dead) {
+      oop(addr)->print_on(_st);
+      _st->print_cr("--------------------------------------");
+    }
+  } else { // free block
+    _st->print_cr(PTR_FORMAT ": free block of size " SIZE_FORMAT "%s",
+      addr, sz, CMSPrintChunksInDump ? ":" : ".");
+    if (CMSPrintChunksInDump) {
+      ((FreeChunk*)addr)->print_on(_st);
+      _st->print_cr("--------------------------------------");
+    }
+  }
+  return sz;
+}
+
+void CompactibleFreeListSpace::dump_at_safepoint_with_locks(CMSCollector* c,
+  outputStream* st) {
+  st->print_cr("\n=========================");
+  st->print_cr("Block layout in CMS Heap:");
+  st->print_cr("=========================");
+  BlkPrintingClosure  bpcl(c, this, c->markBitMap(), st);
+  blk_iterate(&bpcl);
+
+  st->print_cr("\n=======================================");
+  st->print_cr("Order & Layout of Promotion Info Blocks");
+  st->print_cr("=======================================");
+  print_promo_info_blocks(st);
+
+  st->print_cr("\n===========================");
+  st->print_cr("Order of Indexed Free Lists");
+  st->print_cr("=========================");
+  print_indexed_free_lists(st);
+
+  st->print_cr("\n=================================");
+  st->print_cr("Order of Free Lists in Dictionary");
+  st->print_cr("=================================");
+  print_dictionary_free_lists(st);
+}
+
+
 void CompactibleFreeListSpace::reportFreeListStatistics() const {
   assert_lock_strong(&_freelistLock);
   assert(PrintFLSStatistics != 0, "Reporting error");
@@ -449,37 +545,37 @@
   if (prevEnd != NULL) {
     // Resize the underlying block offset table.
     _bt.resize(pointer_delta(value, bottom()));
-  if (value <= prevEnd) {
-    assert(value >= unallocated_block(), "New end is below unallocated block");
-  } else {
-    // Now, take this new chunk and add it to the free blocks.
-    // Note that the BOT has not yet been updated for this block.
-    size_t newFcSize = pointer_delta(value, prevEnd);
-    // XXX This is REALLY UGLY and should be fixed up. XXX
-    if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) {
-      // Mark the boundary of the new block in BOT
-      _bt.mark_block(prevEnd, value);
-      // put it all in the linAB
-      if (ParallelGCThreads == 0) {
-        _smallLinearAllocBlock._ptr = prevEnd;
-        _smallLinearAllocBlock._word_size = newFcSize;
-        repairLinearAllocBlock(&_smallLinearAllocBlock);
-      } else { // ParallelGCThreads > 0
-        MutexLockerEx x(parDictionaryAllocLock(),
-                        Mutex::_no_safepoint_check_flag);
-        _smallLinearAllocBlock._ptr = prevEnd;
-        _smallLinearAllocBlock._word_size = newFcSize;
-        repairLinearAllocBlock(&_smallLinearAllocBlock);
+    if (value <= prevEnd) {
+      assert(value >= unallocated_block(), "New end is below unallocated block");
+    } else {
+      // Now, take this new chunk and add it to the free blocks.
+      // Note that the BOT has not yet been updated for this block.
+      size_t newFcSize = pointer_delta(value, prevEnd);
+      // XXX This is REALLY UGLY and should be fixed up. XXX
+      if (!_adaptive_freelists && _smallLinearAllocBlock._ptr == NULL) {
+        // Mark the boundary of the new block in BOT
+        _bt.mark_block(prevEnd, value);
+        // put it all in the linAB
+        if (ParallelGCThreads == 0) {
+          _smallLinearAllocBlock._ptr = prevEnd;
+          _smallLinearAllocBlock._word_size = newFcSize;
+          repairLinearAllocBlock(&_smallLinearAllocBlock);
+        } else { // ParallelGCThreads > 0
+          MutexLockerEx x(parDictionaryAllocLock(),
+                          Mutex::_no_safepoint_check_flag);
+          _smallLinearAllocBlock._ptr = prevEnd;
+          _smallLinearAllocBlock._word_size = newFcSize;
+          repairLinearAllocBlock(&_smallLinearAllocBlock);
+        }
+        // Births of chunks put into a LinAB are not recorded.  Births
+        // of chunks as they are allocated out of a LinAB are.
+      } else {
+        // Add the block to the free lists, if possible coalescing it
+        // with the last free block, and update the BOT and census data.
+        addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize);
       }
-      // Births of chunks put into a LinAB are not recorded.  Births
-      // of chunks as they are allocated out of a LinAB are.
-    } else {
-      // Add the block to the free lists, if possible coalescing it
-      // with the last free block, and update the BOT and census data.
-      addChunkToFreeListsAtEndRecordingStats(prevEnd, newFcSize);
     }
   }
-  }
 }
 
 class FreeListSpace_DCTOC : public Filtering_DCTOC {
@@ -732,7 +828,7 @@
 
 void CompactibleFreeListSpace::object_iterate_mem(MemRegion mr,
                                                   UpwardsObjectClosure* cl) {
-  assert_locked();
+  assert_locked(freelistLock());
   NOT_PRODUCT(verify_objects_initialized());
   Space::object_iterate_mem(mr, cl);
 }
@@ -1212,12 +1308,15 @@
 void CompactibleFreeListSpace::assert_locked() const {
   CMSLockVerifier::assert_locked(freelistLock(), parDictionaryAllocLock());
 }
+
+void CompactibleFreeListSpace::assert_locked(const Mutex* lock) const {
+  CMSLockVerifier::assert_locked(lock);
+}
 #endif
 
 FreeChunk* CompactibleFreeListSpace::allocateScratch(size_t size) {
   // In the parallel case, the main thread holds the free list lock
   // on behalf the parallel threads.
-  assert_locked();
   FreeChunk* fc;
   {
     // If GC is parallel, this might be called by several threads.
@@ -1298,17 +1397,18 @@
     res = blk->_ptr;
     _bt.allocated(res, blk->_word_size);
   } else if (size + MinChunkSize <= blk->_refillSize) {
+    size_t sz = blk->_word_size;
     // Update _unallocated_block if the size is such that chunk would be
     // returned to the indexed free list.  All other chunks in the indexed
     // free lists are allocated from the dictionary so that _unallocated_block
     // has already been adjusted for them.  Do it here so that the cost
     // for all chunks added back to the indexed free lists.
-    if (blk->_word_size < SmallForDictionary) {
-      _bt.allocated(blk->_ptr, blk->_word_size);
+    if (sz < SmallForDictionary) {
+      _bt.allocated(blk->_ptr, sz);
     }
     // Return the chunk that isn't big enough, and then refill below.
-    addChunkToFreeLists(blk->_ptr, blk->_word_size);
-    _bt.verify_single_block(blk->_ptr, (blk->_ptr + blk->_word_size));
+    addChunkToFreeLists(blk->_ptr, sz);
+    splitBirth(sz);
     // Don't keep statistics on adding back chunk from a LinAB.
   } else {
     // A refilled block would not satisfy the request.
@@ -1376,11 +1476,13 @@
     res = getChunkFromIndexedFreeListHelper(size);
   }
   _bt.verify_not_unallocated((HeapWord*) res, size);
+  assert(res == NULL || res->size() == size, "Incorrect block size");
   return res;
 }
 
 FreeChunk*
-CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size) {
+CompactibleFreeListSpace::getChunkFromIndexedFreeListHelper(size_t size,
+  bool replenish) {
   assert_locked();
   FreeChunk* fc = NULL;
   if (size < SmallForDictionary) {
@@ -1398,54 +1500,66 @@
       // and replenishing indexed lists from the small linAB.
       //
       FreeChunk* newFc = NULL;
-      size_t replenish_size = CMSIndexedFreeListReplenish * size;
+      const size_t replenish_size = CMSIndexedFreeListReplenish * size;
       if (replenish_size < SmallForDictionary) {
         // Do not replenish from an underpopulated size.
         if (_indexedFreeList[replenish_size].surplus() > 0 &&
             _indexedFreeList[replenish_size].head() != NULL) {
-          newFc =
-            _indexedFreeList[replenish_size].getChunkAtHead();
-        } else {
+          newFc = _indexedFreeList[replenish_size].getChunkAtHead();
+        } else if (bestFitFirst()) {
           newFc = bestFitSmall(replenish_size);
         }
       }
+      if (newFc == NULL && replenish_size > size) {
+        assert(CMSIndexedFreeListReplenish > 1, "ctl pt invariant");
+        newFc = getChunkFromIndexedFreeListHelper(replenish_size, false);
+      }
+      // Note: The stats update re split-death of block obtained above
+      // will be recorded below precisely when we know we are going to
+      // be actually splitting it into more than one pieces below.
       if (newFc != NULL) {
-        splitDeath(replenish_size);
-      } else if (replenish_size > size) {
-        assert(CMSIndexedFreeListReplenish > 1, "ctl pt invariant");
-        newFc =
-          getChunkFromIndexedFreeListHelper(replenish_size);
-      }
-      if (newFc != NULL) {
-        assert(newFc->size() == replenish_size, "Got wrong size");
-        size_t i;
-        FreeChunk *curFc, *nextFc;
-        // carve up and link blocks 0, ..., CMSIndexedFreeListReplenish - 2
-        // The last chunk is not added to the lists but is returned as the
-        // free chunk.
-        for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size),
-             i = 0;
-             i < (CMSIndexedFreeListReplenish - 1);
-             curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size),
-             i++) {
+        if  (replenish || CMSReplenishIntermediate) {
+          // Replenish this list and return one block to caller.
+          size_t i;
+          FreeChunk *curFc, *nextFc;
+          size_t num_blk = newFc->size() / size;
+          assert(num_blk >= 1, "Smaller than requested?");
+          assert(newFc->size() % size == 0, "Should be integral multiple of request");
+          if (num_blk > 1) {
+            // we are sure we will be splitting the block just obtained
+            // into multiple pieces; record the split-death of the original
+            splitDeath(replenish_size);
+          }
+          // carve up and link blocks 0, ..., num_blk - 2
+          // The last chunk is not added to the lists but is returned as the
+          // free chunk.
+          for (curFc = newFc, nextFc = (FreeChunk*)((HeapWord*)curFc + size),
+               i = 0;
+               i < (num_blk - 1);
+               curFc = nextFc, nextFc = (FreeChunk*)((HeapWord*)nextFc + size),
+               i++) {
+            curFc->setSize(size);
+            // Don't record this as a return in order to try and
+            // determine the "returns" from a GC.
+            _bt.verify_not_unallocated((HeapWord*) fc, size);
+            _indexedFreeList[size].returnChunkAtTail(curFc, false);
+            _bt.mark_block((HeapWord*)curFc, size);
+            splitBirth(size);
+            // Don't record the initial population of the indexed list
+            // as a split birth.
+          }
+
+          // check that the arithmetic was OK above
+          assert((HeapWord*)nextFc == (HeapWord*)newFc + num_blk*size,
+            "inconsistency in carving newFc");
           curFc->setSize(size);
-          // Don't record this as a return in order to try and
-          // determine the "returns" from a GC.
-          _bt.verify_not_unallocated((HeapWord*) fc, size);
-          _indexedFreeList[size].returnChunkAtTail(curFc, false);
           _bt.mark_block((HeapWord*)curFc, size);
           splitBirth(size);
-          // Don't record the initial population of the indexed list
-          // as a split birth.
+          fc = curFc;
+        } else {
+          // Return entire block to caller
+          fc = newFc;
         }
-
-        // check that the arithmetic was OK above
-        assert((HeapWord*)nextFc == (HeapWord*)newFc + replenish_size,
-          "inconsistency in carving newFc");
-        curFc->setSize(size);
-        _bt.mark_block((HeapWord*)curFc, size);
-        splitBirth(size);
-        return curFc;
       }
     }
   } else {
@@ -1453,7 +1567,7 @@
     // replenish the indexed free list.
     fc = getChunkFromDictionaryExact(size);
   }
-  assert(fc == NULL || fc->isFree(), "Should be returning a free chunk");
+  // assert(fc == NULL || fc->isFree(), "Should be returning a free chunk");
   return fc;
 }
 
@@ -1512,6 +1626,11 @@
   // adjust _unallocated_block downward, as necessary
   _bt.freed((HeapWord*)chunk, size);
   _dictionary->returnChunk(chunk);
+#ifndef PRODUCT
+  if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
+    TreeChunk::as_TreeChunk(chunk)->list()->verify_stats();
+  }
+#endif // PRODUCT
 }
 
 void
@@ -1525,6 +1644,11 @@
   } else {
     _indexedFreeList[size].returnChunkAtHead(fc);
   }
+#ifndef PRODUCT
+  if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
+     _indexedFreeList[size].verify_stats();
+  }
+#endif // PRODUCT
 }
 
 // Add chunk to end of last block -- if it's the largest
@@ -1537,7 +1661,6 @@
   HeapWord* chunk, size_t     size) {
   // check that the chunk does lie in this space!
   assert(chunk != NULL && is_in_reserved(chunk), "Not in this space!");
-  assert_locked();
   // One of the parallel gc task threads may be here
   // whilst others are allocating.
   Mutex* lock = NULL;
@@ -1991,24 +2114,26 @@
   return frag;
 }
 
-#define CoalSurplusPercent 1.05
-#define SplitSurplusPercent 1.10
-
 void CompactibleFreeListSpace::beginSweepFLCensus(
   float inter_sweep_current,
-  float inter_sweep_estimate) {
+  float inter_sweep_estimate,
+  float intra_sweep_estimate) {
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     FreeList* fl    = &_indexedFreeList[i];
-    fl->compute_desired(inter_sweep_current, inter_sweep_estimate);
-    fl->set_coalDesired((ssize_t)((double)fl->desired() * CoalSurplusPercent));
+    if (PrintFLSStatistics > 1) {
+      gclog_or_tty->print("size[%d] : ", i);
+    }
+    fl->compute_desired(inter_sweep_current, inter_sweep_estimate, intra_sweep_estimate);
+    fl->set_coalDesired((ssize_t)((double)fl->desired() * CMSSmallCoalSurplusPercent));
     fl->set_beforeSweep(fl->count());
     fl->set_bfrSurp(fl->surplus());
   }
-  _dictionary->beginSweepDictCensus(CoalSurplusPercent,
+  _dictionary->beginSweepDictCensus(CMSLargeCoalSurplusPercent,
                                     inter_sweep_current,
-                                    inter_sweep_estimate);
+                                    inter_sweep_estimate,
+                                    intra_sweep_estimate);
 }
 
 void CompactibleFreeListSpace::setFLSurplus() {
@@ -2017,7 +2142,7 @@
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     FreeList *fl = &_indexedFreeList[i];
     fl->set_surplus(fl->count() -
-                    (ssize_t)((double)fl->desired() * SplitSurplusPercent));
+                    (ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent));
   }
 }
 
@@ -2048,6 +2173,11 @@
 }
 
 void CompactibleFreeListSpace::endSweepFLCensus(size_t sweep_count) {
+  if (PrintFLSStatistics > 0) {
+    HeapWord* largestAddr = (HeapWord*) dictionary()->findLargestDict();
+    gclog_or_tty->print_cr("CMS: Large block " PTR_FORMAT,
+                           largestAddr);
+  }
   setFLSurplus();
   setFLHints();
   if (PrintGC && PrintFLSCensus > 0) {
@@ -2055,7 +2185,7 @@
   }
   clearFLCensus();
   assert_locked();
-  _dictionary->endSweepDictCensus(SplitSurplusPercent);
+  _dictionary->endSweepDictCensus(CMSLargeSplitSurplusPercent);
 }
 
 bool CompactibleFreeListSpace::coalOverPopulated(size_t size) {
@@ -2312,13 +2442,18 @@
 }
 
 void CompactibleFreeListSpace::verifyIndexedFreeList(size_t size) const {
-  FreeChunk* fc =  _indexedFreeList[size].head();
+  FreeChunk* fc   =  _indexedFreeList[size].head();
+  FreeChunk* tail =  _indexedFreeList[size].tail();
+  size_t    num = _indexedFreeList[size].count();
+  size_t      n = 0;
   guarantee((size % 2 == 0) || fc == NULL, "Odd slots should be empty");
-  for (; fc != NULL; fc = fc->next()) {
+  for (; fc != NULL; fc = fc->next(), n++) {
     guarantee(fc->size() == size, "Size inconsistency");
     guarantee(fc->isFree(), "!free?");
     guarantee(fc->next() == NULL || fc->next()->prev() == fc, "Broken list");
+    guarantee((fc->next() == NULL) == (fc == tail), "Incorrect tail");
   }
+  guarantee(n == num, "Incorrect count");
 }
 
 #ifndef PRODUCT
@@ -2516,11 +2651,41 @@
   _tracking = true;
 }
 
-void PromotionInfo::stopTrackingPromotions() {
+#define CMSPrintPromoBlockInfo 1
+
+void PromotionInfo::stopTrackingPromotions(uint worker_id) {
   assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex,
          "spooling inconsistency?");
   _firstIndex = _nextIndex = 1;
   _tracking = false;
+  if (CMSPrintPromoBlockInfo > 1) {
+    print_statistics(worker_id);
+  }
+}
+
+void PromotionInfo::print_statistics(uint worker_id) const {
+  assert(_spoolHead == _spoolTail && _firstIndex == _nextIndex,
+         "Else will undercount");
+  assert(CMSPrintPromoBlockInfo > 0, "Else unnecessary call");
+  // Count the number of blocks and slots in the free pool
+  size_t slots  = 0;
+  size_t blocks = 0;
+  for (SpoolBlock* cur_spool = _spareSpool;
+       cur_spool != NULL;
+       cur_spool = cur_spool->nextSpoolBlock) {
+    // the first entry is just a self-pointer; indices 1 through
+    // bufferSize - 1 are occupied (thus, bufferSize - 1 slots).
+    guarantee((void*)cur_spool->displacedHdr == (void*)&cur_spool->displacedHdr,
+              "first entry of displacedHdr should be self-referential");
+    slots += cur_spool->bufferSize - 1;
+    blocks++;
+  }
+  if (_spoolHead != NULL) {
+    slots += _spoolHead->bufferSize - 1;
+    blocks++;
+  }
+  gclog_or_tty->print_cr(" [worker %d] promo_blocks = %d, promo_slots = %d ",
+                         worker_id, blocks, slots);
 }
 
 // When _spoolTail is not NULL, then the slot <_spoolTail, _nextIndex>
@@ -2584,15 +2749,84 @@
   guarantee(numDisplacedHdrs == numObjsWithDisplacedHdrs, "Displaced hdr count");
 }
 
+void PromotionInfo::print_on(outputStream* st) const {
+  SpoolBlock* curSpool = NULL;
+  size_t i = 0;
+  st->print_cr("start & end indices: [" SIZE_FORMAT ", " SIZE_FORMAT ")",
+               _firstIndex, _nextIndex);
+  for (curSpool = _spoolHead; curSpool != _spoolTail && curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" active ");
+    i++;
+  }
+  for (curSpool = _spoolTail; curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" inactive ");
+    i++;
+  }
+  for (curSpool = _spareSpool; curSpool != NULL;
+       curSpool = curSpool->nextSpoolBlock) {
+    curSpool->print_on(st);
+    st->print_cr(" free ");
+    i++;
+  }
+  st->print_cr(SIZE_FORMAT " header spooling blocks", i);
+}
+
+void SpoolBlock::print_on(outputStream* st) const {
+  st->print("[" PTR_FORMAT "," PTR_FORMAT "), " SIZE_FORMAT " HeapWords -> " PTR_FORMAT,
+            this, (HeapWord*)displacedHdr + bufferSize,
+            bufferSize, nextSpoolBlock);
+}
+
+///////////////////////////////////////////////////////////////////////////
+// CFLS_LAB
+///////////////////////////////////////////////////////////////////////////
+
+#define VECTOR_257(x)                                                                                  \
+  /* 1  2  3  4  5  6  7  8  9 1x 11 12 13 14 15 16 17 18 19 2x 21 22 23 24 25 26 27 28 29 3x 31 32 */ \
+  {  x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x,   \
+     x }
+
+// Initialize with default setting of CMSParPromoteBlocksToClaim, _not_
+// OldPLABSize, whose static default is different; if overridden at the
+// command-line, this will get reinitialized via a call to
+// modify_initialization() below.
+AdaptiveWeightedAverage CFLS_LAB::_blocks_to_claim[]    =
+  VECTOR_257(AdaptiveWeightedAverage(OldPLABWeight, (float)CMSParPromoteBlocksToClaim));
+size_t CFLS_LAB::_global_num_blocks[]  = VECTOR_257(0);
+int    CFLS_LAB::_global_num_workers[] = VECTOR_257(0);
 
 CFLS_LAB::CFLS_LAB(CompactibleFreeListSpace* cfls) :
   _cfls(cfls)
 {
-  _blocks_to_claim = CMSParPromoteBlocksToClaim;
+  assert(CompactibleFreeListSpace::IndexSetSize == 257, "Modify VECTOR_257() macro above");
   for (size_t i = CompactibleFreeListSpace::IndexSetStart;
        i < CompactibleFreeListSpace::IndexSetSize;
        i += CompactibleFreeListSpace::IndexSetStride) {
     _indexedFreeList[i].set_size(i);
+    _num_blocks[i] = 0;
+  }
+}
+
+static bool _CFLS_LAB_modified = false;
+
+void CFLS_LAB::modify_initialization(size_t n, unsigned wt) {
+  assert(!_CFLS_LAB_modified, "Call only once");
+  _CFLS_LAB_modified = true;
+  for (size_t i = CompactibleFreeListSpace::IndexSetStart;
+       i < CompactibleFreeListSpace::IndexSetSize;
+       i += CompactibleFreeListSpace::IndexSetStride) {
+    _blocks_to_claim[i].modify(n, wt, true /* force */);
   }
 }
 
@@ -2607,11 +2841,9 @@
     if (res == NULL) return NULL;
   } else {
     FreeList* fl = &_indexedFreeList[word_sz];
-    bool filled = false; //TRAP
     if (fl->count() == 0) {
-      bool filled = true; //TRAP
       // Attempt to refill this local free list.
-      _cfls->par_get_chunk_of_blocks(word_sz, _blocks_to_claim, fl);
+      get_from_global_pool(word_sz, fl);
       // If it didn't work, give up.
       if (fl->count() == 0) return NULL;
     }
@@ -2626,80 +2858,190 @@
   return (HeapWord*)res;
 }
 
-void CFLS_LAB::retire() {
-  for (size_t i = CompactibleFreeListSpace::IndexSetStart;
+// Get a chunk of blocks of the right size and update related
+// book-keeping stats
+void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList* fl) {
+  // Get the #blocks we want to claim
+  size_t n_blks = (size_t)_blocks_to_claim[word_sz].average();
+  assert(n_blks > 0, "Error");
+  assert(ResizePLAB || n_blks == OldPLABSize, "Error");
+  // In some cases, when the application has a phase change,
+  // there may be a sudden and sharp shift in the object survival
+  // profile, and updating the counts at the end of a scavenge
+  // may not be quick enough, giving rise to large scavenge pauses
+  // during these phase changes. It is beneficial to detect such
+  // changes on-the-fly during a scavenge and avoid such a phase-change
+  // pothole. The following code is a heuristic attempt to do that.
+  // It is protected by a product flag until we have gained
+  // enough experience with this heuristic and fine-tuned its behaviour.
+  // WARNING: This might increase fragmentation if we overreact to
+  // small spikes, so some kind of historical smoothing based on
+  // previous experience with the greater reactivity might be useful.
+  // Lacking sufficient experience, CMSOldPLABResizeQuicker is disabled by
+  // default.
+  if (ResizeOldPLAB && CMSOldPLABResizeQuicker) {
+    size_t multiple = _num_blocks[word_sz]/(CMSOldPLABToleranceFactor*CMSOldPLABNumRefills*n_blks);
+    n_blks +=  CMSOldPLABReactivityFactor*multiple*n_blks;
+    n_blks = MIN2(n_blks, CMSOldPLABMax);
+  }
+  assert(n_blks > 0, "Error");
+  _cfls->par_get_chunk_of_blocks(word_sz, n_blks, fl);
+  // Update stats table entry for this block size
+  _num_blocks[word_sz] += fl->count();
+}
+
+void CFLS_LAB::compute_desired_plab_size() {
+  for (size_t i =  CompactibleFreeListSpace::IndexSetStart;
        i < CompactibleFreeListSpace::IndexSetSize;
        i += CompactibleFreeListSpace::IndexSetStride) {
-    if (_indexedFreeList[i].count() > 0) {
-      MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
-                      Mutex::_no_safepoint_check_flag);
-      _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
-      // Reset this list.
-      _indexedFreeList[i] = FreeList();
-      _indexedFreeList[i].set_size(i);
+    assert((_global_num_workers[i] == 0) == (_global_num_blocks[i] == 0),
+           "Counter inconsistency");
+    if (_global_num_workers[i] > 0) {
+      // Need to smooth wrt historical average
+      if (ResizeOldPLAB) {
+        _blocks_to_claim[i].sample(
+          MAX2((size_t)CMSOldPLABMin,
+          MIN2((size_t)CMSOldPLABMax,
+               _global_num_blocks[i]/(_global_num_workers[i]*CMSOldPLABNumRefills))));
+      }
+      // Reset counters for next round
+      _global_num_workers[i] = 0;
+      _global_num_blocks[i] = 0;
+      if (PrintOldPLAB) {
+        gclog_or_tty->print_cr("[%d]: %d", i, (size_t)_blocks_to_claim[i].average());
+      }
     }
   }
 }
 
-void
-CompactibleFreeListSpace::
-par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
+void CFLS_LAB::retire(int tid) {
+  // We run this single threaded with the world stopped;
+  // so no need for locks and such.
+#define CFLS_LAB_PARALLEL_ACCESS 0
+  NOT_PRODUCT(Thread* t = Thread::current();)
+  assert(Thread::current()->is_VM_thread(), "Error");
+  assert(CompactibleFreeListSpace::IndexSetStart == CompactibleFreeListSpace::IndexSetStride,
+         "Will access to uninitialized slot below");
+#if CFLS_LAB_PARALLEL_ACCESS
+  for (size_t i = CompactibleFreeListSpace::IndexSetSize - 1;
+       i > 0;
+       i -= CompactibleFreeListSpace::IndexSetStride) {
+#else // CFLS_LAB_PARALLEL_ACCESS
+  for (size_t i =  CompactibleFreeListSpace::IndexSetStart;
+       i < CompactibleFreeListSpace::IndexSetSize;
+       i += CompactibleFreeListSpace::IndexSetStride) {
+#endif // !CFLS_LAB_PARALLEL_ACCESS
+    assert(_num_blocks[i] >= (size_t)_indexedFreeList[i].count(),
+           "Can't retire more than what we obtained");
+    if (_num_blocks[i] > 0) {
+      size_t num_retire =  _indexedFreeList[i].count();
+      assert(_num_blocks[i] > num_retire, "Should have used at least one");
+      {
+#if CFLS_LAB_PARALLEL_ACCESS
+        MutexLockerEx x(_cfls->_indexedFreeListParLocks[i],
+                        Mutex::_no_safepoint_check_flag);
+#endif // CFLS_LAB_PARALLEL_ACCESS
+        // Update globals stats for num_blocks used
+        _global_num_blocks[i] += (_num_blocks[i] - num_retire);
+        _global_num_workers[i]++;
+        assert(_global_num_workers[i] <= (ssize_t)ParallelGCThreads, "Too big");
+        if (num_retire > 0) {
+          _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
+          // Reset this list.
+          _indexedFreeList[i] = FreeList();
+          _indexedFreeList[i].set_size(i);
+        }
+      }
+      if (PrintOldPLAB) {
+        gclog_or_tty->print_cr("%d[%d]: %d/%d/%d",
+                               tid, i, num_retire, _num_blocks[i], (size_t)_blocks_to_claim[i].average());
+      }
+      // Reset stats for next round
+      _num_blocks[i]         = 0;
+    }
+  }
+}
+
+void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList* fl) {
   assert(fl->count() == 0, "Precondition.");
   assert(word_sz < CompactibleFreeListSpace::IndexSetSize,
          "Precondition");
 
-  // We'll try all multiples of word_sz in the indexed set (starting with
-  // word_sz itself), then try getting a big chunk and splitting it.
-  int k = 1;
-  size_t cur_sz = k * word_sz;
-  bool found = false;
-  while (cur_sz < CompactibleFreeListSpace::IndexSetSize && k == 1) {
-    FreeList* gfl = &_indexedFreeList[cur_sz];
-    FreeList fl_for_cur_sz;  // Empty.
-    fl_for_cur_sz.set_size(cur_sz);
-    {
-      MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
-                      Mutex::_no_safepoint_check_flag);
-      if (gfl->count() != 0) {
-        size_t nn = MAX2(n/k, (size_t)1);
-        gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz);
-        found = true;
+  // We'll try all multiples of word_sz in the indexed set, starting with
+  // word_sz itself and, if CMSSplitIndexedFreeListBlocks, try larger multiples,
+  // then try getting a big chunk and splitting it.
+  {
+    bool found;
+    int  k;
+    size_t cur_sz;
+    for (k = 1, cur_sz = k * word_sz, found = false;
+         (cur_sz < CompactibleFreeListSpace::IndexSetSize) &&
+         (CMSSplitIndexedFreeListBlocks || k <= 1);
+         k++, cur_sz = k * word_sz) {
+      FreeList* gfl = &_indexedFreeList[cur_sz];
+      FreeList fl_for_cur_sz;  // Empty.
+      fl_for_cur_sz.set_size(cur_sz);
+      {
+        MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
+                        Mutex::_no_safepoint_check_flag);
+        if (gfl->count() != 0) {
+          // nn is the number of chunks of size cur_sz that
+          // we'd need to split k-ways each, in order to create
+          // "n" chunks of size word_sz each.
+          const size_t nn = MAX2(n/k, (size_t)1);
+          gfl->getFirstNChunksFromList(nn, &fl_for_cur_sz);
+          found = true;
+          if (k > 1) {
+            // Update split death stats for the cur_sz-size blocks list:
+            // we increment the split death count by the number of blocks
+            // we just took from the cur_sz-size blocks list and which
+            // we will be splitting below.
+            ssize_t deaths = _indexedFreeList[cur_sz].splitDeaths() +
+                             fl_for_cur_sz.count();
+            _indexedFreeList[cur_sz].set_splitDeaths(deaths);
+          }
+        }
+      }
+      // Now transfer fl_for_cur_sz to fl.  Common case, we hope, is k = 1.
+      if (found) {
+        if (k == 1) {
+          fl->prepend(&fl_for_cur_sz);
+        } else {
+          // Divide each block on fl_for_cur_sz up k ways.
+          FreeChunk* fc;
+          while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) {
+            // Must do this in reverse order, so that anybody attempting to
+            // access the main chunk sees it as a single free block until we
+            // change it.
+            size_t fc_size = fc->size();
+            for (int i = k-1; i >= 0; i--) {
+              FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
+              ffc->setSize(word_sz);
+              ffc->linkNext(NULL);
+              ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
+              // Above must occur before BOT is updated below.
+              // splitting from the right, fc_size == (k - i + 1) * wordsize
+              _bt.mark_block((HeapWord*)ffc, word_sz);
+              fc_size -= word_sz;
+              _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
+              _bt.verify_single_block((HeapWord*)fc, fc_size);
+              _bt.verify_single_block((HeapWord*)ffc, ffc->size());
+              // Push this on "fl".
+              fl->returnChunkAtHead(ffc);
+            }
+            // TRAP
+            assert(fl->tail()->next() == NULL, "List invariant.");
+          }
+        }
+        // Update birth stats for this block size.
+        size_t num = fl->count();
+        MutexLockerEx x(_indexedFreeListParLocks[word_sz],
+                        Mutex::_no_safepoint_check_flag);
+        ssize_t births = _indexedFreeList[word_sz].splitBirths() + num;
+        _indexedFreeList[word_sz].set_splitBirths(births);
+        return;
       }
     }
-    // Now transfer fl_for_cur_sz to fl.  Common case, we hope, is k = 1.
-    if (found) {
-      if (k == 1) {
-        fl->prepend(&fl_for_cur_sz);
-      } else {
-        // Divide each block on fl_for_cur_sz up k ways.
-        FreeChunk* fc;
-        while ((fc = fl_for_cur_sz.getChunkAtHead()) != NULL) {
-          // Must do this in reverse order, so that anybody attempting to
-          // access the main chunk sees it as a single free block until we
-          // change it.
-          size_t fc_size = fc->size();
-          for (int i = k-1; i >= 0; i--) {
-            FreeChunk* ffc = (FreeChunk*)((HeapWord*)fc + i * word_sz);
-            ffc->setSize(word_sz);
-            ffc->linkNext(NULL);
-            ffc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
-            // Above must occur before BOT is updated below.
-            // splitting from the right, fc_size == (k - i + 1) * wordsize
-            _bt.mark_block((HeapWord*)ffc, word_sz);
-            fc_size -= word_sz;
-            _bt.verify_not_unallocated((HeapWord*)ffc, ffc->size());
-            _bt.verify_single_block((HeapWord*)fc, fc_size);
-            _bt.verify_single_block((HeapWord*)ffc, ffc->size());
-            // Push this on "fl".
-            fl->returnChunkAtHead(ffc);
-          }
-          // TRAP
-          assert(fl->tail()->next() == NULL, "List invariant.");
-        }
-      }
-      return;
-    }
-    k++; cur_sz = k * word_sz;
   }
   // Otherwise, we'll split a block from the dictionary.
   FreeChunk* fc = NULL;
@@ -2723,17 +3065,20 @@
       }
     }
     if (fc == NULL) return;
+    assert((ssize_t)n >= 1, "Control point invariant");
     // Otherwise, split up that block.
-    size_t nn = fc->size() / word_sz;
+    const size_t nn = fc->size() / word_sz;
     n = MIN2(nn, n);
+    assert((ssize_t)n >= 1, "Control point invariant");
     rem = fc->size() - n * word_sz;
     // If there is a remainder, and it's too small, allocate one fewer.
     if (rem > 0 && rem < MinChunkSize) {
       n--; rem += word_sz;
     }
+    assert((ssize_t)n >= 1, "Control point invariant");
     // First return the remainder, if any.
     // Note that we hold the lock until we decide if we're going to give
-    // back the remainder to the dictionary, since a contending allocator
+    // back the remainder to the dictionary, since a concurrent allocation
     // may otherwise see the heap as empty.  (We're willing to take that
     // hit if the block is a small block.)
     if (rem > 0) {
@@ -2743,18 +3088,16 @@
       rem_fc->linkNext(NULL);
       rem_fc->linkPrev(NULL); // Mark as a free block for other (parallel) GC threads.
       // Above must occur before BOT is updated below.
+      assert((ssize_t)n > 0 && prefix_size > 0 && rem_fc > fc, "Error");
       _bt.split_block((HeapWord*)fc, fc->size(), prefix_size);
       if (rem >= IndexSetSize) {
         returnChunkToDictionary(rem_fc);
-        dictionary()->dictCensusUpdate(fc->size(),
-                                       true /*split*/,
-                                       true /*birth*/);
+        dictionary()->dictCensusUpdate(rem, true /*split*/, true /*birth*/);
         rem_fc = NULL;
       }
       // Otherwise, return it to the small list below.
     }
   }
-  //
   if (rem_fc != NULL) {
     MutexLockerEx x(_indexedFreeListParLocks[rem],
                     Mutex::_no_safepoint_check_flag);
@@ -2762,7 +3105,7 @@
     _indexedFreeList[rem].returnChunkAtHead(rem_fc);
     smallSplitBirth(rem);
   }
-
+  assert((ssize_t)n > 0 && fc != NULL, "Consistency");
   // Now do the splitting up.
   // Must do this in reverse order, so that anybody attempting to
   // access the main chunk sees it as a single free block until we
@@ -2792,13 +3135,15 @@
   _bt.verify_single_block((HeapWord*)fc, fc->size());
   fl->returnChunkAtHead(fc);
 
+  assert((ssize_t)n > 0 && (ssize_t)n == fl->count(), "Incorrect number of blocks");
   {
+    // Update the stats for this block size.
     MutexLockerEx x(_indexedFreeListParLocks[word_sz],
                     Mutex::_no_safepoint_check_flag);
-    ssize_t new_births = _indexedFreeList[word_sz].splitBirths() + n;
-    _indexedFreeList[word_sz].set_splitBirths(new_births);
-    ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n;
-    _indexedFreeList[word_sz].set_surplus(new_surplus);
+    const ssize_t births = _indexedFreeList[word_sz].splitBirths() + n;
+    _indexedFreeList[word_sz].set_splitBirths(births);
+    // ssize_t new_surplus = _indexedFreeList[word_sz].surplus() + n;
+    // _indexedFreeList[word_sz].set_surplus(new_surplus);
   }
 
   // TRAP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -25,8 +25,6 @@
 // Classes in support of keeping track of promotions into a non-Contiguous
 // space, in this case a CompactibleFreeListSpace.
 
-#define CFLS_LAB_REFILL_STATS 0
-
 // Forward declarations
 class CompactibleFreeListSpace;
 class BlkClosure;
@@ -89,6 +87,9 @@
     displacedHdr = (markOop*)&displacedHdr;
     nextSpoolBlock = NULL;
   }
+
+  void print_on(outputStream* st) const;
+  void print() const { print_on(gclog_or_tty); }
 };
 
 class PromotionInfo VALUE_OBJ_CLASS_SPEC {
@@ -121,7 +122,7 @@
     return _promoHead == NULL;
   }
   void startTrackingPromotions();
-  void stopTrackingPromotions();
+  void stopTrackingPromotions(uint worker_id = 0);
   bool tracking() const          { return _tracking;  }
   void track(PromotedObject* trackOop);      // keep track of a promoted oop
   // The following variant must be used when trackOop is not fully
@@ -161,6 +162,9 @@
     _nextIndex = 0;
 
   }
+
+  void print_on(outputStream* st) const;
+  void print_statistics(uint worker_id) const;
 };
 
 class LinearAllocBlock VALUE_OBJ_CLASS_SPEC {
@@ -243,6 +247,7 @@
   mutable Mutex _freelistLock;
   // locking verifier convenience function
   void assert_locked() const PRODUCT_RETURN;
+  void assert_locked(const Mutex* lock) const PRODUCT_RETURN;
 
   // Linear allocation blocks
   LinearAllocBlock _smallLinearAllocBlock;
@@ -281,13 +286,6 @@
   // Locks protecting the exact lists during par promotion allocation.
   Mutex* _indexedFreeListParLocks[IndexSetSize];
 
-#if CFLS_LAB_REFILL_STATS
-  // Some statistics.
-  jint  _par_get_chunk_from_small;
-  jint  _par_get_chunk_from_large;
-#endif
-
-
   // Attempt to obtain up to "n" blocks of the size "word_sz" (which is
   // required to be smaller than "IndexSetSize".)  If successful,
   // adds them to "fl", which is required to be an empty free list.
@@ -320,7 +318,7 @@
   // Helper function for getChunkFromIndexedFreeList.
   // Replenish the indexed free list for this "size".  Do not take from an
   // underpopulated size.
-  FreeChunk*  getChunkFromIndexedFreeListHelper(size_t size);
+  FreeChunk*  getChunkFromIndexedFreeListHelper(size_t size, bool replenish = true);
 
   // Get a chunk from the indexed free list.  If the indexed free list
   // does not have a free chunk, try to replenish the indexed free list
@@ -430,10 +428,6 @@
   void initialize_sequential_subtasks_for_marking(int n_threads,
          HeapWord* low = NULL);
 
-#if CFLS_LAB_REFILL_STATS
-  void print_par_alloc_stats();
-#endif
-
   // Space enquiries
   size_t used() const;
   size_t free() const;
@@ -617,6 +611,12 @@
   // Do some basic checks on the the free lists.
   void checkFreeListConsistency()         const PRODUCT_RETURN;
 
+  // Printing support
+  void dump_at_safepoint_with_locks(CMSCollector* c, outputStream* st);
+  void print_indexed_free_lists(outputStream* st) const;
+  void print_dictionary_free_lists(outputStream* st) const;
+  void print_promo_info_blocks(outputStream* st) const;
+
   NOT_PRODUCT (
     void initializeIndexedFreeListArrayReturnedBytes();
     size_t sumIndexedFreeListArrayReturnedBytes();
@@ -638,8 +638,9 @@
 
   // Statistics functions
   // Initialize census for lists before the sweep.
-  void beginSweepFLCensus(float sweep_current,
-                          float sweep_estimate);
+  void beginSweepFLCensus(float inter_sweep_current,
+                          float inter_sweep_estimate,
+                          float intra_sweep_estimate);
   // Set the surplus for each of the free lists.
   void setFLSurplus();
   // Set the hint for each of the free lists.
@@ -730,16 +731,17 @@
   FreeList _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
 
   // Initialized from a command-line arg.
-  size_t _blocks_to_claim;
 
-#if CFLS_LAB_REFILL_STATS
-  // Some statistics.
-  int _refills;
-  int _blocksTaken;
-  static int _tot_refills;
-  static int _tot_blocksTaken;
-  static int _next_threshold;
-#endif
+  // Allocation statistics in support of dynamic adjustment of
+  // #blocks to claim per get_from_global_pool() call below.
+  static AdaptiveWeightedAverage
+                 _blocks_to_claim  [CompactibleFreeListSpace::IndexSetSize];
+  static size_t _global_num_blocks [CompactibleFreeListSpace::IndexSetSize];
+  static int    _global_num_workers[CompactibleFreeListSpace::IndexSetSize];
+  size_t        _num_blocks        [CompactibleFreeListSpace::IndexSetSize];
+
+  // Internal work method
+  void get_from_global_pool(size_t word_sz, FreeList* fl);
 
 public:
   CFLS_LAB(CompactibleFreeListSpace* cfls);
@@ -748,7 +750,12 @@
   HeapWord* alloc(size_t word_sz);
 
   // Return any unused portions of the buffer to the global pool.
-  void retire();
+  void retire(int tid);
+
+  // Dynamic OldPLABSize sizing
+  static void compute_desired_plab_size();
+  // When the settings are modified from default static initialization
+  static void modify_initialization(size_t n, unsigned wt);
 };
 
 size_t PromotionInfo::refillSize() const {
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -253,7 +253,6 @@
   }
 }
 
-
 void ConcurrentMarkSweepGeneration::ref_processor_init() {
   assert(collector() != NULL, "no collector");
   collector()->ref_processor_init();
@@ -341,6 +340,14 @@
   _icms_duty_cycle = CMSIncrementalDutyCycle;
 }
 
+double CMSStats::cms_free_adjustment_factor(size_t free) const {
+  // TBD: CR 6909490
+  return 1.0;
+}
+
+void CMSStats::adjust_cms_free_adjustment_factor(bool fail, size_t free) {
+}
+
 // If promotion failure handling is on use
 // the padded average size of the promotion for each
 // young generation collection.
@@ -361,7 +368,11 @@
 
     // Adjust by the safety factor.
     double cms_free_dbl = (double)cms_free;
-    cms_free_dbl = cms_free_dbl * (100.0 - CMSIncrementalSafetyFactor) / 100.0;
+    double cms_adjustment = (100.0 - CMSIncrementalSafetyFactor)/100.0;
+    // Apply a further correction factor which tries to adjust
+    // for recent occurance of concurrent mode failures.
+    cms_adjustment = cms_adjustment * cms_free_adjustment_factor(cms_free);
+    cms_free_dbl = cms_free_dbl * cms_adjustment;
 
     if (PrintGCDetails && Verbose) {
       gclog_or_tty->print_cr("CMSStats::time_until_cms_gen_full: cms_free "
@@ -395,6 +406,8 @@
   // late.
   double work = cms_duration() + gc0_period();
   double deadline = time_until_cms_gen_full();
+  // If a concurrent mode failure occurred recently, we want to be
+  // more conservative and halve our expected time_until_cms_gen_full()
   if (work > deadline) {
     if (Verbose && PrintGCDetails) {
       gclog_or_tty->print(
@@ -556,7 +569,8 @@
   _should_unload_classes(false),
   _concurrent_cycles_since_last_unload(0),
   _roots_scanning_options(0),
-  _sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
+  _inter_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding),
+  _intra_sweep_estimate(CMS_SweepWeight, CMS_SweepPadding)
 {
   if (ExplicitGCInvokesConcurrentAndUnloadsClasses) {
     ExplicitGCInvokesConcurrent = true;
@@ -773,7 +787,7 @@
   NOT_PRODUCT(_overflow_counter = CMSMarkStackOverflowInterval;)
   _gc_counters = new CollectorCounters("CMS", 1);
   _completed_initialization = true;
-  _sweep_timer.start();  // start of time
+  _inter_sweep_timer.start();  // start of time
 }
 
 const char* ConcurrentMarkSweepGeneration::name() const {
@@ -900,6 +914,14 @@
   return result;
 }
 
+// At a promotion failure dump information on block layout in heap
+// (cms old generation).
+void ConcurrentMarkSweepGeneration::promotion_failure_occurred() {
+  if (CMSDumpAtPromotionFailure) {
+    cmsSpace()->dump_at_safepoint_with_locks(collector(), gclog_or_tty);
+  }
+}
+
 CompactibleSpace*
 ConcurrentMarkSweepGeneration::first_compaction_space() const {
   return _cmsSpace;
@@ -1368,12 +1390,7 @@
 ConcurrentMarkSweepGeneration::
 par_promote_alloc_done(int thread_num) {
   CMSParGCThreadState* ps = _par_gc_thread_states[thread_num];
-  ps->lab.retire();
-#if CFLS_LAB_REFILL_STATS
-  if (thread_num == 0) {
-    _cmsSpace->print_par_alloc_stats();
-  }
-#endif
+  ps->lab.retire(thread_num);
 }
 
 void
@@ -1974,11 +1991,14 @@
   // We must adjust the allocation statistics being maintained
   // in the free list space. We do so by reading and clearing
   // the sweep timer and updating the block flux rate estimates below.
-  assert(_sweep_timer.is_active(), "We should never see the timer inactive");
-  _sweep_timer.stop();
-  // Note that we do not use this sample to update the _sweep_estimate.
-  _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()),
-                                          _sweep_estimate.padded_average());
+  assert(!_intra_sweep_timer.is_active(), "_intra_sweep_timer should be inactive");
+  if (_inter_sweep_timer.is_active()) {
+    _inter_sweep_timer.stop();
+    // Note that we do not use this sample to update the _inter_sweep_estimate.
+    _cmsGen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
+                                            _inter_sweep_estimate.padded_average(),
+                                            _intra_sweep_estimate.padded_average());
+  }
 
   GenMarkSweep::invoke_at_safepoint(_cmsGen->level(),
     ref_processor(), clear_all_soft_refs);
@@ -2015,10 +2035,10 @@
   }
 
   // Adjust the per-size allocation stats for the next epoch.
-  _cmsGen->cmsSpace()->endSweepFLCensus(sweepCount() /* fake */);
-  // Restart the "sweep timer" for next epoch.
-  _sweep_timer.reset();
-  _sweep_timer.start();
+  _cmsGen->cmsSpace()->endSweepFLCensus(sweep_count() /* fake */);
+  // Restart the "inter sweep timer" for the next epoch.
+  _inter_sweep_timer.reset();
+  _inter_sweep_timer.start();
 
   // Sample collection pause time and reset for collection interval.
   if (UseAdaptiveSizePolicy) {
@@ -2676,7 +2696,7 @@
   // Also reset promotion tracking in par gc thread states.
   if (ParallelGCThreads > 0) {
     for (uint i = 0; i < ParallelGCThreads; i++) {
-      _par_gc_thread_states[i]->promo.stopTrackingPromotions();
+      _par_gc_thread_states[i]->promo.stopTrackingPromotions(i);
     }
   }
 }
@@ -2771,7 +2791,7 @@
   bool do_bit(size_t offset) {
     HeapWord* addr = _marks->offsetToHeapWord(offset);
     if (!_marks->isMarked(addr)) {
-      oop(addr)->print();
+      oop(addr)->print_on(gclog_or_tty);
       gclog_or_tty->print_cr(" ("INTPTR_FORMAT" should have been marked)", addr);
       _failed = true;
     }
@@ -2820,7 +2840,7 @@
   // Clear any marks from a previous round
   verification_mark_bm()->clear_all();
   assert(verification_mark_stack()->isEmpty(), "markStack should be empty");
-  assert(overflow_list_is_empty(), "overflow list should be empty");
+  verify_work_stacks_empty();
 
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   gch->ensure_parsability(false);  // fill TLABs, but no need to retire them
@@ -2893,8 +2913,8 @@
   verification_mark_bm()->iterate(&vcl);
   if (vcl.failed()) {
     gclog_or_tty->print("Verification failed");
-    Universe::heap()->print();
-    fatal(" ... aborting");
+    Universe::heap()->print_on(gclog_or_tty);
+    fatal("CMS: failed marking verification after remark");
   }
 }
 
@@ -3314,7 +3334,7 @@
     Universe::heap()->barrier_set()->resize_covered_region(mr);
     // Hmmmm... why doesn't CFLS::set_end verify locking?
     // This is quite ugly; FIX ME XXX
-    _cmsSpace->assert_locked();
+    _cmsSpace->assert_locked(freelistLock());
     _cmsSpace->set_end((HeapWord*)_virtual_space.high());
 
     // update the space and generation capacity counters
@@ -5868,9 +5888,9 @@
   check_correct_thread_executing();
   verify_work_stacks_empty();
   verify_overflow_empty();
-  incrementSweepCount();
-  _sweep_timer.stop();
-  _sweep_estimate.sample(_sweep_timer.seconds());
+  increment_sweep_count();
+  _inter_sweep_timer.stop();
+  _inter_sweep_estimate.sample(_inter_sweep_timer.seconds());
   size_policy()->avg_cms_free_at_sweep()->sample(_cmsGen->free());
 
   // PermGen verification support: If perm gen sweeping is disabled in
@@ -5893,6 +5913,9 @@
     }
   }
 
+  assert(!_intra_sweep_timer.is_active(), "Should not be active");
+  _intra_sweep_timer.reset();
+  _intra_sweep_timer.start();
   if (asynch) {
     TraceCPUTime tcpu(PrintGCDetails, true, gclog_or_tty);
     CMSPhaseAccounting pa(this, "sweep", !PrintGCDetails);
@@ -5937,8 +5960,11 @@
   verify_work_stacks_empty();
   verify_overflow_empty();
 
-  _sweep_timer.reset();
-  _sweep_timer.start();
+  _intra_sweep_timer.stop();
+  _intra_sweep_estimate.sample(_intra_sweep_timer.seconds());
+
+  _inter_sweep_timer.reset();
+  _inter_sweep_timer.start();
 
   update_time_of_last_gc(os::javaTimeMillis());
 
@@ -5981,11 +6007,11 @@
 // FIX ME!!! Looks like this belongs in CFLSpace, with
 // CMSGen merely delegating to it.
 void ConcurrentMarkSweepGeneration::setNearLargestChunk() {
-  double nearLargestPercent = 0.999;
+  double nearLargestPercent = FLSLargestBlockCoalesceProximity;
   HeapWord*  minAddr        = _cmsSpace->bottom();
   HeapWord*  largestAddr    =
     (HeapWord*) _cmsSpace->dictionary()->findLargestDict();
-  if (largestAddr == 0) {
+  if (largestAddr == NULL) {
     // The dictionary appears to be empty.  In this case
     // try to coalesce at the end of the heap.
     largestAddr = _cmsSpace->end();
@@ -5993,6 +6019,13 @@
   size_t largestOffset     = pointer_delta(largestAddr, minAddr);
   size_t nearLargestOffset =
     (size_t)((double)largestOffset * nearLargestPercent) - MinChunkSize;
+  if (PrintFLSStatistics != 0) {
+    gclog_or_tty->print_cr(
+      "CMS: Large Block: " PTR_FORMAT ";"
+      " Proximity: " PTR_FORMAT " -> " PTR_FORMAT,
+      largestAddr,
+      _cmsSpace->nearLargestChunk(), minAddr + nearLargestOffset);
+  }
   _cmsSpace->set_nearLargestChunk(minAddr + nearLargestOffset);
 }
 
@@ -6072,9 +6105,11 @@
   assert_lock_strong(gen->freelistLock());
   assert_lock_strong(bitMapLock());
 
-  assert(!_sweep_timer.is_active(), "Was switched off in an outer context");
-  gen->cmsSpace()->beginSweepFLCensus((float)(_sweep_timer.seconds()),
-                                      _sweep_estimate.padded_average());
+  assert(!_inter_sweep_timer.is_active(), "Was switched off in an outer context");
+  assert(_intra_sweep_timer.is_active(),  "Was switched on  in an outer context");
+  gen->cmsSpace()->beginSweepFLCensus((float)(_inter_sweep_timer.seconds()),
+                                      _inter_sweep_estimate.padded_average(),
+                                      _intra_sweep_estimate.padded_average());
   gen->setNearLargestChunk();
 
   {
@@ -6087,7 +6122,7 @@
     // end-of-sweep-census below will be off by a little bit.
   }
   gen->cmsSpace()->sweep_completed();
-  gen->cmsSpace()->endSweepFLCensus(sweepCount());
+  gen->cmsSpace()->endSweepFLCensus(sweep_count());
   if (should_unload_classes()) {                // unloaded classes this cycle,
     _concurrent_cycles_since_last_unload = 0;   // ... reset count
   } else {                                      // did not unload classes,
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -355,6 +355,11 @@
                                              unsigned int new_duty_cycle);
   unsigned int icms_update_duty_cycle_impl();
 
+  // In support of adjusting of cms trigger ratios based on history
+  // of concurrent mode failure.
+  double cms_free_adjustment_factor(size_t free) const;
+  void   adjust_cms_free_adjustment_factor(bool fail, size_t free);
+
  public:
   CMSStats(ConcurrentMarkSweepGeneration* cms_gen,
            unsigned int alpha = CMSExpAvgFactor);
@@ -570,8 +575,11 @@
   // appropriately.
   void check_gc_time_limit();
   // XXX Move these to CMSStats ??? FIX ME !!!
-  elapsedTimer _sweep_timer;
-  AdaptivePaddedAverage _sweep_estimate;
+  elapsedTimer _inter_sweep_timer;   // time between sweeps
+  elapsedTimer _intra_sweep_timer;   // time _in_ sweeps
+  // padded decaying average estimates of the above
+  AdaptivePaddedAverage _inter_sweep_estimate;
+  AdaptivePaddedAverage _intra_sweep_estimate;
 
  protected:
   ConcurrentMarkSweepGeneration* _cmsGen;  // old gen (CMS)
@@ -625,6 +633,7 @@
   // . _collectorState <= Idling ==  post-sweep && pre-mark
   // . _collectorState in (Idling, Sweeping) == {initial,final}marking ||
   //                                            precleaning || abortablePrecleanb
+ public:
   enum CollectorState {
     Resizing            = 0,
     Resetting           = 1,
@@ -636,6 +645,7 @@
     FinalMarking        = 7,
     Sweeping            = 8
   };
+ protected:
   static CollectorState _collectorState;
 
   // State related to prologue/epilogue invocation for my generations
@@ -655,7 +665,7 @@
 
   int    _numYields;
   size_t _numDirtyCards;
-  uint   _sweepCount;
+  size_t _sweep_count;
   // number of full gc's since the last concurrent gc.
   uint   _full_gcs_since_conc_gc;
 
@@ -905,7 +915,7 @@
 
   // Check that the currently executing thread is the expected
   // one (foreground collector or background collector).
-  void check_correct_thread_executing()        PRODUCT_RETURN;
+  static void check_correct_thread_executing() PRODUCT_RETURN;
   // XXXPERM void print_statistics()           PRODUCT_RETURN;
 
   bool is_cms_reachable(HeapWord* addr);
@@ -930,8 +940,8 @@
   static void set_foregroundGCShouldWait(bool v) { _foregroundGCShouldWait = v; }
   static bool foregroundGCIsActive() { return _foregroundGCIsActive; }
   static void set_foregroundGCIsActive(bool v) { _foregroundGCIsActive = v; }
-  uint  sweepCount() const             { return _sweepCount; }
-  void incrementSweepCount()           { _sweepCount++; }
+  size_t sweep_count() const             { return _sweep_count; }
+  void   increment_sweep_count()         { _sweep_count++; }
 
   // Timers/stats for gc scheduling and incremental mode pacing.
   CMSStats& stats() { return _stats; }
@@ -1165,6 +1175,11 @@
   virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes,
     bool younger_handles_promotion_failure) const;
 
+  // Inform this (non-young) generation that a promotion failure was
+  // encountered during a collection of a younger generation that
+  // promotes into this generation.
+  virtual void promotion_failure_occurred();
+
   bool should_collect(bool full, size_t size, bool tlab);
   virtual bool should_concurrent_collect() const;
   virtual bool is_too_full() const;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeBlockDictionary.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -55,7 +55,8 @@
   virtual void       dictCensusUpdate(size_t size, bool split, bool birth) = 0;
   virtual bool       coalDictOverPopulated(size_t size) = 0;
   virtual void       beginSweepDictCensus(double coalSurplusPercent,
-                       float sweep_current, float sweep_ewstimate) = 0;
+                       float inter_sweep_current, float inter_sweep_estimate,
+                       float intra__sweep_current) = 0;
   virtual void       endSweepDictCensus(double splitSurplusPercent) = 0;
   virtual FreeChunk* findLargestDict() const = 0;
   // verify that the given chunk is in the dictionary.
@@ -79,6 +80,7 @@
   }
 
   virtual void       printDictCensus() const = 0;
+  virtual void       print_free_lists(outputStream* st) const = 0;
 
   virtual void       verify()         const = 0;
 
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -67,3 +67,8 @@
   }
 }
 #endif
+
+void FreeChunk::print_on(outputStream* st) {
+  st->print_cr("Next: " PTR_FORMAT " Prev: " PTR_FORMAT " %s",
+    next(), prev(), cantCoalesce() ? "[can't coalesce]" : "");
+}
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -129,6 +129,8 @@
   void verifyList()         const PRODUCT_RETURN;
   void mangleAllocated(size_t size) PRODUCT_RETURN;
   void mangleFreed(size_t size)     PRODUCT_RETURN;
+
+  void print_on(outputStream* st);
 };
 
 // Alignment helpers etc.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -81,8 +81,8 @@
   set_hint(hint);
 }
 
-void FreeList::init_statistics() {
-  _allocation_stats.initialize();
+void FreeList::init_statistics(bool split_birth) {
+  _allocation_stats.initialize(split_birth);
 }
 
 FreeChunk* FreeList::getChunkAtHead() {
@@ -292,14 +292,31 @@
 }
 
 #ifndef PRODUCT
+void FreeList::verify_stats() const {
+  // The +1 of the LH comparand is to allow some "looseness" in
+  // checking: we usually call this interface when adding a block
+  // and we'll subsequently update the stats; we cannot update the
+  // stats beforehand because in the case of the large-block BT
+  // dictionary for example, this might be the first block and
+  // in that case there would be no place that we could record
+  // the stats (which are kept in the block itself).
+  assert(_allocation_stats.prevSweep() + _allocation_stats.splitBirths() + 1   // Total Stock + 1
+          >= _allocation_stats.splitDeaths() + (ssize_t)count(), "Conservation Principle");
+}
+
 void FreeList::assert_proper_lock_protection_work() const {
-#ifdef ASSERT
-  if (_protecting_lock != NULL &&
-      SharedHeap::heap()->n_par_threads() > 0) {
-    // Should become an assert.
-    guarantee(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+  assert(_protecting_lock != NULL, "Don't call this directly");
+  assert(ParallelGCThreads > 0, "Don't call this directly");
+  Thread* thr = Thread::current();
+  if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
+    // assert that we are holding the freelist lock
+  } else if (thr->is_GC_task_thread()) {
+    assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+  } else if (thr->is_Java_thread()) {
+    assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
+  } else {
+    ShouldNotReachHere();  // unaccounted thread type?
   }
-#endif
 }
 #endif
 
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeList.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -35,18 +35,26 @@
 // for that implementation.
 
 class Mutex;
+class TreeList;
 
 class FreeList VALUE_OBJ_CLASS_SPEC {
   friend class CompactibleFreeListSpace;
   friend class VMStructs;
-  friend class printTreeCensusClosure;
-  FreeChunk*    _head;          // List of free chunks
+  friend class PrintTreeCensusClosure;
+
+ protected:
+  TreeList* _parent;
+  TreeList* _left;
+  TreeList* _right;
+
+ private:
+  FreeChunk*    _head;          // Head of list of free chunks
   FreeChunk*    _tail;          // Tail of list of free chunks
-  size_t        _size;          // Size in Heap words of each chunks
+  size_t        _size;          // Size in Heap words of each chunk
   ssize_t       _count;         // Number of entries in list
   size_t        _hint;          // next larger size list with a positive surplus
 
-  AllocationStats _allocation_stats;            // statistics for smart allocation
+  AllocationStats _allocation_stats; // allocation-related statistics
 
 #ifdef ASSERT
   Mutex*        _protecting_lock;
@@ -63,9 +71,12 @@
 
   // Initialize the allocation statistics.
  protected:
-  void init_statistics();
+  void init_statistics(bool split_birth = false);
   void set_count(ssize_t v) { _count = v;}
-  void increment_count()    { _count++; }
+  void increment_count()    {
+    _count++;
+  }
+
   void decrement_count() {
     _count--;
     assert(_count >= 0, "Count should not be negative");
@@ -167,11 +178,13 @@
     _allocation_stats.set_desired(v);
   }
   void compute_desired(float inter_sweep_current,
-                       float inter_sweep_estimate) {
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
     assert_proper_lock_protection();
     _allocation_stats.compute_desired(_count,
                                       inter_sweep_current,
-                                      inter_sweep_estimate);
+                                      inter_sweep_estimate,
+                                      intra_sweep_estimate);
   }
   ssize_t coalDesired() const {
     return _allocation_stats.coalDesired();
@@ -306,6 +319,9 @@
   // found.  Return NULL if "fc" is not found.
   bool verifyChunkInFreeLists(FreeChunk* fc) const;
 
+  // Stats verification
+  void verify_stats() const PRODUCT_RETURN;
+
   // Printing support
   static void print_labels_on(outputStream* st, const char* c);
   void print_on(outputStream* st, const char* c = NULL) const;
--- a/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_concurrentMarkSweep	Mon Jan 04 07:58:42 2010 -0800
@@ -221,6 +221,7 @@
 freeList.cpp                            globals.hpp
 freeList.cpp                            mutex.hpp
 freeList.cpp                            sharedHeap.hpp
+freeList.cpp                            vmThread.hpp
 
 freeList.hpp                            allocationStats.hpp
 
--- a/src/share/vm/gc_implementation/includeDB_gc_serial	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/includeDB_gc_serial	Mon Jan 04 07:58:42 2010 -0800
@@ -71,6 +71,7 @@
 gcUtil.hpp                              allocation.hpp
 gcUtil.hpp                              debug.hpp
 gcUtil.hpp                              globalDefinitions.hpp
+gcUtil.hpp                              ostream.hpp
 gcUtil.hpp				timer.hpp
 
 generationCounters.cpp                  generationCounters.hpp
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -50,6 +50,7 @@
                       work_queue_set_, &term_),
   _is_alive_closure(gen_), _scan_weak_ref_closure(gen_, this),
   _keep_alive_closure(&_scan_weak_ref_closure),
+  _promotion_failure_size(0),
   _pushes(0), _pops(0), _steals(0), _steal_attempts(0), _term_attempts(0),
   _strong_roots_time(0.0), _term_time(0.0)
 {
@@ -249,6 +250,16 @@
   }
 }
 
+void ParScanThreadState::print_and_clear_promotion_failure_size() {
+  if (_promotion_failure_size != 0) {
+    if (PrintPromotionFailure) {
+      gclog_or_tty->print(" (%d: promotion failure size = " SIZE_FORMAT ") ",
+        _thread_num, _promotion_failure_size);
+    }
+    _promotion_failure_size = 0;
+  }
+}
+
 class ParScanThreadStateSet: private ResourceArray {
 public:
   // Initializes states for the specified number of threads;
@@ -260,11 +271,11 @@
                         GrowableArray<oop>**    overflow_stacks_,
                         size_t                  desired_plab_sz,
                         ParallelTaskTerminator& term);
-  inline ParScanThreadState& thread_sate(int i);
+  inline ParScanThreadState& thread_state(int i);
   int pushes() { return _pushes; }
   int pops()   { return _pops; }
   int steals() { return _steals; }
-  void reset();
+  void reset(bool promotion_failed);
   void flush();
 private:
   ParallelTaskTerminator& _term;
@@ -295,22 +306,31 @@
   }
 }
 
-inline ParScanThreadState& ParScanThreadStateSet::thread_sate(int i)
+inline ParScanThreadState& ParScanThreadStateSet::thread_state(int i)
 {
   assert(i >= 0 && i < length(), "sanity check!");
   return ((ParScanThreadState*)_data)[i];
 }
 
 
-void ParScanThreadStateSet::reset()
+void ParScanThreadStateSet::reset(bool promotion_failed)
 {
   _term.reset_for_reuse();
+  if (promotion_failed) {
+    for (int i = 0; i < length(); ++i) {
+      thread_state(i).print_and_clear_promotion_failure_size();
+    }
+  }
 }
 
 void ParScanThreadStateSet::flush()
 {
+  // Work in this loop should be kept as lightweight as
+  // possible since this might otherwise become a bottleneck
+  // to scaling. Should we add heavy-weight work into this
+  // loop, consider parallelizing the loop into the worker threads.
   for (int i = 0; i < length(); ++i) {
-    ParScanThreadState& par_scan_state = thread_sate(i);
+    ParScanThreadState& par_scan_state = thread_state(i);
 
     // Flush stats related to To-space PLAB activity and
     // retire the last buffer.
@@ -362,6 +382,14 @@
       }
     }
   }
+  if (UseConcMarkSweepGC && ParallelGCThreads > 0) {
+    // We need to call this even when ResizeOldPLAB is disabled
+    // so as to avoid breaking some asserts. While we may be able
+    // to avoid this by reorganizing the code a bit, I am loathe
+    // to do that unless we find cases where ergo leads to bad
+    // performance.
+    CFLS_LAB::compute_desired_plab_size();
+  }
 }
 
 ParScanClosure::ParScanClosure(ParNewGeneration* g,
@@ -475,7 +503,7 @@
 
   Generation* old_gen = gch->next_gen(_gen);
 
-  ParScanThreadState& par_scan_state = _state_set->thread_sate(i);
+  ParScanThreadState& par_scan_state = _state_set->thread_state(i);
   par_scan_state.set_young_old_boundary(_young_old_boundary);
 
   par_scan_state.start_strong_roots();
@@ -659,7 +687,7 @@
 {
   ResourceMark rm;
   HandleMark hm;
-  ParScanThreadState& par_scan_state = _state_set.thread_sate(i);
+  ParScanThreadState& par_scan_state = _state_set.thread_state(i);
   par_scan_state.set_young_old_boundary(_young_old_boundary);
   _task.work(i, par_scan_state.is_alive_closure(),
              par_scan_state.keep_alive_closure(),
@@ -693,7 +721,7 @@
   ParNewRefProcTaskProxy rp_task(task, _generation, *_generation.next_gen(),
                                  _generation.reserved().end(), _state_set);
   workers->run_task(&rp_task);
-  _state_set.reset();
+  _state_set.reset(_generation.promotion_failed());
 }
 
 void ParNewRefProcTaskExecutor::execute(EnqueueTask& task)
@@ -813,7 +841,7 @@
     GenCollectedHeap::StrongRootsScope srs(gch);
     tsk.work(0);
   }
-  thread_state_set.reset();
+  thread_state_set.reset(promotion_failed());
 
   if (PAR_STATS_ENABLED && ParallelGCVerbose) {
     gclog_or_tty->print("Thread totals:\n"
@@ -882,6 +910,8 @@
     swap_spaces();  // Make life simpler for CMS || rescan; see 6483690.
     from()->set_next_compaction_space(to());
     gch->set_incremental_collection_will_fail();
+    // Inform the next generation that a promotion failure occurred.
+    _next_gen->promotion_failure_occurred();
 
     // Reset the PromotionFailureALot counters.
     NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
@@ -1029,6 +1059,8 @@
       new_obj = old;
 
       preserve_mark_if_necessary(old, m);
+      // Log the size of the maiden promotion failure
+      par_scan_state->log_promotion_failure(sz);
     }
 
     old->forward_to(new_obj);
@@ -1150,6 +1182,8 @@
       failed_to_promote = true;
 
       preserve_mark_if_necessary(old, m);
+      // Log the size of the maiden promotion failure
+      par_scan_state->log_promotion_failure(sz);
     }
   } else {
     // Is in to-space; do copying ourselves.
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -97,6 +97,9 @@
   int _pushes, _pops, _steals, _steal_attempts, _term_attempts;
   int _overflow_pushes, _overflow_refills, _overflow_refill_objs;
 
+  // Stats for promotion failure
+  size_t _promotion_failure_size;
+
   // Timing numbers.
   double _start;
   double _start_strong_roots;
@@ -169,6 +172,15 @@
   // Undo the most recent allocation ("obj", of "word_sz").
   void undo_alloc_in_to_space(HeapWord* obj, size_t word_sz);
 
+  // Promotion failure stats
+  size_t promotion_failure_size() { return promotion_failure_size(); }
+  void log_promotion_failure(size_t sz) {
+    if (_promotion_failure_size == 0) {
+      _promotion_failure_size = sz;
+    }
+  }
+  void print_and_clear_promotion_failure_size();
+
   int pushes() { return _pushes; }
   int pops()   { return _pops; }
   int steals() { return _steals; }
--- a/src/share/vm/gc_implementation/shared/allocationStats.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/allocationStats.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -31,7 +31,7 @@
   // beginning of this sweep:
   //   Count(end_last_sweep) - Count(start_this_sweep)
   //     + splitBirths(between) - splitDeaths(between)
-  // The above number divided by the time since the start [END???] of the
+  // The above number divided by the time since the end of the
   // previous sweep gives us a time rate of demand for blocks
   // of this size. We compute a padded average of this rate as
   // our current estimate for the time rate of demand for blocks
@@ -41,7 +41,7 @@
   // estimates.
   AdaptivePaddedAverage _demand_rate_estimate;
 
-  ssize_t     _desired;          // Estimate computed as described above
+  ssize_t     _desired;         // Demand stimate computed as described above
   ssize_t     _coalDesired;     // desired +/- small-percent for tuning coalescing
 
   ssize_t     _surplus;         // count - (desired +/- small-percent),
@@ -53,9 +53,9 @@
   ssize_t     _coalDeaths;      // loss from coalescing
   ssize_t     _splitBirths;     // additional chunks from splitting
   ssize_t     _splitDeaths;     // loss from splitting
-  size_t     _returnedBytes;    // number of bytes returned to list.
+  size_t      _returnedBytes;   // number of bytes returned to list.
  public:
-  void initialize() {
+  void initialize(bool split_birth = false) {
     AdaptivePaddedAverage* dummy =
       new (&_demand_rate_estimate) AdaptivePaddedAverage(CMS_FLSWeight,
                                                          CMS_FLSPadding);
@@ -67,7 +67,7 @@
     _beforeSweep = 0;
     _coalBirths = 0;
     _coalDeaths = 0;
-    _splitBirths = 0;
+    _splitBirths = split_birth? 1 : 0;
     _splitDeaths = 0;
     _returnedBytes = 0;
   }
@@ -75,10 +75,12 @@
   AllocationStats() {
     initialize();
   }
+
   // The rate estimate is in blocks per second.
   void compute_desired(size_t count,
                        float inter_sweep_current,
-                       float inter_sweep_estimate) {
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
     // If the latest inter-sweep time is below our granularity
     // of measurement, we may call in here with
     // inter_sweep_current == 0. However, even for suitably small
@@ -88,12 +90,31 @@
     // vulnerable to noisy glitches. In such cases, we
     // ignore the current sample and use currently available
     // historical estimates.
+    // XXX NEEDS TO BE FIXED
+    // assert(prevSweep() + splitBirths() >= splitDeaths() + (ssize_t)count, "Conservation Principle");
+    //     ^^^^^^^^^^^^^^^^^^^^^^^^^^^    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+    //     "Total Stock"                  "Not used at this block size"
     if (inter_sweep_current > _threshold) {
-      ssize_t demand = prevSweep() - count + splitBirths() - splitDeaths();
+      ssize_t demand = prevSweep() - (ssize_t)count + splitBirths() - splitDeaths();
+      // XXX NEEDS TO BE FIXED
+      // assert(demand >= 0, "Demand should be non-negative");
+      // Defensive: adjust for imprecision in event counting
+      if (demand < 0) {
+        demand = 0;
+      }
+      float old_rate = _demand_rate_estimate.padded_average();
       float rate = ((float)demand)/inter_sweep_current;
       _demand_rate_estimate.sample(rate);
-      _desired = (ssize_t)(_demand_rate_estimate.padded_average()
-                           *inter_sweep_estimate);
+      float new_rate = _demand_rate_estimate.padded_average();
+      ssize_t old_desired = _desired;
+      _desired = (ssize_t)(new_rate * (inter_sweep_estimate
+                                       + CMSExtrapolateSweep
+                                         ? intra_sweep_estimate
+                                         : 0.0));
+      if (PrintFLSStatistics > 1) {
+        gclog_or_tty->print_cr("demand: %d, old_rate: %f, current_rate: %f, new_rate: %f, old_desired: %d, new_desired: %d",
+                                demand,     old_rate,     rate,             new_rate,     old_desired,     _desired);
+      }
     }
   }
 
--- a/src/share/vm/gc_implementation/shared/gcUtil.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/gcUtil.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -52,11 +52,35 @@
   _last_sample = new_sample;
 }
 
+void AdaptiveWeightedAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptiveWeightedAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
+void AdaptivePaddedAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptivePaddedAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
+void AdaptivePaddedNoZeroDevAverage::print() const {
+  print_on(tty);
+}
+
+void AdaptivePaddedNoZeroDevAverage::print_on(outputStream* st) const {
+  guarantee(false, "NYI");
+}
+
 void AdaptivePaddedAverage::sample(float new_sample) {
-  // Compute our parent classes sample information
+  // Compute new adaptive weighted average based on new sample.
   AdaptiveWeightedAverage::sample(new_sample);
 
-  // Now compute the deviation and the new padded sample
+  // Now update the deviation and the padded average.
   float new_avg = average();
   float new_dev = compute_adaptive_average(fabsd(new_sample - new_avg),
                                            deviation());
--- a/src/share/vm/gc_implementation/shared/gcUtil.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/gc_implementation/shared/gcUtil.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -54,8 +54,8 @@
 
  public:
   // Input weight must be between 0 and 100
-  AdaptiveWeightedAverage(unsigned weight) :
-    _average(0.0), _sample_count(0), _weight(weight), _last_sample(0.0) {
+  AdaptiveWeightedAverage(unsigned weight, float avg = 0.0) :
+    _average(avg), _sample_count(0), _weight(weight), _last_sample(0.0) {
   }
 
   void clear() {
@@ -64,6 +64,13 @@
     _last_sample = 0;
   }
 
+  // Useful for modifying static structures after startup.
+  void  modify(size_t avg, unsigned wt, bool force = false)  {
+    assert(force, "Are you sure you want to call this?");
+    _average = (float)avg;
+    _weight  = wt;
+  }
+
   // Accessors
   float    average() const       { return _average;       }
   unsigned weight()  const       { return _weight;        }
@@ -83,6 +90,10 @@
     // Convert to float and back to avoid integer overflow.
     return (size_t)exp_avg((float)avg, (float)sample, weight);
   }
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };
 
 
@@ -129,6 +140,10 @@
 
   // Override
   void  sample(float new_sample);
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };
 
 // A weighted average that includes a deviation from the average,
@@ -146,7 +161,12 @@
     AdaptivePaddedAverage(weight, padding)  {}
   // Override
   void  sample(float new_sample);
+
+  // Printing
+  void print_on(outputStream* st) const;
+  void print() const;
 };
+
 // Use a least squares fit to a set of data to generate a linear
 // equation.
 //              y = intercept + slope * x
--- a/src/share/vm/includeDB_gc_parallel	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/includeDB_gc_parallel	Mon Jan 04 07:58:42 2010 -0800
@@ -21,6 +21,8 @@
 // have any questions.
 //  
 
+arguments.cpp                           compactibleFreeListSpace.hpp
+
 assembler_<arch>.cpp                    g1SATBCardTableModRefBS.hpp
 assembler_<arch>.cpp                    g1CollectedHeap.inline.hpp
 assembler_<arch>.cpp                    heapRegion.hpp
--- a/src/share/vm/memory/defNewGeneration.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/memory/defNewGeneration.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -609,7 +609,7 @@
 
     remove_forwarding_pointers();
     if (PrintGCDetails) {
-      gclog_or_tty->print(" (promotion failed)");
+      gclog_or_tty->print(" (promotion failed) ");
     }
     // Add to-space to the list of space to compact
     // when a promotion failure has occurred.  In that
@@ -620,6 +620,9 @@
     from()->set_next_compaction_space(to());
     gch->set_incremental_collection_will_fail();
 
+    // Inform the next generation that a promotion failure occurred.
+    _next_gen->promotion_failure_occurred();
+
     // Reset the PromotionFailureALot counters.
     NOT_PRODUCT(Universe::heap()->reset_promotion_should_fail();)
   }
@@ -679,6 +682,11 @@
 
 void DefNewGeneration::handle_promotion_failure(oop old) {
   preserve_mark_if_necessary(old, old->mark());
+  if (!_promotion_failed && PrintPromotionFailure) {
+    gclog_or_tty->print(" (promotion failure size = " SIZE_FORMAT ") ",
+                        old->size());
+  }
+
   // forward to self
   old->forward_to(old);
   _promotion_failed = true;
--- a/src/share/vm/memory/generation.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/memory/generation.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -181,6 +181,12 @@
   virtual bool promotion_attempt_is_safe(size_t promotion_in_bytes,
     bool younger_handles_promotion_failure) const;
 
+  // For a non-young generation, this interface can be used to inform a
+  // generation that a promotion attempt into that generation failed.
+  // Typically used to enable diagnostic output for post-mortem analysis,
+  // but other uses of the interface are not ruled out.
+  virtual void promotion_failure_occurred() { /* does nothing */ }
+
   // Return an estimate of the maximum allocation that could be performed
   // in the generation without triggering any collection or expansion
   // activity.  It is "unsafe" because no locks are taken; the result
--- a/src/share/vm/runtime/arguments.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/runtime/arguments.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -948,6 +948,7 @@
   }
 }
 
+#ifndef KERNEL
 // If the user has chosen ParallelGCThreads > 0, we set UseParNewGC
 // if it's not explictly set or unset. If the user has chosen
 // UseParNewGC and not explicitly set ParallelGCThreads we
@@ -1177,8 +1178,7 @@
       // the value (either from the command line or ergonomics) of
       // OldPLABSize.  Following OldPLABSize is an ergonomics decision.
       FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, OldPLABSize);
-    }
-    else {
+    } else {
       // OldPLABSize and CMSParPromoteBlocksToClaim are both set.
       // CMSParPromoteBlocksToClaim is a collector-specific flag, so
       // we'll let it to take precedence.
@@ -1188,7 +1188,23 @@
                   " CMSParPromoteBlocksToClaim will take precedence.\n");
     }
   }
+  if (!FLAG_IS_DEFAULT(ResizeOldPLAB) && !ResizeOldPLAB) {
+    // OldPLAB sizing manually turned off: Use a larger default setting,
+    // unless it was manually specified. This is because a too-low value
+    // will slow down scavenges.
+    if (FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim)) {
+      FLAG_SET_ERGO(uintx, CMSParPromoteBlocksToClaim, 50); // default value before 6631166
+    }
+  }
+  // Overwrite OldPLABSize which is the variable we will internally use everywhere.
+  FLAG_SET_ERGO(uintx, OldPLABSize, CMSParPromoteBlocksToClaim);
+  // If either of the static initialization defaults have changed, note this
+  // modification.
+  if (!FLAG_IS_DEFAULT(CMSParPromoteBlocksToClaim) || !FLAG_IS_DEFAULT(OldPLABWeight)) {
+    CFLS_LAB::modify_initialization(OldPLABSize, OldPLABWeight);
+  }
 }
+#endif // KERNEL
 
 inline uintx max_heap_for_compressed_oops() {
   LP64_ONLY(return oopDesc::OopEncodingHeapMax - MaxPermSize - os::vm_page_size());
@@ -2370,22 +2386,25 @@
                   "ExtendedDTraceProbes flag is only applicable on Solaris\n");
       return JNI_EINVAL;
 #endif // ndef SOLARIS
-    } else
 #ifdef ASSERT
-    if (match_option(option, "-XX:+FullGCALot", &tail)) {
+    } else if (match_option(option, "-XX:+FullGCALot", &tail)) {
       FLAG_SET_CMDLINE(bool, FullGCALot, true);
       // disable scavenge before parallel mark-compact
       FLAG_SET_CMDLINE(bool, ScavengeBeforeFullGC, false);
-    } else
 #endif
-    if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) {
+    } else if (match_option(option, "-XX:CMSParPromoteBlocksToClaim=", &tail)) {
       julong cms_blocks_to_claim = (julong)atol(tail);
       FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim);
       jio_fprintf(defaultStream::error_stream(),
-        "Please use -XX:CMSParPromoteBlocksToClaim in place of "
+        "Please use -XX:OldPLABSize in place of "
+        "-XX:CMSParPromoteBlocksToClaim in the future\n");
+    } else if (match_option(option, "-XX:ParCMSPromoteBlocksToClaim=", &tail)) {
+      julong cms_blocks_to_claim = (julong)atol(tail);
+      FLAG_SET_CMDLINE(uintx, CMSParPromoteBlocksToClaim, cms_blocks_to_claim);
+      jio_fprintf(defaultStream::error_stream(),
+        "Please use -XX:OldPLABSize in place of "
         "-XX:ParCMSPromoteBlocksToClaim in the future\n");
-    } else
-    if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) {
+    } else if (match_option(option, "-XX:ParallelGCOldGenAllocBufferSize=", &tail)) {
       julong old_plab_size = 0;
       ArgsRange errcode = parse_memory_size(tail, &old_plab_size, 1);
       if (errcode != arg_in_range) {
@@ -2398,8 +2417,7 @@
       jio_fprintf(defaultStream::error_stream(),
                   "Please use -XX:OldPLABSize in place of "
                   "-XX:ParallelGCOldGenAllocBufferSize in the future\n");
-    } else
-    if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) {
+    } else if (match_option(option, "-XX:ParallelGCToSpaceAllocBufferSize=", &tail)) {
       julong young_plab_size = 0;
       ArgsRange errcode = parse_memory_size(tail, &young_plab_size, 1);
       if (errcode != arg_in_range) {
@@ -2412,8 +2430,7 @@
       jio_fprintf(defaultStream::error_stream(),
                   "Please use -XX:YoungPLABSize in place of "
                   "-XX:ParallelGCToSpaceAllocBufferSize in the future\n");
-    } else
-    if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
+    } else if (match_option(option, "-XX:", &tail)) { // -XX:xxxx
       // Skip -XX:Flags= since that case has already been handled
       if (strncmp(tail, "Flags=", strlen("Flags=")) != 0) {
         if (!process_argument(tail, args->ignoreUnrecognized, origin)) {
@@ -2727,6 +2744,7 @@
     return JNI_EINVAL;
   }
 
+#ifndef KERNEL
   if (UseConcMarkSweepGC) {
     // Set flags for CMS and ParNew.  Check UseConcMarkSweep first
     // to ensure that when both UseConcMarkSweepGC and UseParNewGC
@@ -2744,6 +2762,7 @@
       set_g1_gc_flags();
     }
   }
+#endif // KERNEL
 
 #ifdef SERIALGC
   assert(verify_serial_gc_flags(), "SerialGC unset");
--- a/src/share/vm/runtime/globals.hpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/runtime/globals.hpp	Mon Jan 04 07:58:42 2010 -0800
@@ -1355,10 +1355,46 @@
   product(uintx, ParGCDesiredObjsFromOverflowList, 20,                      \
           "The desired number of objects to claim from the overflow list")  \
                                                                             \
-  product(uintx, CMSParPromoteBlocksToClaim, 50,                            \
+  product(uintx, CMSParPromoteBlocksToClaim, 16,                             \
           "Number of blocks to attempt to claim when refilling CMS LAB for "\
           "parallel GC.")                                                   \
                                                                             \
+  product(uintx, OldPLABWeight, 50,                                         \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponentially decaying average for resizing CMSParPromoteBlocksToClaim.") \
+                                                                            \
+  product(bool, ResizeOldPLAB, true,                                        \
+          "Dynamically resize (old gen) promotion labs")                    \
+                                                                            \
+  product(bool, PrintOldPLAB, false,                                        \
+          "Print (old gen) promotion labs sizing decisions")                \
+                                                                            \
+  product(uintx, CMSOldPLABMin, 16,                                         \
+          "Min size of CMS gen promotion lab caches per worker per blksize")\
+                                                                            \
+  product(uintx, CMSOldPLABMax, 1024,                                       \
+          "Max size of CMS gen promotion lab caches per worker per blksize")\
+                                                                            \
+  product(uintx, CMSOldPLABNumRefills, 4,                                   \
+          "Nominal number of refills of CMS gen promotion lab cache"        \
+          " per worker per block size")                                     \
+                                                                            \
+  product(bool, CMSOldPLABResizeQuicker, false,                             \
+          "Whether to react on-the-fly during a scavenge to a sudden"       \
+          " change in block demand rate")                                   \
+                                                                            \
+  product(uintx, CMSOldPLABToleranceFactor, 4,                              \
+          "The tolerance of the phase-change detector for on-the-fly"       \
+          " PLAB resizing during a scavenge")                               \
+                                                                            \
+  product(uintx, CMSOldPLABReactivityFactor, 2,                             \
+          "The gain in the feedback loop for on-the-fly PLAB resizing"      \
+          " during a scavenge")                                             \
+                                                                            \
+  product(uintx, CMSOldPLABReactivityCeiling, 10,                           \
+          "The clamping of the gain in the feedback loop for on-the-fly"    \
+          " PLAB resizing during a scavenge")                               \
+                                                                            \
   product(bool, AlwaysPreTouch, false,                                      \
           "It forces all freshly committed pages to be pre-touched.")       \
                                                                             \
@@ -1400,27 +1436,54 @@
           "Percentage (0-100) by which the CMS incremental mode duty cycle" \
           " is shifted to the right within the period between young GCs")   \
                                                                             \
-  product(uintx, CMSExpAvgFactor, 25,                                       \
-          "Percentage (0-100) used to weight the current sample when "      \
-          "computing exponential averages for CMS statistics")              \
-                                                                            \
-  product(uintx, CMS_FLSWeight, 50,                                         \
-          "Percentage (0-100) used to weight the current sample when "      \
-          "computing exponentially decating averages for CMS FLS statistics") \
-                                                                            \
-  product(uintx, CMS_FLSPadding, 2,                                         \
-          "The multiple of deviation from mean to use for buffering "       \
+  product(uintx, CMSExpAvgFactor, 50,                                       \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponential averages for CMS statistics.")             \
+                                                                            \
+  product(uintx, CMS_FLSWeight, 75,                                         \
+          "Percentage (0-100) used to weight the current sample when"       \
+          "computing exponentially decating averages for CMS FLS statistics.") \
+                                                                            \
+  product(uintx, CMS_FLSPadding, 1,                                         \
+          "The multiple of deviation from mean to use for buffering"        \
           "against volatility in free list demand.")                        \
                                                                             \
   product(uintx, FLSCoalescePolicy, 2,                                      \
           "CMS: Aggression level for coalescing, increasing from 0 to 4")   \
                                                                             \
-  product(uintx, CMS_SweepWeight, 50,                                       \
+  product(bool, FLSAlwaysCoalesceLarge, false,                              \
+          "CMS: Larger free blocks are always available for coalescing")    \
+                                                                            \
+  product(double, FLSLargestBlockCoalesceProximity, 0.99,                   \
+          "CMS: the smaller the percentage the greater the coalition force")\
+                                                                            \
+  product(double, CMSSmallCoalSurplusPercent, 1.05,                         \
+          "CMS: the factor by which to inflate estimated demand of small"   \
+          " block sizes to prevent coalescing with an adjoining block")     \
+                                                                            \
+  product(double, CMSLargeCoalSurplusPercent, 0.95,                         \
+          "CMS: the factor by which to inflate estimated demand of large"   \
+          " block sizes to prevent coalescing with an adjoining block")     \
+                                                                            \
+  product(double, CMSSmallSplitSurplusPercent, 1.10,                        \
+          "CMS: the factor by which to inflate estimated demand of small"   \
+          " block sizes to prevent splitting to supply demand for smaller"  \
+          " blocks")                                                        \
+                                                                            \
+  product(double, CMSLargeSplitSurplusPercent, 1.00,                        \
+          "CMS: the factor by which to inflate estimated demand of large"   \
+          " block sizes to prevent splitting to supply demand for smaller"  \
+          " blocks")                                                        \
+                                                                            \
+  product(bool, CMSExtrapolateSweep, false,                                 \
+          "CMS: cushion for block demand during sweep")                     \
+                                                                            \
+  product(uintx, CMS_SweepWeight, 75,                                       \
           "Percentage (0-100) used to weight the current sample when "      \
           "computing exponentially decaying average for inter-sweep "       \
           "duration")                                                       \
                                                                             \
-  product(uintx, CMS_SweepPadding, 2,                                       \
+  product(uintx, CMS_SweepPadding, 1,                                       \
           "The multiple of deviation from mean to use for buffering "       \
           "against volatility in inter-sweep duration.")                    \
                                                                             \
@@ -1459,6 +1522,13 @@
   product(uintx, CMSIndexedFreeListReplenish, 4,                            \
           "Replenish and indexed free list with this number of chunks")     \
                                                                             \
+  product(bool, CMSReplenishIntermediate, true,                             \
+          "Replenish all intermediate free-list caches")                    \
+                                                                            \
+  product(bool, CMSSplitIndexedFreeListBlocks, true,                        \
+          "When satisfying batched demand, splot blocks from the "          \
+          "IndexedFreeList whose size is a multiple of requested size")     \
+                                                                            \
   product(bool, CMSLoopWarn, false,                                         \
           "Warn in case of excessive CMS looping")                          \
                                                                             \
@@ -1593,6 +1663,18 @@
           "Bitmap operations should process at most this many bits"         \
           "between yields")                                                 \
                                                                             \
+  product(bool, CMSDumpAtPromotionFailure, false,                           \
+          "Dump useful information about the state of the CMS old "         \
+          " generation upon a promotion failure.")                          \
+                                                                            \
+  product(bool, CMSPrintChunksInDump, false,                                \
+          "In a dump enabled by CMSDumpAtPromotionFailure, include "        \
+          " more detailed information about the free chunks.")              \
+                                                                            \
+  product(bool, CMSPrintObjectsInDump, false,                               \
+          "In a dump enabled by CMSDumpAtPromotionFailure, include "        \
+          " more detailed information about the allocated objects.")        \
+                                                                            \
   diagnostic(bool, FLSVerifyAllHeapReferences, false,                       \
           "Verify that all refs across the FLS boundary "                   \
           " are to valid objects")                                          \
@@ -1677,6 +1759,10 @@
           "The youngest generation collection does not require "            \
           "a guarantee of full promotion of all live objects.")             \
                                                                             \
+  product(bool, PrintPromotionFailure, false,                               \
+          "Print additional diagnostic information following "              \
+          " promotion failure")                                             \
+                                                                            \
   notproduct(bool, PromotionFailureALot, false,                             \
           "Use promotion failure handling on every youngest generation "    \
           "collection")                                                     \
--- a/src/share/vm/services/classLoadingService.cpp	Wed Dec 23 03:12:16 2009 -0800
+++ b/src/share/vm/services/classLoadingService.cpp	Mon Jan 04 07:58:42 2010 -0800
@@ -128,7 +128,7 @@
 
   if (TraceClassUnloading) {
     ResourceMark rm;
-    tty->print_cr("[Unloading class %s]", k->external_name());
+    gclog_or_tty->print_cr("[Unloading class %s]", k->external_name());
   }
 }