changeset 2982:9d4f4a1825e4

Merge
author brutisso
date Fri, 13 Jan 2012 01:55:22 -0800
parents efdf6985a3a2 2ace1c4ee8da
children 5acd82522540 2e966d967c5c
files
diffstat 16 files changed, 1117 insertions(+), 628 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/shared/vmGCOperations.hpp"
@@ -183,12 +184,11 @@
 void CMMarkStack::allocate(size_t size) {
   _base = NEW_C_HEAP_ARRAY(oop, size);
   if (_base == NULL) {
-    vm_exit_during_initialization("Failed to allocate "
-                                  "CM region mark stack");
+    vm_exit_during_initialization("Failed to allocate CM region mark stack");
   }
   _index = 0;
   _capacity = (jint) size;
-  _oops_do_bound = -1;
+  _saved_index = -1;
   NOT_PRODUCT(_max_depth = 0);
 }
 
@@ -283,7 +283,6 @@
   }
 }
 
-
 CMRegionStack::CMRegionStack() : _base(NULL) {}
 
 void CMRegionStack::allocate(size_t size) {
@@ -302,6 +301,8 @@
 }
 
 void CMRegionStack::push_lock_free(MemRegion mr) {
+  guarantee(false, "push_lock_free(): don't call this any more");
+
   assert(mr.word_size() > 0, "Precondition");
   while (true) {
     jint index = _index;
@@ -325,6 +326,8 @@
 // marking / remark phases. Should only be called in tandem with
 // other lock-free pops.
 MemRegion CMRegionStack::pop_lock_free() {
+  guarantee(false, "pop_lock_free(): don't call this any more");
+
   while (true) {
     jint index = _index;
 
@@ -390,6 +393,8 @@
 #endif
 
 bool CMRegionStack::invalidate_entries_into_cset() {
+  guarantee(false, "invalidate_entries_into_cset(): don't call this any more");
+
   bool result = false;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   for (int i = 0; i < _oops_do_bound; ++i) {
@@ -438,14 +443,29 @@
   return res;
 }
 
+void CMMarkStack::note_start_of_gc() {
+  assert(_saved_index == -1,
+         "note_start_of_gc()/end_of_gc() bracketed incorrectly");
+  _saved_index = _index;
+}
+
+void CMMarkStack::note_end_of_gc() {
+  // This is intentionally a guarantee, instead of an assert. If we
+  // accidentally add something to the mark stack during GC, it
+  // will be a correctness issue so it's better if we crash. we'll
+  // only check this once per GC anyway, so it won't be a performance
+  // issue in any way.
+  guarantee(_saved_index == _index,
+            err_msg("saved index: %d index: %d", _saved_index, _index));
+  _saved_index = -1;
+}
+
 void CMMarkStack::oops_do(OopClosure* f) {
-  if (_index == 0) return;
-  assert(_oops_do_bound != -1 && _oops_do_bound <= _index,
-         "Bound must be set.");
-  for (int i = 0; i < _oops_do_bound; i++) {
+  assert(_saved_index == _index,
+         err_msg("saved index: %d index: %d", _saved_index, _index));
+  for (int i = 0; i < _index; i += 1) {
     f->do_oop(&_base[i]);
   }
-  _oops_do_bound = -1;
 }
 
 bool ConcurrentMark::not_yet_marked(oop obj) const {
@@ -783,7 +803,7 @@
 public:
   bool doHeapRegion(HeapRegion* r) {
     if (!r->continuesHumongous()) {
-      r->note_start_of_marking(true);
+      r->note_start_of_marking();
     }
     return false;
   }
@@ -804,6 +824,10 @@
 
   // Initialise marking structures. This has to be done in a STW phase.
   reset();
+
+  // For each region note start of marking.
+  NoteStartOfMarkHRClosure startcl;
+  g1h->heap_region_iterate(&startcl);
 }
 
 
@@ -818,10 +842,6 @@
   // every remark and we'll eventually not need to cause one.
   force_overflow_stw()->init();
 
-  // For each region note start of marking.
-  NoteStartOfMarkHRClosure startcl;
-  g1h->heap_region_iterate(&startcl);
-
   // Start Concurrent Marking weak-reference discovery.
   ReferenceProcessor* rp = g1h->ref_processor_cm();
   // enable ("weak") refs discovery
@@ -946,22 +966,9 @@
 }
 #endif // !PRODUCT
 
-void ConcurrentMark::grayRoot(oop p) {
-  HeapWord* addr = (HeapWord*) p;
-  // We can't really check against _heap_start and _heap_end, since it
-  // is possible during an evacuation pause with piggy-backed
-  // initial-mark that the committed space is expanded during the
-  // pause without CM observing this change. So the assertions below
-  // is a bit conservative; but better than nothing.
-  assert(_g1h->g1_committed().contains(addr),
-         "address should be within the heap bounds");
-
-  if (!_nextMarkBitMap->isMarked(addr)) {
-    _nextMarkBitMap->parMark(addr);
-  }
-}
-
 void ConcurrentMark::grayRegionIfNecessary(MemRegion mr) {
+  guarantee(false, "grayRegionIfNecessary(): don't call this any more");
+
   // The objects on the region have already been marked "in bulk" by
   // the caller. We only need to decide whether to push the region on
   // the region stack or not.
@@ -1007,6 +1014,8 @@
 }
 
 void ConcurrentMark::markAndGrayObjectIfNecessary(oop p) {
+  guarantee(false, "markAndGrayObjectIfNecessary(): don't call this any more");
+
   // The object is not marked by the caller. We need to at least mark
   // it and maybe push in on the stack.
 
@@ -1224,7 +1233,6 @@
                                        true /* expected_active */);
 
     if (VerifyDuringGC) {
-
       HandleMark hm;  // handle scope
       gclog_or_tty->print(" VerifyDuringGC:(after)");
       Universe::heap()->prepare_for_verify();
@@ -1879,10 +1887,6 @@
   double end = os::elapsedTime();
   _cleanup_times.add((end - start) * 1000.0);
 
-  // G1CollectedHeap::heap()->print();
-  // gclog_or_tty->print_cr("HEAP GC TIME STAMP : %d",
-  // G1CollectedHeap::heap()->get_gc_time_stamp());
-
   if (PrintGC || PrintGCDetails) {
     g1h->print_size_transition(gclog_or_tty,
                                start_used_bytes,
@@ -2669,6 +2673,8 @@
 }
 
 void ConcurrentMark::drainAllSATBBuffers() {
+  guarantee(false, "drainAllSATBBuffers(): don't call this any more");
+
   CMGlobalObjectClosure oc(this);
   SATBMarkQueueSet& satb_mq_set = JavaThread::satb_mark_queue_set();
   satb_mq_set.set_closure(&oc);
@@ -2687,12 +2693,6 @@
   assert(satb_mq_set.completed_buffers_num() == 0, "invariant");
 }
 
-void ConcurrentMark::markPrev(oop p) {
-  // Note we are overriding the read-only view of the prev map here, via
-  // the cast.
-  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*)p);
-}
-
 void ConcurrentMark::clear(oop p) {
   assert(p != NULL && p->is_oop(), "expected an oop");
   HeapWord* addr = (HeapWord*)p;
@@ -2702,13 +2702,21 @@
   _nextMarkBitMap->clear(addr);
 }
 
-void ConcurrentMark::clearRangeBothMaps(MemRegion mr) {
+void ConcurrentMark::clearRangePrevBitmap(MemRegion mr) {
   // Note we are overriding the read-only view of the prev map here, via
   // the cast.
   ((CMBitMap*)_prevMarkBitMap)->clearRange(mr);
+}
+
+void ConcurrentMark::clearRangeNextBitmap(MemRegion mr) {
   _nextMarkBitMap->clearRange(mr);
 }
 
+void ConcurrentMark::clearRangeBothBitmaps(MemRegion mr) {
+  clearRangePrevBitmap(mr);
+  clearRangeNextBitmap(mr);
+}
+
 HeapRegion*
 ConcurrentMark::claim_region(int task_num) {
   // "checkpoint" the finger
@@ -2803,6 +2811,9 @@
 }
 
 bool ConcurrentMark::invalidate_aborted_regions_in_cset() {
+  guarantee(false, "invalidate_aborted_regions_in_cset(): "
+                   "don't call this any more");
+
   bool result = false;
   for (int i = 0; i < (int)_max_task_num; ++i) {
     CMTask* the_task = _tasks[i];
@@ -2854,24 +2865,135 @@
     // ...then over the contents of the all the task queues.
     queue->oops_do(cl);
   }
-
-  // Invalidate any entries, that are in the region stack, that
-  // point into the collection set
-  if (_regionStack.invalidate_entries_into_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
+}
+
+#ifndef PRODUCT
+enum VerifyNoCSetOopsPhase {
+  VerifyNoCSetOopsStack,
+  VerifyNoCSetOopsQueues,
+  VerifyNoCSetOopsSATBCompleted,
+  VerifyNoCSetOopsSATBThread
+};
+
+class VerifyNoCSetOopsClosure : public OopClosure, public ObjectClosure  {
+private:
+  G1CollectedHeap* _g1h;
+  VerifyNoCSetOopsPhase _phase;
+  int _info;
+
+  const char* phase_str() {
+    switch (_phase) {
+    case VerifyNoCSetOopsStack:         return "Stack";
+    case VerifyNoCSetOopsQueues:        return "Queue";
+    case VerifyNoCSetOopsSATBCompleted: return "Completed SATB Buffers";
+    case VerifyNoCSetOopsSATBThread:    return "Thread SATB Buffers";
+    default:                            ShouldNotReachHere();
+    }
+    return NULL;
   }
 
-  // Invalidate any aborted regions, recorded in the individual CM
-  // tasks, that point into the collection set.
-  if (invalidate_aborted_regions_in_cset()) {
-    // otherwise, any gray objects copied during the evacuation pause
-    // might not be visited.
-    assert(_should_gray_objects, "invariant");
+  void do_object_work(oop obj) {
+    guarantee(!_g1h->obj_in_cs(obj),
+              err_msg("obj: "PTR_FORMAT" in CSet, phase: %s, info: %d",
+                      (void*) obj, phase_str(), _info));
   }
 
+public:
+  VerifyNoCSetOopsClosure() : _g1h(G1CollectedHeap::heap()) { }
+
+  void set_phase(VerifyNoCSetOopsPhase phase, int info = -1) {
+    _phase = phase;
+    _info = info;
+  }
+
+  virtual void do_oop(oop* p) {
+    oop obj = oopDesc::load_decode_heap_oop(p);
+    do_object_work(obj);
+  }
+
+  virtual void do_oop(narrowOop* p) {
+    // We should not come across narrow oops while scanning marking
+    // stacks and SATB buffers.
+    ShouldNotReachHere();
+  }
+
+  virtual void do_object(oop obj) {
+    do_object_work(obj);
+  }
+};
+
+void ConcurrentMark::verify_no_cset_oops(bool verify_stacks,
+                                         bool verify_enqueued_buffers,
+                                         bool verify_thread_buffers,
+                                         bool verify_fingers) {
+  assert(SafepointSynchronize::is_at_safepoint(), "should be at a safepoint");
+  if (!G1CollectedHeap::heap()->mark_in_progress()) {
+    return;
+  }
+
+  VerifyNoCSetOopsClosure cl;
+
+  if (verify_stacks) {
+    // Verify entries on the global mark stack
+    cl.set_phase(VerifyNoCSetOopsStack);
+    _markStack.oops_do(&cl);
+
+    // Verify entries on the task queues
+    for (int i = 0; i < (int) _max_task_num; i += 1) {
+      cl.set_phase(VerifyNoCSetOopsQueues, i);
+      OopTaskQueue* queue = _task_queues->queue(i);
+      queue->oops_do(&cl);
+    }
+  }
+
+  SATBMarkQueueSet& satb_qs = JavaThread::satb_mark_queue_set();
+
+  // Verify entries on the enqueued SATB buffers
+  if (verify_enqueued_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBCompleted);
+    satb_qs.iterate_completed_buffers_read_only(&cl);
+  }
+
+  // Verify entries on the per-thread SATB buffers
+  if (verify_thread_buffers) {
+    cl.set_phase(VerifyNoCSetOopsSATBThread);
+    satb_qs.iterate_thread_buffers_read_only(&cl);
+  }
+
+  if (verify_fingers) {
+    // Verify the global finger
+    HeapWord* global_finger = finger();
+    if (global_finger != NULL && global_finger < _heap_end) {
+      // The global finger always points to a heap region boundary. We
+      // use heap_region_containing_raw() to get the containing region
+      // given that the global finger could be pointing to a free region
+      // which subsequently becomes continues humongous. If that
+      // happens, heap_region_containing() will return the bottom of the
+      // corresponding starts humongous region and the check below will
+      // not hold any more.
+      HeapRegion* global_hr = _g1h->heap_region_containing_raw(global_finger);
+      guarantee(global_finger == global_hr->bottom(),
+                err_msg("global finger: "PTR_FORMAT" region: "HR_FORMAT,
+                        global_finger, HR_FORMAT_PARAMS(global_hr)));
+    }
+
+    // Verify the task fingers
+    assert(parallel_marking_threads() <= _max_task_num, "sanity");
+    for (int i = 0; i < (int) parallel_marking_threads(); i += 1) {
+      CMTask* task = _tasks[i];
+      HeapWord* task_finger = task->finger();
+      if (task_finger != NULL && task_finger < _heap_end) {
+        // See above note on the global finger verification.
+        HeapRegion* task_hr = _g1h->heap_region_containing_raw(task_finger);
+        guarantee(task_finger == task_hr->bottom() ||
+                  !task_hr->in_collection_set(),
+                  err_msg("task finger: "PTR_FORMAT" region: "HR_FORMAT,
+                          task_finger, HR_FORMAT_PARAMS(task_hr)));
+      }
+    }
+  }
 }
+#endif // PRODUCT
 
 void ConcurrentMark::clear_marking_state(bool clear_overflow) {
   _markStack.setEmpty();
@@ -3080,19 +3202,6 @@
   }
 };
 
-class SetClaimValuesInCSetHRClosure: public HeapRegionClosure {
-  jint _claim_value;
-
-public:
-  SetClaimValuesInCSetHRClosure(jint claim_value) :
-    _claim_value(claim_value) { }
-
-  bool doHeapRegion(HeapRegion* hr) {
-    hr->set_claim_value(_claim_value);
-    return false;
-  }
-};
-
 class G1ParCompleteMarkInCSetTask: public AbstractGangTask {
 protected:
   G1CollectedHeap* _g1h;
@@ -3112,6 +3221,9 @@
 };
 
 void ConcurrentMark::complete_marking_in_collection_set() {
+  guarantee(false, "complete_marking_in_collection_set(): "
+                   "don't call this any more");
+
   G1CollectedHeap* g1h =  G1CollectedHeap::heap();
 
   if (!g1h->mark_in_progress()) {
@@ -3135,9 +3247,8 @@
 
   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::CompleteMarkCSetClaimValue), "sanity");
 
-  // Now reset the claim values in the regions in the collection set.
-  SetClaimValuesInCSetHRClosure set_cv_cl(HeapRegion::InitialClaimValue);
-  g1h->collection_set_iterate(&set_cv_cl);
+  // Reset the claim values in the regions in the collection set.
+  g1h->reset_cset_heap_region_claim_values();
 
   assert(g1h->check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
 
@@ -3160,6 +3271,8 @@
 // newCSet().
 
 void ConcurrentMark::newCSet() {
+  guarantee(false, "newCSet(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) {
     // nothing to do if marking is not in progress
     return;
@@ -3198,6 +3311,8 @@
 }
 
 void ConcurrentMark::registerCSetRegion(HeapRegion* hr) {
+  guarantee(false, "registerCSetRegion(): don't call this any more");
+
   if (!concurrent_marking_in_progress()) return;
 
   HeapWord* region_end = hr->end();
@@ -3209,6 +3324,9 @@
 // Resets the region fields of active CMTasks whose values point
 // into the collection set.
 void ConcurrentMark::reset_active_task_region_fields_in_cset() {
+  guarantee(false, "reset_active_task_region_fields_in_cset(): "
+                   "don't call this any more");
+
   assert(SafepointSynchronize::is_at_safepoint(), "should be in STW");
   assert(parallel_marking_threads() <= _max_task_num, "sanity");
 
@@ -3919,6 +4037,10 @@
 }
 
 void CMTask::drain_region_stack(BitMapClosure* bc) {
+  assert(_cm->region_stack_empty(), "region stack should be empty");
+  assert(_aborted_region.is_empty(), "aborted region should be empty");
+  return;
+
   if (has_aborted()) return;
 
   assert(_region_finger == NULL,
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -166,10 +166,10 @@
 // Ideally this should be GrowableArray<> just like MSC's marking stack(s).
 class CMMarkStack VALUE_OBJ_CLASS_SPEC {
   ConcurrentMark* _cm;
-  oop*   _base;      // bottom of stack
-  jint   _index;     // one more than last occupied index
-  jint   _capacity;  // max #elements
-  jint   _oops_do_bound;  // Number of elements to include in next iteration.
+  oop*   _base;        // bottom of stack
+  jint   _index;       // one more than last occupied index
+  jint   _capacity;    // max #elements
+  jint   _saved_index; // value of _index saved at start of GC
   NOT_PRODUCT(jint _max_depth;)  // max depth plumbed during run
 
   bool   _overflow;
@@ -247,16 +247,12 @@
 
   void setEmpty()   { _index = 0; clear_overflow(); }
 
-  // Record the current size; a subsequent "oops_do" will iterate only over
-  // indices valid at the time of this call.
-  void set_oops_do_bound(jint bound = -1) {
-    if (bound == -1) {
-      _oops_do_bound = _index;
-    } else {
-      _oops_do_bound = bound;
-    }
-  }
-  jint oops_do_bound() { return _oops_do_bound; }
+  // Record the current index.
+  void note_start_of_gc();
+
+  // Make sure that we have not added any entries to the stack during GC.
+  void note_end_of_gc();
+
   // iterate over the oops in the mark stack, up to the bound recorded via
   // the call above.
   void oops_do(OopClosure* f);
@@ -724,10 +720,9 @@
   // G1CollectedHeap
 
   // This notifies CM that a root during initial-mark needs to be
-  // grayed and it's MT-safe. Currently, we just mark it. But, in the
-  // future, we can experiment with pushing it on the stack and we can
-  // do this without changing G1CollectedHeap.
-  void grayRoot(oop p);
+  // grayed. It is MT-safe.
+  inline void grayRoot(oop obj, size_t word_size);
+
   // It's used during evacuation pauses to gray a region, if
   // necessary, and it's MT-safe. It assumes that the caller has
   // marked any objects on that region. If _should_gray_objects is
@@ -735,6 +730,7 @@
   // pushed on the region stack, if it is located below the global
   // finger, otherwise we do nothing.
   void grayRegionIfNecessary(MemRegion mr);
+
   // It's used during evacuation pauses to mark and, if necessary,
   // gray a single object and it's MT-safe. It assumes the caller did
   // not mark the object. If _should_gray_objects is true and we're
@@ -791,24 +787,40 @@
 
   // Mark in the previous bitmap.  NB: this is usually read-only, so use
   // this carefully!
-  void markPrev(oop p);
+  inline void markPrev(oop p);
+  inline void markNext(oop p);
   void clear(oop p);
-  // Clears marks for all objects in the given range, for both prev and
-  // next bitmaps.  NB: the previous bitmap is usually read-only, so use
-  // this carefully!
-  void clearRangeBothMaps(MemRegion mr);
+  // Clears marks for all objects in the given range, for the prev,
+  // next, or both bitmaps.  NB: the previous bitmap is usually
+  // read-only, so use this carefully!
+  void clearRangePrevBitmap(MemRegion mr);
+  void clearRangeNextBitmap(MemRegion mr);
+  void clearRangeBothBitmaps(MemRegion mr);
 
-  // Record the current top of the mark and region stacks; a
-  // subsequent oops_do() on the mark stack and
-  // invalidate_entries_into_cset() on the region stack will iterate
-  // only over indices valid at the time of this call.
-  void set_oops_do_bound() {
-    _markStack.set_oops_do_bound();
-    _regionStack.set_oops_do_bound();
+  // Notify data structures that a GC has started.
+  void note_start_of_gc() {
+    _markStack.note_start_of_gc();
   }
+
+  // Notify data structures that a GC is finished.
+  void note_end_of_gc() {
+    _markStack.note_end_of_gc();
+  }
+
   // Iterate over the oops in the mark stack and all local queues. It
   // also calls invalidate_entries_into_cset() on the region stack.
   void oops_do(OopClosure* f);
+
+  // Verify that there are no CSet oops on the stacks (taskqueues /
+  // global mark stack), enqueued SATB buffers, per-thread SATB
+  // buffers, and fingers (global / per-task). The boolean parameters
+  // decide which of the above data structures to verify. If marking
+  // is not in progress, it's a no-op.
+  void verify_no_cset_oops(bool verify_stacks,
+                           bool verify_enqueued_buffers,
+                           bool verify_thread_buffers,
+                           bool verify_fingers) PRODUCT_RETURN;
+
   // It is called at the end of an evacuation pause during marking so
   // that CM is notified of where the new end of the heap is. It
   // doesn't do anything if concurrent_marking_in_progress() is false,
@@ -1166,6 +1178,7 @@
   // It keeps picking SATB buffers and processing them until no SATB
   // buffers are available.
   void drain_satb_buffers();
+
   // It keeps popping regions from the region stack and processing
   // them until the region stack is empty.
   void drain_region_stack(BitMapClosure* closure);
--- a/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.inline.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -153,4 +153,46 @@
   }
 }
 
+inline void ConcurrentMark::markPrev(oop p) {
+  assert(!_prevMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  // Note we are overriding the read-only view of the prev map here, via
+  // the cast.
+  ((CMBitMap*)_prevMarkBitMap)->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::markNext(oop p) {
+  assert(!_nextMarkBitMap->isMarked((HeapWord*) p), "sanity");
+  _nextMarkBitMap->mark((HeapWord*) p);
+}
+
+inline void ConcurrentMark::grayRoot(oop obj, size_t word_size) {
+  HeapWord* addr = (HeapWord*) obj;
+
+  // Currently we don't do anything with word_size but we will use it
+  // in the very near future in the liveness calculation piggy-backing
+  // changes.
+
+#ifdef ASSERT
+  HeapRegion* hr = _g1h->heap_region_containing(addr);
+  assert(hr != NULL, "sanity");
+  assert(!hr->is_survivor(), "should not allocate survivors during IM");
+  assert(addr < hr->next_top_at_mark_start(),
+         err_msg("addr: "PTR_FORMAT" hr: "HR_FORMAT" NTAMS: "PTR_FORMAT,
+                 addr, HR_FORMAT_PARAMS(hr), hr->next_top_at_mark_start()));
+  // We cannot assert that word_size == obj->size() given that obj
+  // might not be in a consistent state (another thread might be in
+  // the process of copying it). So the best thing we can do is to
+  // assert that word_size is under an upper bound which is its
+  // containing region's capacity.
+  assert(word_size * HeapWordSize <= hr->capacity(),
+         err_msg("size: "SIZE_FORMAT" capacity: "SIZE_FORMAT" "HR_FORMAT,
+                 word_size * HeapWordSize, hr->capacity(),
+                 HR_FORMAT_PARAMS(hr)));
+#endif // ASSERT
+
+  if (!_nextMarkBitMap->isMarked(addr)) {
+    _nextMarkBitMap->parMark(addr);
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,11 @@
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
 #include "gc_implementation/g1/g1CollectorPolicy.hpp"
 #include "gc_implementation/g1/g1ErgoVerbose.hpp"
+#include "gc_implementation/g1/g1EvacFailure.hpp"
 #include "gc_implementation/g1/g1MarkSweep.hpp"
 #include "gc_implementation/g1/g1OopClosures.inline.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "gc_implementation/g1/vm_operations_g1.hpp"
@@ -591,17 +593,29 @@
     }
     res = new_region_try_secondary_free_list();
   }
-  if (res == NULL && do_expand) {
+  if (res == NULL && do_expand && _expand_heap_after_alloc_failure) {
+    // Currently, only attempts to allocate GC alloc regions set
+    // do_expand to true. So, we should only reach here during a
+    // safepoint. If this assumption changes we might have to
+    // reconsider the use of _expand_heap_after_alloc_failure.
+    assert(SafepointSynchronize::is_at_safepoint(), "invariant");
+
     ergo_verbose1(ErgoHeapSizing,
                   "attempt heap expansion",
                   ergo_format_reason("region allocation request failed")
                   ergo_format_byte("allocation request"),
                   word_size * HeapWordSize);
     if (expand(word_size * HeapWordSize)) {
-      // Even though the heap was expanded, it might not have reached
-      // the desired size. So, we cannot assume that the allocation
-      // will succeed.
+      // Given that expand() succeeded in expanding the heap, and we
+      // always expand the heap by an amount aligned to the heap
+      // region size, the free list should in theory not be empty. So
+      // it would probably be OK to use remove_head(). But the extra
+      // check for NULL is unlikely to be a performance issue here (we
+      // just expanded the heap!) so let's just be conservative and
+      // use remove_head_or_null().
       res = _free_list.remove_head_or_null();
+    } else {
+      _expand_heap_after_alloc_failure = false;
     }
   }
   return res;
@@ -1838,6 +1852,7 @@
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
   _retained_old_gc_alloc_region(NULL),
+  _expand_heap_after_alloc_failure(true),
   _surviving_young_words(NULL),
   _full_collections_completed(0),
   _in_cset_fast_test(NULL),
@@ -2605,12 +2620,16 @@
   }
 };
 
-void
-G1CollectedHeap::reset_heap_region_claim_values() {
+void G1CollectedHeap::reset_heap_region_claim_values() {
   ResetClaimValuesClosure blk;
   heap_region_iterate(&blk);
 }
 
+void G1CollectedHeap::reset_cset_heap_region_claim_values() {
+  ResetClaimValuesClosure blk;
+  collection_set_iterate(&blk);
+}
+
 #ifdef ASSERT
 // This checks whether all regions in the heap have the correct claim
 // value. I also piggy-backed on this a check to ensure that the
@@ -3000,14 +3019,20 @@
       } else {
         VerifyObjsInRegionClosure not_dead_yet_cl(r, _vo);
         r->object_iterate(&not_dead_yet_cl);
-        if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
-          gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
-                                 "max_live_bytes "SIZE_FORMAT" "
-                                 "< calculated "SIZE_FORMAT,
-                                 r->bottom(), r->end(),
-                                 r->max_live_bytes(),
+        if (_vo != VerifyOption_G1UseNextMarking) {
+          if (r->max_live_bytes() < not_dead_yet_cl.live_bytes()) {
+            gclog_or_tty->print_cr("["PTR_FORMAT","PTR_FORMAT"] "
+                                   "max_live_bytes "SIZE_FORMAT" "
+                                   "< calculated "SIZE_FORMAT,
+                                   r->bottom(), r->end(),
+                                   r->max_live_bytes(),
                                  not_dead_yet_cl.live_bytes());
-          _failures = true;
+            _failures = true;
+          }
+        } else {
+          // When vo == UseNextMarking we cannot currently do a sanity
+          // check on the live bytes as the calculation has not been
+          // finalized yet.
         }
       }
     }
@@ -3641,25 +3666,6 @@
         }
         perm_gen()->save_marks();
 
-        // We must do this before any possible evacuation that should propagate
-        // marks.
-        if (mark_in_progress()) {
-          double start_time_sec = os::elapsedTime();
-
-          _cm->drainAllSATBBuffers();
-          double finish_mark_ms = (os::elapsedTime() - start_time_sec) * 1000.0;
-          g1_policy()->record_satb_drain_time(finish_mark_ms);
-        }
-        // Record the number of elements currently on the mark stack, so we
-        // only iterate over these.  (Since evacuation may add to the mark
-        // stack, doing more exposes race conditions.)  If no mark is in
-        // progress, this will be zero.
-        _cm->set_oops_do_bound();
-
-        if (mark_in_progress()) {
-          concurrent_mark()->newCSet();
-        }
-
 #if YOUNG_LIST_VERBOSE
         gclog_or_tty->print_cr("\nBefore choosing collection set.\nYoung_list:");
         _young_list->print();
@@ -3668,6 +3674,16 @@
 
         g1_policy()->choose_collection_set(target_pause_time_ms);
 
+        _cm->note_start_of_gc();
+        // We should not verify the per-thread SATB buffers given that
+        // we have not filtered them yet (we'll do so during the
+        // GC). We also call this after choose_collection_set() to
+        // ensure that the CSet has been finalized.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 false /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+
         if (_hr_printer.is_active()) {
           HeapRegion* hr = g1_policy()->collection_set();
           while (hr != NULL) {
@@ -3684,16 +3700,6 @@
           }
         }
 
-        // We have chosen the complete collection set. If marking is
-        // active then, we clear the region fields of any of the
-        // concurrent marking tasks whose region fields point into
-        // the collection set as these values will become stale. This
-        // will cause the owning marking threads to claim a new region
-        // when marking restarts.
-        if (mark_in_progress()) {
-          concurrent_mark()->reset_active_task_region_fields_in_cset();
-        }
-
 #ifdef ASSERT
         VerifyCSetClosure cl;
         collection_set_iterate(&cl);
@@ -3707,6 +3713,16 @@
         // Actually do the work...
         evacuate_collection_set();
 
+        // We do this to mainly verify the per-thread SATB buffers
+        // (which have been filtered by now) since we didn't verify
+        // them earlier. No point in re-checking the stacks / enqueued
+        // buffers given that the CSet has not changed since last time
+        // we checked.
+        _cm->verify_no_cset_oops(false /* verify_stacks */,
+                                 false /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+
         free_collection_set(g1_policy()->collection_set());
         g1_policy()->clear_collection_set();
 
@@ -3775,6 +3791,8 @@
           size_t expand_bytes = g1_policy()->expansion_amount();
           if (expand_bytes > 0) {
             size_t bytes_before = capacity();
+            // No need for an ergo verbose message here,
+            // expansion_amount() does this when it returns a value > 0.
             if (!expand(expand_bytes)) {
               // We failed to expand the heap so let's verify that
               // committed/uncommitted amount match the backing store
@@ -3784,6 +3802,14 @@
           }
         }
 
+        // We redo the verificaiton but now wrt to the new CSet which
+        // has just got initialized after the previous CSet was freed.
+        _cm->verify_no_cset_oops(true  /* verify_stacks */,
+                                 true  /* verify_enqueued_buffers */,
+                                 true  /* verify_thread_buffers */,
+                                 true  /* verify_fingers */);
+        _cm->note_end_of_gc();
+
         double end_time_sec = os::elapsedTime();
         double pause_time_ms = (end_time_sec - start_time_sec) * MILLIUNITS;
         g1_policy()->record_pause_time_ms(pause_time_ms);
@@ -3831,21 +3857,6 @@
         // CM reference discovery will be re-enabled if necessary.
       }
 
-      {
-        size_t expand_bytes = g1_policy()->expansion_amount();
-        if (expand_bytes > 0) {
-          size_t bytes_before = capacity();
-          // No need for an ergo verbose message here,
-          // expansion_amount() does this when it returns a value > 0.
-          if (!expand(expand_bytes)) {
-            // We failed to expand the heap so let's verify that
-            // committed/uncommitted amount match the backing store
-            assert(capacity() == _g1_storage.committed_size(), "committed size mismatch");
-            assert(max_capacity() == _g1_storage.reserved_size(), "reserved size mismatch");
-          }
-        }
-      }
-
       // We should do this after we potentially expand the heap so
       // that all the COMMIT events are generated before the end GC
       // event, and after we retire the GC alloc regions so that all
@@ -3949,6 +3960,8 @@
     // we allocate to in the region sets. We'll re-add it later, when
     // it's retired again.
     _old_set.remove(retained_region);
+    bool during_im = g1_policy()->during_initial_mark_pause();
+    retained_region->note_start_of_copying(during_im);
     _old_gc_alloc_region.set(retained_region);
     _hr_printer.reuse(retained_region);
   }
@@ -3985,157 +3998,26 @@
   _evac_failure_scan_stack = NULL;
 }
 
-class UpdateRSetDeferred : public OopsInHeapRegionClosure {
-private:
-  G1CollectedHeap* _g1;
-  DirtyCardQueue *_dcq;
-  CardTableModRefBS* _ct_bs;
-
-public:
-  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
-    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
-
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(      oop* p) { do_oop_work(p); }
-  template <class T> void do_oop_work(T* p) {
-    assert(_from->is_in_reserved(p), "paranoia");
-    if (!_from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) &&
-        !_from->is_survivor()) {
-      size_t card_index = _ct_bs->index_for(p);
-      if (_ct_bs->mark_card_deferred(card_index)) {
-        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
-      }
-    }
-  }
-};
-
-class RemoveSelfPointerClosure: public ObjectClosure {
-private:
-  G1CollectedHeap* _g1;
-  ConcurrentMark* _cm;
-  HeapRegion* _hr;
-  size_t _prev_marked_bytes;
-  size_t _next_marked_bytes;
-  OopsInHeapRegionClosure *_cl;
-public:
-  RemoveSelfPointerClosure(G1CollectedHeap* g1, HeapRegion* hr,
-                           OopsInHeapRegionClosure* cl) :
-    _g1(g1), _hr(hr), _cm(_g1->concurrent_mark()),  _prev_marked_bytes(0),
-    _next_marked_bytes(0), _cl(cl) {}
-
-  size_t prev_marked_bytes() { return _prev_marked_bytes; }
-  size_t next_marked_bytes() { return _next_marked_bytes; }
-
-  // <original comment>
-  // The original idea here was to coalesce evacuated and dead objects.
-  // However that caused complications with the block offset table (BOT).
-  // In particular if there were two TLABs, one of them partially refined.
-  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
-  // The BOT entries of the unrefined part of TLAB_2 point to the start
-  // of TLAB_2. If the last object of the TLAB_1 and the first object
-  // of TLAB_2 are coalesced, then the cards of the unrefined part
-  // would point into middle of the filler object.
-  // The current approach is to not coalesce and leave the BOT contents intact.
-  // </original comment>
-  //
-  // We now reset the BOT when we start the object iteration over the
-  // region and refine its entries for every object we come across. So
-  // the above comment is not really relevant and we should be able
-  // to coalesce dead objects if we want to.
-  void do_object(oop obj) {
-    HeapWord* obj_addr = (HeapWord*) obj;
-    assert(_hr->is_in(obj_addr), "sanity");
-    size_t obj_size = obj->size();
-    _hr->update_bot_for_object(obj_addr, obj_size);
-    if (obj->is_forwarded() && obj->forwardee() == obj) {
-      // The object failed to move.
-      assert(!_g1->is_obj_dead(obj), "We should not be preserving dead objs.");
-      _cm->markPrev(obj);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
-      _prev_marked_bytes += (obj_size * HeapWordSize);
-      if (_g1->mark_in_progress() && !_g1->is_obj_ill(obj)) {
-        _cm->markAndGrayObjectIfNecessary(obj);
-      }
-      obj->set_mark(markOopDesc::prototype());
-      // While we were processing RSet buffers during the
-      // collection, we actually didn't scan any cards on the
-      // collection set, since we didn't want to update remebered
-      // sets with entries that point into the collection set, given
-      // that live objects fromthe collection set are about to move
-      // and such entries will be stale very soon. This change also
-      // dealt with a reliability issue which involved scanning a
-      // card in the collection set and coming across an array that
-      // was being chunked and looking malformed. The problem is
-      // that, if evacuation fails, we might have remembered set
-      // entries missing given that we skipped cards on the
-      // collection set. So, we'll recreate such entries now.
-      obj->oop_iterate(_cl);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
-    } else {
-      // The object has been either evacuated or is dead. Fill it with a
-      // dummy object.
-      MemRegion mr((HeapWord*)obj, obj_size);
-      CollectedHeap::fill_with_object(mr);
-      _cm->clearRangeBothMaps(mr);
-    }
-  }
-};
-
 void G1CollectedHeap::remove_self_forwarding_pointers() {
-  UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
-  DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
-  UpdateRSetDeferred deferred_update(_g1h, &dcq);
-  OopsInHeapRegionClosure *cl;
-  if (G1DeferredRSUpdate) {
-    cl = &deferred_update;
+  assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
+  assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
+
+  G1ParRemoveSelfForwardPtrsTask rsfp_task(this);
+
+  if (G1CollectedHeap::use_parallel_gc_threads()) {
+    set_par_threads();
+    workers()->run_task(&rsfp_task);
+    set_par_threads(0);
   } else {
-    cl = &immediate_update;
-  }
-  HeapRegion* cur = g1_policy()->collection_set();
-  while (cur != NULL) {
-    assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
-    assert(!cur->isHumongous(), "sanity");
-
-    if (cur->evacuation_failed()) {
-      assert(cur->in_collection_set(), "bad CS");
-      RemoveSelfPointerClosure rspc(_g1h, cur, cl);
-
-      // In the common case we make sure that this is done when the
-      // region is freed so that it is "ready-to-go" when it's
-      // re-allocated. However, when evacuation failure happens, a
-      // region will remain in the heap and might ultimately be added
-      // to a CSet in the future. So we have to be careful here and
-      // make sure the region's RSet is ready for parallel iteration
-      // whenever this might be required in the future.
-      cur->rem_set()->reset_for_par_iteration();
-      cur->reset_bot();
-      cl->set_region(cur);
-      cur->object_iterate(&rspc);
-
-      // A number of manipulations to make the TAMS be the current top,
-      // and the marked bytes be the ones observed in the iteration.
-      if (_g1h->concurrent_mark()->at_least_one_mark_complete()) {
-        // The comments below are the postconditions achieved by the
-        // calls.  Note especially the last such condition, which says that
-        // the count of marked bytes has been properly restored.
-        cur->note_start_of_marking(false);
-        // _next_top_at_mark_start == top, _next_marked_bytes == 0
-        cur->add_to_marked_bytes(rspc.prev_marked_bytes());
-        // _next_marked_bytes == prev_marked_bytes.
-        cur->note_end_of_marking();
-        // _prev_top_at_mark_start == top(),
-        // _prev_marked_bytes == prev_marked_bytes
-      }
-      // If there is no mark in progress, we modified the _next variables
-      // above needlessly, but harmlessly.
-      if (_g1h->mark_in_progress()) {
-        cur->note_start_of_marking(false);
-        // _next_top_at_mark_start == top, _next_marked_bytes == 0
-        // _next_marked_bytes == next_marked_bytes.
-      }
-    }
-    cur = cur->next_in_collection_set();
-  }
+    rsfp_task.work(0);
+  }
+
+  assert(check_cset_heap_region_claim_values(HeapRegion::ParEvacFailureClaimValue), "sanity");
+
+  // Reset the claim values in the regions in the collection set.
+  reset_cset_heap_region_claim_values();
+
+  assert(check_cset_heap_region_claim_values(HeapRegion::InitialClaimValue), "sanity");
   assert(g1_policy()->assertMarkedBytesDataOK(), "Should be!");
 
   // Now restore saved marks, if any.
@@ -4148,6 +4030,7 @@
       markOop m = _preserved_marks_of_objs->at(i);
       obj->set_mark(m);
     }
+
     // Delete the preserved marks growable arrays (allocated on the C heap).
     delete _objs_with_preserved_marks;
     delete _preserved_marks_of_objs;
@@ -4172,8 +4055,7 @@
 
 oop
 G1CollectedHeap::handle_evacuation_failure_par(OopsInHeapRegionClosure* cl,
-                                               oop old,
-                                               bool should_mark_root) {
+                                               oop old) {
   assert(obj_in_cs(old),
          err_msg("obj: "PTR_FORMAT" should still be in the CSet",
                  (HeapWord*) old));
@@ -4182,15 +4064,6 @@
   if (forward_ptr == NULL) {
     // Forward-to-self succeeded.
 
-    // should_mark_root will be true when this routine is called
-    // from a root scanning closure during an initial mark pause.
-    // In this case the thread that succeeds in self-forwarding the
-    // object is also responsible for marking the object.
-    if (should_mark_root) {
-      assert(!oopDesc::is_null(old), "shouldn't be");
-      _cm->grayRoot(old);
-    }
-
     if (_evac_failure_closure != cl) {
       MutexLockerEx x(EvacFailureStack_lock, Mutex::_no_safepoint_check_flag);
       assert(!_drain_in_progress,
@@ -4286,30 +4159,8 @@
   return NULL;
 }
 
-#ifndef PRODUCT
-bool GCLabBitMapClosure::do_bit(size_t offset) {
-  HeapWord* addr = _bitmap->offsetToHeapWord(offset);
-  guarantee(_cm->isMarked(oop(addr)), "it should be!");
-  return true;
-}
-#endif // PRODUCT
-
 G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) :
-  ParGCAllocBuffer(gclab_word_size),
-  _should_mark_objects(false),
-  _bitmap(G1CollectedHeap::heap()->reserved_region().start(), gclab_word_size),
-  _retired(false)
-{
-  //_should_mark_objects is set to true when G1ParCopyHelper needs to
-  // mark the forwarded location of an evacuated object.
-  // We set _should_mark_objects to true if marking is active, i.e. when we
-  // need to propagate a mark, or during an initial mark pause, i.e. when we
-  // need to mark objects immediately reachable by the roots.
-  if (G1CollectedHeap::heap()->mark_in_progress() ||
-      G1CollectedHeap::heap()->g1_policy()->during_initial_mark_pause()) {
-    _should_mark_objects = true;
-  }
-}
+  ParGCAllocBuffer(gclab_word_size), _retired(false) { }
 
 G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, int queue_num)
   : _g1h(g1h),
@@ -4323,8 +4174,7 @@
     _tenured_alloc_buffer(g1h->desired_plab_sz(GCAllocForTenured)),
     _age_table(false),
     _strong_roots_time(0), _term_time(0),
-    _alloc_buffer_waste(0), _undo_waste(0)
-{
+    _alloc_buffer_waste(0), _undo_waste(0) {
   // we allocate G1YoungSurvRateNumRegions plus one entries, since
   // we "sacrifice" entry 0 to keep track of surviving bytes for
   // non-young regions (where the age is -1)
@@ -4429,35 +4279,53 @@
   } while (!refs()->is_empty());
 }
 
-G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state) :
+G1ParClosureSuper::G1ParClosureSuper(G1CollectedHeap* g1,
+                                     G1ParScanThreadState* par_scan_state) :
   _g1(g1), _g1_rem(_g1->g1_rem_set()), _cm(_g1->concurrent_mark()),
   _par_scan_state(par_scan_state),
   _during_initial_mark(_g1->g1_policy()->during_initial_mark_pause()),
   _mark_in_progress(_g1->mark_in_progress()) { }
 
-template <class T> void G1ParCopyHelper::mark_object(T* p) {
-  // This is called from do_oop_work for objects that are not
-  // in the collection set. Objects in the collection set
-  // are marked after they have been evacuated.
-
-  T heap_oop = oopDesc::load_heap_oop(p);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop obj = oopDesc::decode_heap_oop(heap_oop);
-    HeapWord* addr = (HeapWord*)obj;
-    if (_g1->is_in_g1_reserved(addr)) {
-      _cm->grayRoot(oop(addr));
-    }
-  }
-}
-
-oop G1ParCopyHelper::copy_to_survivor_space(oop old, bool should_mark_root,
-                                                     bool should_mark_copy) {
+void G1ParCopyHelper::mark_object(oop obj) {
+#ifdef ASSERT
+  HeapRegion* hr = _g1->heap_region_containing(obj);
+  assert(hr != NULL, "sanity");
+  assert(!hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // We know that the object is not moving so it's safe to read its size.
+  _cm->grayRoot(obj, (size_t) obj->size());
+}
+
+void G1ParCopyHelper::mark_forwarded_object(oop from_obj, oop to_obj) {
+#ifdef ASSERT
+  assert(from_obj->is_forwarded(), "from obj should be forwarded");
+  assert(from_obj->forwardee() == to_obj, "to obj should be the forwardee");
+  assert(from_obj != to_obj, "should not be self-forwarded");
+
+  HeapRegion* from_hr = _g1->heap_region_containing(from_obj);
+  assert(from_hr != NULL, "sanity");
+  assert(from_hr->in_collection_set(), "from obj should be in the CSet");
+
+  HeapRegion* to_hr = _g1->heap_region_containing(to_obj);
+  assert(to_hr != NULL, "sanity");
+  assert(!to_hr->in_collection_set(), "should not mark objects in the CSet");
+#endif // ASSERT
+
+  // The object might be in the process of being copied by another
+  // worker so we cannot trust that its to-space image is
+  // well-formed. So we have to read its size from its from-space
+  // image which we know should not be changing.
+  _cm->grayRoot(to_obj, (size_t) from_obj->size());
+}
+
+oop G1ParCopyHelper::copy_to_survivor_space(oop old) {
   size_t    word_sz = old->size();
   HeapRegion* from_region = _g1->heap_region_containing_raw(old);
   // +1 to make the -1 indexes valid...
   int       young_index = from_region->young_index_in_cset()+1;
-  assert( (from_region->is_young() && young_index > 0) ||
-          (!from_region->is_young() && young_index == 0), "invariant" );
+  assert( (from_region->is_young() && young_index >  0) ||
+         (!from_region->is_young() && young_index == 0), "invariant" );
   G1CollectorPolicy* g1p = _g1->g1_policy();
   markOop m = old->mark();
   int age = m->has_displaced_mark_helper() ? m->displaced_mark_helper()->age()
@@ -4471,7 +4339,7 @@
     // This will either forward-to-self, or detect that someone else has
     // installed a forwarding pointer.
     OopsInHeapRegionClosure* cl = _par_scan_state->evac_failure_closure();
-    return _g1->handle_evacuation_failure_par(cl, old, should_mark_root);
+    return _g1->handle_evacuation_failure_par(cl, old);
   }
 
   // We're going to allocate linearly, so might as well prefetch ahead.
@@ -4507,28 +4375,14 @@
       obj->set_mark(m);
     }
 
-    // Mark the evacuated object or propagate "next" mark bit
-    if (should_mark_copy) {
-      if (!use_local_bitmaps ||
-          !_par_scan_state->alloc_buffer(alloc_purpose)->mark(obj_ptr)) {
-        // if we couldn't mark it on the local bitmap (this happens when
-        // the object was not allocated in the GCLab), we have to bite
-        // the bullet and do the standard parallel mark
-        _cm->markAndGrayObjectIfNecessary(obj);
-      }
-
-      if (_g1->isMarkedNext(old)) {
-        // Unmark the object's old location so that marking
-        // doesn't think the old object is alive.
-        _cm->nextMarkBitMap()->parClear((HeapWord*)old);
-      }
-    }
-
     size_t* surv_young_words = _par_scan_state->surviving_young_words();
     surv_young_words[young_index] += word_sz;
 
     if (obj->is_objArray() && arrayOop(obj)->length() >= ParGCArrayScanChunk) {
-      arrayOop(old)->set_length(0);
+      // We keep track of the next start index in the length field of
+      // the to-space object. The actual length can be found in the
+      // length field of the from-space object.
+      arrayOop(obj)->set_length(0);
       oop* old_p = set_partial_array_mask(old);
       _par_scan_state->push_on_queue(old_p);
     } else {
@@ -4550,61 +4404,24 @@
 ::do_oop_work(T* p) {
   oop obj = oopDesc::load_decode_heap_oop(p);
   assert(barrier != G1BarrierRS || obj != NULL,
-         "Precondition: G1BarrierRS implies obj is nonNull");
-
-  // Marking:
-  // If the object is in the collection set, then the thread
-  // that copies the object should mark, or propagate the
-  // mark to, the evacuated object.
-  // If the object is not in the collection set then we
-  // should call the mark_object() method depending on the
-  // value of the template parameter do_mark_object (which will
-  // be true for root scanning closures during an initial mark
-  // pause).
-  // The mark_object() method first checks whether the object
-  // is marked and, if not, attempts to mark the object.
+         "Precondition: G1BarrierRS implies obj is non-NULL");
 
   // here the null check is implicit in the cset_fast_test() test
   if (_g1->in_cset_fast_test(obj)) {
+    oop forwardee;
     if (obj->is_forwarded()) {
-      oopDesc::encode_store_heap_oop(p, obj->forwardee());
-      // If we are a root scanning closure during an initial
-      // mark pause (i.e. do_mark_object will be true) then
-      // we also need to handle marking of roots in the
-      // event of an evacuation failure. In the event of an
-      // evacuation failure, the object is forwarded to itself
-      // and not copied. For root-scanning closures, the
-      // object would be marked after a successful self-forward
-      // but an object could be pointed to by both a root and non
-      // root location and be self-forwarded by a non-root-scanning
-      // closure. Therefore we also have to attempt to mark the
-      // self-forwarded root object here.
-      if (do_mark_object && obj->forwardee() == obj) {
-        mark_object(p);
-      }
+      forwardee = obj->forwardee();
     } else {
-      // During an initial mark pause, objects that are pointed to
-      // by the roots need to be marked - even in the event of an
-      // evacuation failure. We pass the template parameter
-      // do_mark_object (which is true for root scanning closures
-      // during an initial mark pause) to copy_to_survivor_space
-      // which will pass it on to the evacuation failure handling
-      // code. The thread that successfully self-forwards a root
-      // object to itself is responsible for marking the object.
-      bool should_mark_root = do_mark_object;
-
-      // We need to mark the copied object if we're a root scanning
-      // closure during an initial mark pause (i.e. do_mark_object
-      // will be true), or the object is already marked and we need
-      // to propagate the mark to the evacuated copy.
-      bool should_mark_copy = do_mark_object ||
-                              _during_initial_mark ||
-                              (_mark_in_progress && !_g1->is_obj_ill(obj));
-
-      oop copy_oop = copy_to_survivor_space(obj, should_mark_root,
-                                                 should_mark_copy);
-      oopDesc::encode_store_heap_oop(p, copy_oop);
+      forwardee = copy_to_survivor_space(obj);
     }
+    assert(forwardee != NULL, "forwardee should not be NULL");
+    oopDesc::encode_store_heap_oop(p, forwardee);
+    if (do_mark_object && forwardee != obj) {
+      // If the object is self-forwarded we don't need to explicitly
+      // mark it, the evacuation failure protocol will do so.
+      mark_forwarded_object(obj, forwardee);
+    }
+
     // When scanning the RS, we only care about objs in CS.
     if (barrier == G1BarrierRS) {
       _par_scan_state->update_rs(_from, p, _par_scan_state->queue_num());
@@ -4613,8 +4430,8 @@
     // The object is not in collection set. If we're a root scanning
     // closure during an initial mark pause (i.e. do_mark_object will
     // be true) then attempt to mark the object.
-    if (do_mark_object) {
-      mark_object(p);
+    if (do_mark_object && _g1->is_in_g1_reserved(obj)) {
+      mark_object(obj);
     }
   }
 
@@ -4632,35 +4449,51 @@
 
 template <class T> void G1ParScanPartialArrayClosure::do_oop_nv(T* p) {
   assert(has_partial_array_mask(p), "invariant");
-  oop old = clear_partial_array_mask(p);
-  assert(old->is_objArray(), "must be obj array");
-  assert(old->is_forwarded(), "must be forwarded");
-  assert(Universe::heap()->is_in_reserved(old), "must be in heap.");
-
-  objArrayOop obj = objArrayOop(old->forwardee());
-  assert((void*)old != (void*)old->forwardee(), "self forwarding here?");
-  // Process ParGCArrayScanChunk elements now
-  // and push the remainder back onto queue
-  int start     = arrayOop(old)->length();
-  int end       = obj->length();
-  int remainder = end - start;
-  assert(start <= end, "just checking");
+  oop from_obj = clear_partial_array_mask(p);
+
+  assert(Universe::heap()->is_in_reserved(from_obj), "must be in heap.");
+  assert(from_obj->is_objArray(), "must be obj array");
+  objArrayOop from_obj_array = objArrayOop(from_obj);
+  // The from-space object contains the real length.
+  int length                 = from_obj_array->length();
+
+  assert(from_obj->is_forwarded(), "must be forwarded");
+  oop to_obj                 = from_obj->forwardee();
+  assert(from_obj != to_obj, "should not be chunking self-forwarded objects");
+  objArrayOop to_obj_array   = objArrayOop(to_obj);
+  // We keep track of the next start index in the length field of the
+  // to-space object.
+  int next_index             = to_obj_array->length();
+  assert(0 <= next_index && next_index < length,
+         err_msg("invariant, next index: %d, length: %d", next_index, length));
+
+  int start                  = next_index;
+  int end                    = length;
+  int remainder              = end - start;
+  // We'll try not to push a range that's smaller than ParGCArrayScanChunk.
   if (remainder > 2 * ParGCArrayScanChunk) {
-    // Test above combines last partial chunk with a full chunk
     end = start + ParGCArrayScanChunk;
-    arrayOop(old)->set_length(end);
-    // Push remainder.
-    oop* old_p = set_partial_array_mask(old);
-    assert(arrayOop(old)->length() < obj->length(), "Empty push?");
-    _par_scan_state->push_on_queue(old_p);
+    to_obj_array->set_length(end);
+    // Push the remainder before we process the range in case another
+    // worker has run out of things to do and can steal it.
+    oop* from_obj_p = set_partial_array_mask(from_obj);
+    _par_scan_state->push_on_queue(from_obj_p);
   } else {
-    // Restore length so that the heap remains parsable in
-    // case of evacuation failure.
-    arrayOop(old)->set_length(end);
-  }
-  _scanner.set_region(_g1->heap_region_containing_raw(obj));
-  // process our set of indices (include header in first chunk)
-  obj->oop_iterate_range(&_scanner, start, end);
+    assert(length == end, "sanity");
+    // We'll process the final range for this object. Restore the length
+    // so that the heap remains parsable in case of evacuation failure.
+    to_obj_array->set_length(end);
+  }
+  _scanner.set_region(_g1->heap_region_containing_raw(to_obj));
+  // Process indexes [start,end). It will also process the header
+  // along with the first chunk (i.e., the chunk with start == 0).
+  // Note that at this point the length field of to_obj_array is not
+  // correct given that we are using it to keep track of the next
+  // start index. oop_iterate_range() (thankfully!) ignores the length
+  // field and only relies on the start / end parameters.  It does
+  // however return the size of the object which will be incorrect. So
+  // we have to ignore it even if we wanted to use it.
+  to_obj_array->oop_iterate_range(&_scanner, start, end);
 }
 
 class G1ParEvacuateFollowersClosure : public VoidClosure {
@@ -4893,12 +4726,16 @@
 
   g1_policy()->record_ext_root_scan_time(worker_i, ext_root_time_ms);
 
-  // Scan strong roots in mark stack.
-  if (!_process_strong_tasks->is_task_claimed(G1H_PS_mark_stack_oops_do)) {
-    concurrent_mark()->oops_do(scan_non_heap_roots);
-  }
-  double mark_stack_scan_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
-  g1_policy()->record_mark_stack_scan_time(worker_i, mark_stack_scan_ms);
+  // During conc marking we have to filter the per-thread SATB buffers
+  // to make sure we remove any oops into the CSet (which will show up
+  // as implicitly live).
+  if (!_process_strong_tasks->is_task_claimed(G1H_PS_filter_satb_buffers)) {
+    if (mark_in_progress()) {
+      JavaThread::satb_mark_queue_set().filter_thread_buffers();
+    }
+  }
+  double satb_filtering_ms = (os::elapsedTime() - ext_roots_end) * 1000.0;
+  g1_policy()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
   // Now scan the complement of the collection set.
   if (scan_rs != NULL) {
@@ -5439,6 +5276,7 @@
 }
 
 void G1CollectedHeap::evacuate_collection_set() {
+  _expand_heap_after_alloc_failure = true;
   set_evacuation_failed(false);
 
   g1_rem_set()->prepare_for_oops_into_collection_set_do();
@@ -5516,13 +5354,6 @@
 
   finalize_for_evac_failure();
 
-  // Must do this before clearing the per-region evac-failure flags
-  // (which is currently done when we free the collection set).
-  // We also only do this if marking is actually in progress and so
-  // have to do this before we set the mark_in_progress flag at the
-  // end of an initial mark pause.
-  concurrent_mark()->complete_marking_in_collection_set();
-
   if (evacuation_failed()) {
     remove_self_forwarding_pointers();
     if (PrintGCDetails) {
@@ -6179,6 +6010,8 @@
       } else {
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Old);
       }
+      bool during_im = g1_policy()->during_initial_mark_pause();
+      new_alloc_region->note_start_of_copying(during_im);
       return new_alloc_region;
     } else {
       g1_policy()->note_alloc_region_limit_reached(ap);
@@ -6190,7 +6023,8 @@
 void G1CollectedHeap::retire_gc_alloc_region(HeapRegion* alloc_region,
                                              size_t allocated_bytes,
                                              GCAllocPurpose ap) {
-  alloc_region->note_end_of_copying();
+  bool during_im = g1_policy()->during_initial_mark_pause();
+  alloc_region->note_end_of_copying(during_im);
   g1_policy()->record_bytes_copied_during_gc(allocated_bytes);
   if (ap == GCAllocForSurvived) {
     young_list()->add_survivor_region(alloc_region);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -285,6 +285,14 @@
   // Typically, it is not full so we should re-use it during the next GC.
   HeapRegion* _retained_old_gc_alloc_region;
 
+  // It specifies whether we should attempt to expand the heap after a
+  // region allocation failure. If heap expansion fails we set this to
+  // false so that we don't re-attempt the heap expansion (it's likely
+  // that subsequent expansion attempts will also fail if one fails).
+  // Currently, it is only consulted during GC and it's reset at the
+  // start of each GC.
+  bool _expand_heap_after_alloc_failure;
+
   // It resets the mutator alloc region before new allocations can take place.
   void init_mutator_alloc_region();
 
@@ -861,8 +869,7 @@
   void finalize_for_evac_failure();
 
   // An attempt to evacuate "obj" has failed; take necessary steps.
-  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj,
-                                    bool should_mark_root);
+  oop handle_evacuation_failure_par(OopsInHeapRegionClosure* cl, oop obj);
   void handle_evacuation_failure_common(oop obj, markOop m);
 
   // ("Weak") Reference processing support.
@@ -954,7 +961,7 @@
   unsigned int* _worker_cset_start_region_time_stamp;
 
   enum G1H_process_strong_roots_tasks {
-    G1H_PS_mark_stack_oops_do,
+    G1H_PS_filter_satb_buffers,
     G1H_PS_refProcessor_oops_do,
     // Leave this one last.
     G1H_PS_NumElements
@@ -1305,6 +1312,10 @@
   // It resets all the region claim values to the default.
   void reset_heap_region_claim_values();
 
+  // Resets the claim values of regions in the current
+  // collection set to the default.
+  void reset_cset_heap_region_claim_values();
+
 #ifdef ASSERT
   bool check_heap_region_claim_values(jint claim_value);
 
@@ -1740,10 +1751,8 @@
       _gclab_word_size(gclab_word_size),
       _real_start_word(NULL),
       _real_end_word(NULL),
-      _start_word(NULL)
-  {
-    guarantee( size_in_words() >= bitmap_size_in_words(),
-               "just making sure");
+      _start_word(NULL) {
+    guarantee(false, "GCLabBitMap::GCLabBitmap(): don't call this any more");
   }
 
   inline unsigned heapWordToOffset(HeapWord* addr) {
@@ -1797,6 +1806,8 @@
   }
 
   void set_buffer(HeapWord* start) {
+    guarantee(false, "set_buffer(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     clear();
 
@@ -1820,6 +1831,8 @@
 #endif // PRODUCT
 
   void retire() {
+    guarantee(false, "retire(): don't call this any more");
+
     guarantee(use_local_bitmaps, "invariant");
     assert(fields_well_formed(), "invariant");
 
@@ -1853,32 +1866,18 @@
 class G1ParGCAllocBuffer: public ParGCAllocBuffer {
 private:
   bool        _retired;
-  bool        _should_mark_objects;
-  GCLabBitMap _bitmap;
 
 public:
   G1ParGCAllocBuffer(size_t gclab_word_size);
 
-  inline bool mark(HeapWord* addr) {
-    guarantee(use_local_bitmaps, "invariant");
-    assert(_should_mark_objects, "invariant");
-    return _bitmap.mark(addr);
-  }
-
-  inline void set_buf(HeapWord* buf) {
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.set_buffer(buf);
-    }
+  void set_buf(HeapWord* buf) {
     ParGCAllocBuffer::set_buf(buf);
     _retired = false;
   }
 
-  inline void retire(bool end_of_gc, bool retain) {
+  void retire(bool end_of_gc, bool retain) {
     if (_retired)
       return;
-    if (use_local_bitmaps && _should_mark_objects) {
-      _bitmap.retire();
-    }
     ParGCAllocBuffer::retire(end_of_gc, retain);
     _retired = true;
   }
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -281,7 +281,7 @@
 
   _par_last_gc_worker_start_times_ms = new double[_parallel_gc_threads];
   _par_last_ext_root_scan_times_ms = new double[_parallel_gc_threads];
-  _par_last_mark_stack_scan_times_ms = new double[_parallel_gc_threads];
+  _par_last_satb_filtering_times_ms = new double[_parallel_gc_threads];
 
   _par_last_update_rs_times_ms = new double[_parallel_gc_threads];
   _par_last_update_rs_processed_buffers = new double[_parallel_gc_threads];
@@ -905,10 +905,19 @@
     gclog_or_tty->print(" (%s)", gcs_are_young() ? "young" : "mixed");
   }
 
-  // We only need to do this here as the policy will only be applied
-  // to the GC we're about to start. so, no point is calculating this
-  // every time we calculate / recalculate the target young length.
-  update_survivors_policy();
+  if (!during_initial_mark_pause()) {
+    // We only need to do this here as the policy will only be applied
+    // to the GC we're about to start. so, no point is calculating this
+    // every time we calculate / recalculate the target young length.
+    update_survivors_policy();
+  } else {
+    // The marking phase has a "we only copy implicitly live
+    // objects during marking" invariant. The easiest way to ensure it
+    // holds is not to allocate any survivor regions and tenure all
+    // objects. In the future we might change this and handle survivor
+    // regions specially during marking.
+    tenure_all_objects();
+  }
 
   assert(_g1->used() == _g1->recalculate_used(),
          err_msg("sanity, used: "SIZE_FORMAT" recalculate_used: "SIZE_FORMAT,
@@ -939,7 +948,7 @@
   for (int i = 0; i < _parallel_gc_threads; ++i) {
     _par_last_gc_worker_start_times_ms[i] = -1234.0;
     _par_last_ext_root_scan_times_ms[i] = -1234.0;
-    _par_last_mark_stack_scan_times_ms[i] = -1234.0;
+    _par_last_satb_filtering_times_ms[i] = -1234.0;
     _par_last_update_rs_times_ms[i] = -1234.0;
     _par_last_update_rs_processed_buffers[i] = -1234.0;
     _par_last_scan_rs_times_ms[i] = -1234.0;
@@ -1227,7 +1236,7 @@
   // of the PrintGCDetails output, in the non-parallel case.
 
   double ext_root_scan_time = avg_value(_par_last_ext_root_scan_times_ms);
-  double mark_stack_scan_time = avg_value(_par_last_mark_stack_scan_times_ms);
+  double satb_filtering_time = avg_value(_par_last_satb_filtering_times_ms);
   double update_rs_time = avg_value(_par_last_update_rs_times_ms);
   double update_rs_processed_buffers =
     sum_of_values(_par_last_update_rs_processed_buffers);
@@ -1236,7 +1245,7 @@
   double termination_time = avg_value(_par_last_termination_times_ms);
 
   double known_time = ext_root_scan_time +
-                      mark_stack_scan_time +
+                      satb_filtering_time +
                       update_rs_time +
                       scan_rs_time +
                       obj_copy_time;
@@ -1282,7 +1291,7 @@
     body_summary->record_satb_drain_time_ms(_cur_satb_drain_time_ms);
 
     body_summary->record_ext_root_scan_time_ms(ext_root_scan_time);
-    body_summary->record_mark_stack_scan_time_ms(mark_stack_scan_time);
+    body_summary->record_satb_filtering_time_ms(satb_filtering_time);
     body_summary->record_update_rs_time_ms(update_rs_time);
     body_summary->record_scan_rs_time_ms(scan_rs_time);
     body_summary->record_obj_copy_time_ms(obj_copy_time);
@@ -1376,16 +1385,12 @@
                            (last_pause_included_initial_mark) ? " (initial-mark)" : "",
                            elapsed_ms / 1000.0);
 
-    if (print_marking_info) {
-      print_stats(1, "SATB Drain Time", _cur_satb_drain_time_ms);
-    }
-
     if (parallel) {
       print_stats(1, "Parallel Time", _cur_collection_par_time_ms);
       print_par_stats(2, "GC Worker Start", _par_last_gc_worker_start_times_ms);
       print_par_stats(2, "Ext Root Scanning", _par_last_ext_root_scan_times_ms);
       if (print_marking_info) {
-        print_par_stats(2, "Mark Stack Scanning", _par_last_mark_stack_scan_times_ms);
+        print_par_stats(2, "SATB Filtering", _par_last_satb_filtering_times_ms);
       }
       print_par_stats(2, "Update RS", _par_last_update_rs_times_ms);
       print_par_sizes(3, "Processed Buffers", _par_last_update_rs_processed_buffers);
@@ -1399,7 +1404,7 @@
         _par_last_gc_worker_times_ms[i] = _par_last_gc_worker_end_times_ms[i] - _par_last_gc_worker_start_times_ms[i];
 
         double worker_known_time = _par_last_ext_root_scan_times_ms[i] +
-                                   _par_last_mark_stack_scan_times_ms[i] +
+                                   _par_last_satb_filtering_times_ms[i] +
                                    _par_last_update_rs_times_ms[i] +
                                    _par_last_scan_rs_times_ms[i] +
                                    _par_last_obj_copy_times_ms[i] +
@@ -1412,7 +1417,7 @@
     } else {
       print_stats(1, "Ext Root Scanning", ext_root_scan_time);
       if (print_marking_info) {
-        print_stats(1, "Mark Stack Scanning", mark_stack_scan_time);
+        print_stats(1, "SATB Filtering", satb_filtering_time);
       }
       print_stats(1, "Update RS", update_rs_time);
       print_stats(2, "Processed Buffers", (int)update_rs_processed_buffers);
@@ -1983,11 +1988,10 @@
   if (summary->get_total_seq()->num() > 0) {
     print_summary_sd(0, "Evacuation Pauses", summary->get_total_seq());
     if (body_summary != NULL) {
-      print_summary(1, "SATB Drain", body_summary->get_satb_drain_seq());
       if (parallel) {
         print_summary(1, "Parallel Time", body_summary->get_parallel_seq());
         print_summary(2, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(2, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(2, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(2, "Update RS", body_summary->get_update_rs_seq());
         print_summary(2, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(2, "Object Copy", body_summary->get_obj_copy_seq());
@@ -1996,7 +2000,7 @@
         {
           NumberSeq* other_parts[] = {
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq(),
@@ -2009,7 +2013,7 @@
         }
       } else {
         print_summary(1, "Ext Root Scanning", body_summary->get_ext_root_scan_seq());
-        print_summary(1, "Mark Stack Scanning", body_summary->get_mark_stack_scan_seq());
+        print_summary(1, "SATB Filtering", body_summary->get_satb_filtering_seq());
         print_summary(1, "Update RS", body_summary->get_update_rs_seq());
         print_summary(1, "Scan RS", body_summary->get_scan_rs_seq());
         print_summary(1, "Object Copy", body_summary->get_obj_copy_seq());
@@ -2036,7 +2040,7 @@
             body_summary->get_satb_drain_seq(),
             body_summary->get_update_rs_seq(),
             body_summary->get_ext_root_scan_seq(),
-            body_summary->get_mark_stack_scan_seq(),
+            body_summary->get_satb_filtering_seq(),
             body_summary->get_scan_rs_seq(),
             body_summary->get_obj_copy_seq()
           };
@@ -2433,9 +2437,6 @@
   assert(_inc_cset_build_state == Active, "Precondition");
   assert(!hr->is_young(), "non-incremental add of young region");
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->registerCSetRegion(hr);
-
   assert(!hr->in_collection_set(), "should not already be in the CSet");
   hr->set_in_collection_set(true);
   hr->set_next_in_collection_set(_collection_set);
@@ -2705,9 +2706,6 @@
   // Clear the fields that point to the survivor list - they are all young now.
   young_list->clear_survivors();
 
-  if (_g1->mark_in_progress())
-    _g1->concurrent_mark()->register_collection_set_finger(_inc_cset_max_finger);
-
   _collection_set = _inc_cset_head;
   _collection_set_bytes_used_before = _inc_cset_bytes_used_before;
   time_remaining_ms -= _inc_cset_predicted_elapsed_time_ms;
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -67,7 +67,7 @@
   define_num_seq(satb_drain) // optional
   define_num_seq(parallel) // parallel only
     define_num_seq(ext_root_scan)
-    define_num_seq(mark_stack_scan)
+    define_num_seq(satb_filtering)
     define_num_seq(update_rs)
     define_num_seq(scan_rs)
     define_num_seq(obj_copy)
@@ -215,7 +215,7 @@
 
   double* _par_last_gc_worker_start_times_ms;
   double* _par_last_ext_root_scan_times_ms;
-  double* _par_last_mark_stack_scan_times_ms;
+  double* _par_last_satb_filtering_times_ms;
   double* _par_last_update_rs_times_ms;
   double* _par_last_update_rs_processed_buffers;
   double* _par_last_scan_rs_times_ms;
@@ -841,8 +841,8 @@
     _par_last_ext_root_scan_times_ms[worker_i] = ms;
   }
 
-  void record_mark_stack_scan_time(int worker_i, double ms) {
-    _par_last_mark_stack_scan_times_ms[worker_i] = ms;
+  void record_satb_filtering_time(int worker_i, double ms) {
+    _par_last_satb_filtering_times_ms[worker_i] = ms;
   }
 
   void record_satb_drain_time(double ms) {
@@ -1146,6 +1146,11 @@
     _survivor_surv_rate_group->stop_adding_regions();
   }
 
+  void tenure_all_objects() {
+    _max_survivor_regions = 0;
+    _tenuring_threshold = 0;
+  }
+
   void record_survivor_regions(size_t      regions,
                                HeapRegion* head,
                                HeapRegion* tail) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
+
+#include "gc_implementation/g1/concurrentMark.inline.hpp"
+#include "gc_implementation/g1/dirtyCardQueue.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1_globals.hpp"
+#include "gc_implementation/g1/g1OopClosures.inline.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#include "gc_implementation/g1/heapRegionRemSet.hpp"
+#include "utilities/workgroup.hpp"
+
+// Closures and tasks associated with any self-forwarding pointers
+// installed as a result of an evacuation failure.
+
+class UpdateRSetDeferred : public OopsInHeapRegionClosure {
+private:
+  G1CollectedHeap* _g1;
+  DirtyCardQueue *_dcq;
+  CardTableModRefBS* _ct_bs;
+
+public:
+  UpdateRSetDeferred(G1CollectedHeap* g1, DirtyCardQueue* dcq) :
+    _g1(g1), _ct_bs((CardTableModRefBS*)_g1->barrier_set()), _dcq(dcq) {}
+
+  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
+  virtual void do_oop(      oop* p) { do_oop_work(p); }
+  template <class T> void do_oop_work(T* p) {
+    assert(_from->is_in_reserved(p), "paranoia");
+    if (!_from->is_in_reserved(oopDesc::load_decode_heap_oop(p)) &&
+        !_from->is_survivor()) {
+      size_t card_index = _ct_bs->index_for(p);
+      if (_ct_bs->mark_card_deferred(card_index)) {
+        _dcq->enqueue((jbyte*)_ct_bs->byte_for_index(card_index));
+      }
+    }
+  }
+};
+
+class RemoveSelfForwardPtrObjClosure: public ObjectClosure {
+private:
+  G1CollectedHeap* _g1;
+  ConcurrentMark* _cm;
+  HeapRegion* _hr;
+  size_t _marked_bytes;
+  OopsInHeapRegionClosure *_update_rset_cl;
+  bool _during_initial_mark;
+  bool _during_conc_mark;
+public:
+  RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
+                                 HeapRegion* hr,
+                                 OopsInHeapRegionClosure* update_rset_cl,
+                                 bool during_initial_mark,
+                                 bool during_conc_mark) :
+    _g1(g1), _cm(cm), _hr(hr), _marked_bytes(0),
+    _update_rset_cl(update_rset_cl),
+    _during_initial_mark(during_initial_mark),
+    _during_conc_mark(during_conc_mark) { }
+
+  size_t marked_bytes() { return _marked_bytes; }
+
+  // <original comment>
+  // The original idea here was to coalesce evacuated and dead objects.
+  // However that caused complications with the block offset table (BOT).
+  // In particular if there were two TLABs, one of them partially refined.
+  // |----- TLAB_1--------|----TLAB_2-~~~(partially refined part)~~~|
+  // The BOT entries of the unrefined part of TLAB_2 point to the start
+  // of TLAB_2. If the last object of the TLAB_1 and the first object
+  // of TLAB_2 are coalesced, then the cards of the unrefined part
+  // would point into middle of the filler object.
+  // The current approach is to not coalesce and leave the BOT contents intact.
+  // </original comment>
+  //
+  // We now reset the BOT when we start the object iteration over the
+  // region and refine its entries for every object we come across. So
+  // the above comment is not really relevant and we should be able
+  // to coalesce dead objects if we want to.
+  void do_object(oop obj) {
+    HeapWord* obj_addr = (HeapWord*) obj;
+    assert(_hr->is_in(obj_addr), "sanity");
+    size_t obj_size = obj->size();
+    _hr->update_bot_for_object(obj_addr, obj_size);
+
+    if (obj->is_forwarded() && obj->forwardee() == obj) {
+      // The object failed to move.
+
+      // We consider all objects that we find self-forwarded to be
+      // live. What we'll do is that we'll update the prev marking
+      // info so that they are all under PTAMS and explicitly marked.
+      _cm->markPrev(obj);
+      if (_during_initial_mark) {
+        // For the next marking info we'll only mark the
+        // self-forwarded objects explicitly if we are during
+        // initial-mark (since, normally, we only mark objects pointed
+        // to by roots if we succeed in copying them). By marking all
+        // self-forwarded objects we ensure that we mark any that are
+        // still pointed to be roots. During concurrent marking, and
+        // after initial-mark, we don't need to mark any objects
+        // explicitly and all objects in the CSet are considered
+        // (implicitly) live. So, we won't mark them explicitly and
+        // we'll leave them over NTAMS.
+        _cm->markNext(obj);
+      }
+      _marked_bytes += (obj_size * HeapWordSize);
+      obj->set_mark(markOopDesc::prototype());
+
+      // While we were processing RSet buffers during the collection,
+      // we actually didn't scan any cards on the collection set,
+      // since we didn't want to update remembered sets with entries
+      // that point into the collection set, given that live objects
+      // from the collection set are about to move and such entries
+      // will be stale very soon.
+      // This change also dealt with a reliability issue which
+      // involved scanning a card in the collection set and coming
+      // across an array that was being chunked and looking malformed.
+      // The problem is that, if evacuation fails, we might have
+      // remembered set entries missing given that we skipped cards on
+      // the collection set. So, we'll recreate such entries now.
+      obj->oop_iterate(_update_rset_cl);
+      assert(_cm->isPrevMarked(obj), "Should be marked!");
+    } else {
+      // The object has been either evacuated or is dead. Fill it with a
+      // dummy object.
+      MemRegion mr((HeapWord*) obj, obj_size);
+      CollectedHeap::fill_with_object(mr);
+    }
+  }
+};
+
+class RemoveSelfForwardPtrHRClosure: public HeapRegionClosure {
+  G1CollectedHeap* _g1h;
+  ConcurrentMark* _cm;
+  OopsInHeapRegionClosure *_update_rset_cl;
+
+public:
+  RemoveSelfForwardPtrHRClosure(G1CollectedHeap* g1h,
+                                OopsInHeapRegionClosure* update_rset_cl) :
+    _g1h(g1h), _update_rset_cl(update_rset_cl),
+    _cm(_g1h->concurrent_mark()) { }
+
+  bool doHeapRegion(HeapRegion *hr) {
+    bool during_initial_mark = _g1h->g1_policy()->during_initial_mark_pause();
+    bool during_conc_mark = _g1h->mark_in_progress();
+
+    assert(!hr->isHumongous(), "sanity");
+    assert(hr->in_collection_set(), "bad CS");
+
+    if (hr->claimHeapRegion(HeapRegion::ParEvacFailureClaimValue)) {
+      if (hr->evacuation_failed()) {
+        RemoveSelfForwardPtrObjClosure rspc(_g1h, _cm, hr, _update_rset_cl,
+                                            during_initial_mark,
+                                            during_conc_mark);
+
+        MemRegion mr(hr->bottom(), hr->end());
+        // We'll recreate the prev marking info so we'll first clear
+        // the prev bitmap range for this region. We never mark any
+        // CSet objects explicitly so the next bitmap range should be
+        // cleared anyway.
+        _cm->clearRangePrevBitmap(mr);
+
+        hr->note_self_forwarding_removal_start(during_initial_mark,
+                                               during_conc_mark);
+
+        // In the common case (i.e. when there is no evacuation
+        // failure) we make sure that the following is done when
+        // the region is freed so that it is "ready-to-go" when it's
+        // re-allocated. However, when evacuation failure happens, a
+        // region will remain in the heap and might ultimately be added
+        // to a CSet in the future. So we have to be careful here and
+        // make sure the region's RSet is ready for parallel iteration
+        // whenever this might be required in the future.
+        hr->rem_set()->reset_for_par_iteration();
+        hr->reset_bot();
+        _update_rset_cl->set_region(hr);
+        hr->object_iterate(&rspc);
+
+        hr->note_self_forwarding_removal_end(during_initial_mark,
+                                             during_conc_mark,
+                                             rspc.marked_bytes());
+      }
+    }
+    return false;
+  }
+};
+
+class G1ParRemoveSelfForwardPtrsTask: public AbstractGangTask {
+protected:
+  G1CollectedHeap* _g1h;
+
+public:
+  G1ParRemoveSelfForwardPtrsTask(G1CollectedHeap* g1h) :
+    AbstractGangTask("G1 Remove Self-forwarding Pointers"),
+    _g1h(g1h) { }
+
+  void work(uint worker_id) {
+    UpdateRSetImmediate immediate_update(_g1h->g1_rem_set());
+    DirtyCardQueue dcq(&_g1h->dirty_card_queue_set());
+    UpdateRSetDeferred deferred_update(_g1h, &dcq);
+
+    OopsInHeapRegionClosure *update_rset_cl = &deferred_update;
+    if (!G1DeferredRSUpdate) {
+      update_rset_cl = &immediate_update;
+    }
+
+    RemoveSelfForwardPtrHRClosure rsfp_cl(_g1h, update_rset_cl);
+
+    HeapRegion* hr = _g1h->start_cset_region_for_worker(worker_id);
+    _g1h->collection_set_iterate_from(hr, &rsfp_cl);
+  }
+};
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1EVACFAILURE_HPP
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -121,17 +121,25 @@
 class G1ParCopyHelper : public G1ParClosureSuper {
   G1ParScanClosure *_scanner;
 protected:
-  template <class T> void mark_object(T* p);
-  oop copy_to_survivor_space(oop obj, bool should_mark_root,
-                                      bool should_mark_copy);
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that are guaranteed not to move
+  // during the GC (i.e., non-CSet objects). It is MT-safe.
+  void mark_object(oop obj);
+
+  // Mark the object if it's not already marked. This is used to mark
+  // objects pointed to by roots that have been forwarded during a
+  // GC. It is MT-safe.
+  void mark_forwarded_object(oop from_obj, oop to_obj);
+
+  oop copy_to_survivor_space(oop obj);
+
 public:
   G1ParCopyHelper(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                   G1ParScanClosure *scanner) :
     G1ParClosureSuper(g1, par_scan_state), _scanner(scanner) { }
 };
 
-template<bool do_gen_barrier, G1Barrier barrier,
-         bool do_mark_object>
+template <bool do_gen_barrier, G1Barrier barrier, bool do_mark_object>
 class G1ParCopyClosure : public G1ParCopyHelper {
   G1ParScanClosure _scanner;
 
@@ -140,9 +148,8 @@
 public:
   G1ParCopyClosure(G1CollectedHeap* g1, G1ParScanThreadState* par_scan_state,
                    ReferenceProcessor* rp) :
-    _scanner(g1, par_scan_state, rp),
-    G1ParCopyHelper(g1, par_scan_state, &_scanner)
-  {
+      _scanner(g1, par_scan_state, rp),
+      G1ParCopyHelper(g1, par_scan_state, &_scanner) {
     assert(_ref_processor == NULL, "sanity");
   }
 
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -295,7 +295,7 @@
           "Percentage (0-100) of the heap size to use as minimum "          \
           "young gen size.")                                                \
                                                                             \
-  develop(uintx, G1DefaultMaxNewGenPercent, 50,                             \
+  develop(uintx, G1DefaultMaxNewGenPercent, 80,                             \
           "Percentage (0-100) of the heap size to use as maximum "          \
           "young gen size.")
 
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -575,6 +575,40 @@
   oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
 }
 
+void HeapRegion::note_self_forwarding_removal_start(bool during_initial_mark,
+                                                    bool during_conc_mark) {
+  // We always recreate the prev marking info and we'll explicitly
+  // mark all objects we find to be self-forwarded on the prev
+  // bitmap. So all objects need to be below PTAMS.
+  _prev_top_at_mark_start = top();
+  _prev_marked_bytes = 0;
+
+  if (during_initial_mark) {
+    // During initial-mark, we'll also explicitly mark all objects
+    // we find to be self-forwarded on the next bitmap. So all
+    // objects need to be below NTAMS.
+    _next_top_at_mark_start = top();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  } else if (during_conc_mark) {
+    // During concurrent mark, all objects in the CSet (including
+    // the ones we find to be self-forwarded) are implicitly live.
+    // So all objects need to be above NTAMS.
+    _next_top_at_mark_start = bottom();
+    set_top_at_conc_mark_count(bottom());
+    _next_marked_bytes = 0;
+  }
+}
+
+void HeapRegion::note_self_forwarding_removal_end(bool during_initial_mark,
+                                                  bool during_conc_mark,
+                                                  size_t marked_bytes) {
+  assert(0 <= marked_bytes && marked_bytes <= used(),
+         err_msg("marked: "SIZE_FORMAT" used: "SIZE_FORMAT,
+                 marked_bytes, used()));
+  _prev_marked_bytes = marked_bytes;
+}
+
 HeapWord*
 HeapRegion::object_iterate_mem_careful(MemRegion mr,
                                                  ObjectClosure* cl) {
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -373,7 +373,8 @@
     ScrubRemSetClaimValue      = 3,
     ParVerifyClaimValue        = 4,
     RebuildRSClaimValue        = 5,
-    CompleteMarkCSetClaimValue = 6
+    CompleteMarkCSetClaimValue = 6,
+    ParEvacFailureClaimValue   = 7
   };
 
   inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
@@ -582,37 +583,33 @@
   // that the collector is about to start or has finished (concurrently)
   // marking the heap.
 
-  // Note the start of a marking phase. Record the
-  // start of the unmarked area of the region here.
-  void note_start_of_marking(bool during_initial_mark) {
-    init_top_at_conc_mark_count();
-    _next_marked_bytes = 0;
-    if (during_initial_mark && is_young() && !is_survivor())
-      _next_top_at_mark_start = bottom();
-    else
-      _next_top_at_mark_start = top();
-  }
+  // Notify the region that concurrent marking is starting. Initialize
+  // all fields related to the next marking info.
+  inline void note_start_of_marking();
 
-  // Note the end of a marking phase. Install the start of
-  // the unmarked area that was captured at start of marking.
-  void note_end_of_marking() {
-    _prev_top_at_mark_start = _next_top_at_mark_start;
-    _prev_marked_bytes = _next_marked_bytes;
-    _next_marked_bytes = 0;
+  // Notify the region that concurrent marking has finished. Copy the
+  // (now finalized) next marking info fields into the prev marking
+  // info fields.
+  inline void note_end_of_marking();
 
-    guarantee(_prev_marked_bytes <=
-              (size_t) (prev_top_at_mark_start() - bottom()) * HeapWordSize,
-              "invariant");
-  }
+  // Notify the region that it will be used as to-space during a GC
+  // and we are about to start copying objects into it.
+  inline void note_start_of_copying(bool during_initial_mark);
 
-  // After an evacuation, we need to update _next_top_at_mark_start
-  // to be the current top.  Note this is only valid if we have only
-  // ever evacuated into this region.  If we evacuate, allocate, and
-  // then evacuate we are in deep doodoo.
-  void note_end_of_copying() {
-    assert(top() >= _next_top_at_mark_start, "Increase only");
-    _next_top_at_mark_start = top();
-  }
+  // Notify the region that it ceases being to-space during a GC and
+  // we will not copy objects into it any more.
+  inline void note_end_of_copying(bool during_initial_mark);
+
+  // Notify the region that we are about to start processing
+  // self-forwarded objects during evac failure handling.
+  void note_self_forwarding_removal_start(bool during_initial_mark,
+                                          bool during_conc_mark);
+
+  // Notify the region that we have finished processing self-forwarded
+  // objects during evac failure handling.
+  void note_self_forwarding_removal_end(bool during_initial_mark,
+                                        bool during_conc_mark,
+                                        size_t marked_bytes);
 
   // Returns "false" iff no object in the region was allocated when the
   // last mark phase ended.
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,4 +55,71 @@
   return _offsets.block_start_const(p);
 }
 
+inline void HeapRegion::note_start_of_marking() {
+  init_top_at_conc_mark_count();
+  _next_marked_bytes = 0;
+  _next_top_at_mark_start = top();
+}
+
+inline void HeapRegion::note_end_of_marking() {
+  _prev_top_at_mark_start = _next_top_at_mark_start;
+  _prev_marked_bytes = _next_marked_bytes;
+  _next_marked_bytes = 0;
+
+  assert(_prev_marked_bytes <=
+         (size_t) pointer_delta(prev_top_at_mark_start(), bottom()) *
+         HeapWordSize, "invariant");
+}
+
+inline void HeapRegion::note_start_of_copying(bool during_initial_mark) {
+  if (during_initial_mark) {
+    if (is_survivor()) {
+      assert(false, "should not allocate survivors during IM");
+    } else {
+      // During initial-mark we'll explicitly mark any objects on old
+      // regions that are pointed to by roots. Given that explicit
+      // marks only make sense under NTAMS it'd be nice if we could
+      // check that condition if we wanted to. Given that we don't
+      // know where the top of this region will end up, we simply set
+      // NTAMS to the end of the region so all marks will be below
+      // NTAMS. We'll set it to the actual top when we retire this region.
+      _next_top_at_mark_start = end();
+    }
+  } else {
+    if (is_survivor()) {
+      // This is how we always allocate survivors.
+      assert(_next_top_at_mark_start == bottom(), "invariant");
+    } else {
+      // We could have re-used this old region as to-space over a
+      // couple of GCs since the start of the concurrent marking
+      // cycle. This means that [bottom,NTAMS) will contain objects
+      // copied up to and including initial-mark and [NTAMS, top)
+      // will contain objects copied during the concurrent marking cycle.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
+inline void HeapRegion::note_end_of_copying(bool during_initial_mark) {
+  if (during_initial_mark) {
+    if (is_survivor()) {
+      assert(false, "should not allocate survivors during IM");
+    } else {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(_next_top_at_mark_start == end(), "pre-condition");
+      _next_top_at_mark_start = top();
+    }
+  } else {
+    if (is_survivor()) {
+      // This is how we always allocate survivors.
+      assert(_next_top_at_mark_start == bottom(), "invariant");
+    } else {
+      // See the comment for note_start_of_copying() for the details
+      // on this.
+      assert(top() >= _next_top_at_mark_start, "invariant");
+    }
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/ptrQueue.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,7 +70,7 @@
   // given PtrQueueSet.
   PtrQueue(PtrQueueSet* qset, bool perm = false, bool active = false);
   // Release any contained resources.
-  void flush();
+  virtual void flush();
   // Calls flush() when destroyed.
   ~PtrQueue() { flush(); }
 
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,14 @@
 #include "runtime/thread.hpp"
 #include "runtime/vmThread.hpp"
 
+void ObjPtrQueue::flush() {
+  // The buffer might contain refs into the CSet. We have to filter it
+  // first before we flush it, otherwise we might end up with an
+  // enqueued buffer with refs into the CSet which breaks our invariants.
+  filter();
+  PtrQueue::flush();
+}
+
 // This method removes entries from an SATB buffer that will not be
 // useful to the concurrent marking threads. An entry is removed if it
 // satisfies one of the following conditions:
@@ -44,38 +52,27 @@
 //     process it again).
 //
 // The rest of the entries will be retained and are compacted towards
-// the top of the buffer. If with this filtering we clear a large
-// enough chunk of the buffer we can re-use it (instead of enqueueing
-// it) and we can just allow the mutator to carry on executing.
+// the top of the buffer. Note that, because we do not allow old
+// regions in the CSet during marking, all objects on the CSet regions
+// are young (eden or survivors) and therefore implicitly live. So any
+// references into the CSet will be removed during filtering.
 
-bool ObjPtrQueue::should_enqueue_buffer() {
-  assert(_lock == NULL || _lock->owned_by_self(),
-         "we should have taken the lock before calling this");
-
-  // A value of 0 means "don't filter SATB buffers".
-  if (G1SATBBufferEnqueueingThresholdPercent == 0) {
-    return true;
-  }
-
+void ObjPtrQueue::filter() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
-
-  // This method should only be called if there is a non-NULL buffer
-  // that is full.
-  assert(_index == 0, "pre-condition");
-  assert(_buf != NULL, "pre-condition");
-
   void** buf = _buf;
   size_t sz = _sz;
 
+  if (buf == NULL) {
+    // nothing to do
+    return;
+  }
+
   // Used for sanity checking at the end of the loop.
   debug_only(size_t entries = 0; size_t retained = 0;)
 
   size_t i = sz;
   size_t new_index = sz;
 
-  // Given that we are expecting _index == 0, we could have changed
-  // the loop condition to (i > 0). But we are using _index for
-  // generality.
   while (i > _index) {
     assert(i > 0, "we should have at least one more entry to process");
     i -= oopSize;
@@ -103,22 +100,58 @@
       debug_only(retained += 1;)
     }
   }
+
+#ifdef ASSERT
   size_t entries_calc = (sz - _index) / oopSize;
   assert(entries == entries_calc, "the number of entries we counted "
          "should match the number of entries we calculated");
   size_t retained_calc = (sz - new_index) / oopSize;
   assert(retained == retained_calc, "the number of retained entries we counted "
          "should match the number of retained entries we calculated");
-  size_t perc = retained_calc * 100 / entries_calc;
+#endif // ASSERT
+
+  _index = new_index;
+}
+
+// This method will first apply the above filtering to the buffer. If
+// post-filtering a large enough chunk of the buffer has been cleared
+// we can re-use the buffer (instead of enqueueing it) and we can just
+// allow the mutator to carry on executing using the same buffer
+// instead of replacing it.
+
+bool ObjPtrQueue::should_enqueue_buffer() {
+  assert(_lock == NULL || _lock->owned_by_self(),
+         "we should have taken the lock before calling this");
+
+  // Even if G1SATBBufferEnqueueingThresholdPercent == 0 we have to
+  // filter the buffer given that this will remove any references into
+  // the CSet as we currently assume that no such refs will appear in
+  // enqueued buffers.
+
+  // This method should only be called if there is a non-NULL buffer
+  // that is full.
+  assert(_index == 0, "pre-condition");
+  assert(_buf != NULL, "pre-condition");
+
+  filter();
+
+  size_t sz = _sz;
+  size_t all_entries = sz / oopSize;
+  size_t retained_entries = (sz - _index) / oopSize;
+  size_t perc = retained_entries * 100 / all_entries;
   bool should_enqueue = perc > (size_t) G1SATBBufferEnqueueingThresholdPercent;
-  _index = new_index;
-
   return should_enqueue;
 }
 
 void ObjPtrQueue::apply_closure(ObjectClosure* cl) {
   if (_buf != NULL) {
     apply_closure_to_buffer(cl, _buf, _index, _sz);
+  }
+}
+
+void ObjPtrQueue::apply_closure_and_empty(ObjectClosure* cl) {
+  if (_buf != NULL) {
+    apply_closure_to_buffer(cl, _buf, _index, _sz);
     _index = _sz;
   }
 }
@@ -135,6 +168,21 @@
   }
 }
 
+#ifndef PRODUCT
+// Helpful for debugging
+
+void ObjPtrQueue::print(const char* name) {
+  print(name, _buf, _index, _sz);
+}
+
+void ObjPtrQueue::print(const char* name,
+                        void** buf, size_t index, size_t sz) {
+  gclog_or_tty->print_cr("  SATB BUFFER [%s] buf: "PTR_FORMAT" "
+                         "index: "SIZE_FORMAT" sz: "SIZE_FORMAT,
+                         name, buf, index, sz);
+}
+#endif // PRODUCT
+
 #ifdef ASSERT
 void ObjPtrQueue::verify_oops_in_buffer() {
   if (_buf == NULL) return;
@@ -150,12 +198,9 @@
 #pragma warning( disable:4355 ) // 'this' : used in base member initializer list
 #endif // _MSC_VER
 
-
 SATBMarkQueueSet::SATBMarkQueueSet() :
-  PtrQueueSet(),
-  _closure(NULL), _par_closures(NULL),
-  _shared_satb_queue(this, true /*perm*/)
-{}
+  PtrQueueSet(), _closure(NULL), _par_closures(NULL),
+  _shared_satb_queue(this, true /*perm*/) { }
 
 void SATBMarkQueueSet::initialize(Monitor* cbl_mon, Mutex* fl_lock,
                                   int process_completed_threshold,
@@ -167,7 +212,6 @@
   }
 }
 
-
 void SATBMarkQueueSet::handle_zero_index_for_thread(JavaThread* t) {
   DEBUG_ONLY(t->satb_mark_queue().verify_oops_in_buffer();)
   t->satb_mark_queue().handle_zero_index();
@@ -228,6 +272,13 @@
   }
 }
 
+void SATBMarkQueueSet::filter_thread_buffers() {
+  for(JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().filter();
+  }
+  shared_satb_queue()->filter();
+}
+
 void SATBMarkQueueSet::set_closure(ObjectClosure* closure) {
   _closure = closure;
 }
@@ -239,9 +290,9 @@
 
 void SATBMarkQueueSet::iterate_closure_all_threads() {
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    t->satb_mark_queue().apply_closure(_closure);
+    t->satb_mark_queue().apply_closure_and_empty(_closure);
   }
-  shared_satb_queue()->apply_closure(_closure);
+  shared_satb_queue()->apply_closure_and_empty(_closure);
 }
 
 void SATBMarkQueueSet::par_iterate_closure_all_threads(int worker) {
@@ -250,7 +301,7 @@
 
   for(JavaThread* t = Threads::first(); t; t = t->next()) {
     if (t->claim_oops_do(true, parity)) {
-      t->satb_mark_queue().apply_closure(_par_closures[worker]);
+      t->satb_mark_queue().apply_closure_and_empty(_par_closures[worker]);
     }
   }
 
@@ -264,7 +315,7 @@
 
   VMThread* vmt = VMThread::vm_thread();
   if (vmt->claim_oops_do(true, parity)) {
-    shared_satb_queue()->apply_closure(_par_closures[worker]);
+    shared_satb_queue()->apply_closure_and_empty(_par_closures[worker]);
   }
 }
 
@@ -292,6 +343,61 @@
   }
 }
 
+void SATBMarkQueueSet::iterate_completed_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  BufferNode* nd = _completed_buffers_head;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    ObjPtrQueue::apply_closure_to_buffer(cl, buf, 0, _sz);
+    nd = nd->next();
+  }
+}
+
+void SATBMarkQueueSet::iterate_thread_buffers_read_only(ObjectClosure* cl) {
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+  assert(cl != NULL, "pre-condition");
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    t->satb_mark_queue().apply_closure(cl);
+  }
+  shared_satb_queue()->apply_closure(cl);
+}
+
+#ifndef PRODUCT
+// Helpful for debugging
+
+#define SATB_PRINTER_BUFFER_SIZE 256
+
+void SATBMarkQueueSet::print_all(const char* msg) {
+  char buffer[SATB_PRINTER_BUFFER_SIZE];
+  assert(SafepointSynchronize::is_at_safepoint(), "Must be at safepoint.");
+
+  gclog_or_tty->cr();
+  gclog_or_tty->print_cr("SATB BUFFERS [%s]", msg);
+
+  BufferNode* nd = _completed_buffers_head;
+  int i = 0;
+  while (nd != NULL) {
+    void** buf = BufferNode::make_buffer_from_node(nd);
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Enqueued: %d", i);
+    ObjPtrQueue::print(buffer, buf, 0, _sz);
+    nd = nd->next();
+    i += 1;
+  }
+
+  for (JavaThread* t = Threads::first(); t; t = t->next()) {
+    jio_snprintf(buffer, SATB_PRINTER_BUFFER_SIZE, "Thread: %s", t->name());
+    t->satb_mark_queue().print(buffer);
+  }
+
+  shared_satb_queue()->print("Shared");
+
+  gclog_or_tty->cr();
+}
+#endif // PRODUCT
+
 void SATBMarkQueueSet::abandon_partial_marking() {
   BufferNode* buffers_to_delete = NULL;
   {
@@ -316,5 +422,5 @@
   for (JavaThread* t = Threads::first(); t; t = t->next()) {
     t->satb_mark_queue().reset();
   }
-  shared_satb_queue()->reset();
+ shared_satb_queue()->reset();
 }
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Thu Jan 12 09:59:55 2012 -0500
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Fri Jan 13 01:55:22 2012 -0800
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,9 +29,26 @@
 
 class ObjectClosure;
 class JavaThread;
+class SATBMarkQueueSet;
 
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
+  friend class SATBMarkQueueSet;
+
+private:
+  // Filter out unwanted entries from the buffer.
+  void filter();
+
+  // Apply the closure to all elements.
+  void apply_closure(ObjectClosure* cl);
+
+  // Apply the closure to all elements and empty the buffer;
+  void apply_closure_and_empty(ObjectClosure* cl);
+
+  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
+  static void apply_closure_to_buffer(ObjectClosure* cl,
+                                      void** buf, size_t index, size_t sz);
+
 public:
   ObjPtrQueue(PtrQueueSet* qset, bool perm = false) :
     // SATB queues are only active during marking cycles. We create
@@ -41,23 +58,23 @@
     // field to true. This is done in JavaThread::initialize_queues().
     PtrQueue(qset, perm, false /* active */) { }
 
+  // Overrides PtrQueue::flush() so that it can filter the buffer
+  // before it is flushed.
+  virtual void flush();
+
   // Overrides PtrQueue::should_enqueue_buffer(). See the method's
   // definition for more information.
   virtual bool should_enqueue_buffer();
 
-  // Apply the closure to all elements, and reset the index to make the
-  // buffer empty.
-  void apply_closure(ObjectClosure* cl);
-
-  // Apply the closure to all elements of "buf", down to "index" (inclusive.)
-  static void apply_closure_to_buffer(ObjectClosure* cl,
-                                      void** buf, size_t index, size_t sz);
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print(const char* name);
+  static void print(const char* name, void** buf, size_t index, size_t sz);
+#endif // PRODUCT
 
   void verify_oops_in_buffer() NOT_DEBUG_RETURN;
 };
 
-
-
 class SATBMarkQueueSet: public PtrQueueSet {
   ObjectClosure* _closure;
   ObjectClosure** _par_closures;  // One per ParGCThread.
@@ -88,6 +105,9 @@
   // set itself, has an active value same as expected_active.
   void set_active_all_threads(bool b, bool expected_active);
 
+  // Filter all the currently-active SATB buffers.
+  void filter_thread_buffers();
+
   // Register "blk" as "the closure" for all queues.  Only one such closure
   // is allowed.  The "apply_closure_to_completed_buffer" method will apply
   // this closure to a completed buffer, and "iterate_closure_all_threads"
@@ -98,10 +118,9 @@
   // closures, one for each parallel GC thread.
   void set_par_closure(int i, ObjectClosure* closure);
 
-  // If there is a registered closure for buffers, apply it to all entries
-  // in all currently-active buffers.  This should only be applied at a
-  // safepoint.  (Currently must not be called in parallel; this should
-  // change in the future.)
+  // Apply the registered closure to all entries on each
+  // currently-active buffer and then empty the buffer. It should only
+  // be called serially and at a safepoint.
   void iterate_closure_all_threads();
   // Parallel version of the above.
   void par_iterate_closure_all_threads(int worker);
@@ -117,11 +136,21 @@
     return apply_closure_to_completed_buffer_work(true, worker);
   }
 
+  // Apply the given closure on enqueued and currently-active buffers
+  // respectively. Both methods are read-only, i.e., they do not
+  // modify any of the buffers.
+  void iterate_completed_buffers_read_only(ObjectClosure* cl);
+  void iterate_thread_buffers_read_only(ObjectClosure* cl);
+
+#ifndef PRODUCT
+  // Helpful for debugging
+  void print_all(const char* msg);
+#endif // PRODUCT
+
   ObjPtrQueue* shared_satb_queue() { return &_shared_satb_queue; }
 
   // If a marking is being abandoned, reset any unprocessed log buffers.
   void abandon_partial_marking();
-
 };
 
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_SATBQUEUE_HPP