changeset 2368:cdd13dce903e

Merge
author vladidan
date Sat, 23 Apr 2011 00:33:38 -0400
parents 49bd9c6f7bce 732454aaf5cb
children 01147d8aac1d
files
diffstat 79 files changed, 2401 insertions(+), 553 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -4257,34 +4257,14 @@
 ///////////////////////////////////////////////////////////////////////////////////
 #ifndef SERIALGC
 
-static uint num_stores = 0;
-static uint num_null_pre_stores = 0;
-
-static void count_null_pre_vals(void* pre_val) {
-  num_stores++;
-  if (pre_val == NULL) num_null_pre_stores++;
-  if ((num_stores % 1000000) == 0) {
-    tty->print_cr(UINT32_FORMAT " stores, " UINT32_FORMAT " (%5.2f%%) with null pre-vals.",
-                  num_stores, num_null_pre_stores,
-                  100.0*(float)num_null_pre_stores/(float)num_stores);
-  }
-}
-
-static address satb_log_enqueue_with_frame = 0;
-static u_char* satb_log_enqueue_with_frame_end = 0;
-
-static address satb_log_enqueue_frameless = 0;
-static u_char* satb_log_enqueue_frameless_end = 0;
+static address satb_log_enqueue_with_frame = NULL;
+static u_char* satb_log_enqueue_with_frame_end = NULL;
+
+static address satb_log_enqueue_frameless = NULL;
+static u_char* satb_log_enqueue_frameless_end = NULL;
 
 static int EnqueueCodeSize = 128 DEBUG_ONLY( + 256); // Instructions?
 
-// The calls to this don't work.  We'd need to do a fair amount of work to
-// make it work.
-static void check_index(int ind) {
-  assert(0 <= ind && ind <= 64*K && ((ind % oopSize) == 0),
-         "Invariants.");
-}
-
 static void generate_satb_log_enqueue(bool with_frame) {
   BufferBlob* bb = BufferBlob::create("enqueue_with_frame", EnqueueCodeSize);
   CodeBuffer buf(bb);
@@ -4388,13 +4368,27 @@
   }
 }
 
-void MacroAssembler::g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs) {
-  assert(offset == 0 || index == noreg, "choose one");
-
-  if (G1DisablePreBarrier) return;
-  // satb_log_barrier(tmp, obj, offset, preserve_o_regs);
+void MacroAssembler::g1_write_barrier_pre(Register obj,
+                                          Register index,
+                                          int offset,
+                                          Register pre_val,
+                                          Register tmp,
+                                          bool preserve_o_regs) {
   Label filtered;
-  // satb_log_barrier_work0(tmp, filtered);
+
+  if (obj == noreg) {
+    // We are not loading the previous value so make
+    // sure that we don't trash the value in pre_val
+    // with the code below.
+    assert_different_registers(pre_val, tmp);
+  } else {
+    // We will be loading the previous value
+    // in this code so...
+    assert(offset == 0 || index == noreg, "choose one");
+    assert(pre_val == noreg, "check this code");
+  }
+
+  // Is marking active?
   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
     ld(G2,
        in_bytes(JavaThread::satb_mark_queue_offset() +
@@ -4413,61 +4407,46 @@
   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, tmp, filtered);
   delayed() -> nop();
 
-  // satb_log_barrier_work1(tmp, offset);
-  if (index == noreg) {
-    if (Assembler::is_simm13(offset)) {
-      load_heap_oop(obj, offset, tmp);
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    // Load the previous value...
+    if (index == noreg) {
+      if (Assembler::is_simm13(offset)) {
+        load_heap_oop(obj, offset, tmp);
+      } else {
+        set(offset, tmp);
+        load_heap_oop(obj, tmp, tmp);
+      }
     } else {
-      set(offset, tmp);
-      load_heap_oop(obj, tmp, tmp);
+      load_heap_oop(obj, index, tmp);
     }
-  } else {
-    load_heap_oop(obj, index, tmp);
+    // Previous value has been loaded into tmp
+    pre_val = tmp;
   }
 
-  // satb_log_barrier_work2(obj, tmp, offset);
-
-  // satb_log_barrier_work3(tmp, filtered, preserve_o_regs);
-
-  const Register pre_val = tmp;
-
-  if (G1SATBBarrierPrintNullPreVals) {
-    save_frame(0);
-    mov(pre_val, O0);
-    // Save G-regs that target may use.
-    mov(G1, L1);
-    mov(G2, L2);
-    mov(G3, L3);
-    mov(G4, L4);
-    mov(G5, L5);
-    call(CAST_FROM_FN_PTR(address, &count_null_pre_vals));
-    delayed()->nop();
-    // Restore G-regs that target may have used.
-    mov(L1, G1);
-    mov(L2, G2);
-    mov(L3, G3);
-    mov(L4, G4);
-    mov(L5, G5);
-    restore(G0, G0, G0);
-  }
-
+  assert(pre_val != noreg, "must have a real register");
+
+  // Is the previous value null?
   // Check on whether to annul.
   br_on_reg_cond(rc_z, /*annul*/false, Assembler::pt, pre_val, filtered);
   delayed() -> nop();
 
   // OK, it's not filtered, so we'll need to call enqueue.  In the normal
-  // case, pre_val will be a scratch G-reg, but there's some cases in which
-  // it's an O-reg.  In the first case, do a normal call.  In the latter,
-  // do a save here and call the frameless version.
+  // case, pre_val will be a scratch G-reg, but there are some cases in
+  // which it's an O-reg.  In the first case, do a normal call.  In the
+  // latter, do a save here and call the frameless version.
 
   guarantee(pre_val->is_global() || pre_val->is_out(),
             "Or we need to think harder.");
+
   if (pre_val->is_global() && !preserve_o_regs) {
-    generate_satb_log_enqueue_if_necessary(true); // with frame.
+    generate_satb_log_enqueue_if_necessary(true); // with frame
+
     call(satb_log_enqueue_with_frame);
     delayed()->mov(pre_val, O0);
   } else {
-    generate_satb_log_enqueue_if_necessary(false); // with frameless.
+    generate_satb_log_enqueue_if_necessary(false); // frameless
+
     save_frame(0);
     call(satb_log_enqueue_frameless);
     delayed()->mov(pre_val->after_save(), O0);
@@ -4614,7 +4593,6 @@
   MacroAssembler* post_filter_masm = this;
 
   if (new_val == G0) return;
-  if (G1DisablePostBarrier) return;
 
   G1SATBCardTableModRefBS* bs = (G1SATBCardTableModRefBS*) Universe::heap()->barrier_set();
   assert(bs->kind() == BarrierSet::G1SATBCT ||
@@ -4626,6 +4604,7 @@
 #else
     srl(tmp, HeapRegion::LogOfHRGrainBytes, tmp);
 #endif
+
     if (G1PrintCTFilterStats) {
       guarantee(tmp->is_global(), "Or stats won't work...");
       // This is a sleazy hack: I'm temporarily hijacking G2, which I
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -2210,15 +2210,11 @@
   void card_write_barrier_post(Register store_addr, Register new_val, Register tmp);
 
 #ifndef SERIALGC
-  // Array store and offset
-  void g1_write_barrier_pre(Register obj, Register index, int offset, Register tmp, bool preserve_o_regs);
-
+  // General G1 pre-barrier generator.
+  void g1_write_barrier_pre(Register obj, Register index, int offset, Register pre_val, Register tmp, bool preserve_o_regs);
+
+  // General G1 post-barrier generator
   void g1_write_barrier_post(Register store_addr, Register new_val, Register tmp);
-
-  // May do filtering, depending on the boolean arguments.
-  void g1_card_table_write(jbyte* byte_map_base,
-                           Register tmp, Register obj, Register new_val,
-                           bool region_filter, bool null_filter);
 #endif // SERIALGC
 
   // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -408,13 +408,20 @@
 #ifndef SERIALGC
 
 void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+
   __ bind(_entry);
 
   assert(pre_val()->is_register(), "Precondition.");
-
   Register pre_val_reg = pre_val()->as_register();
 
-  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+
   if (__ is_in_wdisp16_range(_continuation)) {
     __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
                       pre_val_reg, _continuation);
@@ -431,6 +438,96 @@
 
 }
 
+void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that offset == referent_offset.
+  //
+  // So we might have to emit:
+  //   if (src == null) goto continuation.
+  //
+  // and we definitely have to emit:
+  //   if (klass(src).reference_type == REF_NONE) goto continuation
+  //   if (!marking_active) goto continuation
+  //   if (pre_val == null) goto continuation
+  //   call pre_barrier(pre_val)
+  //   goto continuation
+  //
+  __ bind(_entry);
+
+  assert(src()->is_register(), "sanity");
+  Register src_reg = src()->as_register();
+
+  if (gen_src_check()) {
+    // The original src operand was not a constant.
+    // Generate src == null?
+    if (__ is_in_wdisp16_range(_continuation)) {
+      __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                        src_reg, _continuation);
+    } else {
+      __ cmp(src_reg, G0);
+      __ brx(Assembler::equal, false, Assembler::pt, _continuation);
+    }
+    __ delayed()->nop();
+  }
+
+  // Generate src->_klass->_reference_type() == REF_NONE)?
+  assert(tmp()->is_register(), "sanity");
+  Register tmp_reg = tmp()->as_register();
+
+  __ load_klass(src_reg, tmp_reg);
+
+  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
+  __ ld(ref_type_adr, tmp_reg);
+
+  if (__ is_in_wdisp16_range(_continuation)) {
+    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                      tmp_reg, _continuation);
+  } else {
+    __ cmp(tmp_reg, G0);
+    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
+  }
+  __ delayed()->nop();
+
+  // Is marking active?
+  assert(thread()->is_register(), "precondition");
+  Register thread_reg = thread()->as_pointer_register();
+
+  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    __ ld(in_progress, tmp_reg);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    __ ldsb(in_progress, tmp_reg);
+  }
+  if (__ is_in_wdisp16_range(_continuation)) {
+    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                      tmp_reg, _continuation);
+  } else {
+    __ cmp(tmp_reg, G0);
+    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
+  }
+  __ delayed()->nop();
+
+  // val == null?
+  assert(val()->is_register(), "Precondition.");
+  Register val_reg = val()->as_register();
+
+  if (__ is_in_wdisp16_range(_continuation)) {
+    __ br_on_reg_cond(Assembler::rc_z, /*annul*/false, Assembler::pt,
+                      val_reg, _continuation);
+  } else {
+    __ cmp(val_reg, G0);
+    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
+  }
+  __ delayed()->nop();
+
+  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
+  __ delayed()->mov(val_reg, G4);
+  __ br(Assembler::always, false, Assembler::pt, _continuation);
+  __ delayed()->nop();
+}
+
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -387,7 +387,8 @@
 
   if (obj_store) {
     // Needs GC write barriers.
-    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
   }
   __ move(value.result(), array_addr, null_check_info);
   if (obj_store) {
@@ -687,7 +688,8 @@
   __ add(obj.result(), offset.result(), addr);
 
   if (type == objectType) {  // Write-barrier needed for Object fields.
-    pre_barrier(addr, false, NULL);
+    pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
   }
 
   if (type == objectType)
@@ -1187,7 +1189,8 @@
       }
 
       if (is_obj) {
-        pre_barrier(LIR_OprFact::address(addr), false, NULL);
+        pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+                    true /* do_load */, false /* patch */, NULL);
         // _bs->c1_write_barrier_pre(this, LIR_OprFact::address(addr));
       }
       __ move(data, addr);
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -551,6 +551,26 @@
   return NULL;
 }
 
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  if (UseG1GC) {
+    // We need to generate have a routine that generates code to:
+    //   * load the value in the referent field
+    //   * passes that value to the pre-barrier.
+    //
+    // In the case of G1 this will record the value of the
+    // referent in an SATB buffer if marking is active.
+    // This will cause concurrent marking to mark the referent
+    // field as live.
+    Unimplemented();
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 //
 // Interpreter stub for calling a native method. (C++ interpreter)
 // This sets up a somewhat different looking stack for calling the native method
--- a/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,6 +36,7 @@
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
+  address generate_Reference_get_entry(void);
   void lock_method(void);
   void save_native_result(void);
   void restore_native_result(void);
--- a/src/cpu/sparc/vm/interpreter_sparc.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/sparc/vm/interpreter_sparc.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -407,6 +407,8 @@
     case Interpreter::java_lang_math_abs     :                                                                             break;
     case Interpreter::java_lang_math_log     :                                                                             break;
     case Interpreter::java_lang_math_log10   :                                                                             break;
+    case Interpreter::java_lang_ref_reference_get
+                                             : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
     default                                  : ShouldNotReachHere();                                                       break;
   }
 
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -763,6 +763,87 @@
   return NULL;
 }
 
+// Method entry for java.lang.ref.Reference.get.
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  if (UseG1GC) {
+     Label slow_path;
+
+    // In the G1 code we don't check if we need to reach a safepoint. We
+    // continue and the thread will safepoint at the next bytecode dispatch.
+
+    // Check if local 0 != NULL
+    // If the receiver is null then it is OK to jump to the slow path.
+    __ ld_ptr(Gargs, G0, Otos_i ); // get local 0
+    __ tst(Otos_i);  // check if local 0 == NULL and go the slow path
+    __ brx(Assembler::zero, false, Assembler::pn, slow_path);
+    __ delayed()->nop();
+
+
+    // Load the value of the referent field.
+    if (Assembler::is_simm13(referent_offset)) {
+      __ load_heap_oop(Otos_i, referent_offset, Otos_i);
+    } else {
+      __ set(referent_offset, G3_scratch);
+      __ load_heap_oop(Otos_i, G3_scratch, Otos_i);
+    }
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer. Note with
+    // these parameters the pre-barrier does not generate
+    // the load of the previous value
+
+    __ g1_write_barrier_pre(noreg /* obj */, noreg /* index */, 0 /* offset */,
+                            Otos_i /* pre_val */,
+                            G3_scratch /* tmp */,
+                            true /* preserve_o_regs */);
+
+    // _areturn
+    __ retl();                      // return from leaf routine
+    __ delayed()->mov(O5_savedSP, SP);
+
+    // Generate regular method entry
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+    return entry;
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 //
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the native method
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -57,7 +57,11 @@
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
       {
-        __ g1_write_barrier_pre( base, index, offset, tmp, /*preserve_o_regs*/true);
+        // Load and record the previous value.
+        __ g1_write_barrier_pre(base, index, offset,
+                                noreg /* pre_val */,
+                                tmp, true /*preserve_o_regs*/);
+
         if (index == noreg ) {
           assert(Assembler::is_simm13(offset), "fix this code");
           __ store_heap_oop(val, base, offset);
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -2317,7 +2317,7 @@
 }
 
 void Assembler::prefetchr(Address src) {
-  NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
+  NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
   emit_byte(0x0D);
@@ -2349,7 +2349,7 @@
 }
 
 void Assembler::prefetchw(Address src) {
-  NOT_LP64(assert(VM_Version::supports_3dnow(), "must support"));
+  NOT_LP64(assert(VM_Version::supports_3dnow_prefetch(), "must support"));
   InstructionMark im(this);
   prefetch_prefix(src);
   emit_byte(0x0D);
@@ -6902,26 +6902,39 @@
 #ifndef SERIALGC
 
 void MacroAssembler::g1_write_barrier_pre(Register obj,
-#ifndef _LP64
+                                          Register pre_val,
                                           Register thread,
-#endif
                                           Register tmp,
-                                          Register tmp2,
-                                          bool tosca_live) {
-  LP64_ONLY(Register thread = r15_thread;)
+                                          bool tosca_live,
+                                          bool expand_call) {
+
+  // If expand_call is true then we expand the call_VM_leaf macro
+  // directly to skip generating the check by
+  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
+
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
+  Label done;
+  Label runtime;
+
+  assert(pre_val != noreg, "check this code");
+
+  if (obj != noreg) {
+    assert_different_registers(obj, pre_val, tmp);
+    assert(pre_val != rax, "check this code");
+  }
+
   Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
                                        PtrQueue::byte_offset_of_active()));
-
   Address index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
                                        PtrQueue::byte_offset_of_index()));
   Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
                                        PtrQueue::byte_offset_of_buf()));
 
 
-  Label done;
-  Label runtime;
-
-  // if (!marking_in_progress) goto done;
+  // Is marking active?
   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
     cmpl(in_progress, 0);
   } else {
@@ -6930,65 +6943,92 @@
   }
   jcc(Assembler::equal, done);
 
-  // if (x.f == NULL) goto done;
-#ifdef _LP64
-  load_heap_oop(tmp2, Address(obj, 0));
-#else
-  movptr(tmp2, Address(obj, 0));
-#endif
-  cmpptr(tmp2, (int32_t) NULL_WORD);
+  // Do we need to load the previous value?
+  if (obj != noreg) {
+    load_heap_oop(pre_val, Address(obj, 0));
+  }
+
+  // Is the previous value null?
+  cmpptr(pre_val, (int32_t) NULL_WORD);
   jcc(Assembler::equal, done);
 
   // Can we store original value in the thread's buffer?
-
-#ifdef _LP64
-  movslq(tmp, index);
-  cmpq(tmp, 0);
-#else
-  cmpl(index, 0);
-#endif
-  jcc(Assembler::equal, runtime);
-#ifdef _LP64
-  subq(tmp, wordSize);
-  movl(index, tmp);
-  addq(tmp, buffer);
-#else
-  subl(index, wordSize);
-  movl(tmp, buffer);
-  addl(tmp, index);
-#endif
-  movptr(Address(tmp, 0), tmp2);
+  // Is index == 0?
+  // (The index field is typed as size_t.)
+
+  movptr(tmp, index);                   // tmp := *index_adr
+  cmpptr(tmp, 0);                       // tmp == 0?
+  jcc(Assembler::equal, runtime);       // If yes, goto runtime
+
+  subptr(tmp, wordSize);                // tmp := tmp - wordSize
+  movptr(index, tmp);                   // *index_adr := tmp
+  addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr
+
+  // Record the previous value
+  movptr(Address(tmp, 0), pre_val);
   jmp(done);
+
   bind(runtime);
   // save the live input values
   if(tosca_live) push(rax);
-  push(obj);
-#ifdef _LP64
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, r15_thread);
-#else
-  push(thread);
-  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), tmp2, thread);
-  pop(thread);
-#endif
-  pop(obj);
+
+  if (obj != noreg && obj != rax)
+    push(obj);
+
+  if (pre_val != rax)
+    push(pre_val);
+
+  // Calling the runtime using the regular call_VM_leaf mechanism generates
+  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
+  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
+  //
+  // If we care generating the pre-barrier without a frame (e.g. in the
+  // intrinsified Reference.get() routine) then ebp might be pointing to
+  // the caller frame and so this check will most likely fail at runtime.
+  //
+  // Expanding the call directly bypasses the generation of the check.
+  // So when we do not have have a full interpreter frame on the stack
+  // expand_call should be passed true.
+
+  NOT_LP64( push(thread); )
+
+  if (expand_call) {
+    LP64_ONLY( assert(pre_val != c_rarg1, "smashed arg"); )
+    pass_arg1(this, thread);
+    pass_arg0(this, pre_val);
+    MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), 2);
+  } else {
+    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), pre_val, thread);
+  }
+
+  NOT_LP64( pop(thread); )
+
+  // save the live input values
+  if (pre_val != rax)
+    pop(pre_val);
+
+  if (obj != noreg && obj != rax)
+    pop(obj);
+
   if(tosca_live) pop(rax);
+
   bind(done);
-
 }
 
 void MacroAssembler::g1_write_barrier_post(Register store_addr,
                                            Register new_val,
-#ifndef _LP64
                                            Register thread,
-#endif
                                            Register tmp,
                                            Register tmp2) {
-
-  LP64_ONLY(Register thread = r15_thread;)
+#ifdef _LP64
+  assert(thread == r15_thread, "must be");
+#endif // _LP64
+
   Address queue_index(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
                                        PtrQueue::byte_offset_of_index()));
   Address buffer(thread, in_bytes(JavaThread::dirty_card_queue_offset() +
                                        PtrQueue::byte_offset_of_buf()));
+
   BarrierSet* bs = Universe::heap()->barrier_set();
   CardTableModRefBS* ct = (CardTableModRefBS*)bs;
   Label done;
@@ -7067,7 +7107,6 @@
   pop(store_addr);
 
   bind(done);
-
 }
 
 #endif // SERIALGC
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1453,6 +1453,7 @@
 class MacroAssembler: public Assembler {
   friend class LIR_Assembler;
   friend class Runtime1;      // as_Address()
+
  protected:
 
   Address as_Address(AddressLiteral adr);
@@ -1674,21 +1675,22 @@
   void store_check(Register obj);                // store check for obj - register is destroyed afterwards
   void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
 
+#ifndef SERIALGC
+
   void g1_write_barrier_pre(Register obj,
-#ifndef _LP64
+                            Register pre_val,
                             Register thread,
-#endif
                             Register tmp,
-                            Register tmp2,
-                            bool     tosca_live);
+                            bool tosca_live,
+                            bool expand_call);
+
   void g1_write_barrier_post(Register store_addr,
                              Register new_val,
-#ifndef _LP64
                              Register thread,
-#endif
                              Register tmp,
                              Register tmp2);
 
+#endif // SERIALGC
 
   // split store_check(Register obj) to enhance instruction interleaving
   void store_check_part_1(Register obj);
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -466,15 +466,19 @@
 #ifndef SERIALGC
 
 void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
-
-  // At this point we know that marking is in progress
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
 
   __ bind(_entry);
   assert(pre_val()->is_register(), "Precondition.");
 
   Register pre_val_reg = pre_val()->as_register();
 
-  ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
 
   __ cmpptr(pre_val_reg, (int32_t) NULL_WORD);
   __ jcc(Assembler::equal, _continuation);
@@ -484,6 +488,68 @@
 
 }
 
+void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that offset == referent_offset.
+  //
+  // So we might have to emit:
+  //   if (src == null) goto continuation.
+  //
+  // and we definitely have to emit:
+  //   if (klass(src).reference_type == REF_NONE) goto continuation
+  //   if (!marking_active) goto continuation
+  //   if (pre_val == null) goto continuation
+  //   call pre_barrier(pre_val)
+  //   goto continuation
+  //
+  __ bind(_entry);
+
+  assert(src()->is_register(), "sanity");
+  Register src_reg = src()->as_register();
+
+  if (gen_src_check()) {
+    // The original src operand was not a constant.
+    // Generate src == null?
+    __ cmpptr(src_reg, (int32_t) NULL_WORD);
+    __ jcc(Assembler::equal, _continuation);
+  }
+
+  // Generate src->_klass->_reference_type == REF_NONE)?
+  assert(tmp()->is_register(), "sanity");
+  Register tmp_reg = tmp()->as_register();
+
+  __ load_klass(tmp_reg, src_reg);
+
+  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset_in_bytes() + sizeof(oopDesc));
+  __ cmpl(ref_type_adr, REF_NONE);
+  __ jcc(Assembler::equal, _continuation);
+
+  // Is marking active?
+  assert(thread()->is_register(), "precondition");
+  Register thread_reg = thread()->as_pointer_register();
+
+  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
+                                       PtrQueue::byte_offset_of_active()));
+
+  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
+    __ cmpl(in_progress, 0);
+  } else {
+    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
+    __ cmpb(in_progress, 0);
+  }
+  __ jcc(Assembler::equal, _continuation);
+
+  // val == null?
+  assert(val()->is_register(), "Precondition.");
+  Register val_reg = val()->as_register();
+
+  __ cmpptr(val_reg, (int32_t) NULL_WORD);
+  __ jcc(Assembler::equal, _continuation);
+
+  ce->store_parameter(val()->as_register(), 0);
+  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
+  __ jmp(_continuation);
+}
+
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1401,7 +1401,7 @@
       default:
         ShouldNotReachHere(); break;
     }
-  } else if (VM_Version::supports_3dnow()) {
+  } else if (VM_Version::supports_3dnow_prefetch()) {
     __ prefetchr(from_addr);
   }
 }
@@ -1424,7 +1424,7 @@
       default:
         ShouldNotReachHere(); break;
     }
-  } else if (VM_Version::supports_3dnow()) {
+  } else if (VM_Version::supports_3dnow_prefetch()) {
     __ prefetchw(from_addr);
   }
 }
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -326,7 +326,8 @@
 
   if (obj_store) {
     // Needs GC write barriers.
-    pre_barrier(LIR_OprFact::address(array_addr), false, NULL);
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
     __ move(value.result(), array_addr, null_check_info);
     // Seems to be a precise
     post_barrier(LIR_OprFact::address(array_addr), value.result());
@@ -794,7 +795,8 @@
 
   if (type == objectType) {  // Write-barrier needed for Object fields.
     // Do the pre-write barrier, if any.
-    pre_barrier(addr, false, NULL);
+    pre_barrier(addr, LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
   }
 
   LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
@@ -1339,7 +1341,8 @@
     bool is_obj = (type == T_ARRAY || type == T_OBJECT);
     if (is_obj) {
       // Do the pre-write barrier, if any.
-      pre_barrier(LIR_OprFact::address(addr), false, NULL);
+      pre_barrier(LIR_OprFact::address(addr), LIR_OprFact::illegalOpr /* pre_val */,
+                  true /* do_load */, false /* patch */, NULL);
       __ move(data, addr);
       assert(src->is_register(), "must be register");
       // Seems to be a precise address
--- a/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,7 @@
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
+  address generate_Reference_get_entry(void);
   void lock_method(void);
   void generate_stack_overflow_check(void);
 
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -936,6 +936,26 @@
 
 }
 
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  if (UseG1GC) {
+    // We need to generate have a routine that generates code to:
+    //   * load the value in the referent field
+    //   * passes that value to the pre-barrier.
+    //
+    // In the case of G1 this will record the value of the
+    // referent in an SATB buffer if marking is active.
+    // This will cause concurrent marking to mark the referent
+    // field as live.
+    Unimplemented();
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 //
 // C++ Interpreter stub for calling a native method.
 // This sets up a somewhat different looking stack for calling the native method
@@ -2210,6 +2230,8 @@
     case Interpreter::java_lang_math_log     : // fall thru
     case Interpreter::java_lang_math_log10   : // fall thru
     case Interpreter::java_lang_math_sqrt    : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);     break;
+    case Interpreter::java_lang_ref_reference_get
+                                             : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
     default                                  : ShouldNotReachHere();                                                       break;
   }
 
--- a/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,6 +39,7 @@
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
+  address generate_Reference_get_entry();
   void lock_method(void);
   void generate_stack_overflow_check(void);
 
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -776,6 +776,98 @@
 
 }
 
+// Method entry for java.lang.ref.Reference.get.
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code below can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+
+  // rbx,: methodOop
+  // rcx: receiver (preserve for slow entry into asm interpreter)
+
+  // rsi: senderSP must preserved for slow path, set SP to it on fast path
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  if (UseG1GC) {
+    Label slow_path;
+
+    // Check if local 0 != NULL
+    // If the receiver is null then it is OK to jump to the slow path.
+    __ movptr(rax, Address(rsp, wordSize));
+    __ testptr(rax, rax);
+    __ jcc(Assembler::zero, slow_path);
+
+    // rax: local 0 (must be preserved across the G1 barrier call)
+    //
+    // rbx: method (at this point it's scratch)
+    // rcx: receiver (at this point it's scratch)
+    // rdx: scratch
+    // rdi: scratch
+    //
+    // rsi: sender sp
+
+    // Preserve the sender sp in case the pre-barrier
+    // calls the runtime
+    __ push(rsi);
+
+    // Load the value of the referent field.
+    const Address field_address(rax, referent_offset);
+    __ movptr(rax, field_address);
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    __ get_thread(rcx);
+    __ g1_write_barrier_pre(noreg /* obj */,
+                            rax /* pre_val */,
+                            rcx /* thread */,
+                            rbx /* tmp */,
+                            true /* tosca_save */,
+                            true /* expand_call */);
+
+    // _areturn
+    __ pop(rsi);                // get sender sp
+    __ pop(rdi);                // get return address
+    __ mov(rsp, rsi);           // set sp to sender sp
+    __ jmp(rdi);
+
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+
+    return entry;
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 //
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the native method
@@ -1444,6 +1536,8 @@
     case Interpreter::java_lang_math_log     : // fall thru
     case Interpreter::java_lang_math_log10   : // fall thru
     case Interpreter::java_lang_math_sqrt    : entry_point = ((InterpreterGenerator*)this)->generate_math_entry(kind);     break;
+    case Interpreter::java_lang_ref_reference_get
+                                             : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
     default                                  : ShouldNotReachHere();                                                       break;
   }
 
--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -757,6 +757,95 @@
   return entry_point;
 }
 
+// Method entry for java.lang.ref.Reference.get.
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. The "intrinsified" code for G1 (or any SATB based GC),
+  //    2. The slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:-
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+  //
+  // rbx: methodOop
+
+  // r13: senderSP must preserve for slow path, set SP to it on fast path
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  if (UseG1GC) {
+    Label slow_path;
+    // rbx: method
+
+    // Check if local 0 != NULL
+    // If the receiver is null then it is OK to jump to the slow path.
+    __ movptr(rax, Address(rsp, wordSize));
+
+    __ testptr(rax, rax);
+    __ jcc(Assembler::zero, slow_path);
+
+    // rax: local 0
+    // rbx: method (but can be used as scratch now)
+    // rdx: scratch
+    // rdi: scratch
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+
+    // Load the value of the referent field.
+    const Address field_address(rax, referent_offset);
+    __ load_heap_oop(rax, field_address);
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer.
+    __ g1_write_barrier_pre(noreg /* obj */,
+                            rax /* pre_val */,
+                            r15_thread /* thread */,
+                            rbx /* tmp */,
+                            true /* tosca_live */,
+                            true /* expand_call */);
+
+    // _areturn
+    __ pop(rdi);                // get return address
+    __ mov(rsp, r13);           // set sp to sender sp
+    __ jmp(rdi);
+    __ ret(0);
+
+    // generate a vanilla interpreter entry as the slow path
+    __ bind(slow_path);
+    (void) generate_normal_entry(false);
+
+    return entry;
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
+
 // Interpreter stub for calling a native method. (asm interpreter)
 // This sets up a somewhat different looking stack for calling the
 // native method than the typical interpreter frame setup.
@@ -1463,6 +1552,8 @@
   case Interpreter::java_lang_math_log     : // fall thru
   case Interpreter::java_lang_math_log10   : // fall thru
   case Interpreter::java_lang_math_sqrt    : entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);    break;
+  case Interpreter::java_lang_ref_reference_get
+                                           : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
   default                                  : ShouldNotReachHere();                                                       break;
   }
 
--- a/src/cpu/x86/vm/templateTable_x86_32.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/templateTable_x86_32.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -140,7 +140,12 @@
         }
         __ get_thread(rcx);
         __ save_bcp();
-        __ g1_write_barrier_pre(rdx, rcx, rsi, rbx, val != noreg);
+        __ g1_write_barrier_pre(rdx /* obj */,
+                                rbx /* pre_val */,
+                                rcx /* thread */,
+                                rsi /* tmp */,
+                                val != noreg /* tosca_live */,
+                                false /* expand_call */);
 
         // Do the actual store
         // noreg means NULL
@@ -149,7 +154,11 @@
           // No post barrier for NULL
         } else {
           __ movl(Address(rdx, 0), val);
-          __ g1_write_barrier_post(rdx, rax, rcx, rbx, rsi);
+          __ g1_write_barrier_post(rdx /* store_adr */,
+                                   val /* new_val */,
+                                   rcx /* thread */,
+                                   rbx /* tmp */,
+                                   rsi /* tmp2 */);
         }
         __ restore_bcp();
 
--- a/src/cpu/x86/vm/templateTable_x86_64.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/templateTable_x86_64.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -147,12 +147,21 @@
         } else {
           __ leaq(rdx, obj);
         }
-        __ g1_write_barrier_pre(rdx, r8, rbx, val != noreg);
+        __ g1_write_barrier_pre(rdx /* obj */,
+                                rbx /* pre_val */,
+                                r15_thread /* thread */,
+                                r8  /* tmp */,
+                                val != noreg /* tosca_live */,
+                                false /* expand_call */);
         if (val == noreg) {
           __ store_heap_oop_null(Address(rdx, 0));
         } else {
           __ store_heap_oop(Address(rdx, 0), val);
-          __ g1_write_barrier_post(rdx, val, r8, rbx);
+          __ g1_write_barrier_post(rdx /* store_adr */,
+                                   val /* new_val */,
+                                   r15_thread /* thread */,
+                                   r8 /* tmp */,
+                                   rbx /* tmp2 */);
         }
 
       }
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -348,7 +348,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -363,8 +363,7 @@
                (supports_sse4_2() ? ", sse4.2" : ""),
                (supports_popcnt() ? ", popcnt" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
-               (supports_3dnow()   ? ", 3dnow"  : ""),
-               (supports_3dnow2()  ? ", 3dnowext" : ""),
+               (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
                (supports_lzcnt()   ? ", lzcnt": ""),
                (supports_sse4a()   ? ", sse4a": ""),
                (supports_ht() ? ", ht": ""));
@@ -522,13 +521,13 @@
   // set valid Prefetch instruction
   if( ReadPrefetchInstr < 0 ) ReadPrefetchInstr = 0;
   if( ReadPrefetchInstr > 3 ) ReadPrefetchInstr = 3;
-  if( ReadPrefetchInstr == 3 && !supports_3dnow() ) ReadPrefetchInstr = 0;
-  if( !supports_sse() && supports_3dnow() ) ReadPrefetchInstr = 3;
+  if( ReadPrefetchInstr == 3 && !supports_3dnow_prefetch() ) ReadPrefetchInstr = 0;
+  if( !supports_sse() && supports_3dnow_prefetch() ) ReadPrefetchInstr = 3;
 
   if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
   if( AllocatePrefetchInstr > 3 ) AllocatePrefetchInstr = 3;
-  if( AllocatePrefetchInstr == 3 && !supports_3dnow() ) AllocatePrefetchInstr=0;
-  if( !supports_sse() && supports_3dnow() ) AllocatePrefetchInstr = 3;
+  if( AllocatePrefetchInstr == 3 && !supports_3dnow_prefetch() ) AllocatePrefetchInstr=0;
+  if( !supports_sse() && supports_3dnow_prefetch() ) AllocatePrefetchInstr = 3;
 
   // Allocation prefetch settings
   intx cache_line_size = L1_data_cache_line_size();
@@ -576,10 +575,10 @@
                   logical_processors_per_package());
     tty->print_cr("UseSSE=%d",UseSSE);
     tty->print("Allocation: ");
-    if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow()) {
+    if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
       tty->print_cr("no prefetching");
     } else {
-      if (UseSSE == 0 && supports_3dnow()) {
+      if (UseSSE == 0 && supports_3dnow_prefetch()) {
         tty->print("PREFETCHW");
       } else if (UseSSE >= 1) {
         if (AllocatePrefetchInstr == 0) {
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -188,7 +188,8 @@
      CPU_FXSR   = (1 << 2),
      CPU_HT     = (1 << 3),
      CPU_MMX    = (1 << 4),
-     CPU_3DNOW  = (1 << 5), // 3DNow comes from cpuid 0x80000001 (EDX)
+     CPU_3DNOW_PREFETCH  = (1 << 5), // Processor supports 3dnow prefetch and prefetchw instructions
+                                     // may not necessarily support other 3dnow instructions
      CPU_SSE    = (1 << 6),
      CPU_SSE2   = (1 << 7),
      CPU_SSE3   = (1 << 8), // SSE3 comes from cpuid 1 (ECX)
@@ -328,8 +329,9 @@
 
     // AMD features.
     if (is_amd()) {
-      if (_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0)
-        result |= CPU_3DNOW;
+      if ((_cpuid_info.ext_cpuid1_edx.bits.tdnow != 0) ||
+          (_cpuid_info.ext_cpuid1_ecx.bits.prefetchw != 0))
+        result |= CPU_3DNOW_PREFETCH;
       if (_cpuid_info.ext_cpuid1_ecx.bits.lzcnt != 0)
         result |= CPU_LZCNT;
       if (_cpuid_info.ext_cpuid1_ecx.bits.sse4a != 0)
@@ -446,9 +448,8 @@
   //
   // AMD features
   //
-  static bool supports_3dnow()    { return (_cpuFeatures & CPU_3DNOW) != 0; }
+  static bool supports_3dnow_prefetch()    { return (_cpuFeatures & CPU_3DNOW_PREFETCH) != 0; }
   static bool supports_mmx_ext()  { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.mmx_amd != 0; }
-  static bool supports_3dnow2()   { return is_amd() && _cpuid_info.ext_cpuid1_edx.bits.tdnow2 != 0; }
   static bool supports_lzcnt()    { return (_cpuFeatures & CPU_LZCNT) != 0; }
   static bool supports_sse4a()    { return (_cpuFeatures & CPU_SSE4A) != 0; }
 
--- a/src/cpu/x86/vm/x86_32.ad	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/x86/vm/x86_32.ad	Sat Apr 23 00:33:38 2011 -0400
@@ -3423,7 +3423,7 @@
          masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
 
          // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-         if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
+         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
             // prefetchw [eax + Offset(_owner)-2]
             masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
          }
@@ -3467,7 +3467,7 @@
          masm.movptr(boxReg, tmpReg) ; 
 
          // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
-         if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
+         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
             // prefetchw [eax + Offset(_owner)-2]
             masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
          }
@@ -3614,7 +3614,7 @@
       // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
 
       masm.get_thread (boxReg) ;
-      if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
+      if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
         // prefetchw [ebx + Offset(_owner)-2]
         masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
       }
@@ -7333,7 +7333,7 @@
 // Must be safe to execute with invalid address (cannot fault).
 
 instruct prefetchr0( memory mem ) %{
-  predicate(UseSSE==0 && !VM_Version::supports_3dnow());
+  predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
   match(PrefetchRead mem);
   ins_cost(0);
   size(0);
@@ -7343,7 +7343,7 @@
 %}
 
 instruct prefetchr( memory mem ) %{
-  predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3);
+  predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3);
   match(PrefetchRead mem);
   ins_cost(100);
 
@@ -7387,7 +7387,7 @@
 %}
 
 instruct prefetchw0( memory mem ) %{
-  predicate(UseSSE==0 && !VM_Version::supports_3dnow());
+  predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
   match(PrefetchWrite mem);
   ins_cost(0);
   size(0);
@@ -7397,7 +7397,7 @@
 %}
 
 instruct prefetchw( memory mem ) %{
-  predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3);
+  predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || AllocatePrefetchInstr==3);
   match( PrefetchWrite mem );
   ins_cost(100);
 
--- a/src/cpu/zero/vm/bytecodeInterpreter_zero.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/zero/vm/bytecodeInterpreter_zero.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2007, 2008 Red Hat, Inc.
+ * Copyright 2007, 2008, 2011 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -150,4 +150,22 @@
 #define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \
                                                 ((VMJavaVal64*)(addr))->l)
 
+// VMSlots implementation
+
+#define VMSLOTS_SLOT(offset)    ((intptr_t*)&vmslots[(offset)])
+#define VMSLOTS_ADDR(offset)    ((address)vmslots[(offset)])
+#define VMSLOTS_INT(offset)     (*((jint*)&vmslots[(offset)]))
+#define VMSLOTS_FLOAT(offset)   (*((jfloat*)&vmslots[(offset)]))
+#define VMSLOTS_OBJECT(offset)  ((oop)vmslots[(offset)])
+#define VMSLOTS_DOUBLE(offset)  (((VMJavaVal64*)&vmslots[(offset) - 1])->d)
+#define VMSLOTS_LONG(offset)    (((VMJavaVal64*)&vmslots[(offset) - 1])->l)
+
+#define SET_VMSLOTS_SLOT(value, offset)   (*(intptr_t*)&vmslots[(offset)] = *(intptr_t *)(value))
+#define SET_VMSLOTS_ADDR(value, offset)   (*((address *)&vmslots[(offset)]) = (value))
+#define SET_VMSLOTS_INT(value, offset)    (*((jint *)&vmslots[(offset)]) = (value))
+#define SET_VMSLOTS_FLOAT(value, offset)  (*((jfloat *)&vmslots[(offset)]) = (value))
+#define SET_VMSLOTS_OBJECT(value, offset) (*((oop *)&vmslots[(offset)]) = (value))
+#define SET_VMSLOTS_DOUBLE(value, offset) (((VMJavaVal64*)&vmslots[(offset) - 1])->d = (value))
+#define SET_VMSLOTS_LONG(value, offset)   (((VMJavaVal64*)&vmslots[(offset) - 1])->l = (value))
+
 #endif // CPU_ZERO_VM_BYTECODEINTERPRETER_ZERO_HPP
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
+ * Copyright 2007, 2008, 2009, 2010, 2011 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -56,10 +56,13 @@
 #define fixup_after_potential_safepoint()       \
   method = istate->method()
 
-#define CALL_VM_NOCHECK(func)                   \
+#define CALL_VM_NOCHECK_NOFIX(func)             \
   thread->set_last_Java_frame();                \
   func;                                         \
-  thread->reset_last_Java_frame();              \
+  thread->reset_last_Java_frame();
+
+#define CALL_VM_NOCHECK(func)                   \
+  CALL_VM_NOCHECK_NOFIX(func)                   \
   fixup_after_potential_safepoint()
 
 int CppInterpreter::normal_entry(methodOop method, intptr_t UNUSED, TRAPS) {
@@ -177,6 +180,25 @@
         method, istate->osr_entry(), istate->osr_buf(), THREAD);
       return;
     }
+    else if (istate->msg() == BytecodeInterpreter::call_method_handle) {
+      oop method_handle = istate->callee();
+
+      // Trim back the stack to put the parameters at the top
+      stack->set_sp(istate->stack() + 1);
+
+      // Make the call
+      process_method_handle(method_handle, THREAD);
+      fixup_after_potential_safepoint();
+
+      // Convert the result
+      istate->set_stack(stack->sp() - 1);
+
+      // Restore the stack
+      stack->set_sp(istate->stack_limit() + 1);
+
+      // Resume the interpreter
+      istate->set_msg(BytecodeInterpreter::method_resume);
+    }
     else {
       ShouldNotReachHere();
     }
@@ -607,6 +629,549 @@
   return 0;
 }
 
+int CppInterpreter::method_handle_entry(methodOop method,
+                                        intptr_t UNUSED, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+  ZeroStack *stack = thread->zero_stack();
+  int argument_slots = method->size_of_parameters();
+  int result_slots = type2size[result_type_of(method)];
+  intptr_t *vmslots = stack->sp();
+  intptr_t *unwind_sp = vmslots + argument_slots;
+
+  // Find the MethodType
+  address p = (address) method;
+  for (jint* pc = method->method_type_offsets_chain(); (*pc) != -1; pc++) {
+    p = *(address*)(p + (*pc));
+  }
+  oop method_type = (oop) p;
+
+  // The MethodHandle is in the slot after the arguments
+  oop form = java_lang_invoke_MethodType::form(method_type);
+  int num_vmslots = java_lang_invoke_MethodTypeForm::vmslots(form);
+  assert(argument_slots == num_vmslots + 1, "should be");
+  oop method_handle = VMSLOTS_OBJECT(num_vmslots);
+
+  // InvokeGeneric requires some extra shuffling
+  oop mhtype = java_lang_invoke_MethodHandle::type(method_handle);
+  bool is_exact = mhtype == method_type;
+  if (!is_exact) {
+    if (method->intrinsic_id() == vmIntrinsics::_invokeExact) {
+      CALL_VM_NOCHECK_NOFIX(
+        InterpreterRuntime::throw_WrongMethodTypeException(
+          thread, method_type, mhtype));
+      // NB all oops trashed!
+      assert(HAS_PENDING_EXCEPTION, "should do");
+      stack->set_sp(unwind_sp);
+      return 0;
+    }
+    assert(method->intrinsic_id() == vmIntrinsics::_invokeGeneric, "should be");
+
+    // Load up an adapter from the calling type
+    // NB the x86 code for this (in methodHandles_x86.cpp, search for
+    // "genericInvoker") is really really odd.  I'm hoping it's trying
+    // to accomodate odd VM/class library combinations I can ignore.
+    oop adapter = java_lang_invoke_MethodTypeForm::genericInvoker(form);
+    if (adapter == NULL) {
+      CALL_VM_NOCHECK_NOFIX(
+        InterpreterRuntime::throw_WrongMethodTypeException(
+          thread, method_type, mhtype));
+      // NB all oops trashed!
+      assert(HAS_PENDING_EXCEPTION, "should do");
+      stack->set_sp(unwind_sp);
+      return 0;
+    }
+
+    // Adapters are shared among form-families of method-type.  The
+    // type being called is passed as a trusted first argument so that
+    // the adapter knows the actual types of its arguments and return
+    // values.
+    insert_vmslots(num_vmslots + 1, 1, THREAD);
+    if (HAS_PENDING_EXCEPTION) {
+      // NB all oops trashed!
+      stack->set_sp(unwind_sp);
+      return 0;
+    }
+
+    vmslots = stack->sp();
+    num_vmslots++;
+    SET_VMSLOTS_OBJECT(method_type, num_vmslots);
+
+    method_handle = adapter;
+  }
+
+  // Start processing
+  process_method_handle(method_handle, THREAD);
+  if (HAS_PENDING_EXCEPTION)
+    result_slots = 0;
+
+  // If this is an invokeExact then the eventual callee will not
+  // have unwound the method handle argument so we have to do it.
+  // If a result is being returned the it will be above the method
+  // handle argument we're unwinding.
+  if (is_exact) {
+    intptr_t result[2];
+    for (int i = 0; i < result_slots; i++)
+      result[i] = stack->pop();
+    stack->pop();
+    for (int i = result_slots - 1; i >= 0; i--)
+      stack->push(result[i]);
+  }
+
+  // Check
+  assert(stack->sp() == unwind_sp - result_slots, "should be");
+
+  // No deoptimized frames on the stack
+  return 0;
+}
+
+void CppInterpreter::process_method_handle(oop method_handle, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+  ZeroStack *stack = thread->zero_stack();
+  intptr_t *vmslots = stack->sp();
+
+  bool direct_to_method = false;
+  BasicType src_rtype = T_ILLEGAL;
+  BasicType dst_rtype = T_ILLEGAL;
+
+  MethodHandleEntry *entry =
+    java_lang_invoke_MethodHandle::vmentry(method_handle);
+  MethodHandles::EntryKind entry_kind =
+    (MethodHandles::EntryKind) (((intptr_t) entry) & 0xffffffff);
+
+  methodOop method = NULL;
+  switch (entry_kind) {
+  case MethodHandles::_invokestatic_mh:
+    direct_to_method = true;
+    break;
+
+  case MethodHandles::_invokespecial_mh:
+  case MethodHandles::_invokevirtual_mh:
+  case MethodHandles::_invokeinterface_mh:
+    {
+      oop receiver =
+        VMSLOTS_OBJECT(
+          java_lang_invoke_MethodHandle::vmslots(method_handle) - 1);
+      if (receiver == NULL) {
+          stack->set_sp(calculate_unwind_sp(stack, method_handle));
+          CALL_VM_NOCHECK_NOFIX(
+            throw_exception(
+              thread, vmSymbols::java_lang_NullPointerException()));
+          // NB all oops trashed!
+          assert(HAS_PENDING_EXCEPTION, "should do");
+          return;
+      }
+      if (entry_kind != MethodHandles::_invokespecial_mh) {
+        int index = java_lang_invoke_DirectMethodHandle::vmindex(method_handle);
+        instanceKlass* rcvrKlass =
+          (instanceKlass *) receiver->klass()->klass_part();
+        if (entry_kind == MethodHandles::_invokevirtual_mh) {
+          method = (methodOop) rcvrKlass->start_of_vtable()[index];
+        }
+        else {
+          oop iclass = java_lang_invoke_MethodHandle::vmtarget(method_handle);
+          itableOffsetEntry* ki =
+            (itableOffsetEntry *) rcvrKlass->start_of_itable();
+          int i, length = rcvrKlass->itable_length();
+          for (i = 0; i < length; i++, ki++ ) {
+            if (ki->interface_klass() == iclass)
+              break;
+          }
+          if (i == length) {
+            stack->set_sp(calculate_unwind_sp(stack, method_handle));
+            CALL_VM_NOCHECK_NOFIX(
+              throw_exception(
+                thread, vmSymbols::java_lang_IncompatibleClassChangeError()));
+            // NB all oops trashed!
+            assert(HAS_PENDING_EXCEPTION, "should do");
+            return;
+          }
+          itableMethodEntry* im = ki->first_method_entry(receiver->klass());
+          method = im[index].method();
+          if (method == NULL) {
+            stack->set_sp(calculate_unwind_sp(stack, method_handle));
+            CALL_VM_NOCHECK_NOFIX(
+              throw_exception(
+                thread, vmSymbols::java_lang_AbstractMethodError()));
+            // NB all oops trashed!
+            assert(HAS_PENDING_EXCEPTION, "should do");
+            return;
+          }
+        }
+      }
+    }
+    direct_to_method = true;
+    break;
+
+  case MethodHandles::_bound_ref_direct_mh:
+  case MethodHandles::_bound_int_direct_mh:
+  case MethodHandles::_bound_long_direct_mh:
+    direct_to_method = true;
+    // fall through
+  case MethodHandles::_bound_ref_mh:
+  case MethodHandles::_bound_int_mh:
+  case MethodHandles::_bound_long_mh:
+    {
+      BasicType arg_type  = T_ILLEGAL;
+      int       arg_mask  = -1;
+      int       arg_slots = -1;
+      MethodHandles::get_ek_bound_mh_info(
+        entry_kind, arg_type, arg_mask, arg_slots);
+      int arg_slot =
+        java_lang_invoke_BoundMethodHandle::vmargslot(method_handle);
+
+      // Create the new slot(s)
+      intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
+      insert_vmslots(arg_slot, arg_slots, THREAD);
+      if (HAS_PENDING_EXCEPTION) {
+        // all oops trashed
+        stack->set_sp(unwind_sp);
+        return;
+      }
+      vmslots = stack->sp();
+
+      // Store bound argument into new stack slot
+      oop arg = java_lang_invoke_BoundMethodHandle::argument(method_handle);
+      if (arg_type == T_OBJECT) {
+        assert(arg_slots == 1, "should be");
+        SET_VMSLOTS_OBJECT(arg, arg_slot);
+      }
+      else {
+        jvalue arg_value;
+        arg_type = java_lang_boxing_object::get_value(arg, &arg_value);
+        switch (arg_type) {
+        case T_BOOLEAN:
+          SET_VMSLOTS_INT(arg_value.z, arg_slot);
+          break;
+        case T_CHAR:
+          SET_VMSLOTS_INT(arg_value.c, arg_slot);
+          break;
+        case T_BYTE:
+          SET_VMSLOTS_INT(arg_value.b, arg_slot);
+          break;
+        case T_SHORT:
+          SET_VMSLOTS_INT(arg_value.s, arg_slot);
+          break;
+        case T_INT:
+          SET_VMSLOTS_INT(arg_value.i, arg_slot);
+          break;
+        case T_FLOAT:
+          SET_VMSLOTS_FLOAT(arg_value.f, arg_slot);
+          break;
+        case T_LONG:
+          SET_VMSLOTS_LONG(arg_value.j, arg_slot + 1);
+          break;
+        case T_DOUBLE:
+          SET_VMSLOTS_DOUBLE(arg_value.d, arg_slot + 1);
+          break;
+        default:
+          tty->print_cr("unhandled type %s", type2name(arg_type));
+          ShouldNotReachHere();
+        }
+      }
+    }
+    break;
+
+  case MethodHandles::_adapter_retype_only:
+  case MethodHandles::_adapter_retype_raw:
+    src_rtype = result_type_of_handle(
+      java_lang_invoke_MethodHandle::vmtarget(method_handle));
+    dst_rtype = result_type_of_handle(method_handle);
+    break;
+
+  case MethodHandles::_adapter_check_cast:
+    {
+      int arg_slot =
+        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
+      oop arg = VMSLOTS_OBJECT(arg_slot);
+      if (arg != NULL) {
+        klassOop objKlassOop = arg->klass();
+        klassOop klassOf = java_lang_Class::as_klassOop(
+          java_lang_invoke_AdapterMethodHandle::argument(method_handle));
+
+        if (objKlassOop != klassOf &&
+            !objKlassOop->klass_part()->is_subtype_of(klassOf)) {
+          ResourceMark rm(THREAD);
+          const char* objName = Klass::cast(objKlassOop)->external_name();
+          const char* klassName = Klass::cast(klassOf)->external_name();
+          char* message = SharedRuntime::generate_class_cast_message(
+            objName, klassName);
+
+          stack->set_sp(calculate_unwind_sp(stack, method_handle));
+          CALL_VM_NOCHECK_NOFIX(
+            throw_exception(
+              thread, vmSymbols::java_lang_ClassCastException(), message));
+          // NB all oops trashed!
+          assert(HAS_PENDING_EXCEPTION, "should do");
+          return;
+        }
+      }
+    }
+    break;
+
+  case MethodHandles::_adapter_dup_args:
+    {
+      int arg_slot =
+        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
+      int conv =
+        java_lang_invoke_AdapterMethodHandle::conversion(method_handle);
+      int num_slots = -MethodHandles::adapter_conversion_stack_move(conv);
+      assert(num_slots > 0, "should be");
+
+      // Create the new slot(s)
+      intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
+      stack->overflow_check(num_slots, THREAD);
+      if (HAS_PENDING_EXCEPTION) {
+        // all oops trashed
+        stack->set_sp(unwind_sp);
+        return;
+      }
+
+      // Duplicate the arguments
+      for (int i = num_slots - 1; i >= 0; i--)
+        stack->push(*VMSLOTS_SLOT(arg_slot + i));
+
+      vmslots = stack->sp(); // unused, but let the compiler figure that out
+    }
+    break;
+
+  case MethodHandles::_adapter_drop_args:
+    {
+      int arg_slot =
+        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
+      int conv =
+        java_lang_invoke_AdapterMethodHandle::conversion(method_handle);
+      int num_slots = MethodHandles::adapter_conversion_stack_move(conv);
+      assert(num_slots > 0, "should be");
+
+      remove_vmslots(arg_slot, num_slots, THREAD); // doesn't trap
+      vmslots = stack->sp(); // unused, but let the compiler figure that out
+    }
+    break;
+
+  case MethodHandles::_adapter_opt_swap_1:
+  case MethodHandles::_adapter_opt_swap_2:
+  case MethodHandles::_adapter_opt_rot_1_up:
+  case MethodHandles::_adapter_opt_rot_1_down:
+  case MethodHandles::_adapter_opt_rot_2_up:
+  case MethodHandles::_adapter_opt_rot_2_down:
+    {
+      int arg1 =
+        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
+      int conv =
+        java_lang_invoke_AdapterMethodHandle::conversion(method_handle);
+      int arg2 = MethodHandles::adapter_conversion_vminfo(conv);
+
+      int swap_bytes = 0, rotate = 0;
+      MethodHandles::get_ek_adapter_opt_swap_rot_info(
+        entry_kind, swap_bytes, rotate);
+      int swap_slots = swap_bytes >> LogBytesPerWord;
+
+      intptr_t tmp;
+      switch (rotate) {
+      case 0: // swap
+        for (int i = 0; i < swap_slots; i++) {
+          tmp = *VMSLOTS_SLOT(arg1 + i);
+          SET_VMSLOTS_SLOT(VMSLOTS_SLOT(arg2 + i), arg1 + i);
+          SET_VMSLOTS_SLOT(&tmp, arg2 + i);
+        }
+        break;
+
+      case 1: // up
+        assert(arg1 - swap_slots > arg2, "should be");
+
+        tmp = *VMSLOTS_SLOT(arg1);
+        for (int i = arg1 - swap_slots; i >= arg2; i--)
+          SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i), i + swap_slots);
+        SET_VMSLOTS_SLOT(&tmp, arg2);
+
+        break;
+
+      case -1: // down
+        assert(arg2 - swap_slots > arg1, "should be");
+
+        tmp = *VMSLOTS_SLOT(arg1);
+        for (int i = arg1 + swap_slots; i <= arg2; i++)
+          SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i), i - swap_slots);
+        SET_VMSLOTS_SLOT(&tmp, arg2);
+        break;
+
+      default:
+        ShouldNotReachHere();
+      }
+    }
+    break;
+
+  case MethodHandles::_adapter_opt_i2l:
+    {
+      int arg_slot =
+        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
+      int arg = VMSLOTS_INT(arg_slot);
+      intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
+      insert_vmslots(arg_slot, 1, THREAD);
+      if (HAS_PENDING_EXCEPTION) {
+        // all oops trashed
+        stack->set_sp(unwind_sp);
+        return;
+      }
+      vmslots = stack->sp();
+      arg_slot++;
+      SET_VMSLOTS_LONG(arg, arg_slot);
+    }
+    break;
+
+  case MethodHandles::_adapter_opt_unboxi:
+  case MethodHandles::_adapter_opt_unboxl:
+    {
+      int arg_slot =
+        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
+      oop arg = VMSLOTS_OBJECT(arg_slot);
+      jvalue arg_value;
+      BasicType arg_type = java_lang_boxing_object::get_value(arg, &arg_value);
+      if (arg_type == T_LONG || arg_type == T_DOUBLE) {
+        intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
+        insert_vmslots(arg_slot, 1, THREAD);
+        if (HAS_PENDING_EXCEPTION) {
+          // all oops trashed
+          stack->set_sp(unwind_sp);
+          return;
+        }
+        vmslots = stack->sp();
+        arg_slot++;
+      }
+      switch (arg_type) {
+      case T_BOOLEAN:
+        SET_VMSLOTS_INT(arg_value.z, arg_slot);
+        break;
+      case T_CHAR:
+        SET_VMSLOTS_INT(arg_value.c, arg_slot);
+        break;
+      case T_BYTE:
+        SET_VMSLOTS_INT(arg_value.b, arg_slot);
+        break;
+      case T_SHORT:
+        SET_VMSLOTS_INT(arg_value.s, arg_slot);
+        break;
+      case T_INT:
+        SET_VMSLOTS_INT(arg_value.i, arg_slot);
+        break;
+      case T_FLOAT:
+        SET_VMSLOTS_FLOAT(arg_value.f, arg_slot);
+        break;
+      case T_LONG:
+        SET_VMSLOTS_LONG(arg_value.j, arg_slot);
+        break;
+      case T_DOUBLE:
+        SET_VMSLOTS_DOUBLE(arg_value.d, arg_slot);
+        break;
+      default:
+        tty->print_cr("unhandled type %s", type2name(arg_type));
+        ShouldNotReachHere();
+      }
+    }
+    break;
+
+  default:
+    tty->print_cr("unhandled entry_kind %s",
+                  MethodHandles::entry_name(entry_kind));
+    ShouldNotReachHere();
+  }
+
+  // Continue along the chain
+  if (direct_to_method) {
+    if (method == NULL) {
+      method =
+        (methodOop) java_lang_invoke_MethodHandle::vmtarget(method_handle);
+    }
+    address entry_point = method->from_interpreted_entry();
+    Interpreter::invoke_method(method, entry_point, THREAD);
+  }
+  else {
+    process_method_handle(
+      java_lang_invoke_MethodHandle::vmtarget(method_handle), THREAD);
+  }
+  // NB all oops now trashed
+
+  // Adapt the result type, if necessary
+  if (src_rtype != dst_rtype && !HAS_PENDING_EXCEPTION) {
+    switch (dst_rtype) {
+    case T_VOID:
+      for (int i = 0; i < type2size[src_rtype]; i++)
+        stack->pop();
+      return;
+
+    case T_INT:
+      switch (src_rtype) {
+      case T_VOID:
+        stack->overflow_check(1, CHECK);
+        stack->push(0);
+        return;
+
+      case T_BOOLEAN:
+      case T_CHAR:
+      case T_BYTE:
+      case T_SHORT:
+        return;
+      }
+    }
+
+    tty->print_cr("unhandled conversion:");
+    tty->print_cr("src_rtype = %s", type2name(src_rtype));
+    tty->print_cr("dst_rtype = %s", type2name(dst_rtype));
+    ShouldNotReachHere();
+  }
+}
+
+// The new slots will be inserted before slot insert_before.
+// Slots < insert_before will have the same slot number after the insert.
+// Slots >= insert_before will become old_slot + num_slots.
+void CppInterpreter::insert_vmslots(int insert_before, int num_slots, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+  ZeroStack *stack = thread->zero_stack();
+
+  // Allocate the space
+  stack->overflow_check(num_slots, CHECK);
+  stack->alloc(num_slots * wordSize);
+  intptr_t *vmslots = stack->sp();
+
+  // Shuffle everything up
+  for (int i = 0; i < insert_before; i++)
+    SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i + num_slots), i);
+}
+
+void CppInterpreter::remove_vmslots(int first_slot, int num_slots, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+  ZeroStack *stack = thread->zero_stack();
+  intptr_t *vmslots = stack->sp();
+
+  // Move everything down
+  for (int i = first_slot - 1; i >= 0; i--)
+    SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i), i + num_slots);
+
+  // Deallocate the space
+  stack->set_sp(stack->sp() + num_slots);
+}
+
+BasicType CppInterpreter::result_type_of_handle(oop method_handle) {
+  oop method_type = java_lang_invoke_MethodHandle::type(method_handle);
+  oop return_type = java_lang_invoke_MethodType::rtype(method_type);
+  return java_lang_Class::as_BasicType(return_type, (klassOop *) NULL);
+}
+
+intptr_t* CppInterpreter::calculate_unwind_sp(ZeroStack* stack,
+                                              oop method_handle) {
+  oop method_type = java_lang_invoke_MethodHandle::type(method_handle);
+  oop form = java_lang_invoke_MethodType::form(method_type);
+  int argument_slots = java_lang_invoke_MethodTypeForm::vmslots(form);
+
+  return stack->sp() + argument_slots;
+}
+
+IRT_ENTRY(void, CppInterpreter::throw_exception(JavaThread* thread,
+                                                Symbol*     name,
+                                                char*       message))
+  THROW_MSG(name, message);
+IRT_END
+
 InterpreterFrame *InterpreterFrame::build(const methodOop method, TRAPS) {
   JavaThread *thread = (JavaThread *) THREAD;
   ZeroStack *stack = thread->zero_stack();
@@ -737,6 +1302,26 @@
   return generate_entry((address) CppInterpreter::accessor_entry);
 }
 
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+#ifndef SERIALGC
+  if (UseG1GC) {
+    // We need to generate have a routine that generates code to:
+    //   * load the value in the referent field
+    //   * passes that value to the pre-barrier.
+    //
+    // In the case of G1 this will record the value of the
+    // referent in an SATB buffer if marking is active.
+    // This will cause concurrent marking to mark the referent
+    // field as live.
+    Unimplemented();
+  }
+#endif // SERIALGC
+
+  // If G1 is not enabled then attempt to go through the accessor entry point
+  // Reference.get is an accessor
+  return generate_accessor_entry();
+}
+
 address InterpreterGenerator::generate_native_entry(bool synchronized) {
   assert(synchronized == false, "should be");
 
@@ -792,6 +1377,10 @@
     entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);
     break;
 
+  case Interpreter::java_lang_ref_reference_get:
+    entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry();
+    break;
+
   default:
     ShouldNotReachHere();
   }
--- a/src/cpu/zero/vm/cppInterpreter_zero.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/zero/vm/cppInterpreter_zero.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2007, 2008, 2010 Red Hat, Inc.
+ * Copyright 2007, 2008, 2010, 2011 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,12 +36,22 @@
   static int native_entry(methodOop method, intptr_t UNUSED, TRAPS);
   static int accessor_entry(methodOop method, intptr_t UNUSED, TRAPS);
   static int empty_entry(methodOop method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry(methodOop method, intptr_t UNUSED, TRAPS);
 
  public:
   // Main loop of normal_entry
   static void main_loop(int recurse, TRAPS);
 
  private:
+  // Helpers for method_handle_entry
+  static void process_method_handle(oop method_handle, TRAPS);
+  static void insert_vmslots(int insert_before, int num_slots, TRAPS);
+  static void remove_vmslots(int first_slot, int num_slots, TRAPS);
+  static BasicType result_type_of_handle(oop method_handle);
+  static intptr_t* calculate_unwind_sp(ZeroStack* stack, oop method_handle);
+  static void throw_exception(JavaThread* thread, Symbol* name,char *msg=NULL);
+
+ private:
   // Fast result type determination
   static BasicType result_type_of(methodOop method);
 
--- a/src/cpu/zero/vm/interpreterGenerator_zero.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/zero/vm/interpreterGenerator_zero.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2007 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -37,6 +37,7 @@
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry();
   address generate_accessor_entry();
+  address generate_Reference_get_entry();
   address generate_method_handle_entry();
 
 #endif // CPU_ZERO_VM_INTERPRETERGENERATOR_ZERO_HPP
--- a/src/cpu/zero/vm/interpreter_zero.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/zero/vm/interpreter_zero.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2007, 2008, 2009, 2010 Red Hat, Inc.
+ * Copyright 2007, 2008, 2009, 2010, 2011 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -49,6 +49,9 @@
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
 #endif
+#ifdef CC_INTERP
+#include "interpreter/cppInterpreter.hpp"
+#endif
 
 address AbstractInterpreterGenerator::generate_slow_signature_handler() {
   _masm->advance(1);
@@ -64,11 +67,15 @@
 }
 
 address InterpreterGenerator::generate_abstract_entry() {
-  return ShouldNotCallThisEntry();
+  return generate_entry((address) ShouldNotCallThisEntry());
 }
 
 address InterpreterGenerator::generate_method_handle_entry() {
-  return ShouldNotCallThisEntry();
+#ifdef CC_INTERP
+  return generate_entry((address) CppInterpreter::method_handle_entry);
+#else
+  return generate_entry((address) ShouldNotCallThisEntry());
+#endif // CC_INTERP
 }
 
 bool AbstractInterpreter::can_be_compiled(methodHandle m) {
--- a/src/cpu/zero/vm/methodHandles_zero.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/cpu/zero/vm/methodHandles_zero.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2009, 2010 Red Hat, Inc.
+ * Copyright 2009, 2010, 2011 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,10 +29,21 @@
 #include "prims/methodHandles.hpp"
 
 int MethodHandles::adapter_conversion_ops_supported_mask() {
-  ShouldNotCallThis();
+  return ((1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY)
+         |(1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW)
+         |(1<<java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST)
+         |(1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM)
+         |(1<<java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM)
+         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS)
+         |(1<<java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS)
+         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS)
+         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
+         //|(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS) //BUG!
+         );
+  // FIXME: MethodHandlesTest gets a crash if we enable OP_SPREAD_ARGS.
 }
 
 void MethodHandles::generate_method_handle_stub(MacroAssembler*          masm,
                                                 MethodHandles::EntryKind ek) {
-  ShouldNotCallThis();
+  init_entry(ek, (MethodHandleEntry *) ek);
 }
--- a/src/share/vm/c1/c1_CodeStubs.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/c1/c1_CodeStubs.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -519,42 +519,126 @@
 // Code stubs for Garbage-First barriers.
 class G1PreBarrierStub: public CodeStub {
  private:
+  bool _do_load;
   LIR_Opr _addr;
   LIR_Opr _pre_val;
   LIR_PatchCode _patch_code;
   CodeEmitInfo* _info;
 
  public:
+  // Version that _does_ generate a load of the previous value from addr.
+  // addr (the address of the field to be read) must be a LIR_Address
   // pre_val (a temporary register) must be a register;
-  // addr (the address of the field to be read) must be a LIR_Address
   G1PreBarrierStub(LIR_Opr addr, LIR_Opr pre_val, LIR_PatchCode patch_code, CodeEmitInfo* info) :
-    _addr(addr), _pre_val(pre_val), _patch_code(patch_code), _info(info)
+    _addr(addr), _pre_val(pre_val), _do_load(true),
+    _patch_code(patch_code), _info(info)
   {
     assert(_pre_val->is_register(), "should be temporary register");
     assert(_addr->is_address(), "should be the address of the field");
   }
 
+  // Version that _does not_ generate load of the previous value; the
+  // previous value is assumed to have already been loaded into pre_val.
+  G1PreBarrierStub(LIR_Opr pre_val) :
+    _addr(LIR_OprFact::illegalOpr), _pre_val(pre_val), _do_load(false),
+    _patch_code(lir_patch_none), _info(NULL)
+  {
+    assert(_pre_val->is_register(), "should be a register");
+  }
+
   LIR_Opr addr() const { return _addr; }
   LIR_Opr pre_val() const { return _pre_val; }
   LIR_PatchCode patch_code() const { return _patch_code; }
   CodeEmitInfo* info() const { return _info; }
+  bool do_load() const { return _do_load; }
 
   virtual void emit_code(LIR_Assembler* e);
   virtual void visit(LIR_OpVisitState* visitor) {
-    // don't pass in the code emit info since it's processed in the fast
-    // path
-    if (_info != NULL)
-      visitor->do_slow_case(_info);
-    else
+    if (_do_load) {
+      // don't pass in the code emit info since it's processed in the fast
+      // path
+      if (_info != NULL)
+        visitor->do_slow_case(_info);
+      else
+        visitor->do_slow_case();
+
+      visitor->do_input(_addr);
+      visitor->do_temp(_pre_val);
+    } else {
       visitor->do_slow_case();
-    visitor->do_input(_addr);
-    visitor->do_temp(_pre_val);
+      visitor->do_input(_pre_val);
+    }
   }
 #ifndef PRODUCT
   virtual void print_name(outputStream* out) const { out->print("G1PreBarrierStub"); }
 #endif // PRODUCT
 };
 
+// This G1 barrier code stub is used in Unsafe.getObject.
+// It generates a sequence of guards around the SATB
+// barrier code that are used to detect when we have
+// the referent field of a Reference object.
+// The first check is assumed to have been generated
+// in the code generated for Unsafe.getObject().
+
+class G1UnsafeGetObjSATBBarrierStub: public CodeStub {
+ private:
+  LIR_Opr _val;
+  LIR_Opr _src;
+
+  LIR_Opr _tmp;
+  LIR_Opr _thread;
+
+  bool _gen_src_check;
+
+ public:
+  // A G1 barrier that is guarded by generated guards that determine whether
+  // val (which is the result of Unsafe.getObject() should be recorded in an
+  // SATB log buffer. We could be reading the referent field of a Reference object
+  // using Unsafe.getObject() and we need to record the referent.
+  //
+  // * val is the operand returned by the unsafe.getObject routine.
+  // * src is the base object
+  // * tmp is a temp used to load the klass of src, and then reference type
+  // * thread is the thread object.
+
+  G1UnsafeGetObjSATBBarrierStub(LIR_Opr val, LIR_Opr src,
+                                LIR_Opr tmp, LIR_Opr thread,
+                                bool gen_src_check) :
+    _val(val), _src(src),
+    _tmp(tmp), _thread(thread),
+    _gen_src_check(gen_src_check)
+  {
+    assert(_val->is_register(), "should have already been loaded");
+    assert(_src->is_register(), "should have already been loaded");
+
+    assert(_tmp->is_register(), "should be a temporary register");
+  }
+
+  LIR_Opr val() const { return _val; }
+  LIR_Opr src() const { return _src; }
+
+  LIR_Opr tmp() const { return _tmp; }
+  LIR_Opr thread() const { return _thread; }
+
+  bool gen_src_check() const { return _gen_src_check; }
+
+  virtual void emit_code(LIR_Assembler* e);
+
+  virtual void visit(LIR_OpVisitState* visitor) {
+    visitor->do_slow_case();
+    visitor->do_input(_val);
+    visitor->do_input(_src);
+    visitor->do_input(_thread);
+
+    visitor->do_temp(_tmp);
+  }
+
+#ifndef PRODUCT
+  virtual void print_name(outputStream* out) const { out->print("G1UnsafeGetObjSATBBarrierStub"); }
+#endif // PRODUCT
+};
+
 class G1PostBarrierStub: public CodeStub {
  private:
   LIR_Opr _addr;
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -2913,6 +2913,46 @@
       block()->set_end(end);
       break;
     }
+
+  case vmIntrinsics::_Reference_get:
+    {
+      if (UseG1GC) {
+        // With java.lang.ref.reference.get() we must go through the
+        // intrinsic - when G1 is enabled - even when get() is the root
+        // method of the compile so that, if necessary, the value in
+        // the referent field of the reference object gets recorded by
+        // the pre-barrier code.
+        // Specifically, if G1 is enabled, the value in the referent
+        // field is recorded by the G1 SATB pre barrier. This will
+        // result in the referent being marked live and the reference
+        // object removed from the list of discovered references during
+        // reference processing.
+
+        // Set up a stream so that appending instructions works properly.
+        ciBytecodeStream s(scope->method());
+        s.reset_to_bci(0);
+        scope_data()->set_stream(&s);
+        s.next();
+
+        // setup the initial block state
+        _block = start_block;
+        _state = start_block->state()->copy_for_parsing();
+        _last  = start_block;
+        load_local(objectType, 0);
+
+        // Emit the intrinsic node.
+        bool result = try_inline_intrinsics(scope->method());
+        if (!result) BAILOUT("failed to inline intrinsic");
+        method_return(apop());
+
+        // connect the begin and end blocks and we're all done.
+        BlockEnd* end = last()->as_BlockEnd();
+        block()->set_end(end);
+        break;
+      }
+      // Otherwise, fall thru
+    }
+
   default:
     scope_data()->add_to_work_list(start_block);
     iterate_all_blocks();
@@ -3150,6 +3190,15 @@
       append_unsafe_CAS(callee);
       return true;
 
+    case vmIntrinsics::_Reference_get:
+      // It is only when G1 is enabled that we absolutely
+      // need to use the intrinsic version of Reference.get()
+      // so that the value in the referent field, if necessary,
+      // can be registered by the pre-barrier code.
+      if (!UseG1GC) return false;
+      preserves_state = true;
+      break;
+
     default                       : return false; // do not inline
   }
   // create intrinsic node
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1209,6 +1209,38 @@
   set_no_result(x);
 }
 
+// Examble: ref.get()
+// Combination of LoadField and g1 pre-write barrier
+void LIRGenerator::do_Reference_get(Intrinsic* x) {
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  assert(x->number_of_arguments() == 1, "wrong type");
+
+  LIRItem reference(x->argument_at(0), this);
+  reference.load_item();
+
+  // need to perform the null check on the reference objecy
+  CodeEmitInfo* info = NULL;
+  if (x->needs_null_check()) {
+    info = state_for(x);
+  }
+
+  LIR_Address* referent_field_adr =
+    new LIR_Address(reference.result(), referent_offset, T_OBJECT);
+
+  LIR_Opr result = rlock_result(x);
+
+  __ load(referent_field_adr, result, info);
+
+  // Register the value in the referent field with the pre-barrier
+  pre_barrier(LIR_OprFact::illegalOpr /* addr_opr */,
+              result /* pre_val */,
+              false  /* do_load */,
+              false  /* patch */,
+              NULL   /* info */);
+}
 
 // Example: object.getClass ()
 void LIRGenerator::do_getClass(Intrinsic* x) {
@@ -1351,13 +1383,14 @@
 
 // Various barriers
 
-void LIRGenerator::pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
+void LIRGenerator::pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val,
+                               bool do_load, bool patch, CodeEmitInfo* info) {
   // Do the pre-write barrier, if any.
   switch (_bs->kind()) {
 #ifndef SERIALGC
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
-      G1SATBCardTableModRef_pre_barrier(addr_opr, patch, info);
+      G1SATBCardTableModRef_pre_barrier(addr_opr, pre_val, do_load, patch, info);
       break;
 #endif // SERIALGC
     case BarrierSet::CardTableModRef:
@@ -1398,9 +1431,8 @@
 ////////////////////////////////////////////////////////////////////////
 #ifndef SERIALGC
 
-void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info) {
-  if (G1DisablePreBarrier) return;
-
+void LIRGenerator::G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val,
+                                                     bool do_load, bool patch, CodeEmitInfo* info) {
   // First we test whether marking is in progress.
   BasicType flag_type;
   if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
@@ -1419,26 +1451,40 @@
   // Read the marking-in-progress flag.
   LIR_Opr flag_val = new_register(T_INT);
   __ load(mark_active_flag_addr, flag_val);
-
-  LIR_PatchCode pre_val_patch_code =
-    patch ? lir_patch_normal : lir_patch_none;
-
-  LIR_Opr pre_val = new_register(T_OBJECT);
-
   __ cmp(lir_cond_notEqual, flag_val, LIR_OprFact::intConst(0));
-  if (!addr_opr->is_address()) {
-    assert(addr_opr->is_register(), "must be");
-    addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, T_OBJECT));
+
+  LIR_PatchCode pre_val_patch_code = lir_patch_none;
+
+  CodeStub* slow;
+
+  if (do_load) {
+    assert(pre_val == LIR_OprFact::illegalOpr, "sanity");
+    assert(addr_opr != LIR_OprFact::illegalOpr, "sanity");
+
+    if (patch)
+      pre_val_patch_code = lir_patch_normal;
+
+    pre_val = new_register(T_OBJECT);
+
+    if (!addr_opr->is_address()) {
+      assert(addr_opr->is_register(), "must be");
+      addr_opr = LIR_OprFact::address(new LIR_Address(addr_opr, T_OBJECT));
+    }
+    slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code, info);
+  } else {
+    assert(addr_opr == LIR_OprFact::illegalOpr, "sanity");
+    assert(pre_val->is_register(), "must be");
+    assert(pre_val->type() == T_OBJECT, "must be an object");
+    assert(info == NULL, "sanity");
+
+    slow = new G1PreBarrierStub(pre_val);
   }
-  CodeStub* slow = new G1PreBarrierStub(addr_opr, pre_val, pre_val_patch_code,
-                                        info);
+
   __ branch(lir_cond_notEqual, T_INT, slow);
   __ branch_destination(slow->continuation());
 }
 
 void LIRGenerator::G1SATBCardTableModRef_post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val) {
-  if (G1DisablePostBarrier) return;
-
   // If the "new_val" is a constant NULL, no barrier is necessary.
   if (new_val->is_constant() &&
       new_val->as_constant_ptr()->as_jobject() == NULL) return;
@@ -1662,6 +1708,8 @@
   if (is_oop) {
     // Do the pre-write barrier, if any.
     pre_barrier(LIR_OprFact::address(address),
+                LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load*/,
                 needs_patching,
                 (info ? new CodeEmitInfo(info) : NULL));
   }
@@ -2091,9 +2139,144 @@
   off.load_item();
   src.load_item();
 
-  LIR_Opr reg = reg = rlock_result(x, x->basic_type());
+  LIR_Opr reg = rlock_result(x, x->basic_type());
 
   get_Object_unsafe(reg, src.result(), off.result(), type, x->is_volatile());
+
+#ifndef SERIALGC
+  // We might be reading the value of the referent field of a
+  // Reference object in order to attach it back to the live
+  // object graph. If G1 is enabled then we need to record
+  // the value that is being returned in an SATB log buffer.
+  //
+  // We need to generate code similar to the following...
+  //
+  // if (offset == java_lang_ref_Reference::referent_offset) {
+  //   if (src != NULL) {
+  //     if (klass(src)->reference_type() != REF_NONE) {
+  //       pre_barrier(..., reg, ...);
+  //     }
+  //   }
+  // }
+  //
+  // The first non-constant check of either the offset or
+  // the src operand will be done here; the remainder
+  // will take place in the generated code stub.
+
+  if (UseG1GC && type == T_OBJECT) {
+    bool gen_code_stub = true;       // Assume we need to generate the slow code stub.
+    bool gen_offset_check = true;       // Assume the code stub has to generate the offset guard.
+    bool gen_source_check = true;       // Assume the code stub has to check the src object for null.
+
+    if (off.is_constant()) {
+      jlong off_con = (off.type()->is_int() ?
+                        (jlong) off.get_jint_constant() :
+                        off.get_jlong_constant());
+
+
+      if (off_con != (jlong) java_lang_ref_Reference::referent_offset) {
+        // The constant offset is something other than referent_offset.
+        // We can skip generating/checking the remaining guards and
+        // skip generation of the code stub.
+        gen_code_stub = false;
+      } else {
+        // The constant offset is the same as referent_offset -
+        // we do not need to generate a runtime offset check.
+        gen_offset_check = false;
+      }
+    }
+
+    // We don't need to generate stub if the source object is an array
+    if (gen_code_stub && src.type()->is_array()) {
+      gen_code_stub = false;
+    }
+
+    if (gen_code_stub) {
+      // We still need to continue with the checks.
+      if (src.is_constant()) {
+        ciObject* src_con = src.get_jobject_constant();
+
+        if (src_con->is_null_object()) {
+          // The constant src object is null - We can skip
+          // generating the code stub.
+          gen_code_stub = false;
+        } else {
+          // Non-null constant source object. We still have to generate
+          // the slow stub - but we don't need to generate the runtime
+          // null object check.
+          gen_source_check = false;
+        }
+      }
+    }
+
+    if (gen_code_stub) {
+      // Temoraries.
+      LIR_Opr src_klass = new_register(T_OBJECT);
+
+      // Get the thread pointer for the pre-barrier
+      LIR_Opr thread = getThreadPointer();
+
+      CodeStub* stub;
+
+      // We can have generate one runtime check here. Let's start with
+      // the offset check.
+      if (gen_offset_check) {
+        // if (offset == referent_offset) -> slow code stub
+        // If offset is an int then we can do the comparison with the
+        // referent_offset constant; otherwise we need to move
+        // referent_offset into a temporary register and generate
+        // a reg-reg compare.
+
+        LIR_Opr referent_off;
+
+        if (off.type()->is_int()) {
+          referent_off = LIR_OprFact::intConst(java_lang_ref_Reference::referent_offset);
+        } else {
+          assert(off.type()->is_long(), "what else?");
+          referent_off = new_register(T_LONG);
+          __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off);
+        }
+
+        __ cmp(lir_cond_equal, off.result(), referent_off);
+
+        // Optionally generate "src == null" check.
+        stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
+                                                    src_klass, thread,
+                                                    gen_source_check);
+
+        __ branch(lir_cond_equal, as_BasicType(off.type()), stub);
+      } else {
+        if (gen_source_check) {
+          // offset is a const and equals referent offset
+          // if (source != null) -> slow code stub
+          __ cmp(lir_cond_notEqual, src.result(), LIR_OprFact::oopConst(NULL));
+
+          // Since we are generating the "if src == null" guard here,
+          // there is no need to generate the "src == null" check again.
+          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
+                                                    src_klass, thread,
+                                                    false);
+
+          __ branch(lir_cond_notEqual, T_OBJECT, stub);
+        } else {
+          // We have statically determined that offset == referent_offset
+          // && src != null so we unconditionally branch to code stub
+          // to perform the guards and record reg in the SATB log buffer.
+
+          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
+                                                    src_klass, thread,
+                                                    false);
+
+          __ branch(lir_cond_always, T_ILLEGAL, stub);
+        }
+      }
+
+      // Continuation point
+      __ branch_destination(stub->continuation());
+    }
+  }
+#endif // SERIALGC
+
   if (x->is_volatile() && os::is_MP()) __ membar_acquire();
 }
 
@@ -2759,6 +2942,10 @@
     do_AttemptUpdate(x);
     break;
 
+  case vmIntrinsics::_Reference_get:
+    do_Reference_get(x);
+    break;
+
   default: ShouldNotReachHere(); break;
   }
 }
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -246,6 +246,7 @@
   void do_AttemptUpdate(Intrinsic* x);
   void do_NIOCheckIndex(Intrinsic* x);
   void do_FPIntrinsics(Intrinsic* x);
+  void do_Reference_get(Intrinsic* x);
 
   void do_UnsafePrefetch(UnsafePrefetch* x, bool is_store);
 
@@ -260,13 +261,14 @@
 
   // generic interface
 
-  void pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
+  void pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val, bool do_load, bool patch, CodeEmitInfo* info);
   void post_barrier(LIR_OprDesc* addr, LIR_OprDesc* new_val);
 
   // specific implementations
   // pre barriers
 
-  void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, bool patch,  CodeEmitInfo* info);
+  void G1SATBCardTableModRef_pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val,
+                                         bool do_load, bool patch, CodeEmitInfo* info);
 
   // post barriers
 
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/classfile/vmSymbols.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -678,6 +678,10 @@
   do_intrinsic(_checkIndex,               java_nio_Buffer,        checkIndex_name, int_int_signature,            F_R)   \
    do_name(     checkIndex_name,                                 "checkIndex")                                          \
                                                                                                                         \
+  /* java/lang/ref/Reference */                                                                                         \
+  do_intrinsic(_Reference_get,            java_lang_ref_Reference, get_name,    void_object_signature, F_R)             \
+                                                                                                                        \
+                                                                                                                        \
   do_class(sun_misc_AtomicLongCSImpl,     "sun/misc/AtomicLongCSImpl")                                                  \
   do_intrinsic(_get_AtomicLong,           sun_misc_AtomicLongCSImpl, get_name, void_long_signature,              F_R)   \
   /*   (symbols get_name and void_long_signature defined above) */                                                      \
--- a/src/share/vm/code/codeCache.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/code/codeCache.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -971,8 +971,6 @@
   if (CodeCache_lock->owned_by_self()) {
     return _heap->largest_free_block();
   } else {
-    // Avoid lock ordering problems with ttyLock.
-    ttyUnlocker ttyul;
     MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
     return _heap->largest_free_block();
   }
--- a/src/share/vm/compiler/compileBroker.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/compiler/compileBroker.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1736,8 +1736,14 @@
   UseInterpreter = true;
   if (UseCompiler || AlwaysCompileLoopMethods ) {
     if (xtty != NULL) {
+      stringStream s;
+      // Dump code cache state into a buffer before locking the tty,
+      // because log_state() will use locks causing lock conflicts.
+      CodeCache::log_state(&s);
+      // Lock to prevent tearing
+      ttyLocker ttyl;
       xtty->begin_elem("code_cache_full");
-      CodeCache::log_state(xtty);
+      xtty->print(s.as_string());
       xtty->stamp();
       xtty->end_elem();
     }
--- a/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/g1/concurrentG1Refine.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -172,7 +172,7 @@
 
   // hash a given key (index of card_ptr) with the specified size
   static unsigned int hash(size_t key, size_t size) {
-    return (unsigned int) key % size;
+    return (unsigned int) (key % size);
   }
 
   // hash a given key (index of card_ptr)
@@ -180,11 +180,11 @@
     return hash(key, _n_card_counts);
   }
 
-  unsigned ptr_2_card_num(jbyte* card_ptr) {
-    return (unsigned) (card_ptr - _ct_bot);
+  unsigned int ptr_2_card_num(jbyte* card_ptr) {
+    return (unsigned int) (card_ptr - _ct_bot);
   }
 
-  jbyte* card_num_2_ptr(unsigned card_num) {
+  jbyte* card_num_2_ptr(unsigned int card_num) {
     return (jbyte*) (_ct_bot + card_num);
   }
 
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,9 @@
 
 
 void G1SATBCardTableModRefBS::enqueue(oop pre_val) {
-  assert(pre_val->is_oop_or_null(true), "Error");
+  // Nulls should have been already filtered.
+  assert(pre_val->is_oop(true), "Error");
+
   if (!JavaThread::satb_mark_queue_set().is_active()) return;
   Thread* thr = Thread::current();
   if (thr->is_Java_thread()) {
@@ -59,20 +61,6 @@
   }
 }
 
-// When we know the current java thread:
-template <class T> void
-G1SATBCardTableModRefBS::write_ref_field_pre_static(T* field,
-                                                    oop new_val,
-                                                    JavaThread* jt) {
-  if (!JavaThread::satb_mark_queue_set().is_active()) return;
-  T heap_oop = oopDesc::load_heap_oop(field);
-  if (!oopDesc::is_null(heap_oop)) {
-    oop pre_val = oopDesc::decode_heap_oop_not_null(heap_oop);
-    assert(pre_val->is_oop(true /* ignore mark word */), "Error");
-    jt->satb_mark_queue().enqueue(pre_val);
-  }
-}
-
 template <class T> void
 G1SATBCardTableModRefBS::write_ref_array_pre_work(T* dst, int count) {
   if (!JavaThread::satb_mark_queue_set().is_active()) return;
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -37,12 +37,11 @@
 // snapshot-at-the-beginning marking.
 
 class G1SATBCardTableModRefBS: public CardTableModRefBSForCTRS {
-private:
+public:
   // Add "pre_val" to a set of objects that may have been disconnected from the
   // pre-marking object graph.
   static void enqueue(oop pre_val);
 
-public:
   G1SATBCardTableModRefBS(MemRegion whole_heap,
                           int max_covered_regions);
 
@@ -61,10 +60,6 @@
     }
   }
 
-  // When we know the current java thread:
-  template <class T> static void write_ref_field_pre_static(T* field, oop newVal,
-                                                            JavaThread* jt);
-
   // We export this to make it available in cases where the static
   // type of the barrier set is known.  Note that it is non-virtual.
   template <class T> inline void inline_write_ref_field_pre(T* field, oop newVal) {
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -89,13 +89,9 @@
           "The number of discovered reference objects to process before "   \
           "draining concurrent marking work queues.")                       \
                                                                             \
-  experimental(bool, G1UseConcMarkReferenceProcessing, false,               \
+  experimental(bool, G1UseConcMarkReferenceProcessing, true,                \
           "If true, enable reference discovery during concurrent "          \
-          "marking and reference processing at the end of remark "          \
-          "(unsafe).")                                                      \
-                                                                            \
-  develop(bool, G1SATBBarrierPrintNullPreVals, false,                       \
-          "If true, count frac of ptr writes with null pre-vals.")          \
+          "marking and reference processing at the end of remark.")         \
                                                                             \
   product(intx, G1SATBBufferSize, 1*K,                                      \
           "Number of entries in an SATB log buffer.")                       \
@@ -150,12 +146,6 @@
   develop(bool, G1PrintParCleanupStats, false,                              \
           "When true, print extra stats about parallel cleanup.")           \
                                                                             \
-  develop(bool, G1DisablePreBarrier, false,                                 \
-          "Disable generation of pre-barrier (i.e., marking barrier)   ")   \
-                                                                            \
-  develop(bool, G1DisablePostBarrier, false,                                \
-          "Disable generation of post-barrier (i.e., RS barrier)   ")       \
-                                                                            \
   product(intx, G1UpdateBufferSize, 256,                                    \
           "Size of an update buffer")                                       \
                                                                             \
--- a/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/parNew/parCardTableModRefBS.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2007, 2010 Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2007, 2011 Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,7 +36,6 @@
 void CardTableModRefBS::par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
                                                         DirtyCardToOopClosure* dcto_cl,
                                                         MemRegionClosure* cl,
-                                                        bool clear,
                                                         int n_threads) {
   if (n_threads > 0) {
     assert((n_threads == 1 && ParallelGCThreads == 0) ||
@@ -57,7 +56,7 @@
 
     int stride = 0;
     while (!pst->is_task_claimed(/* reference */ stride)) {
-      process_stride(sp, mr, stride, n_strides, dcto_cl, cl, clear,
+      process_stride(sp, mr, stride, n_strides, dcto_cl, cl,
                      lowest_non_clean,
                      lowest_non_clean_base_chunk_index,
                      lowest_non_clean_chunk_size);
@@ -83,7 +82,6 @@
                jint stride, int n_strides,
                DirtyCardToOopClosure* dcto_cl,
                MemRegionClosure* cl,
-               bool clear,
                jbyte** lowest_non_clean,
                uintptr_t lowest_non_clean_base_chunk_index,
                size_t    lowest_non_clean_chunk_size) {
@@ -129,7 +127,7 @@
                              lowest_non_clean_base_chunk_index,
                              lowest_non_clean_chunk_size);
 
-    non_clean_card_iterate_work(chunk_mr, cl, clear);
+    non_clean_card_iterate_work(chunk_mr, cl);
 
     // Find the next chunk of the stride.
     chunk_card_start += CardsPerStrideChunk * n_strides;
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -176,10 +176,6 @@
   object_mark_sweep()->compact(ZapUnusedHeapArea);
 }
 
-void PSOldGen::move_and_update(ParCompactionManager* cm) {
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::old_space_id);
-}
-
 size_t PSOldGen::contiguous_available() const {
   return object_space()->free_in_bytes() + virtual_space()->uncommitted_size();
 }
--- a/src/share/vm/gc_implementation/parallelScavenge/psOldGen.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/psOldGen.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -143,9 +143,6 @@
   void adjust_pointers();
   void compact();
 
-  // Parallel old
-  virtual void move_and_update(ParCompactionManager* cm);
-
   // Size info
   size_t capacity_in_bytes() const        { return object_space()->capacity_in_bytes(); }
   size_t used_in_bytes() const            { return object_space()->used_in_bytes(); }
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -2104,11 +2104,7 @@
     // klasses are used in the update of an object?
     compact_perm(vmthread_cm);
 
-    if (UseParallelOldGCCompacting) {
-      compact();
-    } else {
-      compact_serial(vmthread_cm);
-    }
+    compact();
 
     // Reset the mark bitmap, summary data, and do other bookkeeping.  Must be
     // done before resizing.
@@ -2582,18 +2578,16 @@
     // each thread?
     if (total_dense_prefix_regions > 0) {
       uint tasks_for_dense_prefix = 1;
-      if (UseParallelDensePrefixUpdate) {
-        if (total_dense_prefix_regions <=
-            (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) {
-          // Don't over partition.  This assumes that
-          // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value
-          // so there are not many regions to process.
-          tasks_for_dense_prefix = parallel_gc_threads;
-        } else {
-          // Over partition
-          tasks_for_dense_prefix = parallel_gc_threads *
-            PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING;
-        }
+      if (total_dense_prefix_regions <=
+          (parallel_gc_threads * PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING)) {
+        // Don't over partition.  This assumes that
+        // PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING is a small integer value
+        // so there are not many regions to process.
+        tasks_for_dense_prefix = parallel_gc_threads;
+      } else {
+        // Over partition
+        tasks_for_dense_prefix = parallel_gc_threads *
+          PAR_OLD_DENSE_PREFIX_OVER_PARTITIONING;
       }
       size_t regions_per_thread = total_dense_prefix_regions /
         tasks_for_dense_prefix;
@@ -2733,21 +2727,6 @@
 }
 #endif  // #ifdef ASSERT
 
-void PSParallelCompact::compact_serial(ParCompactionManager* cm) {
-  EventMark m("5 compact serial");
-  TraceTime tm("compact serial", print_phases(), true, gclog_or_tty);
-
-  ParallelScavengeHeap* heap = (ParallelScavengeHeap*)Universe::heap();
-  assert(heap->kind() == CollectedHeap::ParallelScavengeHeap, "Sanity");
-
-  PSYoungGen* young_gen = heap->young_gen();
-  PSOldGen* old_gen = heap->old_gen();
-
-  old_gen->start_array()->reset();
-  old_gen->move_and_update(cm);
-  young_gen->move_and_update(cm);
-}
-
 void
 PSParallelCompact::follow_weak_klass_links() {
   // All klasses on the revisit stack are marked at this point.
@@ -3530,11 +3509,8 @@
            "Object liveness is wrong.");
     return ParMarkBitMap::incomplete;
   }
-  assert(UseParallelOldGCDensePrefix ||
-         (HeapMaximumCompactionInterval > 1) ||
-         (MarkSweepAlwaysCompactCount > 1) ||
-         (forwarding_ptr == new_pointer),
-    "Calculation of new location is incorrect");
+  assert(HeapMaximumCompactionInterval > 1 || MarkSweepAlwaysCompactCount > 1 ||
+         forwarding_ptr == new_pointer, "new location is incorrect");
   return ParMarkBitMap::incomplete;
 }
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1027,9 +1027,6 @@
                                        ParallelTaskTerminator* terminator_ptr,
                                        uint parallel_gc_threads);
 
-  // For debugging only - compacts the old gen serially
-  static void compact_serial(ParCompactionManager* cm);
-
   // If objects are left in eden after a collection, try to move the boundary
   // and absorb them into the old gen.  Returns true if eden was emptied.
   static bool absorb_live_data_from_eden(PSAdaptiveSizePolicy* size_policy,
--- a/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPermGen.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -121,12 +121,6 @@
   }
 }
 
-
-
-void PSPermGen::move_and_update(ParCompactionManager* cm) {
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::perm_space_id);
-}
-
 void PSPermGen::precompact() {
   // Reset start array first.
   _start_array.reset();
--- a/src/share/vm/gc_implementation/parallelScavenge/psPermGen.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPermGen.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,9 +51,6 @@
   // MarkSweep code
   virtual void precompact();
 
-  // Parallel old
-  virtual void move_and_update(ParCompactionManager* cm);
-
   virtual const char* name() const { return "PSPermGen"; }
 };
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -792,12 +792,6 @@
   to_mark_sweep()->compact(false);
 }
 
-void PSYoungGen::move_and_update(ParCompactionManager* cm) {
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::eden_space_id);
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::from_space_id);
-  PSParallelCompact::move_and_update(cm, PSParallelCompact::to_space_id);
-}
-
 void PSYoungGen::print() const { print_on(tty); }
 void PSYoungGen::print_on(outputStream* st) const {
   st->print(" %-15s", "PSYoungGen");
--- a/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/parallelScavenge/psYoungGen.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -127,9 +127,6 @@
   void adjust_pointers();
   void compact();
 
-  // Parallel Old
-  void move_and_update(ParCompactionManager* cm);
-
   // Called during/after gc
   void swap_spaces();
 
--- a/src/share/vm/gc_implementation/shared/allocationStats.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/gc_implementation/shared/allocationStats.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -76,7 +76,7 @@
     _beforeSweep = 0;
     _coalBirths = 0;
     _coalDeaths = 0;
-    _splitBirths = split_birth? 1 : 0;
+    _splitBirths = (split_birth ? 1 : 0);
     _splitDeaths = 0;
     _returnedBytes = 0;
   }
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -104,6 +104,7 @@
     java_lang_math_sqrt,                                        // implementation of java.lang.Math.sqrt  (x)
     java_lang_math_log,                                         // implementation of java.lang.Math.log   (x)
     java_lang_math_log10,                                       // implementation of java.lang.Math.log10 (x)
+    java_lang_ref_reference_get,                                // implementation of java.lang.ref.Reference.get()
     number_of_method_entries,
     invalid = -1
   };
@@ -140,7 +141,7 @@
   // Method activation
   static MethodKind method_kind(methodHandle m);
   static address    entry_for_kind(MethodKind k)                { assert(0 <= k && k < number_of_method_entries, "illegal kind"); return _entry_table[k]; }
-  static address    entry_for_method(methodHandle m)            { return _entry_table[method_kind(m)]; }
+  static address    entry_for_method(methodHandle m)            { return entry_for_kind(method_kind(m)); }
 
   static void       print_method_kind(MethodKind kind)          PRODUCT_RETURN;
 
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -554,7 +554,7 @@
 
 /* 0xB0 */ &&opc_areturn,     &&opc_return,         &&opc_getstatic,    &&opc_putstatic,
 /* 0xB4 */ &&opc_getfield,    &&opc_putfield,       &&opc_invokevirtual,&&opc_invokespecial,
-/* 0xB8 */ &&opc_invokestatic,&&opc_invokeinterface,&&opc_default,      &&opc_new,
+/* 0xB8 */ &&opc_invokestatic,&&opc_invokeinterface,&&opc_invokedynamic,&&opc_new,
 /* 0xBC */ &&opc_newarray,    &&opc_anewarray,      &&opc_arraylength,  &&opc_athrow,
 
 /* 0xC0 */ &&opc_checkcast,   &&opc_instanceof,     &&opc_monitorenter, &&opc_monitorexit,
@@ -568,7 +568,7 @@
 /* 0xDC */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
 
 /* 0xE0 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
-/* 0xE4 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_return_register_finalizer,
+/* 0xE4 */ &&opc_default,     &&opc_fast_aldc,      &&opc_fast_aldc_w,  &&opc_return_register_finalizer,
 /* 0xE8 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
 /* 0xEC */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
 
@@ -1718,8 +1718,7 @@
         }
         // Need to throw illegal monitor state exception
         CALL_VM(InterpreterRuntime::throw_illegal_monitor_state_exception(THREAD), handle_exception);
-        // Should never reach here...
-        assert(false, "Should have thrown illegal monitor exception");
+        ShouldNotReachHere();
       }
 
       /* All of the non-quick opcodes. */
@@ -2147,6 +2146,74 @@
           UPDATE_PC_AND_TOS_AND_CONTINUE(3, 2);
         }
 
+      CASE(_fast_aldc_w):
+      CASE(_fast_aldc): {
+        if (!EnableInvokeDynamic) {
+          // We should not encounter this bytecode if !EnableInvokeDynamic.
+          // The verifier will stop it.  However, if we get past the verifier,
+          // this will stop the thread in a reasonable way, without crashing the JVM.
+          CALL_VM(InterpreterRuntime::throw_IncompatibleClassChangeError(THREAD),
+                  handle_exception);
+          ShouldNotReachHere();
+        }
+
+        u2 index;
+        int incr;
+        if (opcode == Bytecodes::_fast_aldc) {
+          index = pc[1];
+          incr = 2;
+        } else {
+          index = Bytes::get_native_u2(pc+1);
+          incr = 3;
+        }
+
+        // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
+        // This kind of CP cache entry does not need to match the flags byte, because
+        // there is a 1-1 relation between bytecode type and CP entry type.
+        ConstantPoolCacheEntry* cache = cp->entry_at(index);
+        if (cache->is_f1_null()) {
+          CALL_VM(InterpreterRuntime::resolve_ldc(THREAD, (Bytecodes::Code) opcode),
+                  handle_exception);
+        }
+
+        VERIFY_OOP(cache->f1());
+        SET_STACK_OBJECT(cache->f1(), 0);
+        UPDATE_PC_AND_TOS_AND_CONTINUE(incr, 1);
+      }
+
+      CASE(_invokedynamic): {
+        if (!EnableInvokeDynamic) {
+          // We should not encounter this bytecode if !EnableInvokeDynamic.
+          // The verifier will stop it.  However, if we get past the verifier,
+          // this will stop the thread in a reasonable way, without crashing the JVM.
+          CALL_VM(InterpreterRuntime::throw_IncompatibleClassChangeError(THREAD),
+                  handle_exception);
+          ShouldNotReachHere();
+        }
+
+        int index = Bytes::get_native_u4(pc+1);
+
+        // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
+        // This kind of CP cache entry does not need to match the flags byte, because
+        // there is a 1-1 relation between bytecode type and CP entry type.
+        assert(constantPoolCacheOopDesc::is_secondary_index(index), "incorrect format");
+        ConstantPoolCacheEntry* cache = cp->secondary_entry_at(index);
+        if (cache->is_f1_null()) {
+          CALL_VM(InterpreterRuntime::resolve_invokedynamic(THREAD),
+                  handle_exception);
+        }
+
+        VERIFY_OOP(cache->f1());
+        oop method_handle = java_lang_invoke_CallSite::target(cache->f1());
+        CHECK_NULL(method_handle);
+
+        istate->set_msg(call_method_handle);
+        istate->set_callee((methodOop) method_handle);
+        istate->set_bcp_advance(5);
+
+        UPDATE_PC_AND_RETURN(0); // I'll be back...
+      }
+
       CASE(_invokeinterface): {
         u2 index = Bytes::get_native_u2(pc+1);
 
--- a/src/share/vm/interpreter/bytecodeInterpreter.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/interpreter/bytecodeInterpreter.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -107,6 +107,7 @@
          rethrow_exception,         // unwinding and throwing exception
          // requests to frame manager from C++ interpreter
          call_method,               // request for new frame from interpreter, manager responds with method_entry
+         call_method_handle,        // like the above, except the callee is a method handle
          return_from_method,        // request from interpreter to unwind, manager responds with method_continue
          more_monitors,             // need a new monitor
          throwing_exception,        // unwind stack and rethrow
--- a/src/share/vm/interpreter/cppInterpreter.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/interpreter/cppInterpreter.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -125,6 +125,7 @@
     method_entry(java_lang_math_sqrt  );
     method_entry(java_lang_math_log   );
     method_entry(java_lang_math_log10 );
+    method_entry(java_lang_ref_reference_get);
     Interpreter::_native_entry_begin = Interpreter::code()->code_end();
     method_entry(native);
     method_entry(native_synchronized);
--- a/src/share/vm/interpreter/interpreter.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/interpreter/interpreter.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -208,12 +208,6 @@
     return empty;
   }
 
-  // Accessor method?
-  if (m->is_accessor()) {
-    assert(m->size_of_parameters() == 1, "fast code for accessors assumes parameter size = 1");
-    return accessor;
-  }
-
   // Special intrinsic method?
   // Note: This test must come _after_ the test for native methods,
   //       otherwise we will run into problems with JDK 1.2, see also
@@ -227,6 +221,15 @@
     case vmIntrinsics::_dsqrt : return java_lang_math_sqrt ;
     case vmIntrinsics::_dlog  : return java_lang_math_log  ;
     case vmIntrinsics::_dlog10: return java_lang_math_log10;
+
+    case vmIntrinsics::_Reference_get:
+                                return java_lang_ref_reference_get;
+  }
+
+  // Accessor method?
+  if (m->is_accessor()) {
+    assert(m->size_of_parameters() == 1, "fast code for accessors assumes parameter size = 1");
+    return accessor;
   }
 
   // Note: for now: zero locals for all non-empty methods
--- a/src/share/vm/interpreter/templateInterpreter.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/interpreter/templateInterpreter.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -372,6 +372,7 @@
   method_entry(java_lang_math_sqrt )
   method_entry(java_lang_math_log  )
   method_entry(java_lang_math_log10)
+  method_entry(java_lang_ref_reference_get)
 
   // all native method kinds (must be one contiguous block)
   Interpreter::_native_entry_begin = Interpreter::code()->code_end();
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -459,18 +459,17 @@
 void CardTableModRefBS::non_clean_card_iterate(Space* sp,
                                                MemRegion mr,
                                                DirtyCardToOopClosure* dcto_cl,
-                                               MemRegionClosure* cl,
-                                               bool clear) {
+                                               MemRegionClosure* cl) {
   if (!mr.is_empty()) {
     int n_threads = SharedHeap::heap()->n_par_threads();
     if (n_threads > 0) {
 #ifndef SERIALGC
-      par_non_clean_card_iterate_work(sp, mr, dcto_cl, cl, clear, n_threads);
+      par_non_clean_card_iterate_work(sp, mr, dcto_cl, cl, n_threads);
 #else  // SERIALGC
       fatal("Parallel gc not supported here.");
 #endif // SERIALGC
     } else {
-      non_clean_card_iterate_work(mr, cl, clear);
+      non_clean_card_iterate_work(mr, cl);
     }
   }
 }
@@ -481,10 +480,7 @@
 // cards (and miss those marked precleaned). In that sense,
 // the name precleaned is currently somewhat of a misnomer.
 void CardTableModRefBS::non_clean_card_iterate_work(MemRegion mr,
-                                                    MemRegionClosure* cl,
-                                                    bool clear) {
-  // Figure out whether we have to worry about parallelism.
-  bool is_par = (SharedHeap::heap()->n_par_threads() > 1);
+                                                    MemRegionClosure* cl) {
   for (int i = 0; i < _cur_covered_regions; i++) {
     MemRegion mri = mr.intersection(_covered[i]);
     if (mri.word_size() > 0) {
@@ -506,22 +502,6 @@
           MemRegion cur_cards(addr_for(cur_entry),
                               non_clean_cards * card_size_in_words);
           MemRegion dirty_region = cur_cards.intersection(mri);
-          if (clear) {
-            for (size_t i = 0; i < non_clean_cards; i++) {
-              // Clean the dirty cards (but leave the other non-clean
-              // alone.)  If parallel, do the cleaning atomically.
-              jbyte cur_entry_val = cur_entry[i];
-              if (card_is_dirty_wrt_gen_iter(cur_entry_val)) {
-                if (is_par) {
-                  jbyte res = Atomic::cmpxchg(clean_card, &cur_entry[i], cur_entry_val);
-                  assert(res != clean_card,
-                         "Dirty card mysteriously cleaned");
-                } else {
-                  cur_entry[i] = clean_card;
-                }
-              }
-            }
-          }
           cl->do_MemRegion(dirty_region);
         }
         cur_entry = next_entry;
@@ -530,22 +510,6 @@
   }
 }
 
-void CardTableModRefBS::mod_oop_in_space_iterate(Space* sp,
-                                                 OopClosure* cl,
-                                                 bool clear,
-                                                 bool before_save_marks) {
-  // Note that dcto_cl is resource-allocated, so there is no
-  // corresponding "delete".
-  DirtyCardToOopClosure* dcto_cl = sp->new_dcto_cl(cl, precision());
-  MemRegion used_mr;
-  if (before_save_marks) {
-    used_mr = sp->used_region_at_save_marks();
-  } else {
-    used_mr = sp->used_region();
-  }
-  non_clean_card_iterate(sp, used_mr, dcto_cl, dcto_cl, clear);
-}
-
 void CardTableModRefBS::dirty_MemRegion(MemRegion mr) {
   assert((HeapWord*)align_size_down((uintptr_t)mr.start(), HeapWordSize) == mr.start(), "Unaligned start");
   assert((HeapWord*)align_size_up  ((uintptr_t)mr.end(),   HeapWordSize) == mr.end(),   "Unaligned end"  );
@@ -593,9 +557,8 @@
   memset(first, dirty_card, last-first);
 }
 
-// NOTES:
-// (1) Unlike mod_oop_in_space_iterate() above, dirty_card_iterate()
-//     iterates over dirty cards ranges in increasing address order.
+// Unlike several other card table methods, dirty_card_iterate()
+// iterates over dirty cards ranges in increasing address order.
 void CardTableModRefBS::dirty_card_iterate(MemRegion mr,
                                            MemRegionClosure* cl) {
   for (int i = 0; i < _cur_covered_regions; i++) {
@@ -698,7 +661,7 @@
 
 void CardTableModRefBS::verify_clean_region(MemRegion mr) {
   GuaranteeNotModClosure blk(this);
-  non_clean_card_iterate_work(mr, &blk, false);
+  non_clean_card_iterate_work(mr, &blk);
 }
 
 // To verify a MemRegion is entirely dirty this closure is passed to
--- a/src/share/vm/memory/cardTableModRefBS.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/memory/cardTableModRefBS.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -171,17 +171,14 @@
   // mode if worker threads are available.
   void non_clean_card_iterate(Space* sp, MemRegion mr,
                               DirtyCardToOopClosure* dcto_cl,
-                              MemRegionClosure* cl,
-                              bool clear);
+                              MemRegionClosure* cl);
 
   // Utility function used to implement the other versions below.
-  void non_clean_card_iterate_work(MemRegion mr, MemRegionClosure* cl,
-                                   bool clear);
+  void non_clean_card_iterate_work(MemRegion mr, MemRegionClosure* cl);
 
   void par_non_clean_card_iterate_work(Space* sp, MemRegion mr,
                                        DirtyCardToOopClosure* dcto_cl,
                                        MemRegionClosure* cl,
-                                       bool clear,
                                        int n_threads);
 
   // Dirty the bytes corresponding to "mr" (not all of which must be
@@ -241,7 +238,6 @@
                       jint stride, int n_strides,
                       DirtyCardToOopClosure* dcto_cl,
                       MemRegionClosure* cl,
-                      bool clear,
                       jbyte** lowest_non_clean,
                       uintptr_t lowest_non_clean_base_chunk_index,
                       size_t lowest_non_clean_chunk_size);
@@ -402,9 +398,6 @@
   virtual void invalidate(MemRegion mr, bool whole_heap = false);
   void clear(MemRegion mr);
   void dirty(MemRegion mr);
-  void mod_oop_in_space_iterate(Space* sp, OopClosure* cl,
-                                bool clear = false,
-                                bool before_save_marks = false);
 
   // *** Card-table-RemSet-specific things.
 
@@ -415,18 +408,15 @@
   // *decreasing* address order.  (This order aids with imprecise card
   // marking, where a dirty card may cause scanning, and summarization
   // marking, of objects that extend onto subsequent cards.)
-  // If "clear" is true, the card is (conceptually) marked unmodified before
-  // applying the closure.
-  void mod_card_iterate(MemRegionClosure* cl, bool clear = false) {
-    non_clean_card_iterate_work(_whole_heap, cl, clear);
+  void mod_card_iterate(MemRegionClosure* cl) {
+    non_clean_card_iterate_work(_whole_heap, cl);
   }
 
   // Like the "mod_cards_iterate" above, except only invokes the closure
   // for cards within the MemRegion "mr" (which is required to be
   // card-aligned and sized.)
-  void mod_card_iterate(MemRegion mr, MemRegionClosure* cl,
-                        bool clear = false) {
-    non_clean_card_iterate_work(mr, cl, clear);
+  void mod_card_iterate(MemRegion mr, MemRegionClosure* cl) {
+    non_clean_card_iterate_work(mr, cl);
   }
 
   static uintx ct_max_alignment_constraint();
--- a/src/share/vm/memory/cardTableRS.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/memory/cardTableRS.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -247,7 +247,7 @@
   ClearNoncleanCardWrapper clear_cl(dcto_cl, this);
 
   _ct_bs->non_clean_card_iterate(sp, sp->used_region_at_save_marks(),
-                                dcto_cl, &clear_cl, false);
+                                dcto_cl, &clear_cl);
 }
 
 void CardTableRS::clear_into_younger(Generation* gen, bool clear_perm) {
--- a/src/share/vm/memory/modRefBarrierSet.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/memory/modRefBarrierSet.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -88,15 +88,6 @@
     assert(false, "can't call");
   }
 
-  // Invoke "cl->do_oop" on (the address of) every possibly-modifed
-  // reference field in objects in "sp".  If "clear" is "true", the oops
-  // are no longer considered possibly modified after application of the
-  // closure.  If' "before_save_marks" is true, oops in objects allocated
-  // after the last call to "save_marks" on "sp" will not be considered.
-  virtual void mod_oop_in_space_iterate(Space* sp, OopClosure* cl,
-                                        bool clear = false,
-                                        bool before_save_marks = false) = 0;
-
   // Causes all refs in "mr" to be assumed to be modified.  If "whole_heap"
   // is true, the caller asserts that the entire heap is being invalidated,
   // which may admit an optimized implementation for some barriers.
--- a/src/share/vm/oops/instanceKlass.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/oops/instanceKlass.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -401,6 +401,8 @@
   ReferenceType reference_type() const     { return _reference_type; }
   void set_reference_type(ReferenceType t) { _reference_type = t; }
 
+  static int reference_type_offset_in_bytes() { return offset_of(instanceKlass, _reference_type); }
+
   // find local field, returns true if found
   bool find_local_field(Symbol* name, Symbol* sig, fieldDescriptor* fd) const;
   // find field in direct superinterfaces, returns the interface in which the field is defined
--- a/src/share/vm/oops/methodOop.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/oops/methodOop.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -921,6 +921,10 @@
     tty->cr();
   }
 
+  // invariant:   cp->symbol_at_put is preceded by a refcount increment (more usually a lookup)
+  name->increment_refcount();
+  signature->increment_refcount();
+
   constantPoolHandle cp;
   {
     constantPoolOop cp_oop = oopFactory::new_constantPool(_imcp_limit, IsSafeConc, CHECK_(empty));
--- a/src/share/vm/opto/compile.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/opto/compile.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -629,7 +629,7 @@
     initial_gvn()->transform_no_reclaim(top());
 
     // Set up tf(), start(), and find a CallGenerator.
-    CallGenerator* cg;
+    CallGenerator* cg = NULL;
     if (is_osr_compilation()) {
       const TypeTuple *domain = StartOSRNode::osr_domain();
       const TypeTuple *range = TypeTuple::make_range(method()->signature());
@@ -644,9 +644,24 @@
       StartNode* s = new (this, 2) StartNode(root(), tf()->domain());
       initial_gvn()->set_type_bottom(s);
       init_start(s);
-      float past_uses = method()->interpreter_invocation_count();
-      float expected_uses = past_uses;
-      cg = CallGenerator::for_inline(method(), expected_uses);
+      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {
+        // With java.lang.ref.reference.get() we must go through the
+        // intrinsic when G1 is enabled - even when get() is the root
+        // method of the compile - so that, if necessary, the value in
+        // the referent field of the reference object gets recorded by
+        // the pre-barrier code.
+        // Specifically, if G1 is enabled, the value in the referent
+        // field is recorded by the G1 SATB pre barrier. This will
+        // result in the referent being marked live and the reference
+        // object removed from the list of discovered references during
+        // reference processing.
+        cg = find_intrinsic(method(), false);
+      }
+      if (cg == NULL) {
+        float past_uses = method()->interpreter_invocation_count();
+        float expected_uses = past_uses;
+        cg = CallGenerator::for_inline(method(), expected_uses);
+      }
     }
     if (failing())  return;
     if (cg == NULL) {
@@ -2041,6 +2056,52 @@
   // Note that OffsetBot and OffsetTop are very negative.
 }
 
+// Eliminate trivially redundant StoreCMs and accumulate their
+// precedence edges.
+static void eliminate_redundant_card_marks(Node* n) {
+  assert(n->Opcode() == Op_StoreCM, "expected StoreCM");
+  if (n->in(MemNode::Address)->outcnt() > 1) {
+    // There are multiple users of the same address so it might be
+    // possible to eliminate some of the StoreCMs
+    Node* mem = n->in(MemNode::Memory);
+    Node* adr = n->in(MemNode::Address);
+    Node* val = n->in(MemNode::ValueIn);
+    Node* prev = n;
+    bool done = false;
+    // Walk the chain of StoreCMs eliminating ones that match.  As
+    // long as it's a chain of single users then the optimization is
+    // safe.  Eliminating partially redundant StoreCMs would require
+    // cloning copies down the other paths.
+    while (mem->Opcode() == Op_StoreCM && mem->outcnt() == 1 && !done) {
+      if (adr == mem->in(MemNode::Address) &&
+          val == mem->in(MemNode::ValueIn)) {
+        // redundant StoreCM
+        if (mem->req() > MemNode::OopStore) {
+          // Hasn't been processed by this code yet.
+          n->add_prec(mem->in(MemNode::OopStore));
+        } else {
+          // Already converted to precedence edge
+          for (uint i = mem->req(); i < mem->len(); i++) {
+            // Accumulate any precedence edges
+            if (mem->in(i) != NULL) {
+              n->add_prec(mem->in(i));
+            }
+          }
+          // Everything above this point has been processed.
+          done = true;
+        }
+        // Eliminate the previous StoreCM
+        prev->set_req(MemNode::Memory, mem->in(MemNode::Memory));
+        assert(mem->outcnt() == 0, "should be dead");
+        mem->disconnect_inputs(NULL);
+      } else {
+        prev = mem;
+      }
+      mem = prev->in(MemNode::Memory);
+    }
+  }
+}
+
 //------------------------------final_graph_reshaping_impl----------------------
 // Implement items 1-5 from final_graph_reshaping below.
 static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc ) {
@@ -2167,9 +2228,19 @@
     frc.inc_float_count();
     goto handle_mem;
 
+  case Op_StoreCM:
+    {
+      // Convert OopStore dependence into precedence edge
+      Node* prec = n->in(MemNode::OopStore);
+      n->del_req(MemNode::OopStore);
+      n->add_prec(prec);
+      eliminate_redundant_card_marks(n);
+    }
+
+    // fall through
+
   case Op_StoreB:
   case Op_StoreC:
-  case Op_StoreCM:
   case Op_StorePConditional:
   case Op_StoreI:
   case Op_StoreL:
--- a/src/share/vm/opto/graphKit.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/opto/graphKit.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1457,19 +1457,22 @@
 }
 
 
-void GraphKit::pre_barrier(Node* ctl,
+void GraphKit::pre_barrier(bool do_load,
+                           Node* ctl,
                            Node* obj,
                            Node* adr,
                            uint  adr_idx,
                            Node* val,
                            const TypeOopPtr* val_type,
+                           Node* pre_val,
                            BasicType bt) {
+
   BarrierSet* bs = Universe::heap()->barrier_set();
   set_control(ctl);
   switch (bs->kind()) {
     case BarrierSet::G1SATBCT:
     case BarrierSet::G1SATBCTLogging:
-      g1_write_barrier_pre(obj, adr, adr_idx, val, val_type, bt);
+      g1_write_barrier_pre(do_load, obj, adr, adr_idx, val, val_type, pre_val, bt);
       break;
 
     case BarrierSet::CardTableModRef:
@@ -1532,7 +1535,11 @@
   uint adr_idx = C->get_alias_index(adr_type);
   assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
 
-  pre_barrier(control(), obj, adr, adr_idx, val, val_type, bt);
+  pre_barrier(true /* do_load */,
+              control(), obj, adr, adr_idx, val, val_type,
+              NULL /* pre_val */,
+              bt);
+
   Node* store = store_to_memory(control(), adr, val, bt, adr_idx);
   post_barrier(control(), store, obj, adr, adr_idx, val, bt, use_precise);
   return store;
@@ -3470,12 +3477,31 @@
 }
 
 // G1 pre/post barriers
-void GraphKit::g1_write_barrier_pre(Node* obj,
+void GraphKit::g1_write_barrier_pre(bool do_load,
+                                    Node* obj,
                                     Node* adr,
                                     uint alias_idx,
                                     Node* val,
                                     const TypeOopPtr* val_type,
+                                    Node* pre_val,
                                     BasicType bt) {
+
+  // Some sanity checks
+  // Note: val is unused in this routine.
+
+  if (do_load) {
+    // We need to generate the load of the previous value
+    assert(obj != NULL, "must have a base");
+    assert(adr != NULL, "where are loading from?");
+    assert(pre_val == NULL, "loaded already?");
+    assert(val_type != NULL, "need a type");
+  } else {
+    // In this case both val_type and alias_idx are unused.
+    assert(pre_val != NULL, "must be loaded already");
+    assert(pre_val->bottom_type()->basic_type() == T_OBJECT, "or we shouldn't be here");
+  }
+  assert(bt == T_OBJECT, "or we shouldn't be here");
+
   IdealKit ideal(this, true);
 
   Node* tls = __ thread(); // ThreadLocalStorage
@@ -3497,32 +3523,28 @@
                                           PtrQueue::byte_offset_of_index());
   const int buffer_offset  = in_bytes(JavaThread::satb_mark_queue_offset() +  // 652
                                           PtrQueue::byte_offset_of_buf());
+
   // Now the actual pointers into the thread
-
-  // set_control( ctl);
-
   Node* marking_adr = __ AddP(no_base, tls, __ ConX(marking_offset));
   Node* buffer_adr  = __ AddP(no_base, tls, __ ConX(buffer_offset));
   Node* index_adr   = __ AddP(no_base, tls, __ ConX(index_offset));
 
   // Now some of the values
-
   Node* marking = __ load(__ ctrl(), marking_adr, TypeInt::INT, active_type, Compile::AliasIdxRaw);
 
   // if (!marking)
   __ if_then(marking, BoolTest::ne, zero); {
     Node* index   = __ load(__ ctrl(), index_adr, TypeInt::INT, T_INT, Compile::AliasIdxRaw);
 
-    const Type* t1 = adr->bottom_type();
-    const Type* t2 = val->bottom_type();
-
-    Node* orig = __ load(no_ctrl, adr, val_type, bt, alias_idx);
-    // if (orig != NULL)
-    __ if_then(orig, BoolTest::ne, null()); {
-      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
-
+    if (do_load) {
       // load original value
       // alias_idx correct??
+      pre_val = __ load(no_ctrl, adr, val_type, bt, alias_idx);
+    }
+
+    // if (pre_val != NULL)
+    __ if_then(pre_val, BoolTest::ne, null()); {
+      Node* buffer  = __ load(__ ctrl(), buffer_adr, TypeRawPtr::NOTNULL, T_ADDRESS, Compile::AliasIdxRaw);
 
       // is the queue for this thread full?
       __ if_then(index, BoolTest::ne, zero, likely); {
@@ -3536,10 +3558,9 @@
         next_indexX = _gvn.transform( new (C, 2) ConvI2LNode(next_index, TypeLong::make(0, max_jlong, Type::WidenMax)) );
 #endif
 
-        // Now get the buffer location we will log the original value into and store it
+        // Now get the buffer location we will log the previous value into and store it
         Node *log_addr = __ AddP(no_base, buffer, next_indexX);
-        __ store(__ ctrl(), log_addr, orig, T_OBJECT, Compile::AliasIdxRaw);
-
+        __ store(__ ctrl(), log_addr, pre_val, T_OBJECT, Compile::AliasIdxRaw);
         // update the index
         __ store(__ ctrl(), index_adr, next_index, T_INT, Compile::AliasIdxRaw);
 
@@ -3547,9 +3568,9 @@
 
         // logging buffer is full, call the runtime
         const TypeFunc *tf = OptoRuntime::g1_wb_pre_Type();
-        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", orig, tls);
+        __ make_leaf_call(tf, CAST_FROM_FN_PTR(address, SharedRuntime::g1_wb_pre), "g1_wb_pre", pre_val, tls);
       } __ end_if();  // (!index)
-    } __ end_if();  // (orig != NULL)
+    } __ end_if();  // (pre_val != NULL)
   } __ end_if();  // (!marking)
 
   // Final sync IdealKit and GraphKit.
--- a/src/share/vm/opto/graphKit.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/opto/graphKit.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -544,8 +544,10 @@
                              BasicType bt);
 
   // For the few case where the barriers need special help
-  void pre_barrier(Node* ctl, Node* obj, Node* adr, uint adr_idx,
-                   Node* val, const TypeOopPtr* val_type, BasicType bt);
+  void pre_barrier(bool do_load, Node* ctl,
+                   Node* obj, Node* adr, uint adr_idx, Node* val, const TypeOopPtr* val_type,
+                   Node* pre_val,
+                   BasicType bt);
 
   void post_barrier(Node* ctl, Node* store, Node* obj, Node* adr, uint adr_idx,
                     Node* val, BasicType bt, bool use_precise);
@@ -671,11 +673,13 @@
                           Node* adr,  uint adr_idx, Node* val, bool use_precise);
 
   // G1 pre/post barriers
-  void g1_write_barrier_pre(Node* obj,
+  void g1_write_barrier_pre(bool do_load,
+                            Node* obj,
                             Node* adr,
                             uint alias_idx,
                             Node* val,
                             const TypeOopPtr* val_type,
+                            Node* pre_val,
                             BasicType bt);
 
   void g1_write_barrier_post(Node* store,
--- a/src/share/vm/opto/lcm.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/opto/lcm.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -688,20 +688,22 @@
       }
       ready_cnt[n->_idx] = local; // Count em up
 
+#ifdef ASSERT
+      if( UseConcMarkSweepGC || UseG1GC ) {
+        if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
+          // Check the precedence edges
+          for (uint prec = n->req(); prec < n->len(); prec++) {
+            Node* oop_store = n->in(prec);
+            if (oop_store != NULL) {
+              assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark");
+            }
+          }
+        }
+      }
+#endif
+
       // A few node types require changing a required edge to a precedence edge
       // before allocation.
-      if( UseConcMarkSweepGC || UseG1GC ) {
-        if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
-          // Note: Required edges with an index greater than oper_input_base
-          // are not supported by the allocator.
-          // Note2: Can only depend on unmatched edge being last,
-          // can not depend on its absolute position.
-          Node *oop_store = n->in(n->req() - 1);
-          n->del_req(n->req() - 1);
-          n->add_prec(oop_store);
-          assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark");
-        }
-      }
       if( n->is_Mach() && n->req() > TypeFunc::Parms &&
           (n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ||
            n->as_Mach()->ideal_Opcode() == Op_MemBarVolatile) ) {
--- a/src/share/vm/opto/library_call.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/opto/library_call.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -166,6 +166,10 @@
   // This returns Type::AnyPtr, RawPtr, or OopPtr.
   int classify_unsafe_addr(Node* &base, Node* &offset);
   Node* make_unsafe_address(Node* base, Node* offset);
+  // Helper for inline_unsafe_access.
+  // Generates the guards that check whether the result of
+  // Unsafe.getObject should be recorded in an SATB log buffer.
+  void insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val);
   bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
   bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
   bool inline_unsafe_allocate();
@@ -240,6 +244,8 @@
   bool inline_numberOfTrailingZeros(vmIntrinsics::ID id);
   bool inline_bitCount(vmIntrinsics::ID id);
   bool inline_reverseBytes(vmIntrinsics::ID id);
+
+  bool inline_reference_get();
 };
 
 
@@ -336,6 +342,14 @@
     if (!UsePopCountInstruction)  return NULL;
     break;
 
+  case vmIntrinsics::_Reference_get:
+    // It is only when G1 is enabled that we absolutely
+    // need to use the intrinsic version of Reference.get()
+    // so that the value in the referent field, if necessary,
+    // can be registered by the pre-barrier code.
+    if (!UseG1GC) return NULL;
+    break;
+
  default:
     assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
     assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -387,6 +401,7 @@
     tty->print_cr("Intrinsic %s", str);
   }
 #endif
+
   if (kit.try_to_inline()) {
     if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
       CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
@@ -402,11 +417,19 @@
   }
 
   if (PrintIntrinsics) {
-    tty->print("Did not inline intrinsic %s%s at bci:%d in",
+    if (jvms->has_method()) {
+      // Not a root compile.
+      tty->print("Did not inline intrinsic %s%s at bci:%d in",
+                 vmIntrinsics::name_at(intrinsic_id()),
+                 (is_virtual() ? " (virtual)" : ""), kit.bci());
+      kit.caller()->print_short_name(tty);
+      tty->print_cr(" (%d bytes)", kit.caller()->code_size());
+    } else {
+      // Root compile
+      tty->print("Did not generate intrinsic %s%s at bci:%d in",
                vmIntrinsics::name_at(intrinsic_id()),
                (is_virtual() ? " (virtual)" : ""), kit.bci());
-    kit.caller()->print_short_name(tty);
-    tty->print_cr(" (%d bytes)", kit.caller()->code_size());
+    }
   }
   C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
   return NULL;
@@ -418,6 +441,14 @@
   const bool is_native_ptr  = true;
   const bool is_static      = true;
 
+  if (!jvms()->has_method()) {
+    // Root JVMState has a null method.
+    assert(map()->memory()->Opcode() == Op_Parm, "");
+    // Insert the memory aliasing node
+    set_all_memory(reset_memory());
+  }
+  assert(merged_memory(), "");
+
   switch (intrinsic_id()) {
   case vmIntrinsics::_hashCode:
     return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
@@ -658,6 +689,9 @@
   case vmIntrinsics::_getCallerClass:
     return inline_native_Reflection_getCallerClass();
 
+  case vmIntrinsics::_Reference_get:
+    return inline_reference_get();
+
   default:
     // If you get here, it may be that someone has added a new intrinsic
     // to the list in vmSymbols.hpp without implementing it here.
@@ -2076,6 +2110,106 @@
 
 const static BasicType T_ADDRESS_HOLDER = T_LONG;
 
+// Helper that guards and inserts a G1 pre-barrier.
+void LibraryCallKit::insert_g1_pre_barrier(Node* base_oop, Node* offset, Node* pre_val) {
+  assert(UseG1GC, "should not call this otherwise");
+
+  // We could be accessing the referent field of a reference object. If so, when G1
+  // is enabled, we need to log the value in the referent field in an SATB buffer.
+  // This routine performs some compile time filters and generates suitable
+  // runtime filters that guard the pre-barrier code.
+
+  // Some compile time checks.
+
+  // If offset is a constant, is it java_lang_ref_Reference::_reference_offset?
+  const TypeX* otype = offset->find_intptr_t_type();
+  if (otype != NULL && otype->is_con() &&
+      otype->get_con() != java_lang_ref_Reference::referent_offset) {
+    // Constant offset but not the reference_offset so just return
+    return;
+  }
+
+  // We only need to generate the runtime guards for instances.
+  const TypeOopPtr* btype = base_oop->bottom_type()->isa_oopptr();
+  if (btype != NULL) {
+    if (btype->isa_aryptr()) {
+      // Array type so nothing to do
+      return;
+    }
+
+    const TypeInstPtr* itype = btype->isa_instptr();
+    if (itype != NULL) {
+      // Can the klass of base_oop be statically determined
+      // to be _not_ a sub-class of Reference?
+      ciKlass* klass = itype->klass();
+      if (klass->is_subtype_of(env()->Reference_klass()) &&
+          !env()->Reference_klass()->is_subtype_of(klass)) {
+        return;
+      }
+    }
+  }
+
+  // The compile time filters did not reject base_oop/offset so
+  // we need to generate the following runtime filters
+  //
+  // if (offset == java_lang_ref_Reference::_reference_offset) {
+  //   if (base != null) {
+  //     if (klass(base)->reference_type() != REF_NONE)) {
+  //       pre_barrier(_, pre_val, ...);
+  //     }
+  //   }
+  // }
+
+  float likely  = PROB_LIKELY(0.999);
+  float unlikely  = PROB_UNLIKELY(0.999);
+
+  IdealKit ideal(this);
+#define __ ideal.
+
+  const int reference_type_offset = instanceKlass::reference_type_offset_in_bytes() +
+                                        sizeof(oopDesc);
+
+  Node* referent_off = __ ConX(java_lang_ref_Reference::referent_offset);
+
+  __ if_then(offset, BoolTest::eq, referent_off, unlikely); {
+    __ if_then(base_oop, BoolTest::ne, null(), likely); {
+
+      // Update graphKit memory and control from IdealKit.
+      sync_kit(ideal);
+
+      Node* ref_klass_con = makecon(TypeKlassPtr::make(env()->Reference_klass()));
+      Node* is_instof = gen_instanceof(base_oop, ref_klass_con);
+
+      // Update IdealKit memory and control from graphKit.
+      __ sync_kit(this);
+
+      Node* one = __ ConI(1);
+
+      __ if_then(is_instof, BoolTest::eq, one, unlikely); {
+
+        // Update graphKit from IdeakKit.
+        sync_kit(ideal);
+
+        // Use the pre-barrier to record the value in the referent field
+        pre_barrier(false /* do_load */,
+                    __ ctrl(),
+                    NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
+                    pre_val /* pre_val */,
+                    T_OBJECT);
+
+        // Update IdealKit from graphKit.
+        __ sync_kit(this);
+
+      } __ end_if(); // _ref_type != ref_none
+    } __ end_if(); // base  != NULL
+  } __ end_if(); // offset == referent_offset
+
+  // Final sync IdealKit and GraphKit.
+  final_sync(ideal);
+#undef __
+}
+
+
 // Interpret Unsafe.fieldOffset cookies correctly:
 extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
 
@@ -2152,9 +2286,11 @@
   // Build address expression.  See the code in inline_unsafe_prefetch.
   Node *adr;
   Node *heap_base_oop = top();
+  Node* offset = top();
+
   if (!is_native_ptr) {
     // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
-    Node* offset = pop_pair();
+    offset = pop_pair();
     // The base is either a Java object or a value produced by Unsafe.staticFieldBase
     Node* base   = pop();
     // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
@@ -2195,6 +2331,13 @@
   // or Compile::must_alias will throw a diagnostic assert.)
   bool need_mem_bar = (alias_type->adr_type() == TypeOopPtr::BOTTOM);
 
+  // If we are reading the value of the referent field of a Reference
+  // object (either by using Unsafe directly or through reflection)
+  // then, if G1 is enabled, we need to record the referent in an
+  // SATB log buffer using the pre-barrier mechanism.
+  bool need_read_barrier = UseG1GC && !is_native_ptr && !is_store &&
+                           offset != top() && heap_base_oop != top();
+
   if (!is_store && type == T_OBJECT) {
     // Attempt to infer a sharper value type from the offset and base type.
     ciKlass* sharpened_klass = NULL;
@@ -2278,8 +2421,13 @@
     case T_SHORT:
     case T_INT:
     case T_FLOAT:
+      push(p);
+      break;
     case T_OBJECT:
-      push( p );
+      if (need_read_barrier) {
+        insert_g1_pre_barrier(heap_base_oop, offset, p);
+      }
+      push(p);
       break;
     case T_ADDRESS:
       // Cast to an int type.
@@ -2534,7 +2682,10 @@
   case T_OBJECT:
      // reference stores need a store barrier.
     // (They don't if CAS fails, but it isn't worth checking.)
-    pre_barrier(control(), base, adr, alias_idx, newval, value_type->make_oopptr(), T_OBJECT);
+    pre_barrier(true /* do_load*/,
+                control(), base, adr, alias_idx, newval, value_type->make_oopptr(),
+                NULL /* pre_val*/,
+                T_OBJECT);
 #ifdef _LP64
     if (adr->bottom_type()->is_ptr_to_narrowoop()) {
       Node *newval_enc = _gvn.transform(new (C, 2) EncodePNode(newval, newval->bottom_type()->make_narrowoop()));
@@ -5235,3 +5386,44 @@
                     copyfunc_addr, copyfunc_name, adr_type,
                     src_start, dest_start, copy_length XTOP);
 }
+
+//----------------------------inline_reference_get----------------------------
+
+bool LibraryCallKit::inline_reference_get() {
+  const int nargs = 1; // self
+
+  guarantee(java_lang_ref_Reference::referent_offset > 0,
+            "should have already been set");
+
+  int referent_offset = java_lang_ref_Reference::referent_offset;
+
+  // Restore the stack and pop off the argument
+  _sp += nargs;
+  Node *reference_obj = pop();
+
+  // Null check on self without removing any arguments.
+  _sp += nargs;
+  reference_obj = do_null_check(reference_obj, T_OBJECT);
+  _sp -= nargs;;
+
+  if (stopped()) return true;
+
+  Node *adr = basic_plus_adr(reference_obj, reference_obj, referent_offset);
+
+  ciInstanceKlass* klass = env()->Object_klass();
+  const TypeOopPtr* object_type = TypeOopPtr::make_from_klass(klass);
+
+  Node* no_ctrl = NULL;
+  Node *result = make_load(no_ctrl, adr, object_type, T_OBJECT);
+
+  // Use the pre-barrier to record the value in the referent field
+  pre_barrier(false /* do_load */,
+              control(),
+              NULL /* obj */, NULL /* adr */, max_juint /* alias_idx */, NULL /* val */, NULL /* val_type */,
+              result /* pre_val */,
+              T_OBJECT);
+
+  push(result);
+  return true;
+}
+
--- a/src/share/vm/opto/memnode.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/opto/memnode.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -2159,9 +2159,12 @@
   Node* mem     = in(MemNode::Memory);
   Node* address = in(MemNode::Address);
 
-  // Back-to-back stores to same address?  Fold em up.
-  // Generally unsafe if I have intervening uses...
-  if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address)) {
+  // Back-to-back stores to same address?  Fold em up.  Generally
+  // unsafe if I have intervening uses...  Also disallowed for StoreCM
+  // since they must follow each StoreP operation.  Redundant StoreCMs
+  // are eliminated just before matching in final_graph_reshape.
+  if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address) &&
+      mem->Opcode() != Op_StoreCM) {
     // Looking at a dead closed cycle of memory?
     assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
 
--- a/src/share/vm/opto/output.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/opto/output.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1354,15 +1354,20 @@
         // Check that oop-store precedes the card-mark
         else if( mach->ideal_Opcode() == Op_StoreCM ) {
           uint storeCM_idx = j;
-          Node *oop_store = mach->in(mach->_cnt);  // First precedence edge
-          assert( oop_store != NULL, "storeCM expects a precedence edge");
-          uint i4;
-          for( i4 = 0; i4 < last_inst; ++i4 ) {
-            if( b->_nodes[i4] == oop_store ) break;
+          int count = 0;
+          for (uint prec = mach->req(); prec < mach->len(); prec++) {
+            Node *oop_store = mach->in(prec);  // Precedence edge
+            if (oop_store == NULL) continue;
+            count++;
+            uint i4;
+            for( i4 = 0; i4 < last_inst; ++i4 ) {
+              if( b->_nodes[i4] == oop_store ) break;
+            }
+            // Note: This test can provide a false failure if other precedence
+            // edges have been added to the storeCMNode.
+            assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
           }
-          // Note: This test can provide a false failure if other precedence
-          // edges have been added to the storeCMNode.
-          assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
+          assert(count > 0, "storeCM expects at least one precedence edge");
         }
 #endif
 
--- a/src/share/vm/opto/parse2.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/opto/parse2.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -795,8 +795,9 @@
   taken = method()->scale_count(taken);
   not_taken = method()->scale_count(not_taken);
 
-  // Give up if too few counts to be meaningful
-  if (taken + not_taken < 40) {
+  // Give up if too few (or too many, in which case the sum will overflow) counts to be meaningful.
+  // We also check that individual counters are positive first, overwise the sum can become positive.
+  if (taken < 0 || not_taken < 0 || taken + not_taken < 40) {
     if (C->log() != NULL) {
       C->log()->elem("branch target_bci='%d' taken='%d' not_taken='%d'", iter().get_dest(), taken, not_taken);
     }
@@ -804,13 +805,13 @@
   }
 
   // Compute frequency that we arrive here
-  int sum = taken + not_taken;
+  float sum = taken + not_taken;
   // Adjust, if this block is a cloned private block but the
   // Jump counts are shared.  Taken the private counts for
   // just this path instead of the shared counts.
   if( block()->count() > 0 )
     sum = block()->count();
-  cnt = (float)sum / (float)FreqCountInvocations;
+  cnt = sum / FreqCountInvocations;
 
   // Pin probability to sane limits
   float prob;
--- a/src/share/vm/prims/jni.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/prims/jni.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -29,6 +29,9 @@
 #include "classfile/systemDictionary.hpp"
 #include "classfile/vmSymbols.hpp"
 #include "interpreter/linkResolver.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif // SERIALGC
 #include "memory/allocation.inline.hpp"
 #include "memory/gcLocker.inline.hpp"
 #include "memory/oopFactory.hpp"
@@ -1724,6 +1727,26 @@
     o = JvmtiExport::jni_GetField_probe(thread, obj, o, k, fieldID, false);
   }
   jobject ret = JNIHandles::make_local(env, o->obj_field(offset));
+#ifndef SERIALGC
+  // If G1 is enabled and we are accessing the value of the referent
+  // field in a reference object then we need to register a non-null
+  // referent with the SATB barrier.
+  if (UseG1GC) {
+    bool needs_barrier = false;
+
+    if (ret != NULL &&
+        offset == java_lang_ref_Reference::referent_offset &&
+        instanceKlass::cast(k)->reference_type() != REF_NONE) {
+      assert(instanceKlass::cast(k)->is_subclass_of(SystemDictionary::Reference_klass()), "sanity");
+      needs_barrier = true;
+    }
+
+    if (needs_barrier) {
+      oop referent = JNIHandles::resolve(ret);
+      G1SATBCardTableModRefBS::enqueue(referent);
+    }
+  }
+#endif // SERIALGC
   DTRACE_PROBE1(hotspot_jni, GetObjectField__return, ret);
   return ret;
 JNI_END
--- a/src/share/vm/prims/methodHandleWalk.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/prims/methodHandleWalk.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -343,6 +343,7 @@
   int cpool_symbol_put(int tag, Symbol* con) {
     if (con == NULL)  return 0;
     ConstantValue* cv = new ConstantValue(tag, con);
+    con->increment_refcount();
     return _constants.append(cv);
   }
 
--- a/src/share/vm/prims/methodHandles.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/prims/methodHandles.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -928,6 +928,7 @@
 };
 
 static bool is_always_null_type(klassOop klass) {
+  if (klass == NULL)  return false;  // safety
   if (!Klass::cast(klass)->oop_is_instance())  return false;
   instanceKlass* ik = instanceKlass::cast(klass);
   // Must be on the boot class path:
@@ -944,6 +945,8 @@
 }
 
 bool MethodHandles::class_cast_needed(klassOop src, klassOop dst) {
+  if (dst == NULL)  return true;
+  if (src == NULL)  return (dst != SystemDictionary::Object_klass());
   if (src == dst || dst == SystemDictionary::Object_klass())
     return false;                               // quickest checks
   Klass* srck = Klass::cast(src);
@@ -1026,10 +1029,15 @@
                                             int first_ptype_pos,
                                             KlassHandle insert_ptype,
                                             TRAPS) {
+  Handle mhi_type;
+  if (m->is_method_handle_invoke()) {
+    // use this more exact typing instead of the symbolic signature:
+    mhi_type = Handle(THREAD, m->method_handle_type());
+  }
   objArrayHandle ptypes(THREAD, java_lang_invoke_MethodType::ptypes(mtype()));
   int pnum = first_ptype_pos;
   int pmax = ptypes->length();
-  int mnum = 0;                 // method argument
+  int anum = 0;                 // method argument
   const char* err = NULL;
   ResourceMark rm(THREAD);
   for (SignatureStream ss(m->signature()); !ss.is_done(); ss.next()) {
@@ -1048,47 +1056,70 @@
       else
         ptype_oop = insert_ptype->java_mirror();
       pnum += 1;
-      mnum += 1;
+      anum += 1;
     }
-    klassOop  pklass = NULL;
-    BasicType ptype  = T_OBJECT;
-    if (ptype_oop != NULL)
-      ptype = java_lang_Class::as_BasicType(ptype_oop, &pklass);
-    else
-      // null does not match any non-reference; use Object to report the error
-      pklass = SystemDictionary::Object_klass();
-    klassOop  mklass = NULL;
-    BasicType mtype  = ss.type();
-    if (mtype == T_ARRAY)  mtype = T_OBJECT; // fold all refs to T_OBJECT
-    if (mtype == T_OBJECT) {
-      if (ptype_oop == NULL) {
+    KlassHandle pklass;
+    BasicType   ptype = T_OBJECT;
+    bool   have_ptype = false;
+    // missing ptype_oop does not match any non-reference; use Object to report the error
+    pklass = SystemDictionaryHandles::Object_klass();
+    if (ptype_oop != NULL) {
+      have_ptype = true;
+      klassOop pklass_oop = NULL;
+      ptype = java_lang_Class::as_BasicType(ptype_oop, &pklass_oop);
+      pklass = KlassHandle(THREAD, pklass_oop);
+    }
+    ptype_oop = NULL; //done with this
+    KlassHandle aklass;
+    BasicType   atype = ss.type();
+    if (atype == T_ARRAY)  atype = T_OBJECT; // fold all refs to T_OBJECT
+    if (atype == T_OBJECT) {
+      if (!have_ptype) {
         // null matches any reference
         continue;
       }
-      KlassHandle pklass_handle(THREAD, pklass); pklass = NULL;
-      // If we fail to resolve types at this point, we will throw an error.
-      Symbol* name = ss.as_symbol(CHECK);
-      instanceKlass* mk = instanceKlass::cast(m->method_holder());
-      Handle loader(THREAD, mk->class_loader());
-      Handle domain(THREAD, mk->protection_domain());
-      mklass = SystemDictionary::resolve_or_null(name, loader, domain, CHECK);
-      pklass = pklass_handle();
-      if (mklass == NULL && pklass != NULL &&
-          Klass::cast(pklass)->name() == name &&
-          m->is_method_handle_invoke()) {
-        // Assume a match.  We can't really decode the signature of MH.invoke*.
-        continue;
+      if (mhi_type.is_null()) {
+        // If we fail to resolve types at this point, we will usually throw an error.
+        TempNewSymbol name = ss.as_symbol_or_null();
+        if (name != NULL) {
+          instanceKlass* mk = instanceKlass::cast(m->method_holder());
+          Handle loader(THREAD, mk->class_loader());
+          Handle domain(THREAD, mk->protection_domain());
+          klassOop aklass_oop = SystemDictionary::resolve_or_null(name, loader, domain, CHECK);
+          if (aklass_oop != NULL)
+            aklass = KlassHandle(THREAD, aklass_oop);
+        }
+      } else {
+        // for method handle invokers we don't look at the name in the signature
+        oop atype_oop;
+        if (ss.at_return_type())
+          atype_oop = java_lang_invoke_MethodType::rtype(mhi_type());
+        else
+          atype_oop = java_lang_invoke_MethodType::ptype(mhi_type(), anum-1);
+        klassOop aklass_oop = NULL;
+        atype = java_lang_Class::as_BasicType(atype_oop, &aklass_oop);
+        aklass = KlassHandle(THREAD, aklass_oop);
       }
     }
     if (!ss.at_return_type()) {
-      err = check_argument_type_change(ptype, pklass, mtype, mklass, mnum);
+      err = check_argument_type_change(ptype, pklass(), atype, aklass(), anum);
     } else {
-      err = check_return_type_change(mtype, mklass, ptype, pklass); // note reversal!
+      err = check_return_type_change(atype, aklass(), ptype, pklass()); // note reversal!
     }
     if (err != NULL)  break;
   }
 
   if (err != NULL) {
+#ifndef PRODUCT
+    if (PrintMiscellaneous && (Verbose || WizardMode)) {
+      tty->print("*** verify_method_signature failed: ");
+      java_lang_invoke_MethodType::print_signature(mtype(), tty);
+      tty->cr();
+      tty->print_cr("    first_ptype_pos = %d, insert_ptype = "UINTX_FORMAT, first_ptype_pos, insert_ptype());
+      tty->print("    Failing method: ");
+      m->print();
+    }
+#endif //PRODUCT
     THROW_MSG(vmSymbols::java_lang_InternalError(), err);
   }
 }
@@ -1288,10 +1319,12 @@
   // format, format, format
   const char* src_name = type2name(src_type);
   const char* dst_name = type2name(dst_type);
-  if (src_type == T_OBJECT)  src_name = Klass::cast(src_klass)->external_name();
-  if (dst_type == T_OBJECT)  dst_name = Klass::cast(dst_klass)->external_name();
   if (src_name == NULL)  src_name = "unknown type";
   if (dst_name == NULL)  dst_name = "unknown type";
+  if (src_type == T_OBJECT)
+    src_name = (src_klass != NULL) ? Klass::cast(src_klass)->external_name() : "an unresolved class";
+  if (dst_type == T_OBJECT)
+    dst_name = (dst_klass != NULL) ? Klass::cast(dst_klass)->external_name() : "an unresolved class";
 
   size_t msglen = strlen(err) + strlen(src_name) + strlen(dst_name) + (argnum < 10 ? 1 : 11);
   char* msg = NEW_RESOURCE_ARRAY(char, msglen + 1);
--- a/src/share/vm/prims/unsafe.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/prims/unsafe.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -24,6 +24,9 @@
 
 #include "precompiled.hpp"
 #include "classfile/vmSymbols.hpp"
+#ifndef SERIALGC
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif // SERIALGC
 #include "memory/allocation.inline.hpp"
 #include "prims/jni.h"
 #include "prims/jvm.h"
@@ -193,7 +196,32 @@
   UnsafeWrapper("Unsafe_GetObject");
   if (obj == NULL)  THROW_0(vmSymbols::java_lang_NullPointerException());
   GET_OOP_FIELD(obj, offset, v)
-  return JNIHandles::make_local(env, v);
+  jobject ret = JNIHandles::make_local(env, v);
+#ifndef SERIALGC
+  // We could be accessing the referent field in a reference
+  // object. If G1 is enabled then we need to register a non-null
+  // referent with the SATB barrier.
+  if (UseG1GC) {
+    bool needs_barrier = false;
+
+    if (ret != NULL) {
+      if (offset == java_lang_ref_Reference::referent_offset) {
+        oop o = JNIHandles::resolve_non_null(obj);
+        klassOop k = o->klass();
+        if (instanceKlass::cast(k)->reference_type() != REF_NONE) {
+          assert(instanceKlass::cast(k)->is_subclass_of(SystemDictionary::Reference_klass()), "sanity");
+          needs_barrier = true;
+        }
+      }
+    }
+
+    if (needs_barrier) {
+      oop referent = JNIHandles::resolve(ret);
+      G1SATBCardTableModRefBS::enqueue(referent);
+    }
+  }
+#endif // SERIALGC
+  return ret;
 UNSAFE_END
 
 UNSAFE_ENTRY(void, Unsafe_SetObject140(JNIEnv *env, jobject unsafe, jobject obj, jint offset, jobject x_h))
@@ -226,7 +254,32 @@
 UNSAFE_ENTRY(jobject, Unsafe_GetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset))
   UnsafeWrapper("Unsafe_GetObject");
   GET_OOP_FIELD(obj, offset, v)
-  return JNIHandles::make_local(env, v);
+  jobject ret = JNIHandles::make_local(env, v);
+#ifndef SERIALGC
+  // We could be accessing the referent field in a reference
+  // object. If G1 is enabled then we need to register non-null
+  // referent with the SATB barrier.
+  if (UseG1GC) {
+    bool needs_barrier = false;
+
+    if (ret != NULL) {
+      if (offset == java_lang_ref_Reference::referent_offset && obj != NULL) {
+        oop o = JNIHandles::resolve(obj);
+        klassOop k = o->klass();
+        if (instanceKlass::cast(k)->reference_type() != REF_NONE) {
+          assert(instanceKlass::cast(k)->is_subclass_of(SystemDictionary::Reference_klass()), "sanity");
+          needs_barrier = true;
+        }
+      }
+    }
+
+    if (needs_barrier) {
+      oop referent = JNIHandles::resolve(ret);
+      G1SATBCardTableModRefBS::enqueue(referent);
+    }
+  }
+#endif // SERIALGC
+  return ret;
 UNSAFE_END
 
 UNSAFE_ENTRY(void, Unsafe_SetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h))
--- a/src/share/vm/runtime/arguments.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/runtime/arguments.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -244,6 +244,12 @@
   { "MaxLiveObjectEvacuationRatio",
                            JDK_Version::jdk_update(6,24), JDK_Version::jdk(8) },
   { "ForceSharedSpaces",   JDK_Version::jdk_update(6,25), JDK_Version::jdk(8) },
+  { "UseParallelOldGCCompacting",
+                           JDK_Version::jdk_update(6,27), JDK_Version::jdk(8) },
+  { "UseParallelDensePrefixUpdate",
+                           JDK_Version::jdk_update(6,27), JDK_Version::jdk(8) },
+  { "UseParallelOldGCDensePrefix",
+                           JDK_Version::jdk_update(6,27), JDK_Version::jdk(8) },
   { "AllowTransitionalJSR292",       JDK_Version::jdk(7), JDK_Version::jdk(8) },
   { NULL, JDK_Version(0), JDK_Version(0) }
 };
@@ -801,26 +807,22 @@
 
   JDK_Version since = JDK_Version();
 
-  if (parse_argument(arg, origin)) {
-    // do nothing
-  } else if (is_newly_obsolete(arg, &since)) {
-    enum { bufsize = 256 };
-    char buffer[bufsize];
-    since.to_string(buffer, bufsize);
-    jio_fprintf(defaultStream::error_stream(),
-      "Warning: The flag %s has been EOL'd as of %s and will"
-      " be ignored\n", arg, buffer);
-  } else {
-    if (!ignore_unrecognized) {
-      jio_fprintf(defaultStream::error_stream(),
-                  "Unrecognized VM option '%s'\n", arg);
-      // allow for commandline "commenting out" options like -XX:#+Verbose
-      if (strlen(arg) == 0 || arg[0] != '#') {
-        return false;
-      }
-    }
+  if (parse_argument(arg, origin) || ignore_unrecognized) {
+    return true;
   }
-  return true;
+
+  const char * const argname = *arg == '+' || *arg == '-' ? arg + 1 : arg;
+  if (is_newly_obsolete(arg, &since)) {
+    char version[256];
+    since.to_string(version, sizeof(version));
+    warning("ignoring option %s; support was removed in %s", argname, version);
+    return true;
+  }
+
+  jio_fprintf(defaultStream::error_stream(),
+              "Unrecognized VM option '%s'\n", argname);
+  // allow for commandline "commenting out" options like -XX:#+Verbose
+  return arg[0] == '#';
 }
 
 bool Arguments::process_settings_file(const char* file_name, bool should_exist, jboolean ignore_unrecognized) {
--- a/src/share/vm/runtime/globals.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/runtime/globals.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1355,13 +1355,6 @@
   product(bool, UseParallelOldGC, false,                                    \
           "Use the Parallel Old garbage collector")                         \
                                                                             \
-  product(bool, UseParallelOldGCCompacting, true,                           \
-          "In the Parallel Old garbage collector use parallel compaction")  \
-                                                                            \
-  product(bool, UseParallelDensePrefixUpdate, true,                         \
-          "In the Parallel Old garbage collector use parallel dense"        \
-          " prefix update")                                                 \
-                                                                            \
   product(uintx, HeapMaximumCompactionInterval, 20,                         \
           "How often should we maximally compact the heap (not allowing "   \
           "any dead space)")                                                \
@@ -1381,9 +1374,6 @@
           "The standard deviation used by the par compact dead wood"        \
           "limiter (a number between 0-100).")                              \
                                                                             \
-  product(bool, UseParallelOldGCDensePrefix, true,                          \
-          "Use a dense prefix with the Parallel Old garbage collector")     \
-                                                                            \
   product(uintx, ParallelGCThreads, 0,                                      \
           "Number of parallel threads parallel gc will use")                \
                                                                             \
--- a/src/share/vm/runtime/sweeper.cpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/runtime/sweeper.cpp	Sat Apr 23 00:33:38 2011 -0400
@@ -418,6 +418,11 @@
 // state of the code cache if it's requested.
 void NMethodSweeper::log_sweep(const char* msg, const char* format, ...) {
   if (PrintMethodFlushing) {
+    stringStream s;
+    // Dump code cache state into a buffer before locking the tty,
+    // because log_state() will use locks causing lock conflicts.
+    CodeCache::log_state(&s);
+
     ttyLocker ttyl;
     tty->print("### sweeper: %s ", msg);
     if (format != NULL) {
@@ -426,10 +431,15 @@
       tty->vprint(format, ap);
       va_end(ap);
     }
-    CodeCache::log_state(tty); tty->cr();
+    tty->print_cr(s.as_string());
   }
 
   if (LogCompilation && (xtty != NULL)) {
+    stringStream s;
+    // Dump code cache state into a buffer before locking the tty,
+    // because log_state() will use locks causing lock conflicts.
+    CodeCache::log_state(&s);
+
     ttyLocker ttyl;
     xtty->begin_elem("sweeper state='%s' traversals='" INTX_FORMAT "' ", msg, (intx)traversal_count());
     if (format != NULL) {
@@ -438,7 +448,7 @@
       xtty->vprint(format, ap);
       va_end(ap);
     }
-    CodeCache::log_state(xtty);
+    xtty->print(s.as_string());
     xtty->stamp();
     xtty->end_elem();
   }
--- a/src/share/vm/shark/llvmHeaders.hpp	Thu Apr 21 10:12:42 2011 -0400
+++ b/src/share/vm/shark/llvmHeaders.hpp	Sat Apr 23 00:33:38 2011 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2008, 2009, 2010 Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -46,7 +46,11 @@
 #include <llvm/ModuleProvider.h>
 #endif
 #include <llvm/Support/IRBuilder.h>
+#if SHARK_LLVM_VERSION >= 29
+#include <llvm/Support/Threading.h>
+#else
 #include <llvm/System/Threading.h>
+#endif
 #include <llvm/Target/TargetSelect.h>
 #include <llvm/Type.h>
 #include <llvm/ExecutionEngine/JITMemoryManager.h>
@@ -55,8 +59,12 @@
 #include <llvm/ExecutionEngine/JIT.h>
 #include <llvm/ADT/StringMap.h>
 #include <llvm/Support/Debug.h>
+#if SHARK_LLVM_VERSION >= 29
+#include <llvm/Support/Host.h>
+#else
 #include <llvm/System/Host.h>
 #endif
+#endif
 
 #include <map>