changeset 6507:897333c7e587 hs25.40-b05

Merge
author amurillo
date Thu, 14 Aug 2014 12:55:30 -0700
parents afac3987537e 29a5c2fd2d2e
children f52cb9164759
files
diffstat 192 files changed, 7209 insertions(+), 2780 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegion.java	Wed Aug 13 14:49:46 2014 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/HeapRegion.java	Thu Aug 14 12:55:30 2014 -0700
@@ -24,23 +24,26 @@
 
 package sun.jvm.hotspot.gc_implementation.g1;
 
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Observable;
 import java.util.Observer;
-
 import sun.jvm.hotspot.debugger.Address;
-import sun.jvm.hotspot.memory.ContiguousSpace;
+import sun.jvm.hotspot.memory.CompactibleSpace;
+import sun.jvm.hotspot.memory.MemRegion;
 import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.types.AddressField;
 import sun.jvm.hotspot.types.CIntegerField;
 import sun.jvm.hotspot.types.Type;
 import sun.jvm.hotspot.types.TypeDataBase;
 
 // Mirror class for HeapRegion. Currently we don't actually include
-// any of its fields but only iterate over it (which we get "for free"
-// as HeapRegion ultimately inherits from ContiguousSpace).
+// any of its fields but only iterate over it.
 
-public class HeapRegion extends ContiguousSpace {
+public class HeapRegion extends CompactibleSpace {
     // static int GrainBytes;
     static private CIntegerField grainBytesField;
+    static private AddressField topField;
 
     static {
         VM.registerVMInitializedObserver(new Observer() {
@@ -54,6 +57,8 @@
         Type type = db.lookupType("HeapRegion");
 
         grainBytesField = type.getCIntegerField("GrainBytes");
+        topField = type.getAddressField("_top");
+
     }
 
     static public long grainBytes() {
@@ -63,4 +68,25 @@
     public HeapRegion(Address addr) {
         super(addr);
     }
+
+    public Address top() {
+        return topField.getValue(addr);
+    }
+
+    @Override
+    public List getLiveRegions() {
+        List res = new ArrayList();
+        res.add(new MemRegion(bottom(), top()));
+        return res;
+    }
+
+    @Override
+    public long used() {
+        return top().minus(bottom());
+    }
+
+    @Override
+    public long free() {
+        return end().minus(top());
+    }
 }
--- a/make/hotspot_version	Wed Aug 13 14:49:46 2014 -0700
+++ b/make/hotspot_version	Thu Aug 14 12:55:30 2014 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=25
 HS_MINOR_VER=40
-HS_BUILD_NUMBER=04
+HS_BUILD_NUMBER=05
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/src/cpu/ppc/vm/compiledIC_ppc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/ppc/vm/compiledIC_ppc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -50,34 +50,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 // A PPC CompiledStaticCall looks like this:
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -123,6 +123,7 @@
     fpop2_op3    = 0x35,
     impdep1_op3  = 0x36,
     aes3_op3     = 0x36,
+    sha_op3      = 0x36,
     alignaddr_op3  = 0x36,
     faligndata_op3 = 0x36,
     flog3_op3    = 0x36,
@@ -223,7 +224,11 @@
     mwtos_opf          = 0x119,
 
     aes_kexpand0_opf   = 0x130,
-    aes_kexpand2_opf   = 0x131
+    aes_kexpand2_opf   = 0x131,
+
+    sha1_opf           = 0x141,
+    sha256_opf         = 0x142,
+    sha512_opf         = 0x143
   };
 
   enum op5s {
@@ -595,6 +600,11 @@
   // AES crypto instructions supported only on certain processors
   static void aes_only() { assert( VM_Version::has_aes(), "This instruction only works on SPARC with AES instructions support"); }
 
+  // SHA crypto instructions supported only on certain processors
+  static void sha1_only()   { assert( VM_Version::has_sha1(),   "This instruction only works on SPARC with SHA1"); }
+  static void sha256_only() { assert( VM_Version::has_sha256(), "This instruction only works on SPARC with SHA256"); }
+  static void sha512_only() { assert( VM_Version::has_sha512(), "This instruction only works on SPARC with SHA512"); }
+
   // instruction only in VIS1
   static void vis1_only() { assert( VM_Version::has_vis1(), "This instruction only works on SPARC with VIS1"); }
 
@@ -1179,7 +1189,6 @@
                                                u_field(3, 29, 25) | immed(true) | simm(simm13a, 13)); }
   inline void wrfprs( Register d) { v9_only(); emit_int32( op(arith_op) | rs1(d) | op3(wrreg_op3) | u_field(6, 29, 25)); }
 
-
   //  VIS1 instructions
 
   void alignaddr( Register s1, Register s2, Register d ) { vis1_only(); emit_int32( op(arith_op) | rd(d) | op3(alignaddr_op3) | rs1(s1) | opf(alignaddr_opf) | rs2(s2)); }
@@ -1203,6 +1212,12 @@
   void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
   void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
 
+  // Crypto SHA instructions
+
+  void sha1()   { sha1_only();    emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
+  void sha256() { sha256_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha256_opf)); }
+  void sha512() { sha512_only();  emit_int32( op(arith_op) | op3(sha_op3) | opf(sha512_opf)); }
+
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 #ifdef CHECK_DELAY
--- a/src/cpu/sparc/vm/compiledIC_sparc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/compiledIC_sparc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -50,34 +50,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 #define __ _masm.
--- a/src/cpu/sparc/vm/sparc.ad	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Thu Aug 14 12:55:30 2014 -0700
@@ -6184,7 +6184,11 @@
   ins_cost(DEFAULT_COST * 3/2);
   format %{ "SET    $con,$dst\t! non-oop ptr" %}
   ins_encode %{
-    __ set($con$$constant, $dst$$Register);
+    if (_opnds[1]->constant_reloc() == relocInfo::metadata_type) {
+      __ set_metadata_constant((Metadata*)$con$$constant, $dst$$Register);
+    } else {
+      __ set($con$$constant, $dst$$Register);
+    }
   %}
   ins_pipe(loadConP);
 %}
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -4575,6 +4575,219 @@
     return start;
   }
 
+  address generate_sha1_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha1_loop, L_sha1_unaligned_input, L_sha1_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // int[]  SHA.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F4
+    for (i = 0; i < 5; i++) {
+      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha1_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha1_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    __ sha1();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F4 into state and return
+    for (i = 0; i < 4; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
+
+    __ BIND(L_sha1_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha1_unaligned_input_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 9; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    for (i = 0; i < 8; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
+    }
+    __ sha1();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha1_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F4 into state and return
+    for (i = 0; i < 4; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F4, state, 0x10);
+
+    return start;
+  }
+
+  address generate_sha256_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha256_loop, L_sha256_unaligned_input, L_sha256_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // int[]  SHA2.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F7
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::S, state, i*4, as_FloatRegister(i));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha256_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha256_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    __ sha256();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F7 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
+
+    __ BIND(L_sha256_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha256_unaligned_input_loop);
+    // load buf into F8-F22
+    for (i = 0; i < 9; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 8));
+    }
+    for (i = 0; i < 8; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 8), as_FloatRegister(i*2 + 10), as_FloatRegister(i*2 + 8));
+    }
+    __ sha256();
+    if (multi_block) {
+      __ add(ofs, 64, ofs);
+      __ add(buf, 64, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha256_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F7 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::S, as_FloatRegister(i), state, i*4);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::S, F7, state, 0x1c);
+
+    return start;
+  }
+
+  address generate_sha512_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Label L_sha512_loop, L_sha512_unaligned_input, L_sha512_unaligned_input_loop;
+    int i;
+
+    Register buf   = O0; // byte[] source+offset
+    Register state = O1; // long[] SHA5.state
+    Register ofs   = O2; // int    offset
+    Register limit = O3; // int    limit
+
+    // load state into F0-F14
+    for (i = 0; i < 8; i++) {
+      __ ldf(FloatRegisterImpl::D, state, i*8, as_FloatRegister(i*2));
+    }
+
+    __ andcc(buf, 7, G0);
+    __ br(Assembler::notZero, false, Assembler::pn, L_sha512_unaligned_input);
+    __ delayed()->nop();
+
+    __ BIND(L_sha512_loop);
+    // load buf into F16-F46
+    for (i = 0; i < 16; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
+    }
+    __ sha512();
+    if (multi_block) {
+      __ add(ofs, 128, ofs);
+      __ add(buf, 128, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F14 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
+
+    __ BIND(L_sha512_unaligned_input);
+    __ alignaddr(buf, G0, buf);
+
+    __ BIND(L_sha512_unaligned_input_loop);
+    // load buf into F16-F46
+    for (i = 0; i < 17; i++) {
+      __ ldf(FloatRegisterImpl::D, buf, i*8, as_FloatRegister(i*2 + 16));
+    }
+    for (i = 0; i < 16; i++) {
+      __ faligndata(as_FloatRegister(i*2 + 16), as_FloatRegister(i*2 + 18), as_FloatRegister(i*2 + 16));
+    }
+    __ sha512();
+    if (multi_block) {
+      __ add(ofs, 128, ofs);
+      __ add(buf, 128, buf);
+      __ cmp_and_brx_short(ofs, limit, Assembler::lessEqual, Assembler::pt, L_sha512_unaligned_input_loop);
+      __ mov(ofs, O0); // to be returned
+    }
+
+    // store F0-F14 into state and return
+    for (i = 0; i < 7; i++) {
+      __ stf(FloatRegisterImpl::D, as_FloatRegister(i*2), state, i*8);
+    }
+    __ retl();
+    __ delayed()->stf(FloatRegisterImpl::D, F14, state, 0x38);
+
+    return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -4647,6 +4860,20 @@
       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
     }
+
+    // generate SHA1/SHA256/SHA512 intrinsics code
+    if (UseSHA1Intrinsics) {
+      StubRoutines::_sha1_implCompress     = generate_sha1_implCompress(false,   "sha1_implCompress");
+      StubRoutines::_sha1_implCompressMB   = generate_sha1_implCompress(true,    "sha1_implCompressMB");
+    }
+    if (UseSHA256Intrinsics) {
+      StubRoutines::_sha256_implCompress   = generate_sha256_implCompress(false, "sha256_implCompress");
+      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
+    }
+    if (UseSHA512Intrinsics) {
+      StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
+      StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true,  "sha512_implCompressMB");
+    }
   }
 
 
--- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -41,7 +41,7 @@
 enum /* platform_dependent_constants */ {
   // %%%%%%%% May be able to shrink this a lot
   code_size1 = 20000,           // simply increase if too small (assembler will crash if too small)
-  code_size2 = 22000            // simply increase if too small (assembler will crash if too small)
+  code_size2 = 23000            // simply increase if too small (assembler will crash if too small)
 };
 
 class Sparc {
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -234,7 +234,7 @@
   assert((OptoLoopAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
 
   char buf[512];
-  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_v9() ? ", v9" : (has_v8() ? ", v8" : "")),
                (has_hardware_popc() ? ", popc" : ""),
                (has_vis1() ? ", vis1" : ""),
@@ -243,6 +243,9 @@
                (has_blk_init() ? ", blk_init" : ""),
                (has_cbcond() ? ", cbcond" : ""),
                (has_aes() ? ", aes" : ""),
+               (has_sha1() ? ", sha1" : ""),
+               (has_sha256() ? ", sha256" : ""),
+               (has_sha512() ? ", sha512" : ""),
                (is_ultra3() ? ", ultra3" : ""),
                (is_sun4v() ? ", sun4v" : ""),
                (is_niagara_plus() ? ", niagara_plus" : (is_niagara() ? ", niagara" : "")),
@@ -301,6 +304,58 @@
     }
   }
 
+  // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
+  if (has_sha1() || has_sha256() || has_sha512()) {
+    if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
+      if (FLAG_IS_DEFAULT(UseSHA)) {
+        FLAG_SET_DEFAULT(UseSHA, true);
+      }
+    } else {
+      if (UseSHA) {
+        warning("SPARC SHA intrinsics require VIS1 instruction support. Intrinsics will be disabled.");
+        FLAG_SET_DEFAULT(UseSHA, false);
+      }
+    }
+  } else if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
+  if (!UseSHA) {
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  } else {
+    if (has_sha1()) {
+      if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+      }
+    } else if (UseSHA1Intrinsics) {
+      warning("SHA1 instruction is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    }
+    if (has_sha256()) {
+      if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+      }
+    } else if (UseSHA256Intrinsics) {
+      warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    }
+
+    if (has_sha512()) {
+      if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+        FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+      }
+    } else if (UseSHA512Intrinsics) {
+      warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU.");
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+    }
+    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA, false);
+    }
+  }
+
   if (FLAG_IS_DEFAULT(ContendedPaddingWidth) &&
     (cache_line_size > ContendedPaddingWidth))
     ContendedPaddingWidth = cache_line_size;
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,7 +50,10 @@
     T_family             = 16,
     T1_model             = 17,
     sparc5_instructions  = 18,
-    aes_instructions     = 19
+    aes_instructions     = 19,
+    sha1_instruction     = 20,
+    sha256_instruction   = 21,
+    sha512_instruction   = 22
   };
 
   enum Feature_Flag_Set {
@@ -77,6 +80,9 @@
     T1_model_m              = 1 << T1_model,
     sparc5_instructions_m   = 1 << sparc5_instructions,
     aes_instructions_m      = 1 << aes_instructions,
+    sha1_instruction_m      = 1 << sha1_instruction,
+    sha256_instruction_m    = 1 << sha256_instruction,
+    sha512_instruction_m    = 1 << sha512_instruction,
 
     generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
     generic_v9_m        = generic_v8_m | v9_instructions_m,
@@ -129,6 +135,9 @@
   static bool has_cbcond()              { return (_features & cbcond_instructions_m) != 0; }
   static bool has_sparc5_instr()        { return (_features & sparc5_instructions_m) != 0; }
   static bool has_aes()                 { return (_features & aes_instructions_m) != 0; }
+  static bool has_sha1()                { return (_features & sha1_instruction_m) != 0; }
+  static bool has_sha256()              { return (_features & sha256_instruction_m) != 0; }
+  static bool has_sha512()              { return (_features & sha512_instruction_m) != 0; }
 
   static bool supports_compare_and_exchange()
                                         { return has_v9(); }
--- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -3854,6 +3854,15 @@
 }
 
 // Carry-Less Multiplication Quadword
+void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
+  assert(VM_Version::supports_clmul(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  emit_int8(0x44);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)mask);
+}
+
+// Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
   bool vector256 = false;
--- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1837,6 +1837,7 @@
   void vpbroadcastd(XMMRegister dst, XMMRegister src);
 
   // Carry-Less Multiplication Quadword
+  void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
   void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
 
   // AVX instruction which is used to clear upper 128 bits of YMM registers and
--- a/src/cpu/x86/vm/compiledIC_x86.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/compiledIC_x86.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -47,34 +47,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 #define __ _masm.
--- a/src/cpu/x86/vm/globals_x86.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -130,16 +130,16 @@
           "Use fast-string operation for zeroing: rep stosb")               \
                                                                             \
   /* Use Restricted Transactional Memory for lock eliding */                \
-  experimental(bool, UseRTMLocking, false,                                  \
+  product(bool, UseRTMLocking, false,                                       \
           "Enable RTM lock eliding for inflated locks in compiled code")    \
                                                                             \
   experimental(bool, UseRTMForStackLocks, false,                            \
           "Enable RTM lock eliding for stack locks in compiled code")       \
                                                                             \
-  experimental(bool, UseRTMDeopt, false,                                    \
+  product(bool, UseRTMDeopt, false,                                         \
           "Perform deopt and recompilation based on RTM abort ratio")       \
                                                                             \
-  experimental(uintx, RTMRetryCount, 5,                                     \
+  product(uintx, RTMRetryCount, 5,                                          \
           "Number of RTM retries on lock abort or busy")                    \
                                                                             \
   experimental(intx, RTMSpinLoopCount, 100,                                 \
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -7316,17 +7316,34 @@
  * Fold 128-bit data chunk
  */
 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, Register buf, int offset) {
-  vpclmulhdq(xtmp, xK, xcrc); // [123:64]
-  vpclmulldq(xcrc, xK, xcrc); // [63:0]
-  vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc); // [123:64]
+    vpclmulldq(xcrc, xK, xcrc); // [63:0]
+    vpxor(xcrc, xcrc, Address(buf, offset), false /* vector256 */);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);   // [123:64]
+    pclmulldq(xcrc, xK);   // [63:0]
+    pxor(xcrc, xtmp);
+    movdqu(xtmp, Address(buf, offset));
+    pxor(xcrc, xtmp);
+  }
 }
 
 void MacroAssembler::fold_128bit_crc32(XMMRegister xcrc, XMMRegister xK, XMMRegister xtmp, XMMRegister xbuf) {
-  vpclmulhdq(xtmp, xK, xcrc);
-  vpclmulldq(xcrc, xK, xcrc);
-  pxor(xcrc, xbuf);
-  pxor(xcrc, xtmp);
+  if (UseAVX > 0) {
+    vpclmulhdq(xtmp, xK, xcrc);
+    vpclmulldq(xcrc, xK, xcrc);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  } else {
+    movdqa(xtmp, xcrc);
+    pclmulhdq(xtmp, xK);
+    pclmulldq(xcrc, xK);
+    pxor(xcrc, xbuf);
+    pxor(xcrc, xtmp);
+  }
 }
 
 /**
@@ -7444,9 +7461,17 @@
   // Fold 128 bits in xmm1 down into 32 bits in crc register.
   BIND(L_fold_128b);
   movdqu(xmm0, ExternalAddress(StubRoutines::x86::crc_by128_masks_addr()));
-  vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
-  vpand(xmm3, xmm0, xmm2, false /* vector256 */);
-  vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  if (UseAVX > 0) {
+    vpclmulqdq(xmm2, xmm0, xmm1, 0x1);
+    vpand(xmm3, xmm0, xmm2, false /* vector256 */);
+    vpclmulqdq(xmm0, xmm0, xmm3, 0x1);
+  } else {
+    movdqa(xmm2, xmm0);
+    pclmulqdq(xmm2, xmm1, 0x1);
+    movdqa(xmm3, xmm0);
+    pand(xmm3, xmm2);
+    pclmulqdq(xmm0, xmm3, 0x1);
+  }
   psrldq(xmm1, 8);
   psrldq(xmm2, 4);
   pxor(xmm0, xmm1);
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -966,6 +966,16 @@
   void mulss(XMMRegister dst, Address src)        { Assembler::mulss(dst, src); }
   void mulss(XMMRegister dst, AddressLiteral src);
 
+  // Carry-Less Multiplication Quadword
+  void pclmulldq(XMMRegister dst, XMMRegister src) {
+    // 0x00 - multiply lower 64 bits [0:63]
+    Assembler::pclmulqdq(dst, src, 0x00);
+  }
+  void pclmulhdq(XMMRegister dst, XMMRegister src) {
+    // 0x11 - multiply upper 64 bits [64:127]
+    Assembler::pclmulqdq(dst, src, 0x11);
+  }
+
   void sqrtsd(XMMRegister dst, XMMRegister src)    { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, Address src)        { Assembler::sqrtsd(dst, src); }
   void sqrtsd(XMMRegister dst, AddressLiteral src);
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -568,7 +568,7 @@
     FLAG_SET_DEFAULT(UseCLMUL, false);
   }
 
-  if (UseCLMUL && (UseAVX > 0) && (UseSSE > 2)) {
+  if (UseCLMUL && (UseSSE > 2)) {
     if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
       UseCRC32Intrinsics = true;
     }
@@ -590,6 +590,17 @@
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }
 
+  if (UseSHA) {
+    warning("SHA instructions are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
+    warning("SHA intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
+  }
+
   // Adjust RTM (Restricted Transactional Memory) flags
   if (!supports_rtm() && UseRTMLocking) {
     // Can't continue because UseRTMLocking affects UseBiasedLocking flag
@@ -803,6 +814,21 @@
         }
       }
     }
+    if ((cpu_family() == 0x06) &&
+        ((extended_cpu_model() == 0x36) || // Centerton
+         (extended_cpu_model() == 0x37) || // Silvermont
+         (extended_cpu_model() == 0x4D))) {
+#ifdef COMPILER2
+      if (FLAG_IS_DEFAULT(OptoScheduling)) {
+        OptoScheduling = true;
+      }
+#endif
+      if (supports_sse4_2()) { // Silvermont
+        if (FLAG_IS_DEFAULT(UseUnalignedLoadStores)) {
+          UseUnalignedLoadStores = true; // use movdqu on newest Intel cpus
+        }
+      }
+    }
   }
 
   // Use count leading zeros count instruction if available.
@@ -890,23 +916,25 @@
   AllocatePrefetchDistance = allocate_prefetch_distance();
   AllocatePrefetchStyle    = allocate_prefetch_style();
 
-  if( is_intel() && cpu_family() == 6 && supports_sse3() ) {
-    if( AllocatePrefetchStyle == 2 ) { // watermark prefetching on Core
+  if (is_intel() && cpu_family() == 6 && supports_sse3()) {
+    if (AllocatePrefetchStyle == 2) { // watermark prefetching on Core
 #ifdef _LP64
       AllocatePrefetchDistance = 384;
 #else
       AllocatePrefetchDistance = 320;
 #endif
     }
-    if( supports_sse4_2() && supports_ht() ) { // Nehalem based cpus
+    if (supports_sse4_2() && supports_ht()) { // Nehalem based cpus
       AllocatePrefetchDistance = 192;
       AllocatePrefetchLines = 4;
+    }
 #ifdef COMPILER2
-      if (AggressiveOpts && FLAG_IS_DEFAULT(UseFPUForSpilling)) {
+    if (supports_sse4_2()) {
+      if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
         FLAG_SET_DEFAULT(UseFPUForSpilling, true);
       }
+    }
 #endif
-    }
   }
   assert(AllocatePrefetchDistance % AllocatePrefetchStepSize == 0, "invalid value");
 
--- a/src/cpu/zero/vm/compiledIC_zero.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/cpu/zero/vm/compiledIC_zero.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -58,34 +58,6 @@
   return is_icholder_entry(call->destination());
 }
 
-//-----------------------------------------------------------------------------
-// High-level access to an inline cache. Guaranteed to be MT-safe.
-
-CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
-  : _ic_call(call)
-{
-  address ic_call = call->instruction_address();
-
-  assert(ic_call != NULL, "ic_call address must be set");
-  assert(nm != NULL, "must pass nmethod");
-  assert(nm->contains(ic_call), "must be in nmethod");
-
-  // Search for the ic_call at the given address.
-  RelocIterator iter(nm, ic_call, ic_call+1);
-  bool ret = iter.next();
-  assert(ret == true, "relocInfo must exist at this address");
-  assert(iter.addr() == ic_call, "must find ic_call");
-  if (iter.type() == relocInfo::virtual_call_type) {
-    virtual_call_Relocation* r = iter.virtual_call_reloc();
-    _is_optimized = false;
-    _value = nativeMovConstReg_at(r->cached_value());
-  } else {
-    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
-    _is_optimized = true;
-    _value = NULL;
-  }
-}
-
 // ----------------------------------------------------------------------------
 
 void CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -137,6 +137,21 @@
 #endif
     if (av & AV_SPARC_AES)       features |= aes_instructions_m;
 
+#ifndef AV_SPARC_SHA1
+#define AV_SPARC_SHA1   0x00400000  /* sha1 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA1)         features |= sha1_instruction_m;
+
+#ifndef AV_SPARC_SHA256
+#define AV_SPARC_SHA256 0x00800000  /* sha256 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA256)       features |= sha256_instruction_m;
+
+#ifndef AV_SPARC_SHA512
+#define AV_SPARC_SHA512 0x01000000  /* sha512 instruction supported */
+#endif
+    if (av & AV_SPARC_SHA512)       features |= sha512_instruction_m;
+
   } else {
     // getisax(2) failed, use the old legacy code.
 #ifndef PRODUCT
--- a/src/share/vm/c1/c1_Runtime1.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1018,6 +1018,7 @@
               n_copy->set_data((intx) (load_klass()));
             } else {
               assert(mirror() != NULL, "klass not set");
+              // Don't need a G1 pre-barrier here since we assert above that data isn't an oop.
               n_copy->set_data(cast_from_oop<intx>(mirror()));
             }
 
--- a/src/share/vm/ci/ciEnv.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciEnv.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -184,6 +184,10 @@
     }
   }
 
+  void ensure_metadata_alive(ciMetadata* m) {
+    _factory->ensure_metadata_alive(m);
+  }
+
   ciInstance* get_instance(oop o) {
     if (o == NULL) return NULL;
     return get_object(o)->as_instance();
--- a/src/share/vm/ci/ciKlass.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciKlass.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -43,6 +43,7 @@
   friend class ciMethod;
   friend class ciMethodData;
   friend class ciObjArrayKlass;
+  friend class ciReceiverTypeData;
 
 private:
   ciSymbol* _name;
--- a/src/share/vm/ci/ciMethodData.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciMethodData.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -170,6 +170,7 @@
     Klass* k = data->as_ReceiverTypeData()->receiver(row);
     if (k != NULL) {
       ciKlass* klass = CURRENT_ENV->get_klass(k);
+      CURRENT_ENV->ensure_metadata_alive(klass);
       set_receiver(row, klass);
     }
   }
@@ -191,6 +192,7 @@
 void ciSpeculativeTrapData::translate_from(const ProfileData* data) {
   Method* m = data->as_SpeculativeTrapData()->method();
   ciMethod* ci_m = CURRENT_ENV->get_method(m);
+  CURRENT_ENV->ensure_metadata_alive(ci_m);
   set_method(ci_m);
 }
 
--- a/src/share/vm/ci/ciMethodData.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciMethodData.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -70,6 +70,7 @@
     Klass* v = TypeEntries::valid_klass(k);
     if (v != NULL) {
       ciKlass* klass = CURRENT_ENV->get_klass(v);
+      CURRENT_ENV->ensure_metadata_alive(klass);
       return with_status(klass, k);
     }
     return with_status(NULL, k);
--- a/src/share/vm/ci/ciObjectFactory.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciObjectFactory.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -46,6 +46,9 @@
 #include "oops/oop.inline.hpp"
 #include "oops/oop.inline2.hpp"
 #include "runtime/fieldType.hpp"
+#if INCLUDE_ALL_GCS
+# include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#endif
 
 // ciObjectFactory
 //
@@ -374,6 +377,37 @@
   return NULL;
 }
 
+// ------------------------------------------------------------------
+// ciObjectFactory::ensure_metadata_alive
+//
+// Ensure that the metadata wrapped by the ciMetadata is kept alive by GC.
+// This is primarily useful for metadata which is considered as weak roots
+// by the GC but need to be strong roots if reachable from a current compilation.
+//
+void ciObjectFactory::ensure_metadata_alive(ciMetadata* m) {
+  ASSERT_IN_VM; // We're handling raw oops here.
+
+#if INCLUDE_ALL_GCS
+  if (!UseG1GC) {
+    return;
+  }
+  Klass* metadata_owner_klass;
+  if (m->is_klass()) {
+    metadata_owner_klass = m->as_klass()->get_Klass();
+  } else if (m->is_method()) {
+    metadata_owner_klass = m->as_method()->get_Method()->constants()->pool_holder();
+  } else {
+    fatal("Not implemented for other types of metadata");
+  }
+
+  oop metadata_holder = metadata_owner_klass->klass_holder();
+  if (metadata_holder != NULL) {
+    G1SATBCardTableModRefBS::enqueue(metadata_holder);
+  }
+
+#endif
+}
+
 //------------------------------------------------------------------
 // ciObjectFactory::get_unloaded_method
 //
--- a/src/share/vm/ci/ciObjectFactory.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/ci/ciObjectFactory.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -75,6 +75,8 @@
   ciObject* create_new_object(oop o);
   ciMetadata* create_new_object(Metadata* o);
 
+  void ensure_metadata_alive(ciMetadata* m);
+
   static bool is_equal(NonPermObject* p, oop key) {
     return p->object()->get_oop() == key;
   }
--- a/src/share/vm/classfile/classLoaderData.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/classLoaderData.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -73,7 +73,11 @@
 
 ClassLoaderData::ClassLoaderData(Handle h_class_loader, bool is_anonymous, Dependencies dependencies) :
   _class_loader(h_class_loader()),
-  _is_anonymous(is_anonymous), _keep_alive(is_anonymous), // initially
+  _is_anonymous(is_anonymous),
+  // An anonymous class loader data doesn't have anything to keep
+  // it from being unloaded during parsing of the anonymous class.
+  // The null-class-loader should always be kept alive.
+  _keep_alive(is_anonymous || h_class_loader.is_null()),
   _metaspace(NULL), _unloading(false), _klasses(NULL),
   _claimed(0), _jmethod_ids(NULL), _handles(NULL), _deallocate_list(NULL),
   _next(NULL), _dependencies(dependencies),
@@ -317,12 +321,45 @@
   }
 }
 
+#ifdef ASSERT
+class AllAliveClosure : public OopClosure {
+  BoolObjectClosure* _is_alive_closure;
+  bool _found_dead;
+ public:
+  AllAliveClosure(BoolObjectClosure* is_alive_closure) : _is_alive_closure(is_alive_closure), _found_dead(false) {}
+  template <typename T> void do_oop_work(T* p) {
+    T heap_oop = oopDesc::load_heap_oop(p);
+    if (!oopDesc::is_null(heap_oop)) {
+      oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
+      if (!_is_alive_closure->do_object_b(obj)) {
+        _found_dead = true;
+      }
+    }
+  }
+  void do_oop(oop* p)       { do_oop_work<oop>(p); }
+  void do_oop(narrowOop* p) { do_oop_work<narrowOop>(p); }
+  bool found_dead()         { return _found_dead; }
+};
+#endif
+
+oop ClassLoaderData::keep_alive_object() const {
+  assert(!keep_alive(), "Don't use with CLDs that are artificially kept alive");
+  return is_anonymous() ? _klasses->java_mirror() : class_loader();
+}
+
 bool ClassLoaderData::is_alive(BoolObjectClosure* is_alive_closure) const {
-  bool alive =
-    is_anonymous() ?
-       is_alive_closure->do_object_b(_klasses->java_mirror()) :
-       class_loader() == NULL || is_alive_closure->do_object_b(class_loader());
-  assert(!alive || claimed(), "must be claimed");
+  bool alive = keep_alive() // null class loader and incomplete anonymous klasses.
+      || is_alive_closure->do_object_b(keep_alive_object());
+
+#ifdef ASSERT
+  if (alive) {
+    AllAliveClosure all_alive_closure(is_alive_closure);
+    KlassToOopClosure klass_closure(&all_alive_closure);
+    const_cast<ClassLoaderData*>(this)->oops_do(&all_alive_closure, &klass_closure, false);
+    assert(!all_alive_closure.found_dead(), err_msg("Found dead oop in alive cld: " PTR_FORMAT, p2i(this)));
+  }
+#endif
+
   return alive;
 }
 
@@ -601,11 +638,36 @@
 
 void ClassLoaderDataGraph::always_strong_oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim) {
   if (ClassUnloading) {
-    ClassLoaderData::the_null_class_loader_data()->oops_do(f, klass_closure, must_claim);
-    // keep any special CLDs alive.
-    ClassLoaderDataGraph::keep_alive_oops_do(f, klass_closure, must_claim);
+    keep_alive_oops_do(f, klass_closure, must_claim);
   } else {
-    ClassLoaderDataGraph::oops_do(f, klass_closure, must_claim);
+    oops_do(f, klass_closure, must_claim);
+  }
+}
+
+void ClassLoaderDataGraph::cld_do(CLDClosure* cl) {
+  for (ClassLoaderData* cld = _head; cl != NULL && cld != NULL; cld = cld->next()) {
+    cl->do_cld(cld);
+  }
+}
+
+void ClassLoaderDataGraph::roots_cld_do(CLDClosure* strong, CLDClosure* weak) {
+  for (ClassLoaderData* cld = _head;  cld != NULL; cld = cld->_next) {
+    CLDClosure* closure = cld->keep_alive() ? strong : weak;
+    if (closure != NULL) {
+      closure->do_cld(cld);
+    }
+  }
+}
+
+void ClassLoaderDataGraph::keep_alive_cld_do(CLDClosure* cl) {
+  roots_cld_do(cl, NULL);
+}
+
+void ClassLoaderDataGraph::always_strong_cld_do(CLDClosure* cl) {
+  if (ClassUnloading) {
+    keep_alive_cld_do(cl);
+  } else {
+    cld_do(cl);
   }
 }
 
@@ -660,6 +722,16 @@
   return array;
 }
 
+bool ClassLoaderDataGraph::unload_list_contains(const void* x) {
+  assert(SafepointSynchronize::is_at_safepoint(), "only safe to call at safepoint");
+  for (ClassLoaderData* cld = _unloading; cld != NULL; cld = cld->next()) {
+    if (cld->metaspace_or_null() != NULL && cld->metaspace_or_null()->contains(x)) {
+      return true;
+    }
+  }
+  return false;
+}
+
 #ifndef PRODUCT
 bool ClassLoaderDataGraph::contains_loader_data(ClassLoaderData* loader_data) {
   for (ClassLoaderData* data = _head; data != NULL; data = data->next()) {
@@ -689,7 +761,7 @@
   bool has_redefined_a_class = JvmtiExport::has_redefined_a_class();
   MetadataOnStackMark md_on_stack;
   while (data != NULL) {
-    if (data->keep_alive() || data->is_alive(is_alive_closure)) {
+    if (data->is_alive(is_alive_closure)) {
       if (has_redefined_a_class) {
         data->classes_do(InstanceKlass::purge_previous_versions);
       }
@@ -780,6 +852,60 @@
   return _rw_metaspace;
 }
 
+ClassLoaderDataGraphKlassIteratorAtomic::ClassLoaderDataGraphKlassIteratorAtomic()
+    : _next_klass(NULL) {
+  ClassLoaderData* cld = ClassLoaderDataGraph::_head;
+  Klass* klass = NULL;
+
+  // Find the first klass in the CLDG.
+  while (cld != NULL) {
+    klass = cld->_klasses;
+    if (klass != NULL) {
+      _next_klass = klass;
+      return;
+    }
+    cld = cld->next();
+  }
+}
+
+Klass* ClassLoaderDataGraphKlassIteratorAtomic::next_klass_in_cldg(Klass* klass) {
+  Klass* next = klass->next_link();
+  if (next != NULL) {
+    return next;
+  }
+
+  // No more klasses in the current CLD. Time to find a new CLD.
+  ClassLoaderData* cld = klass->class_loader_data();
+  while (next == NULL) {
+    cld = cld->next();
+    if (cld == NULL) {
+      break;
+    }
+    next = cld->_klasses;
+  }
+
+  return next;
+}
+
+Klass* ClassLoaderDataGraphKlassIteratorAtomic::next_klass() {
+  Klass* head = (Klass*)_next_klass;
+
+  while (head != NULL) {
+    Klass* next = next_klass_in_cldg(head);
+
+    Klass* old_head = (Klass*)Atomic::cmpxchg_ptr(next, &_next_klass, head);
+
+    if (old_head == head) {
+      return head; // Won the CAS.
+    }
+
+    head = old_head;
+  }
+
+  // Nothing more for the iterator to hand out.
+  assert(head == NULL, err_msg("head is " PTR_FORMAT ", expected not null:", p2i(head)));
+  return NULL;
+}
 
 ClassLoaderDataGraphMetaspaceIterator::ClassLoaderDataGraphMetaspaceIterator() {
   _data = ClassLoaderDataGraph::_head;
--- a/src/share/vm/classfile/classLoaderData.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/classLoaderData.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -31,7 +31,6 @@
 #include "memory/metaspaceCounters.hpp"
 #include "runtime/mutex.hpp"
 #include "utilities/growableArray.hpp"
-
 #if INCLUDE_TRACE
 # include "utilities/ticks.hpp"
 #endif
@@ -59,6 +58,7 @@
 class ClassLoaderDataGraph : public AllStatic {
   friend class ClassLoaderData;
   friend class ClassLoaderDataGraphMetaspaceIterator;
+  friend class ClassLoaderDataGraphKlassIteratorAtomic;
   friend class VMStructs;
  private:
   // All CLDs (except the null CLD) can be reached by walking _head->_next->...
@@ -75,9 +75,16 @@
   static ClassLoaderData* find_or_create(Handle class_loader, TRAPS);
   static void purge();
   static void clear_claimed_marks();
+  // oops do
   static void oops_do(OopClosure* f, KlassClosure* klass_closure, bool must_claim);
+  static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
   static void always_strong_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
-  static void keep_alive_oops_do(OopClosure* blk, KlassClosure* klass_closure, bool must_claim);
+  // cld do
+  static void cld_do(CLDClosure* cl);
+  static void roots_cld_do(CLDClosure* strong, CLDClosure* weak);
+  static void keep_alive_cld_do(CLDClosure* cl);
+  static void always_strong_cld_do(CLDClosure* cl);
+  // klass do
   static void classes_do(KlassClosure* klass_closure);
   static void classes_do(void f(Klass* const));
   static void loaded_classes_do(KlassClosure* klass_closure);
@@ -102,6 +109,7 @@
   static void dump() { dump_on(tty); }
   static void verify();
 
+  static bool unload_list_contains(const void* x);
 #ifndef PRODUCT
   static bool contains_loader_data(ClassLoaderData* loader_data);
 #endif
@@ -134,6 +142,7 @@
   };
 
   friend class ClassLoaderDataGraph;
+  friend class ClassLoaderDataGraphKlassIteratorAtomic;
   friend class ClassLoaderDataGraphMetaspaceIterator;
   friend class MetaDataFactory;
   friend class Method;
@@ -149,7 +158,7 @@
                            // classes in the class loader are allocated.
   Mutex* _metaspace_lock;  // Locks the metaspace for allocations and setup.
   bool _unloading;         // true if this class loader goes away
-  bool _keep_alive;        // if this CLD can be unloaded for anonymous loaders
+  bool _keep_alive;        // if this CLD is kept alive without a keep_alive_object().
   bool _is_anonymous;      // if this CLD is for an anonymous class
   volatile int _claimed;   // true if claimed, for example during GC traces.
                            // To avoid applying oop closure more than once.
@@ -195,7 +204,6 @@
 
   void unload();
   bool keep_alive() const       { return _keep_alive; }
-  bool is_alive(BoolObjectClosure* is_alive_closure) const;
   void classes_do(void f(Klass*));
   void loaded_classes_do(KlassClosure* klass_closure);
   void classes_do(void f(InstanceKlass*));
@@ -207,6 +215,9 @@
   MetaWord* allocate(size_t size);
 
  public:
+
+  bool is_alive(BoolObjectClosure* is_alive_closure) const;
+
   // Accessors
   Metaspace* metaspace_or_null() const     { return _metaspace; }
 
@@ -240,13 +251,16 @@
 
   oop class_loader() const      { return _class_loader; }
 
+  // The object the GC is using to keep this ClassLoaderData alive.
+  oop keep_alive_object() const;
+
   // Returns true if this class loader data is for a loader going away.
   bool is_unloading() const     {
     assert(!(is_the_null_class_loader_data() && _unloading), "The null class loader can never be unloaded");
     return _unloading;
   }
-  // Anonymous class loader data doesn't have anything to keep them from
-  // being unloaded during parsing the anonymous class.
+
+  // Used to make sure that this CLD is not unloaded.
   void set_keep_alive(bool value) { _keep_alive = value; }
 
   unsigned int identity_hash() {
@@ -287,6 +301,16 @@
   void initialize_shared_metaspaces();
 };
 
+// An iterator that distributes Klasses to parallel worker threads.
+class ClassLoaderDataGraphKlassIteratorAtomic : public StackObj {
+  volatile Klass* _next_klass;
+ public:
+  ClassLoaderDataGraphKlassIteratorAtomic();
+  Klass* next_klass();
+ private:
+  static Klass* next_klass_in_cldg(Klass* klass);
+};
+
 class ClassLoaderDataGraphMetaspaceIterator : public StackObj {
   ClassLoaderData* _data;
  public:
--- a/src/share/vm/classfile/dictionary.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/dictionary.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -199,6 +199,26 @@
   return class_was_unloaded;
 }
 
+void Dictionary::roots_oops_do(OopClosure* strong, OopClosure* weak) {
+  // Skip the strong roots probe marking if the closures are the same.
+  if (strong == weak) {
+    oops_do(strong);
+    return;
+  }
+
+  for (int index = 0; index < table_size(); index++) {
+    for (DictionaryEntry *probe = bucket(index);
+                          probe != NULL;
+                          probe = probe->next()) {
+      Klass* e = probe->klass();
+      ClassLoaderData* loader_data = probe->loader_data();
+      if (is_strongly_reachable(loader_data, e)) {
+        probe->set_strongly_reachable();
+      }
+    }
+  }
+  _pd_cache_table->roots_oops_do(strong, weak);
+}
 
 void Dictionary::always_strong_oops_do(OopClosure* blk) {
   // Follow all system classes and temporary placeholders in dictionary; only
@@ -490,6 +510,23 @@
   }
 }
 
+void ProtectionDomainCacheTable::roots_oops_do(OopClosure* strong, OopClosure* weak) {
+  for (int index = 0; index < table_size(); index++) {
+    for (ProtectionDomainCacheEntry* probe = bucket(index);
+                                     probe != NULL;
+                                     probe = probe->next()) {
+      if (probe->is_strongly_reachable()) {
+        probe->reset_strongly_reachable();
+        probe->oops_do(strong);
+      } else {
+        if (weak != NULL) {
+          probe->oops_do(weak);
+        }
+      }
+    }
+  }
+}
+
 uint ProtectionDomainCacheTable::bucket_size() {
   return sizeof(ProtectionDomainCacheEntry);
 }
--- a/src/share/vm/classfile/dictionary.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/dictionary.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -89,6 +89,7 @@
   // GC support
   void oops_do(OopClosure* f);
   void always_strong_oops_do(OopClosure* blk);
+  void roots_oops_do(OopClosure* strong, OopClosure* weak);
 
   void always_strong_classes_do(KlassClosure* closure);
 
@@ -218,6 +219,7 @@
   // GC support
   void oops_do(OopClosure* f);
   void always_strong_oops_do(OopClosure* f);
+  void roots_oops_do(OopClosure* strong, OopClosure* weak);
 
   static uint bucket_size();
 
--- a/src/share/vm/classfile/javaClasses.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/javaClasses.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -463,12 +463,11 @@
   return true;
 }
 
-void java_lang_String::print(Handle java_string, outputStream* st) {
-  oop          obj    = java_string();
-  assert(obj->klass() == SystemDictionary::String_klass(), "must be java_string");
-  typeArrayOop value  = java_lang_String::value(obj);
-  int          offset = java_lang_String::offset(obj);
-  int          length = java_lang_String::length(obj);
+void java_lang_String::print(oop java_string, outputStream* st) {
+  assert(java_string->klass() == SystemDictionary::String_klass(), "must be java_string");
+  typeArrayOop value  = java_lang_String::value(java_string);
+  int          offset = java_lang_String::offset(java_string);
+  int          length = java_lang_String::length(java_string);
 
   int end = MIN2(length, 100);
   if (value == NULL) {
--- a/src/share/vm/classfile/javaClasses.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/javaClasses.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -208,7 +208,7 @@
   }
 
   // Debugging
-  static void print(Handle java_string, outputStream* st);
+  static void print(oop java_string, outputStream* st);
   friend class JavaClasses;
 };
 
--- a/src/share/vm/classfile/metadataOnStackMark.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/metadataOnStackMark.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -47,8 +47,11 @@
   if (_marked_objects == NULL) {
     _marked_objects = new (ResourceObj::C_HEAP, mtClass) GrowableArray<Metadata*>(1000, true);
   }
+
   Threads::metadata_do(Metadata::mark_on_stack);
-  CodeCache::alive_nmethods_do(nmethod::mark_on_stack);
+  if (JvmtiExport::has_redefined_a_class()) {
+    CodeCache::alive_nmethods_do(nmethod::mark_on_stack);
+  }
   CompileBroker::mark_on_stack();
   JvmtiCurrentBreakpoints::metadata_do(Metadata::mark_on_stack);
   ThreadService::metadata_do(Metadata::mark_on_stack);
--- a/src/share/vm/classfile/symbolTable.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/symbolTable.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -36,6 +36,7 @@
 #include "runtime/mutexLocker.hpp"
 #include "utilities/hashtable.inline.hpp"
 #if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc_implementation/g1/g1StringDedup.hpp"
 #endif
 
@@ -704,11 +705,26 @@
   return lookup(chars, length);
 }
 
+// Tell the GC that this string was looked up in the StringTable.
+static void ensure_string_alive(oop string) {
+  // A lookup in the StringTable could return an object that was previously
+  // considered dead. The SATB part of G1 needs to get notified about this
+  // potential resurrection, otherwise the marking might not find the object.
+#if INCLUDE_ALL_GCS
+  if (UseG1GC && string != NULL) {
+    G1SATBCardTableModRefBS::enqueue(string);
+  }
+#endif
+}
 
 oop StringTable::lookup(jchar* name, int len) {
   unsigned int hash = hash_string(name, len);
   int index = the_table()->hash_to_index(hash);
-  return the_table()->lookup(index, name, len, hash);
+  oop string = the_table()->lookup(index, name, len, hash);
+
+  ensure_string_alive(string);
+
+  return string;
 }
 
 
@@ -719,7 +735,10 @@
   oop found_string = the_table()->lookup(index, name, len, hashValue);
 
   // Found
-  if (found_string != NULL) return found_string;
+  if (found_string != NULL) {
+    ensure_string_alive(found_string);
+    return found_string;
+  }
 
   debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
   assert(!Universe::heap()->is_in_reserved(name),
@@ -744,11 +763,17 @@
 
   // Grab the StringTable_lock before getting the_table() because it could
   // change at safepoint.
-  MutexLocker ml(StringTable_lock, THREAD);
+  oop added_or_found;
+  {
+    MutexLocker ml(StringTable_lock, THREAD);
+    // Otherwise, add to symbol to table
+    added_or_found = the_table()->basic_add(index, string, name, len,
+                                  hashValue, CHECK_NULL);
+  }
 
-  // Otherwise, add to symbol to table
-  return the_table()->basic_add(index, string, name, len,
-                                hashValue, CHECK_NULL);
+  ensure_string_alive(added_or_found);
+
+  return added_or_found;
 }
 
 oop StringTable::intern(Symbol* symbol, TRAPS) {
--- a/src/share/vm/classfile/systemDictionary.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/systemDictionary.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1613,13 +1613,7 @@
 // system dictionary and follows the remaining classes' contents.
 
 void SystemDictionary::always_strong_oops_do(OopClosure* blk) {
-  blk->do_oop(&_java_system_loader);
-  blk->do_oop(&_system_loader_lock_obj);
-
-  dictionary()->always_strong_oops_do(blk);
-
-  // Visit extra methods
-  invoke_method_table()->oops_do(blk);
+  roots_oops_do(blk, NULL);
 }
 
 void SystemDictionary::always_strong_classes_do(KlassClosure* closure) {
@@ -1686,6 +1680,17 @@
   return unloading_occurred;
 }
 
+void SystemDictionary::roots_oops_do(OopClosure* strong, OopClosure* weak) {
+  strong->do_oop(&_java_system_loader);
+  strong->do_oop(&_system_loader_lock_obj);
+
+  // Adjust dictionary
+  dictionary()->roots_oops_do(strong, weak);
+
+  // Visit extra methods
+  invoke_method_table()->oops_do(strong);
+}
+
 void SystemDictionary::oops_do(OopClosure* f) {
   f->do_oop(&_java_system_loader);
   f->do_oop(&_system_loader_lock_obj);
--- a/src/share/vm/classfile/systemDictionary.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/systemDictionary.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -335,6 +335,7 @@
 
   // Applies "f->do_oop" to all root oops in the system dictionary.
   static void oops_do(OopClosure* f);
+  static void roots_oops_do(OopClosure* strong, OopClosure* weak);
 
   // System loader lock
   static oop system_loader_lock()           { return _system_loader_lock_obj; }
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -789,6 +789,26 @@
    do_name(     decrypt_name,                                      "decrypt")                                           \
    do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
                                                                                                                         \
+  /* support for sun.security.provider.SHA */                                                                           \
+  do_class(sun_security_provider_sha,                              "sun/security/provider/SHA")                         \
+  do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R)            \
+   do_name(     implCompress_name,                                 "implCompress")                                      \
+   do_signature(implCompress_signature,                            "([BI)V")                                            \
+                                                                                                                        \
+  /* support for sun.security.provider.SHA2 */                                                                          \
+  do_class(sun_security_provider_sha2,                             "sun/security/provider/SHA2")                        \
+  do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R)          \
+                                                                                                                        \
+  /* support for sun.security.provider.SHA5 */                                                                          \
+  do_class(sun_security_provider_sha5,                             "sun/security/provider/SHA5")                        \
+  do_intrinsic(_sha5_implCompress, sun_security_provider_sha5, implCompress_name, implCompress_signature, F_R)          \
+                                                                                                                        \
+  /* support for sun.security.provider.DigestBase */                                                                    \
+  do_class(sun_security_provider_digestbase,                       "sun/security/provider/DigestBase")                  \
+  do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, implCompressMB_signature, F_R)   \
+   do_name(     implCompressMB_name,                               "implCompressMultiBlock")                            \
+   do_signature(implCompressMB_signature,                          "([BII)I")                                           \
+                                                                                                                        \
   /* support for java.util.zip */                                                                                       \
   do_class(java_util_zip_CRC32,           "java/util/zip/CRC32")                                                        \
   do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \
--- a/src/share/vm/code/codeCache.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/codeCache.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -337,6 +337,11 @@
 // Walk the list of methods which might contain non-perm oops.
 void CodeCache::scavenge_root_nmethods_do(CodeBlobClosure* f) {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   debug_only(mark_scavenge_root_nmethods());
 
   for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
@@ -362,6 +367,11 @@
 
 void CodeCache::add_scavenge_root_nmethod(nmethod* nm) {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   nm->set_on_scavenge_root_list();
   nm->set_scavenge_root_link(_scavenge_root_nmethods);
   set_scavenge_root_nmethods(nm);
@@ -370,6 +380,11 @@
 
 void CodeCache::drop_scavenge_root_nmethod(nmethod* nm) {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   print_trace("drop_scavenge_root", nm);
   nmethod* last = NULL;
   nmethod* cur = scavenge_root_nmethods();
@@ -391,6 +406,11 @@
 
 void CodeCache::prune_scavenge_root_nmethods() {
   assert_locked_or_safepoint(CodeCache_lock);
+
+  if (UseG1GC) {
+    return;
+  }
+
   debug_only(mark_scavenge_root_nmethods());
 
   nmethod* last = NULL;
@@ -423,6 +443,10 @@
 
 #ifndef PRODUCT
 void CodeCache::asserted_non_scavengable_nmethods_do(CodeBlobClosure* f) {
+  if (UseG1GC) {
+    return;
+  }
+
   // While we are here, verify the integrity of the list.
   mark_scavenge_root_nmethods();
   for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
@@ -463,9 +487,36 @@
 }
 #endif //PRODUCT
 
+void CodeCache::verify_clean_inline_caches() {
+#ifdef ASSERT
+  FOR_ALL_ALIVE_BLOBS(cb) {
+    if (cb->is_nmethod()) {
+      nmethod* nm = (nmethod*)cb;
+      assert(!nm->is_unloaded(), "Tautology");
+      nm->verify_clean_inline_caches();
+      nm->verify();
+    }
+  }
+#endif
+}
+
+void CodeCache::verify_icholder_relocations() {
+#ifdef ASSERT
+  // make sure that we aren't leaking icholders
+  int count = 0;
+  FOR_ALL_BLOBS(cb) {
+    if (cb->is_nmethod()) {
+      nmethod* nm = (nmethod*)cb;
+      count += nm->verify_icholder_relocations();
+    }
+  }
+
+  assert(count + InlineCacheBuffer::pending_icholder_count() + CompiledICHolder::live_not_claimed_count() ==
+         CompiledICHolder::live_count(), "must agree");
+#endif
+}
 
 void CodeCache::gc_prologue() {
-  assert(!nmethod::oops_do_marking_is_active(), "oops_do_marking_epilogue must be called");
 }
 
 void CodeCache::gc_epilogue() {
@@ -478,41 +529,15 @@
         nm->cleanup_inline_caches();
       }
       DEBUG_ONLY(nm->verify());
-      nm->fix_oop_relocations();
+      DEBUG_ONLY(nm->verify_oop_relocations());
     }
   }
   set_needs_cache_clean(false);
   prune_scavenge_root_nmethods();
-  assert(!nmethod::oops_do_marking_is_active(), "oops_do_marking_prologue must be called");
 
-#ifdef ASSERT
-  // make sure that we aren't leaking icholders
-  int count = 0;
-  FOR_ALL_BLOBS(cb) {
-    if (cb->is_nmethod()) {
-      RelocIterator iter((nmethod*)cb);
-      while(iter.next()) {
-        if (iter.type() == relocInfo::virtual_call_type) {
-          if (CompiledIC::is_icholder_call_site(iter.virtual_call_reloc())) {
-            CompiledIC *ic = CompiledIC_at(iter.reloc());
-            if (TraceCompiledIC) {
-              tty->print("noticed icholder " INTPTR_FORMAT " ", p2i(ic->cached_icholder()));
-              ic->print();
-            }
-            assert(ic->cached_icholder() != NULL, "must be non-NULL");
-            count++;
-          }
-        }
-      }
-    }
-  }
-
-  assert(count + InlineCacheBuffer::pending_icholder_count() + CompiledICHolder::live_not_claimed_count() ==
-         CompiledICHolder::live_count(), "must agree");
-#endif
+  verify_icholder_relocations();
 }
 
-
 void CodeCache::verify_oops() {
   MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
   VerifyOopClosure voc;
--- a/src/share/vm/code/codeCache.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/codeCache.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -134,10 +134,6 @@
   // to) any unmarked codeBlobs in the cache.  Sets "marked_for_unloading"
   // to "true" iff some code got unloaded.
   static void do_unloading(BoolObjectClosure* is_alive, bool unloading_occurred);
-  static void oops_do(OopClosure* f) {
-    CodeBlobToOopClosure oopc(f, /*do_marking=*/ false);
-    blobs_do(&oopc);
-  }
   static void asserted_non_scavengable_nmethods_do(CodeBlobClosure* f = NULL) PRODUCT_RETURN;
   static void scavenge_root_nmethods_do(CodeBlobClosure* f);
 
@@ -172,6 +168,9 @@
   static void set_needs_cache_clean(bool v)      { _needs_cache_clean = v;    }
   static void clear_inline_caches();             // clear all inline caches
 
+  static void verify_clean_inline_caches();
+  static void verify_icholder_relocations();
+
   // Deoptimization
   static int  mark_for_deoptimization(DepChange& changes);
 #ifdef HOTSWAP
--- a/src/share/vm/code/compiledIC.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/compiledIC.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -99,13 +99,13 @@
   }
 
   {
-  MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
+    MutexLockerEx pl(SafepointSynchronize::is_at_safepoint() ? NULL : Patching_lock, Mutex::_no_safepoint_check_flag);
 #ifdef ASSERT
-  CodeBlob* cb = CodeCache::find_blob_unsafe(_ic_call);
-  assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
+    CodeBlob* cb = CodeCache::find_blob_unsafe(_ic_call);
+    assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
 #endif
-  _ic_call->set_destination_mt_safe(entry_point);
-}
+     _ic_call->set_destination_mt_safe(entry_point);
+  }
 
   if (is_optimized() || is_icstub) {
     // Optimized call sites don't have a cache value and ICStub call
@@ -159,6 +159,50 @@
 //-----------------------------------------------------------------------------
 // High-level access to an inline cache. Guaranteed to be MT-safe.
 
+void CompiledIC::initialize_from_iter(RelocIterator* iter) {
+  assert(iter->addr() == _ic_call->instruction_address(), "must find ic_call");
+
+  if (iter->type() == relocInfo::virtual_call_type) {
+    virtual_call_Relocation* r = iter->virtual_call_reloc();
+    _is_optimized = false;
+    _value = nativeMovConstReg_at(r->cached_value());
+  } else {
+    assert(iter->type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
+    _is_optimized = true;
+    _value = NULL;
+  }
+}
+
+CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
+  : _ic_call(call)
+{
+  address ic_call = _ic_call->instruction_address();
+
+  assert(ic_call != NULL, "ic_call address must be set");
+  assert(nm != NULL, "must pass nmethod");
+  assert(nm->contains(ic_call), "must be in nmethod");
+
+  // Search for the ic_call at the given address.
+  RelocIterator iter(nm, ic_call, ic_call+1);
+  bool ret = iter.next();
+  assert(ret == true, "relocInfo must exist at this address");
+  assert(iter.addr() == ic_call, "must find ic_call");
+
+  initialize_from_iter(&iter);
+}
+
+CompiledIC::CompiledIC(RelocIterator* iter)
+  : _ic_call(nativeCall_at(iter->addr()))
+{
+  address ic_call = _ic_call->instruction_address();
+
+  nmethod* nm = iter->code();
+  assert(ic_call != NULL, "ic_call address must be set");
+  assert(nm != NULL, "must pass nmethod");
+  assert(nm->contains(ic_call), "must be in nmethod");
+
+  initialize_from_iter(iter);
+}
 
 bool CompiledIC::set_to_megamorphic(CallInfo* call_info, Bytecodes::Code bytecode, TRAPS) {
   assert(CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "");
@@ -485,7 +529,7 @@
 void CompiledStaticCall::set_to_clean() {
   assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
   // Reset call site
-  MutexLockerEx pl(Patching_lock, Mutex::_no_safepoint_check_flag);
+  MutexLockerEx pl(SafepointSynchronize::is_at_safepoint() ? NULL : Patching_lock, Mutex::_no_safepoint_check_flag);
 #ifdef ASSERT
   CodeBlob* cb = CodeCache::find_blob_unsafe(this);
   assert(cb != NULL && cb->is_nmethod(), "must be nmethod");
--- a/src/share/vm/code/compiledIC.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/compiledIC.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -150,6 +150,9 @@
   bool          _is_optimized;  // an optimized virtual call (i.e., no compiled IC)
 
   CompiledIC(nmethod* nm, NativeCall* ic_call);
+  CompiledIC(RelocIterator* iter);
+
+  void initialize_from_iter(RelocIterator* iter);
 
   static bool is_icholder_entry(address entry);
 
@@ -183,6 +186,7 @@
   friend CompiledIC* CompiledIC_before(nmethod* nm, address return_addr);
   friend CompiledIC* CompiledIC_at(nmethod* nm, address call_site);
   friend CompiledIC* CompiledIC_at(Relocation* call_site);
+  friend CompiledIC* CompiledIC_at(RelocIterator* reloc_iter);
 
   // This is used to release CompiledICHolder*s from nmethods that
   // are about to be freed.  The callsite might contain other stale
@@ -263,6 +267,13 @@
   return c_ic;
 }
 
+inline CompiledIC* CompiledIC_at(RelocIterator* reloc_iter) {
+  assert(reloc_iter->type() == relocInfo::virtual_call_type ||
+      reloc_iter->type() == relocInfo::opt_virtual_call_type, "wrong reloc. info");
+  CompiledIC* c_ic = new CompiledIC(reloc_iter);
+  c_ic->verify();
+  return c_ic;
+}
 
 //-----------------------------------------------------------------------------
 // The CompiledStaticCall represents a call to a static method in the compiled
--- a/src/share/vm/code/dependencies.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/dependencies.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -407,56 +407,66 @@
 // for the sake of the compiler log, print out current dependencies:
 void Dependencies::log_all_dependencies() {
   if (log() == NULL)  return;
-  ciBaseObject* args[max_arg_count];
+  ResourceMark rm;
   for (int deptv = (int)FIRST_TYPE; deptv < (int)TYPE_LIMIT; deptv++) {
     DepType dept = (DepType)deptv;
     GrowableArray<ciBaseObject*>* deps = _deps[dept];
-    if (deps->length() == 0)  continue;
+    int deplen = deps->length();
+    if (deplen == 0) {
+      continue;
+    }
     int stride = dep_args(dept);
+    GrowableArray<ciBaseObject*>* ciargs = new GrowableArray<ciBaseObject*>(stride);
     for (int i = 0; i < deps->length(); i += stride) {
       for (int j = 0; j < stride; j++) {
         // flush out the identities before printing
-        args[j] = deps->at(i+j);
+        ciargs->push(deps->at(i+j));
       }
-      write_dependency_to(log(), dept, stride, args);
+      write_dependency_to(log(), dept, ciargs);
+      ciargs->clear();
     }
+    guarantee(deplen == deps->length(), "deps array cannot grow inside nested ResoureMark scope");
   }
 }
 
 void Dependencies::write_dependency_to(CompileLog* log,
                                        DepType dept,
-                                       int nargs, DepArgument args[],
+                                       GrowableArray<DepArgument>* args,
                                        Klass* witness) {
   if (log == NULL) {
     return;
   }
+  ResourceMark rm;
   ciEnv* env = ciEnv::current();
-  ciBaseObject* ciargs[max_arg_count];
-  assert(nargs <= max_arg_count, "oob");
-  for (int j = 0; j < nargs; j++) {
-    if (args[j].is_oop()) {
-      ciargs[j] = env->get_object(args[j].oop_value());
+  GrowableArray<ciBaseObject*>* ciargs = new GrowableArray<ciBaseObject*>(args->length());
+  for (GrowableArrayIterator<DepArgument> it = args->begin(); it != args->end(); ++it) {
+    DepArgument arg = *it;
+    if (arg.is_oop()) {
+      ciargs->push(env->get_object(arg.oop_value()));
     } else {
-      ciargs[j] = env->get_metadata(args[j].metadata_value());
+      ciargs->push(env->get_metadata(arg.metadata_value()));
     }
   }
-  Dependencies::write_dependency_to(log, dept, nargs, ciargs, witness);
+  int argslen = ciargs->length();
+  Dependencies::write_dependency_to(log, dept, ciargs, witness);
+  guarantee(argslen == ciargs->length(), "ciargs array cannot grow inside nested ResoureMark scope");
 }
 
 void Dependencies::write_dependency_to(CompileLog* log,
                                        DepType dept,
-                                       int nargs, ciBaseObject* args[],
+                                       GrowableArray<ciBaseObject*>* args,
                                        Klass* witness) {
-  if (log == NULL)  return;
-  assert(nargs <= max_arg_count, "oob");
-  int argids[max_arg_count];
-  int ctxkj = dep_context_arg(dept);  // -1 if no context arg
-  int j;
-  for (j = 0; j < nargs; j++) {
-    if (args[j]->is_object()) {
-      argids[j] = log->identify(args[j]->as_object());
+  if (log == NULL) {
+    return;
+  }
+  ResourceMark rm;
+  GrowableArray<int>* argids = new GrowableArray<int>(args->length());
+  for (GrowableArrayIterator<ciBaseObject*> it = args->begin(); it != args->end(); ++it) {
+    ciBaseObject* obj = *it;
+    if (obj->is_object()) {
+      argids->push(log->identify(obj->as_object()));
     } else {
-      argids[j] = log->identify(args[j]->as_metadata());
+      argids->push(log->identify(obj->as_metadata()));
     }
   }
   if (witness != NULL) {
@@ -465,16 +475,17 @@
     log->begin_elem("dependency");
   }
   log->print(" type='%s'", dep_name(dept));
-  if (ctxkj >= 0) {
-    log->print(" ctxk='%d'", argids[ctxkj]);
+  const int ctxkj = dep_context_arg(dept);  // -1 if no context arg
+  if (ctxkj >= 0 && ctxkj < argids->length()) {
+    log->print(" ctxk='%d'", argids->at(ctxkj));
   }
   // write remaining arguments, if any.
-  for (j = 0; j < nargs; j++) {
+  for (int j = 0; j < argids->length(); j++) {
     if (j == ctxkj)  continue;  // already logged
     if (j == 1) {
-      log->print(  " x='%d'",    argids[j]);
+      log->print(  " x='%d'",    argids->at(j));
     } else {
-      log->print(" x%d='%d'", j, argids[j]);
+      log->print(" x%d='%d'", j, argids->at(j));
     }
   }
   if (witness != NULL) {
@@ -486,9 +497,12 @@
 
 void Dependencies::write_dependency_to(xmlStream* xtty,
                                        DepType dept,
-                                       int nargs, DepArgument args[],
+                                       GrowableArray<DepArgument>* args,
                                        Klass* witness) {
-  if (xtty == NULL)  return;
+  if (xtty == NULL) {
+    return;
+  }
+  ResourceMark rm;
   ttyLocker ttyl;
   int ctxkj = dep_context_arg(dept);  // -1 if no context arg
   if (witness != NULL) {
@@ -498,23 +512,24 @@
   }
   xtty->print(" type='%s'", dep_name(dept));
   if (ctxkj >= 0) {
-    xtty->object("ctxk", args[ctxkj].metadata_value());
+    xtty->object("ctxk", args->at(ctxkj).metadata_value());
   }
   // write remaining arguments, if any.
-  for (int j = 0; j < nargs; j++) {
+  for (int j = 0; j < args->length(); j++) {
     if (j == ctxkj)  continue;  // already logged
+    DepArgument arg = args->at(j);
     if (j == 1) {
-      if (args[j].is_oop()) {
-        xtty->object("x", args[j].oop_value());
+      if (arg.is_oop()) {
+        xtty->object("x", arg.oop_value());
       } else {
-        xtty->object("x", args[j].metadata_value());
+        xtty->object("x", arg.metadata_value());
       }
     } else {
       char xn[10]; sprintf(xn, "x%d", j);
-      if (args[j].is_oop()) {
-        xtty->object(xn, args[j].oop_value());
+      if (arg.is_oop()) {
+        xtty->object(xn, arg.oop_value());
       } else {
-        xtty->object(xn, args[j].metadata_value());
+        xtty->object(xn, arg.metadata_value());
       }
     }
   }
@@ -525,7 +540,7 @@
   xtty->end_elem();
 }
 
-void Dependencies::print_dependency(DepType dept, int nargs, DepArgument args[],
+void Dependencies::print_dependency(DepType dept, GrowableArray<DepArgument>* args,
                                     Klass* witness) {
   ResourceMark rm;
   ttyLocker ttyl;   // keep the following output all in one block
@@ -534,8 +549,8 @@
                 dep_name(dept));
   // print arguments
   int ctxkj = dep_context_arg(dept);  // -1 if no context arg
-  for (int j = 0; j < nargs; j++) {
-    DepArgument arg = args[j];
+  for (int j = 0; j < args->length(); j++) {
+    DepArgument arg = args->at(j);
     bool put_star = false;
     if (arg.is_null())  continue;
     const char* what;
@@ -571,31 +586,33 @@
 void Dependencies::DepStream::log_dependency(Klass* witness) {
   if (_deps == NULL && xtty == NULL)  return;  // fast cutout for runtime
   ResourceMark rm;
-  int nargs = argument_count();
-  DepArgument args[max_arg_count];
+  const int nargs = argument_count();
+  GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
   for (int j = 0; j < nargs; j++) {
     if (type() == call_site_target_value) {
-      args[j] = argument_oop(j);
+      args->push(argument_oop(j));
     } else {
-      args[j] = argument(j);
+      args->push(argument(j));
     }
   }
+  int argslen = args->length();
   if (_deps != NULL && _deps->log() != NULL) {
-    Dependencies::write_dependency_to(_deps->log(),
-                                      type(), nargs, args, witness);
+    Dependencies::write_dependency_to(_deps->log(), type(), args, witness);
   } else {
-    Dependencies::write_dependency_to(xtty,
-                                      type(), nargs, args, witness);
+    Dependencies::write_dependency_to(xtty, type(), args, witness);
   }
+  guarantee(argslen == args->length(), "args array cannot grow inside nested ResoureMark scope");
 }
 
 void Dependencies::DepStream::print_dependency(Klass* witness, bool verbose) {
+  ResourceMark rm;
   int nargs = argument_count();
-  DepArgument args[max_arg_count];
+  GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
   for (int j = 0; j < nargs; j++) {
-    args[j] = argument(j);
+    args->push(argument(j));
   }
-  Dependencies::print_dependency(type(), nargs, args, witness);
+  int argslen = args->length();
+  Dependencies::print_dependency(type(), args, witness);
   if (verbose) {
     if (_code != NULL) {
       tty->print("  code: ");
@@ -603,6 +620,7 @@
       tty->cr();
     }
   }
+  guarantee(argslen == args->length(), "args array cannot grow inside nested ResoureMark scope");
 }
 
 
--- a/src/share/vm/code/dependencies.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/dependencies.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -368,20 +368,36 @@
   void copy_to(nmethod* nm);
 
   void log_all_dependencies();
-  void log_dependency(DepType dept, int nargs, ciBaseObject* args[]) {
-    write_dependency_to(log(), dept, nargs, args);
+
+  void log_dependency(DepType dept, GrowableArray<ciBaseObject*>* args) {
+    ResourceMark rm;
+    int argslen = args->length();
+    write_dependency_to(log(), dept, args);
+    guarantee(argslen == args->length(),
+              "args array cannot grow inside nested ResoureMark scope");
   }
+
   void log_dependency(DepType dept,
                       ciBaseObject* x0,
                       ciBaseObject* x1 = NULL,
                       ciBaseObject* x2 = NULL) {
-    if (log() == NULL)  return;
-    ciBaseObject* args[max_arg_count];
-    args[0] = x0;
-    args[1] = x1;
-    args[2] = x2;
-    assert(2 < max_arg_count, "");
-    log_dependency(dept, dep_args(dept), args);
+    if (log() == NULL) {
+      return;
+    }
+    ResourceMark rm;
+    GrowableArray<ciBaseObject*>* ciargs =
+                new GrowableArray<ciBaseObject*>(dep_args(dept));
+    assert (x0 != NULL, "no log x0");
+    ciargs->push(x0);
+
+    if (x1 != NULL) {
+      ciargs->push(x1);
+    }
+    if (x2 != NULL) {
+      ciargs->push(x2);
+    }
+    assert(ciargs->length() == dep_args(dept), "");
+    log_dependency(dept, ciargs);
   }
 
   class DepArgument : public ResourceObj {
@@ -404,20 +420,8 @@
     Metadata* metadata_value() const { assert(!_is_oop && _valid, "must be"); return (Metadata*) _value; }
   };
 
-  static void write_dependency_to(CompileLog* log,
-                                  DepType dept,
-                                  int nargs, ciBaseObject* args[],
-                                  Klass* witness = NULL);
-  static void write_dependency_to(CompileLog* log,
-                                  DepType dept,
-                                  int nargs, DepArgument args[],
-                                  Klass* witness = NULL);
-  static void write_dependency_to(xmlStream* xtty,
-                                  DepType dept,
-                                  int nargs, DepArgument args[],
-                                  Klass* witness = NULL);
   static void print_dependency(DepType dept,
-                               int nargs, DepArgument args[],
+                               GrowableArray<DepArgument>* args,
                                Klass* witness = NULL);
 
  private:
@@ -426,6 +430,18 @@
 
   static Klass* ctxk_encoded_as_null(DepType dept, Metadata* x);
 
+  static void write_dependency_to(CompileLog* log,
+                                  DepType dept,
+                                  GrowableArray<ciBaseObject*>* args,
+                                  Klass* witness = NULL);
+  static void write_dependency_to(CompileLog* log,
+                                  DepType dept,
+                                  GrowableArray<DepArgument>* args,
+                                  Klass* witness = NULL);
+  static void write_dependency_to(xmlStream* xtty,
+                                  DepType dept,
+                                  GrowableArray<DepArgument>* args,
+                                  Klass* witness = NULL);
  public:
   // Use this to iterate over an nmethod's dependency set.
   // Works on new and old dependency sets.
--- a/src/share/vm/code/nmethod.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/nmethod.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -49,6 +49,8 @@
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
+unsigned char nmethod::_global_unloading_clock = 0;
+
 #ifdef DTRACE_ENABLED
 
 // Only bother with this argument setup if dtrace is available
@@ -384,27 +386,30 @@
   set_exception_cache(new_entry);
 }
 
-void nmethod::remove_from_exception_cache(ExceptionCache* ec) {
+void nmethod::clean_exception_cache(BoolObjectClosure* is_alive) {
   ExceptionCache* prev = NULL;
   ExceptionCache* curr = exception_cache();
-  assert(curr != NULL, "nothing to remove");
-  // find the previous and next entry of ec
-  while (curr != ec) {
-    prev = curr;
-    curr = curr->next();
-    assert(curr != NULL, "ExceptionCache not found");
+
+  while (curr != NULL) {
+    ExceptionCache* next = curr->next();
+
+    Klass* ex_klass = curr->exception_type();
+    if (ex_klass != NULL && !ex_klass->is_loader_alive(is_alive)) {
+      if (prev == NULL) {
+        set_exception_cache(next);
+      } else {
+        prev->set_next(next);
+      }
+      delete curr;
+      // prev stays the same.
+    } else {
+      prev = curr;
+    }
+
+    curr = next;
   }
-  // now: curr == ec
-  ExceptionCache* next = curr->next();
-  if (prev == NULL) {
-    set_exception_cache(next);
-  } else {
-    prev->set_next(next);
-  }
-  delete curr;
 }
 
-
 // public method for accessing the exception cache
 // These are the public access methods.
 address nmethod::handler_for_exception_and_pc(Handle exception, address pc) {
@@ -463,6 +468,7 @@
 // Fill in default values for various flag fields
 void nmethod::init_defaults() {
   _state                      = in_use;
+  _unloading_clock            = 0;
   _marked_for_reclamation     = 0;
   _has_flushed_dependencies   = 0;
   _has_unsafe_access          = 0;
@@ -481,7 +487,11 @@
   _oops_do_mark_link       = NULL;
   _jmethod_id              = NULL;
   _osr_link                = NULL;
-  _scavenge_root_link      = NULL;
+  if (UseG1GC) {
+    _unloading_next        = NULL;
+  } else {
+    _scavenge_root_link    = NULL;
+  }
   _scavenge_root_state     = 0;
   _compiler                = NULL;
 #if INCLUDE_RTM_OPT
@@ -1163,7 +1173,7 @@
     switch(iter.type()) {
       case relocInfo::virtual_call_type:
       case relocInfo::opt_virtual_call_type: {
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
+        CompiledIC *ic = CompiledIC_at(&iter);
         // Ok, to lookup references to zombies here
         CodeBlob *cb = CodeCache::find_blob_unsafe(ic->ic_destination());
         if( cb != NULL && cb->is_nmethod() ) {
@@ -1187,6 +1197,77 @@
   }
 }
 
+void nmethod::verify_clean_inline_caches() {
+  assert_locked_or_safepoint(CompiledIC_lock);
+
+  // If the method is not entrant or zombie then a JMP is plastered over the
+  // first few bytes.  If an oop in the old code was there, that oop
+  // should not get GC'd.  Skip the first few bytes of oops on
+  // not-entrant methods.
+  address low_boundary = verified_entry_point();
+  if (!is_in_use()) {
+    low_boundary += NativeJump::instruction_size;
+    // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
+    // This means that the low_boundary is going to be a little too high.
+    // This shouldn't matter, since oops of non-entrant methods are never used.
+    // In fact, why are we bothering to look at oops in a non-entrant method??
+  }
+
+  ResourceMark rm;
+  RelocIterator iter(this, low_boundary);
+  while(iter.next()) {
+    switch(iter.type()) {
+      case relocInfo::virtual_call_type:
+      case relocInfo::opt_virtual_call_type: {
+        CompiledIC *ic = CompiledIC_at(&iter);
+        // Ok, to lookup references to zombies here
+        CodeBlob *cb = CodeCache::find_blob_unsafe(ic->ic_destination());
+        if( cb != NULL && cb->is_nmethod() ) {
+          nmethod* nm = (nmethod*)cb;
+          // Verify that inline caches pointing to both zombie and not_entrant methods are clean
+          if (!nm->is_in_use() || (nm->method()->code() != nm)) {
+            assert(ic->is_clean(), "IC should be clean");
+          }
+        }
+        break;
+      }
+      case relocInfo::static_call_type: {
+        CompiledStaticCall *csc = compiledStaticCall_at(iter.reloc());
+        CodeBlob *cb = CodeCache::find_blob_unsafe(csc->destination());
+        if( cb != NULL && cb->is_nmethod() ) {
+          nmethod* nm = (nmethod*)cb;
+          // Verify that inline caches pointing to both zombie and not_entrant methods are clean
+          if (!nm->is_in_use() || (nm->method()->code() != nm)) {
+            assert(csc->is_clean(), "IC should be clean");
+          }
+        }
+        break;
+      }
+    }
+  }
+}
+
+int nmethod::verify_icholder_relocations() {
+  int count = 0;
+
+  RelocIterator iter(this);
+  while(iter.next()) {
+    if (iter.type() == relocInfo::virtual_call_type) {
+      if (CompiledIC::is_icholder_call_site(iter.virtual_call_reloc())) {
+        CompiledIC *ic = CompiledIC_at(&iter);
+        if (TraceCompiledIC) {
+          tty->print("noticed icholder " INTPTR_FORMAT " ", p2i(ic->cached_icholder()));
+          ic->print();
+        }
+        assert(ic->cached_icholder() != NULL, "must be non-NULL");
+        count++;
+      }
+    }
+  }
+
+  return count;
+}
+
 // This is a private interface with the sweeper.
 void nmethod::mark_as_seen_on_stack() {
   assert(is_alive(), "Must be an alive method");
@@ -1219,6 +1300,23 @@
   mdo->inc_decompile_count();
 }
 
+void nmethod::increase_unloading_clock() {
+  _global_unloading_clock++;
+  if (_global_unloading_clock == 0) {
+    // _nmethods are allocated with _unloading_clock == 0,
+    // so 0 is never used as a clock value.
+    _global_unloading_clock = 1;
+  }
+}
+
+void nmethod::set_unloading_clock(unsigned char unloading_clock) {
+  OrderAccess::release_store((volatile jubyte*)&_unloading_clock, unloading_clock);
+}
+
+unsigned char nmethod::unloading_clock() {
+  return (unsigned char)OrderAccess::load_acquire((volatile jubyte*)&_unloading_clock);
+}
+
 void nmethod::make_unloaded(BoolObjectClosure* is_alive, oop cause) {
 
   post_compiled_method_unload();
@@ -1264,6 +1362,10 @@
     // for later on.
     CodeCache::set_needs_cache_clean(true);
   }
+
+  // Unregister must be done before the state change
+  Universe::heap()->unregister_nmethod(this);
+
   _state = unloaded;
 
   // Log the unloading.
@@ -1618,6 +1720,35 @@
   set_unload_reported();
 }
 
+void static clean_ic_if_metadata_is_dead(CompiledIC *ic, BoolObjectClosure *is_alive) {
+  if (ic->is_icholder_call()) {
+    // The only exception is compiledICHolder oops which may
+    // yet be marked below. (We check this further below).
+    CompiledICHolder* cichk_oop = ic->cached_icholder();
+    if (cichk_oop->holder_method()->method_holder()->is_loader_alive(is_alive) &&
+        cichk_oop->holder_klass()->is_loader_alive(is_alive)) {
+      return;
+    }
+  } else {
+    Metadata* ic_oop = ic->cached_metadata();
+    if (ic_oop != NULL) {
+      if (ic_oop->is_klass()) {
+        if (((Klass*)ic_oop)->is_loader_alive(is_alive)) {
+          return;
+        }
+      } else if (ic_oop->is_method()) {
+        if (((Method*)ic_oop)->method_holder()->is_loader_alive(is_alive)) {
+          return;
+        }
+      } else {
+        ShouldNotReachHere();
+      }
+    }
+  }
+
+  ic->set_to_clean();
+}
+
 // This is called at the end of the strong tracing/marking phase of a
 // GC to unload an nmethod if it contains otherwise unreachable
 // oops.
@@ -1650,15 +1781,7 @@
   }
 
   // Exception cache
-  ExceptionCache* ec = exception_cache();
-  while (ec != NULL) {
-    Klass* ex_klass = ec->exception_type();
-    ExceptionCache* next_ec = ec->next();
-    if (ex_klass != NULL && !ex_klass->is_loader_alive(is_alive)) {
-      remove_from_exception_cache(ec);
-    }
-    ec = next_ec;
-  }
+  clean_exception_cache(is_alive);
 
   // If class unloading occurred we first iterate over all inline caches and
   // clear ICs where the cached oop is referring to an unloaded klass or method.
@@ -1668,32 +1791,8 @@
     RelocIterator iter(this, low_boundary);
     while(iter.next()) {
       if (iter.type() == relocInfo::virtual_call_type) {
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
-        if (ic->is_icholder_call()) {
-          // The only exception is compiledICHolder oops which may
-          // yet be marked below. (We check this further below).
-          CompiledICHolder* cichk_oop = ic->cached_icholder();
-          if (cichk_oop->holder_method()->method_holder()->is_loader_alive(is_alive) &&
-              cichk_oop->holder_klass()->is_loader_alive(is_alive)) {
-            continue;
-          }
-        } else {
-          Metadata* ic_oop = ic->cached_metadata();
-          if (ic_oop != NULL) {
-            if (ic_oop->is_klass()) {
-              if (((Klass*)ic_oop)->is_loader_alive(is_alive)) {
-                continue;
-              }
-            } else if (ic_oop->is_method()) {
-              if (((Method*)ic_oop)->method_holder()->is_loader_alive(is_alive)) {
-                continue;
-              }
-            } else {
-              ShouldNotReachHere();
-            }
-          }
-        }
-        ic->set_to_clean();
+        CompiledIC *ic = CompiledIC_at(&iter);
+        clean_ic_if_metadata_is_dead(ic, is_alive);
       }
     }
   }
@@ -1731,6 +1830,175 @@
   verify_metadata_loaders(low_boundary, is_alive);
 }
 
+template <class CompiledICorStaticCall>
+static bool clean_if_nmethod_is_unloaded(CompiledICorStaticCall *ic, address addr, BoolObjectClosure *is_alive, nmethod* from) {
+  // Ok, to lookup references to zombies here
+  CodeBlob *cb = CodeCache::find_blob_unsafe(addr);
+  if (cb != NULL && cb->is_nmethod()) {
+    nmethod* nm = (nmethod*)cb;
+
+    if (nm->unloading_clock() != nmethod::global_unloading_clock()) {
+      // The nmethod has not been processed yet.
+      return true;
+    }
+
+    // Clean inline caches pointing to both zombie and not_entrant methods
+    if (!nm->is_in_use() || (nm->method()->code() != nm)) {
+      ic->set_to_clean();
+      assert(ic->is_clean(), err_msg("nmethod " PTR_FORMAT "not clean %s", from, from->method()->name_and_sig_as_C_string()));
+    }
+  }
+
+  return false;
+}
+
+static bool clean_if_nmethod_is_unloaded(CompiledIC *ic, BoolObjectClosure *is_alive, nmethod* from) {
+  return clean_if_nmethod_is_unloaded(ic, ic->ic_destination(), is_alive, from);
+}
+
+static bool clean_if_nmethod_is_unloaded(CompiledStaticCall *csc, BoolObjectClosure *is_alive, nmethod* from) {
+  return clean_if_nmethod_is_unloaded(csc, csc->destination(), is_alive, from);
+}
+
+bool nmethod::do_unloading_parallel(BoolObjectClosure* is_alive, bool unloading_occurred) {
+  ResourceMark rm;
+
+  // Make sure the oop's ready to receive visitors
+  assert(!is_zombie() && !is_unloaded(),
+         "should not call follow on zombie or unloaded nmethod");
+
+  // If the method is not entrant then a JMP is plastered over the
+  // first few bytes.  If an oop in the old code was there, that oop
+  // should not get GC'd.  Skip the first few bytes of oops on
+  // not-entrant methods.
+  address low_boundary = verified_entry_point();
+  if (is_not_entrant()) {
+    low_boundary += NativeJump::instruction_size;
+    // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
+    // (See comment above.)
+  }
+
+  // The RedefineClasses() API can cause the class unloading invariant
+  // to no longer be true. See jvmtiExport.hpp for details.
+  // Also, leave a debugging breadcrumb in local flag.
+  bool a_class_was_redefined = JvmtiExport::has_redefined_a_class();
+  if (a_class_was_redefined) {
+    // This set of the unloading_occurred flag is done before the
+    // call to post_compiled_method_unload() so that the unloading
+    // of this nmethod is reported.
+    unloading_occurred = true;
+  }
+
+  // Exception cache
+  clean_exception_cache(is_alive);
+
+  bool is_unloaded = false;
+  bool postponed = false;
+
+  RelocIterator iter(this, low_boundary);
+  while(iter.next()) {
+
+    switch (iter.type()) {
+
+    case relocInfo::virtual_call_type:
+      if (unloading_occurred) {
+        // If class unloading occurred we first iterate over all inline caches and
+        // clear ICs where the cached oop is referring to an unloaded klass or method.
+        clean_ic_if_metadata_is_dead(CompiledIC_at(&iter), is_alive);
+      }
+
+      postponed |= clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::opt_virtual_call_type:
+      postponed |= clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::static_call_type:
+      postponed |= clean_if_nmethod_is_unloaded(compiledStaticCall_at(iter.reloc()), is_alive, this);
+      break;
+
+    case relocInfo::oop_type:
+      if (!is_unloaded) {
+        // Unload check
+        oop_Relocation* r = iter.oop_reloc();
+        // Traverse those oops directly embedded in the code.
+        // Other oops (oop_index>0) are seen as part of scopes_oops.
+        assert(1 == (r->oop_is_immediate()) +
+                  (r->oop_addr() >= oops_begin() && r->oop_addr() < oops_end()),
+              "oop must be found in exactly one place");
+        if (r->oop_is_immediate() && r->oop_value() != NULL) {
+          if (can_unload(is_alive, r->oop_addr(), unloading_occurred)) {
+            is_unloaded = true;
+          }
+        }
+      }
+      break;
+
+    }
+  }
+
+  if (is_unloaded) {
+    return postponed;
+  }
+
+  // Scopes
+  for (oop* p = oops_begin(); p < oops_end(); p++) {
+    if (*p == Universe::non_oop_word())  continue;  // skip non-oops
+    if (can_unload(is_alive, p, unloading_occurred)) {
+      is_unloaded = true;
+      break;
+    }
+  }
+
+  if (is_unloaded) {
+    return postponed;
+  }
+
+  // Ensure that all metadata is still alive
+  verify_metadata_loaders(low_boundary, is_alive);
+
+  return postponed;
+}
+
+void nmethod::do_unloading_parallel_postponed(BoolObjectClosure* is_alive, bool unloading_occurred) {
+  ResourceMark rm;
+
+  // Make sure the oop's ready to receive visitors
+  assert(!is_zombie(),
+         "should not call follow on zombie nmethod");
+
+  // If the method is not entrant then a JMP is plastered over the
+  // first few bytes.  If an oop in the old code was there, that oop
+  // should not get GC'd.  Skip the first few bytes of oops on
+  // not-entrant methods.
+  address low_boundary = verified_entry_point();
+  if (is_not_entrant()) {
+    low_boundary += NativeJump::instruction_size;
+    // %%% Note:  On SPARC we patch only a 4-byte trap, not a full NativeJump.
+    // (See comment above.)
+  }
+
+  RelocIterator iter(this, low_boundary);
+  while(iter.next()) {
+
+    switch (iter.type()) {
+
+    case relocInfo::virtual_call_type:
+      clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::opt_virtual_call_type:
+      clean_if_nmethod_is_unloaded(CompiledIC_at(&iter), is_alive, this);
+      break;
+
+    case relocInfo::static_call_type:
+      clean_if_nmethod_is_unloaded(compiledStaticCall_at(iter.reloc()), is_alive, this);
+      break;
+    }
+  }
+}
+
 #ifdef ASSERT
 
 class CheckClass : AllStatic {
@@ -1777,7 +2045,7 @@
     // compiled code is maintaining a link to dead metadata.
     address static_call_addr = NULL;
     if (iter.type() == relocInfo::opt_virtual_call_type) {
-      CompiledIC* cic = CompiledIC_at(iter.reloc());
+      CompiledIC* cic = CompiledIC_at(&iter);
       if (!cic->is_call_to_interpreted()) {
         static_call_addr = iter.addr();
       }
@@ -1829,7 +2097,7 @@
         }
       } else if (iter.type() == relocInfo::virtual_call_type) {
         // Check compiledIC holders associated with this nmethod
-        CompiledIC *ic = CompiledIC_at(iter.reloc());
+        CompiledIC *ic = CompiledIC_at(&iter);
         if (ic->is_icholder_call()) {
           CompiledICHolder* cichk = ic->cached_icholder();
           f(cichk->holder_method());
@@ -1947,7 +2215,7 @@
     assert(cur != NULL, "not NULL-terminated");
     nmethod* next = cur->_oops_do_mark_link;
     cur->_oops_do_mark_link = NULL;
-    cur->fix_oop_relocations();
+    cur->verify_oop_relocations();
     NOT_PRODUCT(if (TraceScavenge)  cur->print_on(tty, "oops_do, unmark"));
     cur = next;
   }
@@ -2489,6 +2757,10 @@
 };
 
 void nmethod::verify_scavenge_root_oops() {
+  if (UseG1GC) {
+    return;
+  }
+
   if (!on_scavenge_root_list()) {
     // Actually look inside, to verify the claim that it's clean.
     DebugScavengeRoot debug_scavenge_root(this);
@@ -2932,7 +3204,7 @@
     case relocInfo::virtual_call_type:
     case relocInfo::opt_virtual_call_type: {
       VerifyMutexLocker mc(CompiledIC_lock);
-      CompiledIC_at(iter.reloc())->print();
+      CompiledIC_at(&iter)->print();
       break;
     }
     case relocInfo::static_call_type:
--- a/src/share/vm/code/nmethod.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/code/nmethod.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -116,6 +116,11 @@
   friend class NMethodSweeper;
   friend class CodeCache;  // scavengable oops
  private:
+
+  // GC support to help figure out if an nmethod has been
+  // cleaned/unloaded by the current GC.
+  static unsigned char _global_unloading_clock;
+
   // Shared fields for all nmethod's
   Method*   _method;
   int       _entry_bci;        // != InvocationEntryBci if this nmethod is an on-stack replacement method
@@ -123,7 +128,13 @@
 
   // To support simple linked-list chaining of nmethods:
   nmethod*  _osr_link;         // from InstanceKlass::osr_nmethods_head
-  nmethod*  _scavenge_root_link; // from CodeCache::scavenge_root_nmethods
+
+  union {
+    // Used by G1 to chain nmethods.
+    nmethod* _unloading_next;
+    // Used by non-G1 GCs to chain nmethods.
+    nmethod* _scavenge_root_link; // from CodeCache::scavenge_root_nmethods
+  };
 
   static nmethod* volatile _oops_do_mark_nmethods;
   nmethod*        volatile _oops_do_mark_link;
@@ -185,6 +196,8 @@
   // Protected by Patching_lock
   volatile unsigned char _state;             // {alive, not_entrant, zombie, unloaded}
 
+  volatile unsigned char _unloading_clock;   // Incremented after GC unloaded/cleaned the nmethod
+
 #ifdef ASSERT
   bool _oops_are_stale;  // indicates that it's no longer safe to access oops section
 #endif
@@ -442,6 +455,15 @@
   bool  unload_reported()                         { return _unload_reported; }
   void  set_unload_reported()                     { _unload_reported = true; }
 
+  void set_unloading_next(nmethod* next)          { _unloading_next = next; }
+  nmethod* unloading_next()                       { return _unloading_next; }
+
+  static unsigned char global_unloading_clock()   { return _global_unloading_clock; }
+  static void increase_unloading_clock();
+
+  void set_unloading_clock(unsigned char unloading_clock);
+  unsigned char unloading_clock();
+
   bool  is_marked_for_deoptimization() const      { return _marked_for_deoptimization; }
   void  mark_for_deoptimization()                 { _marked_for_deoptimization = true; }
 
@@ -534,7 +556,7 @@
   void set_exception_cache(ExceptionCache *ec)    { _exception_cache = ec; }
   address handler_for_exception_and_pc(Handle exception, address pc);
   void add_handler_for_exception_and_pc(Handle exception, address pc, address handler);
-  void remove_from_exception_cache(ExceptionCache* ec);
+  void clean_exception_cache(BoolObjectClosure* is_alive);
 
   // implicit exceptions support
   address continuation_for_implicit_exception(address pc);
@@ -557,6 +579,10 @@
     return (addr >= code_begin() && addr < verified_entry_point());
   }
 
+  // Verify calls to dead methods have been cleaned.
+  void verify_clean_inline_caches();
+  // Verify and count cached icholder relocations.
+  int  verify_icholder_relocations();
   // Check that all metadata is still alive
   void verify_metadata_loaders(address low_boundary, BoolObjectClosure* is_alive);
 
@@ -582,6 +608,10 @@
 
   // GC support
   void do_unloading(BoolObjectClosure* is_alive, bool unloading_occurred);
+  //  The parallel versions are used by G1.
+  bool do_unloading_parallel(BoolObjectClosure* is_alive, bool unloading_occurred);
+  void do_unloading_parallel_postponed(BoolObjectClosure* is_alive, bool unloading_occurred);
+  //  Unload a nmethod if the *root object is dead.
   bool can_unload(BoolObjectClosure* is_alive, oop* root, bool unloading_occurred);
 
   void preserve_callee_argument_oops(frame fr, const RegisterMap *reg_map,
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_CMSOOPCLOSURES_HPP
 
 #include "memory/genOopClosures.hpp"
+#include "memory/iterator.hpp"
 
 /////////////////////////////////////////////////////////////////
 // Closures used by ConcurrentMarkSweepGeneration's collector
@@ -48,33 +49,13 @@
     }                                                     \
   }
 
-// Applies the given oop closure to all oops in all klasses visited.
-class CMKlassClosure : public KlassClosure {
-  friend class CMSOopClosure;
-  friend class CMSOopsInGenClosure;
-
-  OopClosure* _oop_closure;
-
-  // Used when _oop_closure couldn't be set in an initialization list.
-  void initialize(OopClosure* oop_closure) {
-    assert(_oop_closure == NULL, "Should only be called once");
-    _oop_closure = oop_closure;
-  }
+// TODO: This duplication of the MetadataAwareOopClosure class is only needed
+//       because some CMS OopClosures derive from OopsInGenClosure. It would be
+//       good to get rid of them completely.
+class MetadataAwareOopsInGenClosure: public OopsInGenClosure {
+  KlassToOopClosure _klass_closure;
  public:
-  CMKlassClosure(OopClosure* oop_closure = NULL) : _oop_closure(oop_closure) { }
-
-  void do_klass(Klass* k);
-};
-
-// The base class for all CMS marking closures.
-// It's used to proxy through the metadata to the oops defined in them.
-class CMSOopClosure: public ExtendedOopClosure {
-  CMKlassClosure      _klass_closure;
- public:
-  CMSOopClosure() : ExtendedOopClosure() {
-    _klass_closure.initialize(this);
-  }
-  CMSOopClosure(ReferenceProcessor* rp) : ExtendedOopClosure(rp) {
+  MetadataAwareOopsInGenClosure() {
     _klass_closure.initialize(this);
   }
 
@@ -87,26 +68,7 @@
   virtual void do_class_loader_data(ClassLoaderData* cld);
 };
 
-// TODO: This duplication of the CMSOopClosure class is only needed because
-//       some CMS OopClosures derive from OopsInGenClosure. It would be good
-//       to get rid of them completely.
-class CMSOopsInGenClosure: public OopsInGenClosure {
-  CMKlassClosure _klass_closure;
- public:
-  CMSOopsInGenClosure() {
-    _klass_closure.initialize(this);
-  }
-
-  virtual bool do_metadata()    { return do_metadata_nv(); }
-  inline  bool do_metadata_nv() { return true; }
-
-  virtual void do_klass(Klass* k);
-  void do_klass_nv(Klass* k);
-
-  virtual void do_class_loader_data(ClassLoaderData* cld);
-};
-
-class MarkRefsIntoClosure: public CMSOopsInGenClosure {
+class MarkRefsIntoClosure: public MetadataAwareOopsInGenClosure {
  private:
   const MemRegion _span;
   CMSBitMap*      _bitMap;
@@ -122,7 +84,7 @@
   }
 };
 
-class Par_MarkRefsIntoClosure: public CMSOopsInGenClosure {
+class Par_MarkRefsIntoClosure: public MetadataAwareOopsInGenClosure {
  private:
   const MemRegion _span;
   CMSBitMap*      _bitMap;
@@ -140,7 +102,7 @@
 
 // A variant of the above used in certain kinds of CMS
 // marking verification.
-class MarkRefsIntoVerifyClosure: public CMSOopsInGenClosure {
+class MarkRefsIntoVerifyClosure: public MetadataAwareOopsInGenClosure {
  private:
   const MemRegion _span;
   CMSBitMap*      _verification_bm;
@@ -159,7 +121,7 @@
 };
 
 // The non-parallel version (the parallel version appears further below).
-class PushAndMarkClosure: public CMSOopClosure {
+class PushAndMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   MemRegion     _span;
@@ -193,7 +155,7 @@
 // synchronization (for instance, via CAS). The marking stack
 // used in the non-parallel case above is here replaced with
 // an OopTaskQueue structure to allow efficient work stealing.
-class Par_PushAndMarkClosure: public CMSOopClosure {
+class Par_PushAndMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   MemRegion     _span;
@@ -218,7 +180,7 @@
 };
 
 // The non-parallel version (the parallel version appears further below).
-class MarkRefsIntoAndScanClosure: public CMSOopsInGenClosure {
+class MarkRefsIntoAndScanClosure: public MetadataAwareOopsInGenClosure {
  private:
   MemRegion          _span;
   CMSBitMap*         _bit_map;
@@ -262,7 +224,7 @@
 // stack and the bitMap are shared, so access needs to be suitably
 // sycnhronized. An OopTaskQueue structure, supporting efficient
 // workstealing, replaces a CMSMarkStack for storing grey objects.
-class Par_MarkRefsIntoAndScanClosure: public CMSOopsInGenClosure {
+class Par_MarkRefsIntoAndScanClosure: public MetadataAwareOopsInGenClosure {
  private:
   MemRegion              _span;
   CMSBitMap*             _bit_map;
@@ -291,7 +253,7 @@
 // This closure is used during the concurrent marking phase
 // following the first checkpoint. Its use is buried in
 // the closure MarkFromRootsClosure.
-class PushOrMarkClosure: public CMSOopClosure {
+class PushOrMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector*   _collector;
   MemRegion       _span;
@@ -324,7 +286,7 @@
 // This closure is used during the concurrent marking phase
 // following the first checkpoint. Its use is buried in
 // the closure Par_MarkFromRootsClosure.
-class Par_PushOrMarkClosure: public CMSOopClosure {
+class Par_PushOrMarkClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector*    _collector;
   MemRegion        _whole_span;
@@ -364,7 +326,7 @@
 // processing phase of the CMS final checkpoint step, as
 // well as during the concurrent precleaning of the discovered
 // reference lists.
-class CMSKeepAliveClosure: public CMSOopClosure {
+class CMSKeepAliveClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   const MemRegion _span;
@@ -384,7 +346,7 @@
   inline void do_oop_nv(narrowOop* p) { CMSKeepAliveClosure::do_oop_work(p); }
 };
 
-class CMSInnerParMarkAndPushClosure: public CMSOopClosure {
+class CMSInnerParMarkAndPushClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   MemRegion     _span;
@@ -405,7 +367,7 @@
 // A parallel (MT) version of the above, used when
 // reference processing is parallel; the only difference
 // is in the do_oop method.
-class CMSParKeepAliveClosure: public CMSOopClosure {
+class CMSParKeepAliveClosure: public MetadataAwareOopClosure {
  private:
   MemRegion     _span;
   OopTaskQueue* _work_queue;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/cmsOopClosures.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -44,33 +44,20 @@
   }
 }
 
-// CMSOopClosure and CMSoopsInGenClosure are duplicated,
+// MetadataAwareOopClosure and MetadataAwareOopsInGenClosure are duplicated,
 // until we get rid of OopsInGenClosure.
 
-inline void CMSOopClosure::do_klass(Klass* k)       { do_klass_nv(k); }
-inline void CMSOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }
-
-inline void CMSOopClosure::do_klass_nv(Klass* k) {
+inline void MetadataAwareOopsInGenClosure::do_klass_nv(Klass* k) {
   ClassLoaderData* cld = k->class_loader_data();
   do_class_loader_data(cld);
 }
-inline void CMSOopsInGenClosure::do_klass_nv(Klass* k) {
-  ClassLoaderData* cld = k->class_loader_data();
-  do_class_loader_data(cld);
-}
+inline void MetadataAwareOopsInGenClosure::do_klass(Klass* k) { do_klass_nv(k); }
 
-inline void CMSOopClosure::do_class_loader_data(ClassLoaderData* cld) {
-  assert(_klass_closure._oop_closure == this, "Must be");
-
-  bool claim = true;  // Must claim the class loader data before processing.
-  cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
-}
-inline void CMSOopsInGenClosure::do_class_loader_data(ClassLoaderData* cld) {
+inline void MetadataAwareOopsInGenClosure::do_class_loader_data(ClassLoaderData* cld) {
   assert(_klass_closure._oop_closure == this, "Must be");
 
   bool claim = true;  // Must claim the class loader data before processing.
   cld->oops_do(_klass_closure._oop_closure, &_klass_closure, claim);
 }
 
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_CMSOOPCLOSURES_INLINE_HPP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -795,53 +795,6 @@
   }
 }
 
-// Apply the given closure to each oop in the space \intersect memory region.
-void CompactibleFreeListSpace::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  assert_lock_strong(freelistLock());
-  if (is_empty()) {
-    return;
-  }
-  MemRegion cur = MemRegion(bottom(), end());
-  mr = mr.intersection(cur);
-  if (mr.is_empty()) {
-    return;
-  }
-  if (mr.equals(cur)) {
-    oop_iterate(cl);
-    return;
-  }
-  assert(mr.end() <= end(), "just took an intersection above");
-  HeapWord* obj_addr = block_start(mr.start());
-  HeapWord* t = mr.end();
-
-  SpaceMemRegionOopsIterClosure smr_blk(cl, mr);
-  if (block_is_obj(obj_addr)) {
-    // Handle first object specially.
-    oop obj = oop(obj_addr);
-    obj_addr += adjustObjectSize(obj->oop_iterate(&smr_blk));
-  } else {
-    FreeChunk* fc = (FreeChunk*)obj_addr;
-    obj_addr += fc->size();
-  }
-  while (obj_addr < t) {
-    HeapWord* obj = obj_addr;
-    obj_addr += block_size(obj_addr);
-    // If "obj_addr" is not greater than top, then the
-    // entire object "obj" is within the region.
-    if (obj_addr <= t) {
-      if (block_is_obj(obj)) {
-        oop(obj)->oop_iterate(cl);
-      }
-    } else {
-      // "obj" extends beyond end of region
-      if (block_is_obj(obj)) {
-        oop(obj)->oop_iterate(&smr_blk);
-      }
-      break;
-    }
-  }
-}
-
 // NOTE: In the following methods, in order to safely be able to
 // apply the closure to an object, we need to be sure that the
 // object has been initialized. We are guaranteed that an object
@@ -900,42 +853,60 @@
                                                   UpwardsObjectClosure* cl) {
   assert_locked(freelistLock());
   NOT_PRODUCT(verify_objects_initialized());
-  Space::object_iterate_mem(mr, cl);
+  assert(!mr.is_empty(), "Should be non-empty");
+  // We use MemRegion(bottom(), end()) rather than used_region() below
+  // because the two are not necessarily equal for some kinds of
+  // spaces, in particular, certain kinds of free list spaces.
+  // We could use the more complicated but more precise:
+  // MemRegion(used_region().start(), round_to(used_region().end(), CardSize))
+  // but the slight imprecision seems acceptable in the assertion check.
+  assert(MemRegion(bottom(), end()).contains(mr),
+         "Should be within used space");
+  HeapWord* prev = cl->previous();   // max address from last time
+  if (prev >= mr.end()) { // nothing to do
+    return;
+  }
+  // This assert will not work when we go from cms space to perm
+  // space, and use same closure. Easy fix deferred for later. XXX YSR
+  // assert(prev == NULL || contains(prev), "Should be within space");
+
+  bool last_was_obj_array = false;
+  HeapWord *blk_start_addr, *region_start_addr;
+  if (prev > mr.start()) {
+    region_start_addr = prev;
+    blk_start_addr    = prev;
+    // The previous invocation may have pushed "prev" beyond the
+    // last allocated block yet there may be still be blocks
+    // in this region due to a particular coalescing policy.
+    // Relax the assertion so that the case where the unallocated
+    // block is maintained and "prev" is beyond the unallocated
+    // block does not cause the assertion to fire.
+    assert((BlockOffsetArrayUseUnallocatedBlock &&
+            (!is_in(prev))) ||
+           (blk_start_addr == block_start(region_start_addr)), "invariant");
+  } else {
+    region_start_addr = mr.start();
+    blk_start_addr    = block_start(region_start_addr);
+  }
+  HeapWord* region_end_addr = mr.end();
+  MemRegion derived_mr(region_start_addr, region_end_addr);
+  while (blk_start_addr < region_end_addr) {
+    const size_t size = block_size(blk_start_addr);
+    if (block_is_obj(blk_start_addr)) {
+      last_was_obj_array = cl->do_object_bm(oop(blk_start_addr), derived_mr);
+    } else {
+      last_was_obj_array = false;
+    }
+    blk_start_addr += size;
+  }
+  if (!last_was_obj_array) {
+    assert((bottom() <= blk_start_addr) && (blk_start_addr <= end()),
+           "Should be within (closed) used space");
+    assert(blk_start_addr > prev, "Invariant");
+    cl->set_previous(blk_start_addr); // min address for next time
+  }
 }
 
-// Callers of this iterator beware: The closure application should
-// be robust in the face of uninitialized objects and should (always)
-// return a correct size so that the next addr + size below gives us a
-// valid block boundary. [See for instance,
-// ScanMarkedObjectsAgainCarefullyClosure::do_object_careful()
-// in ConcurrentMarkSweepGeneration.cpp.]
-HeapWord*
-CompactibleFreeListSpace::object_iterate_careful(ObjectClosureCareful* cl) {
-  assert_lock_strong(freelistLock());
-  HeapWord *addr, *last;
-  size_t size;
-  for (addr = bottom(), last  = end();
-       addr < last; addr += size) {
-    FreeChunk* fc = (FreeChunk*)addr;
-    if (fc->is_free()) {
-      // Since we hold the free list lock, which protects direct
-      // allocation in this generation by mutators, a free object
-      // will remain free throughout this iteration code.
-      size = fc->size();
-    } else {
-      // Note that the object need not necessarily be initialized,
-      // because (for instance) the free list lock does NOT protect
-      // object initialization. The closure application below must
-      // therefore be correct in the face of uninitialized objects.
-      size = cl->do_object_careful(oop(addr));
-      if (size == 0) {
-        // An unparsable object found. Signal early termination.
-        return addr;
-      }
-    }
-  }
-  return NULL;
-}
 
 // Callers of this iterator beware: The closure application should
 // be robust in the face of uninitialized objects and should (always)
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -337,10 +337,6 @@
                      unallocated_block() : end());
   }
 
-  bool is_in(const void* p) const {
-    return used_region().contains(p);
-  }
-
   virtual bool is_free_block(const HeapWord* p) const;
 
   // Resizing support
@@ -350,7 +346,6 @@
   Mutex* freelistLock() const { return &_freelistLock; }
 
   // Iteration support
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   void oop_iterate(ExtendedOopClosure* cl);
 
   void object_iterate(ObjectClosure* blk);
@@ -363,6 +358,12 @@
   // obj_is_alive() to determine whether it is safe to iterate of
   // an object.
   void safe_object_iterate(ObjectClosure* blk);
+
+  // Iterate over all objects that intersect with mr, calling "cl->do_object"
+  // on each.  There is an exception to this: if this closure has already
+  // been invoked on an object, it may skip such objects in some cases.  This is
+  // Most likely to happen in an "upwards" (ascending address) iteration of
+  // MemRegions.
   void object_iterate_mem(MemRegion mr, UpwardsObjectClosure* cl);
 
   // Requires that "mr" be entirely within the space.
@@ -371,11 +372,8 @@
   // terminate the iteration and return the address of the start of the
   // subregion that isn't done.  Return of "NULL" indicates that the
   // interation completed.
-  virtual HeapWord*
-       object_iterate_careful_m(MemRegion mr,
-                                ObjectClosureCareful* cl);
-  virtual HeapWord*
-       object_iterate_careful(ObjectClosureCareful* cl);
+ HeapWord* object_iterate_careful_m(MemRegion mr,
+                                    ObjectClosureCareful* cl);
 
   // Override: provides a DCTO_CL specific to this kind of space.
   DirtyCardToOopClosure* new_dcto_cl(ExtendedOopClosure* cl,
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -49,7 +49,7 @@
 #include "memory/genCollectedHeap.hpp"
 #include "memory/genMarkSweep.hpp"
 #include "memory/genOopClosures.inline.hpp"
-#include "memory/iterator.hpp"
+#include "memory/iterator.inline.hpp"
 #include "memory/padded.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/resourceArea.hpp"
@@ -1570,11 +1570,11 @@
   }
 
   if (MetaspaceGC::should_concurrent_collect()) {
-      if (Verbose && PrintGCDetails) {
+    if (Verbose && PrintGCDetails) {
       gclog_or_tty->print("CMSCollector: collect for metadata allocation ");
-      }
-      return true;
-    }
+    }
+    return true;
+  }
 
   return false;
 }
@@ -3028,22 +3028,21 @@
   HandleMark  hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
-  // Get a clear set of claim bits for the strong roots processing to work with.
+  // Get a clear set of claim bits for the roots processing to work with.
   ClassLoaderDataGraph::clear_claimed_marks();
 
   // Mark from roots one level into CMS
   MarkRefsIntoClosure notOlder(_span, verification_mark_bm());
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
 
-  gch->gen_process_strong_roots(_cmsGen->level(),
-                                true,   // younger gens are roots
-                                true,   // activate StrongRootsScope
-                                false,  // not scavenging
-                                SharedHeap::ScanningOption(roots_scanning_options()),
-                                &notOlder,
-                                true,   // walk code active on stacks
-                                NULL,
-                                NULL); // SSS: Provide correct closure
+  gch->gen_process_roots(_cmsGen->level(),
+                         true,   // younger gens are roots
+                         true,   // activate StrongRootsScope
+                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         should_unload_classes(),
+                         &notOlder,
+                         NULL,
+                         NULL);  // SSS: Provide correct closure
 
   // Now mark from the roots
   MarkFromRootsClosure markFromRootsClosure(this, _span,
@@ -3094,24 +3093,24 @@
   HandleMark  hm;
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
-  // Get a clear set of claim bits for the strong roots processing to work with.
+  // Get a clear set of claim bits for the roots processing to work with.
   ClassLoaderDataGraph::clear_claimed_marks();
 
   // Mark from roots one level into CMS
   MarkRefsIntoVerifyClosure notOlder(_span, verification_mark_bm(),
                                      markBitMap());
-  CMKlassClosure klass_closure(&notOlder);
+  CLDToOopClosure cld_closure(&notOlder, true);
 
   gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-  gch->gen_process_strong_roots(_cmsGen->level(),
-                                true,   // younger gens are roots
-                                true,   // activate StrongRootsScope
-                                false,  // not scavenging
-                                SharedHeap::ScanningOption(roots_scanning_options()),
-                                &notOlder,
-                                true,   // walk code active on stacks
-                                NULL,
-                                &klass_closure);
+
+  gch->gen_process_roots(_cmsGen->level(),
+                         true,   // younger gens are roots
+                         true,   // activate StrongRootsScope
+                         SharedHeap::ScanningOption(roots_scanning_options()),
+                         should_unload_classes(),
+                         &notOlder,
+                         NULL,
+                         &cld_closure);
 
   // Now mark from the roots
   MarkFromRootsVerifyClosure markFromRootsClosure(this, _span,
@@ -3172,16 +3171,6 @@
 }
 
 void
-ConcurrentMarkSweepGeneration::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  if (freelistLock()->owned_by_self()) {
-    Generation::oop_iterate(mr, cl);
-  } else {
-    MutexLockerEx x(freelistLock(), Mutex::_no_safepoint_check_flag);
-    Generation::oop_iterate(mr, cl);
-  }
-}
-
-void
 ConcurrentMarkSweepGeneration::oop_iterate(ExtendedOopClosure* cl) {
   if (freelistLock()->owned_by_self()) {
     Generation::oop_iterate(cl);
@@ -3308,12 +3297,10 @@
 void CMSCollector::setup_cms_unloading_and_verification_state() {
   const  bool should_verify =   VerifyBeforeGC || VerifyAfterGC || VerifyDuringGC
                              || VerifyBeforeExit;
-  const  int  rso           =   SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+  const  int  rso           =   SharedHeap::SO_AllCodeCache;
 
   // We set the proper root for this CMS cycle here.
   if (should_unload_classes()) {   // Should unload classes this cycle
-    remove_root_scanning_option(SharedHeap::SO_AllClasses);
-    add_root_scanning_option(SharedHeap::SO_SystemClasses);
     remove_root_scanning_option(rso);  // Shrink the root set appropriately
     set_verifying(should_verify);    // Set verification state for this cycle
     return;                            // Nothing else needs to be done at this time
@@ -3321,8 +3308,6 @@
 
   // Not unloading classes this cycle
   assert(!should_unload_classes(), "Inconsitency!");
-  remove_root_scanning_option(SharedHeap::SO_SystemClasses);
-  add_root_scanning_option(SharedHeap::SO_AllClasses);
 
   if ((!verifying() || unloaded_classes_last_cycle()) && should_verify) {
     // Include symbols, strings and code cache elements to prevent their resurrection.
@@ -3688,12 +3673,6 @@
   ResourceMark rm;
   HandleMark  hm;
 
-  FalseClosure falseClosure;
-  // In the case of a synchronous collection, we will elide the
-  // remark step, so it's important to catch all the nmethod oops
-  // in this step.
-  // The final 'true' flag to gen_process_strong_roots will ensure this.
-  // If 'async' is true, we can relax the nmethod tracing.
   MarkRefsIntoClosure notOlder(_span, &_markBitMap);
   GenCollectedHeap* gch = GenCollectedHeap::heap();
 
@@ -3739,17 +3718,16 @@
       gch->set_par_threads(0);
     } else {
       // The serial version.
-      CMKlassClosure klass_closure(&notOlder);
+      CLDToOopClosure cld_closure(&notOlder, true);
       gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
-      gch->gen_process_strong_roots(_cmsGen->level(),
-                                    true,   // younger gens are roots
-                                    true,   // activate StrongRootsScope
-                                    false,  // not scavenging
-                                    SharedHeap::ScanningOption(roots_scanning_options()),
-                                    &notOlder,
-                                    true,   // walk all of code cache if (so & SO_CodeCache)
-                                    NULL,
-                                    &klass_closure);
+      gch->gen_process_roots(_cmsGen->level(),
+                             true,   // younger gens are roots
+                             true,   // activate StrongRootsScope
+                             SharedHeap::ScanningOption(roots_scanning_options()),
+                             should_unload_classes(),
+                             &notOlder,
+                             NULL,
+                             &cld_closure);
     }
   }
 
@@ -4203,7 +4181,7 @@
   pst->all_tasks_completed();
 }
 
-class Par_ConcMarkingClosure: public CMSOopClosure {
+class Par_ConcMarkingClosure: public MetadataAwareOopClosure {
  private:
   CMSCollector* _collector;
   CMSConcMarkingTask* _task;
@@ -4216,7 +4194,7 @@
  public:
   Par_ConcMarkingClosure(CMSCollector* collector, CMSConcMarkingTask* task, OopTaskQueue* work_queue,
                          CMSBitMap* bit_map, CMSMarkStack* overflow_stack):
-    CMSOopClosure(collector->ref_processor()),
+    MetadataAwareOopClosure(collector->ref_processor()),
     _collector(collector),
     _task(task),
     _span(collector->_span),
@@ -4987,7 +4965,7 @@
 }
 
 class PrecleanKlassClosure : public KlassClosure {
-  CMKlassClosure _cm_klass_closure;
+  KlassToOopClosure _cm_klass_closure;
  public:
   PrecleanKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
   void do_klass(Klass* k) {
@@ -5225,7 +5203,6 @@
   _timer.start();
   GenCollectedHeap* gch = GenCollectedHeap::heap();
   Par_MarkRefsIntoClosure par_mri_cl(_collector->_span, &(_collector->_markBitMap));
-  CMKlassClosure klass_closure(&par_mri_cl);
 
   // ---------- young gen roots --------------
   {
@@ -5241,17 +5218,19 @@
   // ---------- remaining roots --------------
   _timer.reset();
   _timer.start();
-  gch->gen_process_strong_roots(_collector->_cmsGen->level(),
-                                false,     // yg was scanned above
-                                false,     // this is parallel code
-                                false,     // not scavenging
-                                SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
-                                &par_mri_cl,
-                                true,   // walk all of code cache if (so & SO_CodeCache)
-                                NULL,
-                                &klass_closure);
+
+  CLDToOopClosure cld_closure(&par_mri_cl, true);
+
+  gch->gen_process_roots(_collector->_cmsGen->level(),
+                         false,     // yg was scanned above
+                         false,     // this is parallel code
+                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         _collector->should_unload_classes(),
+                         &par_mri_cl,
+                         NULL,
+                         &cld_closure);
   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_CodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5301,7 +5280,7 @@
 };
 
 class RemarkKlassClosure : public KlassClosure {
-  CMKlassClosure _cm_klass_closure;
+  KlassToOopClosure _cm_klass_closure;
  public:
   RemarkKlassClosure(OopClosure* oop_closure) : _cm_klass_closure(oop_closure) {}
   void do_klass(Klass* k) {
@@ -5378,17 +5357,17 @@
   // ---------- remaining roots --------------
   _timer.reset();
   _timer.start();
-  gch->gen_process_strong_roots(_collector->_cmsGen->level(),
-                                false,     // yg was scanned above
-                                false,     // this is parallel code
-                                false,     // not scavenging
-                                SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
-                                &par_mrias_cl,
-                                true,   // walk all of code cache if (so & SO_CodeCache)
-                                NULL,
-                                NULL);     // The dirty klasses will be handled below
+  gch->gen_process_roots(_collector->_cmsGen->level(),
+                         false,     // yg was scanned above
+                         false,     // this is parallel code
+                         SharedHeap::ScanningOption(_collector->CMSCollector::roots_scanning_options()),
+                         _collector->should_unload_classes(),
+                         &par_mrias_cl,
+                         NULL,
+                         NULL);     // The dirty klasses will be handled below
+
   assert(_collector->should_unload_classes()
-         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_CodeCache),
+         || (_collector->CMSCollector::roots_scanning_options() & SharedHeap::SO_AllCodeCache),
          "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   _timer.stop();
   if (PrintCMSStatistics != 0) {
@@ -5441,7 +5420,7 @@
   // We might have added oops to ClassLoaderData::_handles during the
   // concurrent marking phase. These oops point to newly allocated objects
   // that are guaranteed to be kept alive. Either by the direct allocation
-  // code, or when the young collector processes the strong roots. Hence,
+  // code, or when the young collector processes the roots. Hence,
   // we don't have to revisit the _handles block during the remark phase.
 
   // ---------- rescan dirty cards ------------
@@ -5863,7 +5842,7 @@
     cms_space,
     n_workers, workers, task_queues());
 
-  // Set up for parallel process_strong_roots work.
+  // Set up for parallel process_roots work.
   gch->set_par_threads(n_workers);
   // We won't be iterating over the cards in the card table updating
   // the younger_gen cards, so we shouldn't call the following else
@@ -5872,7 +5851,7 @@
   // gch->rem_set()->prepare_for_younger_refs_iterate(true); // parallel
 
   // The young gen rescan work will not be done as part of
-  // process_strong_roots (which currently doesn't knw how to
+  // process_roots (which currently doesn't know how to
   // parallelize such a scan), but rather will be broken up into
   // a set of parallel tasks (via the sampling that the [abortable]
   // preclean phase did of EdenSpace, plus the [two] tasks of
@@ -5969,18 +5948,18 @@
 
     gch->rem_set()->prepare_for_younger_refs_iterate(false); // Not parallel.
     GenCollectedHeap::StrongRootsScope srs(gch);
-    gch->gen_process_strong_roots(_cmsGen->level(),
-                                  true,  // younger gens as roots
-                                  false, // use the local StrongRootsScope
-                                  false, // not scavenging
-                                  SharedHeap::ScanningOption(roots_scanning_options()),
-                                  &mrias_cl,
-                                  true,   // walk code active on stacks
-                                  NULL,
-                                  NULL);  // The dirty klasses will be handled below
+
+    gch->gen_process_roots(_cmsGen->level(),
+                           true,  // younger gens as roots
+                           false, // use the local StrongRootsScope
+                           SharedHeap::ScanningOption(roots_scanning_options()),
+                           should_unload_classes(),
+                           &mrias_cl,
+                           NULL,
+                           NULL); // The dirty klasses will be handled below
 
     assert(should_unload_classes()
-           || (roots_scanning_options() & SharedHeap::SO_CodeCache),
+           || (roots_scanning_options() & SharedHeap::SO_AllCodeCache),
            "if we didn't scan the code cache, we have to be ready to drop nmethods with expired weak oops");
   }
 
@@ -6017,7 +5996,7 @@
   // We might have added oops to ClassLoaderData::_handles during the
   // concurrent marking phase. These oops point to newly allocated objects
   // that are guaranteed to be kept alive. Either by the direct allocation
-  // code, or when the young collector processes the strong roots. Hence,
+  // code, or when the young collector processes the roots. Hence,
   // we don't have to revisit the _handles block during the remark phase.
 
   verify_work_stacks_empty();
@@ -6072,6 +6051,8 @@
 };
 
 void CMSRefProcTaskProxy::work(uint worker_id) {
+  ResourceMark rm;
+  HandleMark hm;
   assert(_collector->_span.equals(_span), "Inconsistency in _span");
   CMSParKeepAliveClosure par_keep_alive(_collector, _span,
                                         _mark_bit_map,
@@ -6267,15 +6248,14 @@
       // Clean up unreferenced symbols in symbol table.
       SymbolTable::unlink();
     }
-  }
-
-  // CMS doesn't use the StringTable as hard roots when class unloading is turned off.
-  // Need to check if we really scanned the StringTable.
-  if ((roots_scanning_options() & SharedHeap::SO_Strings) == 0) {
-    GCTraceTime t("scrub string table", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
-    // Delete entries for dead interned strings.
-    StringTable::unlink(&_is_alive_closure);
-  }
+
+    {
+      GCTraceTime t("scrub string table", PrintGCDetails, false, _gc_timer_cm, _gc_tracer_cm->gc_id());
+      // Delete entries for dead interned strings.
+      StringTable::unlink(&_is_alive_closure);
+    }
+  }
+
 
   // Restore any preserved marks as a result of mark stack or
   // work queue overflow
@@ -7744,7 +7724,7 @@
   CMSCollector* collector, MemRegion span,
   CMSBitMap* verification_bm, CMSBitMap* cms_bm,
   CMSMarkStack*  mark_stack):
-  CMSOopClosure(collector->ref_processor()),
+  MetadataAwareOopClosure(collector->ref_processor()),
   _collector(collector),
   _span(span),
   _verification_bm(verification_bm),
@@ -7797,7 +7777,7 @@
                      MemRegion span,
                      CMSBitMap* bitMap, CMSMarkStack*  markStack,
                      HeapWord* finger, MarkFromRootsClosure* parent) :
-  CMSOopClosure(collector->ref_processor()),
+  MetadataAwareOopClosure(collector->ref_processor()),
   _collector(collector),
   _span(span),
   _bitMap(bitMap),
@@ -7814,7 +7794,7 @@
                      HeapWord* finger,
                      HeapWord** global_finger_addr,
                      Par_MarkFromRootsClosure* parent) :
-  CMSOopClosure(collector->ref_processor()),
+  MetadataAwareOopClosure(collector->ref_processor()),
   _collector(collector),
   _whole_span(collector->_span),
   _span(span),
@@ -7863,11 +7843,6 @@
   _overflow_stack->expand(); // expand the stack if possible
 }
 
-void CMKlassClosure::do_klass(Klass* k) {
-  assert(_oop_closure != NULL, "Not initialized?");
-  k->oops_do(_oop_closure);
-}
-
 void PushOrMarkClosure::do_oop(oop obj) {
   // Ignore mark word because we are running concurrent with mutators.
   assert(obj->is_oop_or_null(true), "expected an oop or NULL");
@@ -7965,7 +7940,7 @@
                                        CMSBitMap* mod_union_table,
                                        CMSMarkStack*  mark_stack,
                                        bool           concurrent_precleaning):
-  CMSOopClosure(rp),
+  MetadataAwareOopClosure(rp),
   _collector(collector),
   _span(span),
   _bit_map(bit_map),
@@ -8038,7 +8013,7 @@
                                                ReferenceProcessor* rp,
                                                CMSBitMap* bit_map,
                                                OopTaskQueue* work_queue):
-  CMSOopClosure(rp),
+  MetadataAwareOopClosure(rp),
   _collector(collector),
   _span(span),
   _bit_map(bit_map),
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -32,6 +32,7 @@
 #include "gc_implementation/shared/generationCounters.hpp"
 #include "memory/freeBlockDictionary.hpp"
 #include "memory/generation.hpp"
+#include "memory/iterator.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "runtime/virtualspace.hpp"
 #include "services/memoryService.hpp"
@@ -1285,7 +1286,6 @@
   void save_sweep_limit();
 
   // More iteration support
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   virtual void oop_iterate(ExtendedOopClosure* cl);
   virtual void safe_object_iterate(ObjectClosure* cl);
   virtual void object_iterate(ObjectClosure* cl);
@@ -1383,13 +1383,6 @@
 // Closures of various sorts used by CMS to accomplish its work
 //
 
-// This closure is used to check that a certain set of oops is empty.
-class FalseClosure: public OopClosure {
- public:
-  void do_oop(oop* p)       { guarantee(false, "Should be an empty set"); }
-  void do_oop(narrowOop* p) { guarantee(false, "Should be an empty set"); }
-};
-
 // This closure is used to do concurrent marking from the roots
 // following the first checkpoint.
 class MarkFromRootsClosure: public BitMapClosure {
@@ -1454,7 +1447,7 @@
 
 // The following closures are used to do certain kinds of verification of
 // CMS marking.
-class PushAndMarkVerifyClosure: public CMSOopClosure {
+class PushAndMarkVerifyClosure: public MetadataAwareOopClosure {
   CMSCollector*    _collector;
   MemRegion        _span;
   CMSBitMap*       _verification_bm;
@@ -1507,6 +1500,19 @@
   }
 };
 
+// A version of ObjectClosure with "memory" (see _previous_address below)
+class UpwardsObjectClosure: public BoolObjectClosure {
+  HeapWord* _previous_address;
+ public:
+  UpwardsObjectClosure() : _previous_address(NULL) { }
+  void set_previous(HeapWord* addr) { _previous_address = addr; }
+  HeapWord* previous()              { return _previous_address; }
+  // A return value of "true" can be used by the caller to decide
+  // if this object's end should *NOT* be recorded in
+  // _previous_address above.
+  virtual bool do_object_bm(oop obj, MemRegion mr) = 0;
+};
+
 // This closure is used during the second checkpointing phase
 // to rescan the marked objects on the dirty cards in the mod
 // union table and the card table proper. It's invoked via
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/g1/bufferingOopClosure.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,271 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/g1/bufferingOopClosure.hpp"
+#include "memory/iterator.hpp"
+#include "utilities/debug.hpp"
+
+/////////////// Unit tests ///////////////
+
+#ifndef PRODUCT
+
+class TestBufferingOopClosure {
+
+  // Helper class to fake a set of oop*s and narrowOop*s.
+  class FakeRoots {
+   public:
+    // Used for sanity checking of the values passed to the do_oops functions in the test.
+    static const uintptr_t NarrowOopMarker = uintptr_t(1) << (BitsPerWord -1);
+
+    int    _num_narrow;
+    int    _num_full;
+    void** _narrow;
+    void** _full;
+
+    FakeRoots(int num_narrow, int num_full) :
+        _num_narrow(num_narrow),
+        _num_full(num_full),
+        _narrow((void**)::malloc(sizeof(void*) * num_narrow)),
+        _full((void**)::malloc(sizeof(void*) * num_full)) {
+
+      for (int i = 0; i < num_narrow; i++) {
+        _narrow[i] = (void*)(NarrowOopMarker + (uintptr_t)i);
+      }
+      for (int i = 0; i < num_full; i++) {
+        _full[i] = (void*)(uintptr_t)i;
+      }
+    }
+
+    ~FakeRoots() {
+      ::free(_narrow);
+      ::free(_full);
+    }
+
+    void oops_do_narrow_then_full(OopClosure* cl) {
+      for (int i = 0; i < _num_narrow; i++) {
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+      for (int i = 0; i < _num_full; i++) {
+        cl->do_oop((oop*)_full[i]);
+      }
+    }
+
+    void oops_do_full_then_narrow(OopClosure* cl) {
+      for (int i = 0; i < _num_full; i++) {
+        cl->do_oop((oop*)_full[i]);
+      }
+      for (int i = 0; i < _num_narrow; i++) {
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+    }
+
+    void oops_do_mixed(OopClosure* cl) {
+      int i;
+      for (i = 0; i < _num_full && i < _num_narrow; i++) {
+        cl->do_oop((oop*)_full[i]);
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+      for (int j = i; j < _num_full; j++) {
+        cl->do_oop((oop*)_full[i]);
+      }
+      for (int j = i; j < _num_narrow; j++) {
+        cl->do_oop((narrowOop*)_narrow[i]);
+      }
+    }
+
+    static const int MaxOrder = 2;
+
+    void oops_do(OopClosure* cl, int do_oop_order) {
+      switch(do_oop_order) {
+        case 0:
+          oops_do_narrow_then_full(cl);
+          break;
+        case 1:
+          oops_do_full_then_narrow(cl);
+          break;
+        case 2:
+          oops_do_mixed(cl);
+          break;
+        default:
+          oops_do_narrow_then_full(cl);
+          break;
+      }
+    }
+  };
+
+  class CountOopClosure : public OopClosure {
+    int _narrow_oop_count;
+    int _full_oop_count;
+   public:
+    CountOopClosure() : _narrow_oop_count(0), _full_oop_count(0) {}
+    void do_oop(narrowOop* p) {
+      assert((uintptr_t(p) & FakeRoots::NarrowOopMarker) != 0,
+          "The narrowOop was unexpectedly not marked with the NarrowOopMarker");
+      _narrow_oop_count++;
+    }
+
+    void do_oop(oop* p){
+      assert((uintptr_t(p) & FakeRoots::NarrowOopMarker) == 0,
+          "The oop was unexpectedly marked with the NarrowOopMarker");
+      _full_oop_count++;
+    }
+
+    int narrow_oop_count() { return _narrow_oop_count; }
+    int full_oop_count()   { return _full_oop_count; }
+    int all_oop_count()    { return _narrow_oop_count + _full_oop_count; }
+  };
+
+  class DoNothingOopClosure : public OopClosure {
+   public:
+    void do_oop(narrowOop* p) {}
+    void do_oop(oop* p)       {}
+  };
+
+  static void testCount(int num_narrow, int num_full, int do_oop_order) {
+    FakeRoots fr(num_narrow, num_full);
+
+    CountOopClosure coc;
+    BufferingOopClosure boc(&coc);
+
+    fr.oops_do(&boc, do_oop_order);
+
+    boc.done();
+
+    #define assert_testCount(got, expected)                                     \
+       assert((got) == (expected),                                              \
+           err_msg("Expected: %d, got: %d, when running testCount(%d, %d, %d)", \
+               (got), (expected), num_narrow, num_full, do_oop_order))
+
+    assert_testCount(num_narrow, coc.narrow_oop_count());
+    assert_testCount(num_full, coc.full_oop_count());
+    assert_testCount(num_narrow + num_full, coc.all_oop_count());
+  }
+
+  static void testCount() {
+    int buffer_length = BufferingOopClosure::BufferLength;
+
+    for (int order = 0; order < FakeRoots::MaxOrder; order++) {
+      testCount(0,                 0,                 order);
+      testCount(10,                0,                 order);
+      testCount(0,                 10,                order);
+      testCount(10,                10,                order);
+      testCount(buffer_length,     10,                order);
+      testCount(10,                buffer_length,     order);
+      testCount(buffer_length,     buffer_length,     order);
+      testCount(buffer_length + 1, 10,                order);
+      testCount(10,                buffer_length + 1, order);
+      testCount(buffer_length + 1, buffer_length,     order);
+      testCount(buffer_length,     buffer_length + 1, order);
+      testCount(buffer_length + 1, buffer_length + 1, order);
+    }
+  }
+
+  static void testIsBufferEmptyOrFull(int num_narrow, int num_full, bool expect_empty, bool expect_full) {
+    FakeRoots fr(num_narrow, num_full);
+
+    DoNothingOopClosure cl;
+    BufferingOopClosure boc(&cl);
+
+    fr.oops_do(&boc, 0);
+
+    #define assert_testIsBufferEmptyOrFull(got, expected)                             \
+        assert((got) == (expected),                                                   \
+            err_msg("Expected: %d, got: %d. testIsBufferEmptyOrFull(%d, %d, %s, %s)", \
+                (got), (expected), num_narrow, num_full,                              \
+                BOOL_TO_STR(expect_empty), BOOL_TO_STR(expect_full)))
+
+    assert_testIsBufferEmptyOrFull(expect_empty, boc.is_buffer_empty());
+    assert_testIsBufferEmptyOrFull(expect_full, boc.is_buffer_full());
+  }
+
+  static void testIsBufferEmptyOrFull() {
+    int bl = BufferingOopClosure::BufferLength;
+
+    testIsBufferEmptyOrFull(0,       0, true,  false);
+    testIsBufferEmptyOrFull(1,       0, false, false);
+    testIsBufferEmptyOrFull(0,       1, false, false);
+    testIsBufferEmptyOrFull(1,       1, false, false);
+    testIsBufferEmptyOrFull(10,      0, false, false);
+    testIsBufferEmptyOrFull(0,      10, false, false);
+    testIsBufferEmptyOrFull(10,     10, false, false);
+    testIsBufferEmptyOrFull(0,      bl, false, true);
+    testIsBufferEmptyOrFull(bl,      0, false, true);
+    testIsBufferEmptyOrFull(bl/2, bl/2, false, true);
+    testIsBufferEmptyOrFull(bl-1,    1, false, true);
+    testIsBufferEmptyOrFull(1,    bl-1, false, true);
+    // Processed
+    testIsBufferEmptyOrFull(bl+1,    0, false, false);
+    testIsBufferEmptyOrFull(bl*2,    0, false, true);
+  }
+
+  static void testEmptyAfterDone(int num_narrow, int num_full) {
+    FakeRoots fr(num_narrow, num_full);
+
+    DoNothingOopClosure cl;
+    BufferingOopClosure boc(&cl);
+
+    fr.oops_do(&boc, 0);
+
+    // Make sure all get processed.
+    boc.done();
+
+    assert(boc.is_buffer_empty(),
+        err_msg("Should be empty after call to done(). testEmptyAfterDone(%d, %d)",
+            num_narrow, num_full));
+  }
+
+  static void testEmptyAfterDone() {
+    int bl = BufferingOopClosure::BufferLength;
+
+    testEmptyAfterDone(0,       0);
+    testEmptyAfterDone(1,       0);
+    testEmptyAfterDone(0,       1);
+    testEmptyAfterDone(1,       1);
+    testEmptyAfterDone(10,      0);
+    testEmptyAfterDone(0,      10);
+    testEmptyAfterDone(10,     10);
+    testEmptyAfterDone(0,      bl);
+    testEmptyAfterDone(bl,      0);
+    testEmptyAfterDone(bl/2, bl/2);
+    testEmptyAfterDone(bl-1,    1);
+    testEmptyAfterDone(1,    bl-1);
+    // Processed
+    testEmptyAfterDone(bl+1,    0);
+    testEmptyAfterDone(bl*2,    0);
+  }
+
+  public:
+  static void test() {
+    testCount();
+    testIsBufferEmptyOrFull();
+    testEmptyAfterDone();
+  }
+};
+
+void TestBufferingOopClosure_test() {
+  TestBufferingOopClosure::test();
+}
+
+#endif
--- a/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/bufferingOopClosure.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,10 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_BUFFERINGOOPCLOSURE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_BUFFERINGOOPCLOSURE_HPP
 
-#include "memory/genOopClosures.hpp"
-#include "memory/generation.hpp"
+#include "memory/iterator.hpp"
+#include "oops/oopsHierarchy.hpp"
 #include "runtime/os.hpp"
-#include "utilities/taskqueue.hpp"
+#include "utilities/debug.hpp"
 
 // A BufferingOops closure tries to separate out the cost of finding roots
 // from the cost of applying closures to them.  It maintains an array of
@@ -41,60 +41,103 @@
 // The caller must be sure to call "done" to process any unprocessed
 // buffered entriess.
 
-class Generation;
-class HeapRegion;
+class BufferingOopClosure: public OopClosure {
+  friend class TestBufferingOopClosure;
+protected:
+  static const size_t BufferLength = 1024;
 
-class BufferingOopClosure: public OopClosure {
-protected:
-  enum PrivateConstants {
-    BufferLength = 1024
-  };
-
-  StarTask  _buffer[BufferLength];
-  StarTask* _buffer_top;
-  StarTask* _buffer_curr;
+  // We need to know if the buffered addresses contain oops or narrowOops.
+  // We can't tag the addresses the way StarTask does, because we need to
+  // be able to handle unaligned addresses coming from oops embedded in code.
+  //
+  // The addresses for the full-sized oops are filled in from the bottom,
+  // while the addresses for the narrowOops are filled in from the top.
+  OopOrNarrowOopStar  _buffer[BufferLength];
+  OopOrNarrowOopStar* _oop_top;
+  OopOrNarrowOopStar* _narrowOop_bottom;
 
   OopClosure* _oc;
   double      _closure_app_seconds;
 
-  void process_buffer () {
+
+  bool is_buffer_empty() {
+    return _oop_top == _buffer && _narrowOop_bottom == (_buffer + BufferLength - 1);
+  }
+
+  bool is_buffer_full() {
+    return _narrowOop_bottom < _oop_top;
+  }
+
+  // Process addresses containing full-sized oops.
+  void process_oops() {
+    for (OopOrNarrowOopStar* curr = _buffer; curr < _oop_top; ++curr) {
+      _oc->do_oop((oop*)(*curr));
+    }
+    _oop_top = _buffer;
+  }
+
+  // Process addresses containing narrow oops.
+  void process_narrowOops() {
+    for (OopOrNarrowOopStar* curr = _buffer + BufferLength - 1; curr > _narrowOop_bottom; --curr) {
+      _oc->do_oop((narrowOop*)(*curr));
+    }
+    _narrowOop_bottom = _buffer + BufferLength - 1;
+  }
+
+  // Apply the closure to all oops and clear the buffer.
+  // Accumulate the time it took.
+  void process_buffer() {
     double start = os::elapsedTime();
-    for (StarTask* curr = _buffer; curr < _buffer_curr; ++curr) {
-      if (curr->is_narrow()) {
-        assert(UseCompressedOops, "Error");
-        _oc->do_oop((narrowOop*)(*curr));
-      } else {
-        _oc->do_oop((oop*)(*curr));
-      }
-    }
-    _buffer_curr = _buffer;
+
+    process_oops();
+    process_narrowOops();
+
     _closure_app_seconds += (os::elapsedTime() - start);
   }
 
-  template <class T> inline void do_oop_work(T* p) {
-    if (_buffer_curr == _buffer_top) {
+  void process_buffer_if_full() {
+    if (is_buffer_full()) {
       process_buffer();
     }
-    StarTask new_ref(p);
-    *_buffer_curr = new_ref;
-    ++_buffer_curr;
+  }
+
+  void add_narrowOop(narrowOop* p) {
+    assert(!is_buffer_full(), "Buffer should not be full");
+    *_narrowOop_bottom = (OopOrNarrowOopStar)p;
+    _narrowOop_bottom--;
+  }
+
+  void add_oop(oop* p) {
+    assert(!is_buffer_full(), "Buffer should not be full");
+    *_oop_top = (OopOrNarrowOopStar)p;
+    _oop_top++;
   }
 
 public:
-  virtual void do_oop(narrowOop* p) { do_oop_work(p); }
-  virtual void do_oop(oop* p)       { do_oop_work(p); }
+  virtual void do_oop(narrowOop* p) {
+    process_buffer_if_full();
+    add_narrowOop(p);
+  }
 
-  void done () {
-    if (_buffer_curr > _buffer) {
+  virtual void do_oop(oop* p)       {
+    process_buffer_if_full();
+    add_oop(p);
+  }
+
+  void done() {
+    if (!is_buffer_empty()) {
       process_buffer();
     }
   }
-  double closure_app_seconds () {
+
+  double closure_app_seconds() {
     return _closure_app_seconds;
   }
-  BufferingOopClosure (OopClosure *oc) :
+
+  BufferingOopClosure(OopClosure *oc) :
     _oc(oc),
-    _buffer_curr(_buffer), _buffer_top(_buffer + BufferLength),
+    _oop_top(_buffer),
+    _narrowOop_bottom(_buffer + BufferLength - 1),
     _closure_app_seconds(0.0) { }
 };
 
--- a/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "classfile/symbolTable.hpp"
+#include "code/codeCache.hpp"
 #include "gc_implementation/g1/concurrentMark.inline.hpp"
 #include "gc_implementation/g1/concurrentMarkThread.inline.hpp"
 #include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
@@ -39,6 +40,7 @@
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
+#include "memory/allocation.hpp"
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/referencePolicy.hpp"
 #include "memory/resourceArea.hpp"
@@ -57,8 +59,8 @@
   _bmWordSize = 0;
 }
 
-HeapWord* CMBitMapRO::getNextMarkedWordAddress(HeapWord* addr,
-                                               HeapWord* limit) const {
+HeapWord* CMBitMapRO::getNextMarkedWordAddress(const HeapWord* addr,
+                                               const HeapWord* limit) const {
   // First we must round addr *up* to a possible object boundary.
   addr = (HeapWord*)align_size_up((intptr_t)addr,
                                   HeapWordSize << _shifter);
@@ -75,8 +77,8 @@
   return nextAddr;
 }
 
-HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(HeapWord* addr,
-                                                 HeapWord* limit) const {
+HeapWord* CMBitMapRO::getNextUnmarkedWordAddress(const HeapWord* addr,
+                                                 const HeapWord* limit) const {
   size_t addrOffset = heapWordToOffset(addr);
   if (limit == NULL) {
     limit = _bmStartWord + _bmWordSize;
@@ -888,6 +890,10 @@
   guarantee(!g1h->mark_in_progress(), "invariant");
 }
 
+bool ConcurrentMark::nextMarkBitmapIsClear() {
+  return _nextMarkBitMap->getNextMarkedWordAddress(_heap_start, _heap_end) == _heap_end;
+}
+
 class NoteStartOfMarkHRClosure: public HeapRegionClosure {
 public:
   bool doHeapRegion(HeapRegion* r) {
@@ -1222,6 +1228,9 @@
 };
 
 void ConcurrentMark::scanRootRegions() {
+  // Start of concurrent marking.
+  ClassLoaderDataGraph::clear_claimed_marks();
+
   // scan_in_progress() will have been set to true only if there was
   // at least one root region to scan. So, if it's false, we
   // should not attempt to do any further work.
@@ -1270,7 +1279,7 @@
   CMConcurrentMarkingTask markingTask(this, cmThread());
   if (use_parallel_marking_threads()) {
     _parallel_workers->set_active_workers((int)active_workers);
-    // Don't set _n_par_threads because it affects MT in proceess_strong_roots()
+    // Don't set _n_par_threads because it affects MT in process_roots()
     // and the decisions on that MT processing is made elsewhere.
     assert(_parallel_workers->active_workers() > 0, "Should have been set");
     _parallel_workers->run_task(&markingTask);
@@ -1301,6 +1310,7 @@
     Universe::verify(VerifyOption_G1UsePrevMarking,
                      " VerifyDuringGC:(before)");
   }
+  g1h->check_bitmaps("Remark Start");
 
   G1CollectorPolicy* g1p = g1h->g1_policy();
   g1p->record_concurrent_mark_remark_start();
@@ -1349,6 +1359,7 @@
       Universe::verify(VerifyOption_G1UseNextMarking,
                        " VerifyDuringGC:(after)");
     }
+    g1h->check_bitmaps("Remark End");
     assert(!restart_for_overflow(), "sanity");
     // Completely reset the marking state since marking completed
     set_non_marking_state();
@@ -1998,6 +2009,7 @@
     Universe::verify(VerifyOption_G1UsePrevMarking,
                      " VerifyDuringGC:(before)");
   }
+  g1h->check_bitmaps("Cleanup Start");
 
   G1CollectorPolicy* g1p = G1CollectedHeap::heap()->g1_policy();
   g1p->record_concurrent_mark_cleanup_start();
@@ -2035,8 +2047,8 @@
     // that calculated by walking the marking bitmap.
 
     // Bitmaps to hold expected values
-    BitMap expected_region_bm(_region_bm.size(), false);
-    BitMap expected_card_bm(_card_bm.size(), false);
+    BitMap expected_region_bm(_region_bm.size(), true);
+    BitMap expected_card_bm(_card_bm.size(), true);
 
     G1ParVerifyFinalCountTask g1_par_verify_task(g1h,
                                                  &_region_bm,
@@ -2138,22 +2150,30 @@
   // Update the soft reference policy with the new heap occupancy.
   Universe::update_heap_info_at_gc();
 
-  // We need to make this be a "collection" so any collection pause that
-  // races with it goes around and waits for completeCleanup to finish.
-  g1h->increment_total_collections();
-
-  // We reclaimed old regions so we should calculate the sizes to make
-  // sure we update the old gen/space data.
-  g1h->g1mm()->update_sizes();
-
   if (VerifyDuringGC) {
     HandleMark hm;  // handle scope
     Universe::heap()->prepare_for_verify();
     Universe::verify(VerifyOption_G1UsePrevMarking,
                      " VerifyDuringGC:(after)");
   }
+  g1h->check_bitmaps("Cleanup End");
 
   g1h->verify_region_sets_optional();
+
+  // We need to make this be a "collection" so any collection pause that
+  // races with it goes around and waits for completeCleanup to finish.
+  g1h->increment_total_collections();
+
+  // Clean out dead classes and update Metaspace sizes.
+  if (ClassUnloadingWithConcurrentMark) {
+    ClassLoaderDataGraph::purge();
+  }
+  MetaspaceGC::compute_new_size();
+
+  // We reclaimed old regions so we should calculate the sizes to make
+  // sure we update the old gen/space data.
+  g1h->g1mm()->update_sizes();
+
   g1h->trace_heap_after_concurrent_cycle();
 }
 
@@ -2383,6 +2403,8 @@
   }
 
   virtual void work(uint worker_id) {
+    ResourceMark rm;
+    HandleMark hm;
     CMTask* task = _cm->task(worker_id);
     G1CMIsAliveClosure g1_is_alive(_g1h);
     G1CMKeepAliveAndDrainClosure g1_par_keep_alive(_cm, task, false /* is_serial */);
@@ -2440,6 +2462,26 @@
   _g1h->set_par_threads(0);
 }
 
+void ConcurrentMark::weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes) {
+  G1CollectedHeap::heap()->parallel_cleaning(is_alive, true, true, purged_classes);
+}
+
+// Helper class to get rid of some boilerplate code.
+class G1RemarkGCTraceTime : public GCTraceTime {
+  static bool doit_and_prepend(bool doit) {
+    if (doit) {
+      gclog_or_tty->put(' ');
+    }
+    return doit;
+  }
+
+ public:
+  G1RemarkGCTraceTime(const char* title, bool doit)
+    : GCTraceTime(title, doit_and_prepend(doit), false, G1CollectedHeap::heap()->gc_timer_cm(),
+        G1CollectedHeap::heap()->concurrent_mark()->concurrent_gc_id()) {
+  }
+};
+
 void ConcurrentMark::weakRefsWork(bool clear_all_soft_refs) {
   if (has_overflown()) {
     // Skip processing the discovered references if we have
@@ -2552,9 +2594,31 @@
     return;
   }
 
-  g1h->unlink_string_and_symbol_table(&g1_is_alive,
-                                      /* process_strings */ false, // currently strings are always roots
-                                      /* process_symbols */ true);
+  assert(_markStack.isEmpty(), "Marking should have completed");
+
+  // Unload Klasses, String, Symbols, Code Cache, etc.
+  {
+    G1RemarkGCTraceTime trace("Unloading", G1Log::finer());
+
+    if (ClassUnloadingWithConcurrentMark) {
+      bool purged_classes;
+
+      {
+        G1RemarkGCTraceTime trace("System Dictionary Unloading", G1Log::finest());
+        purged_classes = SystemDictionary::do_unloading(&g1_is_alive);
+      }
+
+      {
+        G1RemarkGCTraceTime trace("Parallel Unloading", G1Log::finest());
+        weakRefsWorkParallelPart(&g1_is_alive, purged_classes);
+      }
+    }
+
+    if (G1StringDedup::is_enabled()) {
+      G1RemarkGCTraceTime trace("String Deduplication Unlink", G1Log::finest());
+      G1StringDedup::unlink(&g1_is_alive);
+    }
+  }
 }
 
 void ConcurrentMark::swapMarkBitMaps() {
@@ -2563,6 +2627,57 @@
   _nextMarkBitMap  = (CMBitMap*)  temp;
 }
 
+class CMObjectClosure;
+
+// Closure for iterating over objects, currently only used for
+// processing SATB buffers.
+class CMObjectClosure : public ObjectClosure {
+private:
+  CMTask* _task;
+
+public:
+  void do_object(oop obj) {
+    _task->deal_with_reference(obj);
+  }
+
+  CMObjectClosure(CMTask* task) : _task(task) { }
+};
+
+class G1RemarkThreadsClosure : public ThreadClosure {
+  CMObjectClosure _cm_obj;
+  G1CMOopClosure _cm_cl;
+  MarkingCodeBlobClosure _code_cl;
+  int _thread_parity;
+  bool _is_par;
+
+ public:
+  G1RemarkThreadsClosure(G1CollectedHeap* g1h, CMTask* task, bool is_par) :
+    _cm_obj(task), _cm_cl(g1h, g1h->concurrent_mark(), task), _code_cl(&_cm_cl, !CodeBlobToOopClosure::FixRelocations),
+    _thread_parity(SharedHeap::heap()->strong_roots_parity()), _is_par(is_par) {}
+
+  void do_thread(Thread* thread) {
+    if (thread->is_Java_thread()) {
+      if (thread->claim_oops_do(_is_par, _thread_parity)) {
+        JavaThread* jt = (JavaThread*)thread;
+
+        // In theory it should not be neccessary to explicitly walk the nmethods to find roots for concurrent marking
+        // however the liveness of oops reachable from nmethods have very complex lifecycles:
+        // * Alive if on the stack of an executing method
+        // * Weakly reachable otherwise
+        // Some objects reachable from nmethods, such as the class loader (or klass_holder) of the receiver should be
+        // live by the SATB invariant but other oops recorded in nmethods may behave differently.
+        jt->nmethods_do(&_code_cl);
+
+        jt->satb_mark_queue().apply_closure_and_empty(&_cm_obj);
+      }
+    } else if (thread->is_VM_thread()) {
+      if (thread->claim_oops_do(_is_par, _thread_parity)) {
+        JavaThread::satb_mark_queue_set().shared_satb_queue()->apply_closure_and_empty(&_cm_obj);
+      }
+    }
+  }
+};
+
 class CMRemarkTask: public AbstractGangTask {
 private:
   ConcurrentMark* _cm;
@@ -2574,6 +2689,14 @@
     if (worker_id < _cm->active_tasks()) {
       CMTask* task = _cm->task(worker_id);
       task->record_start_time();
+      {
+        ResourceMark rm;
+        HandleMark hm;
+
+        G1RemarkThreadsClosure threads_f(G1CollectedHeap::heap(), task, !_is_serial);
+        Threads::threads_do(&threads_f);
+      }
+
       do {
         task->do_marking_step(1000000000.0 /* something very large */,
                               true         /* do_termination       */,
@@ -2596,6 +2719,8 @@
   HandleMark   hm;
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
 
+  G1RemarkGCTraceTime trace("Finalize Marking", G1Log::finer());
+
   g1h->ensure_parsability(false);
 
   if (G1CollectedHeap::use_parallel_gc_threads()) {
@@ -3243,8 +3368,14 @@
 
 // abandon current marking iteration due to a Full GC
 void ConcurrentMark::abort() {
-  // Clear all marks to force marking thread to do nothing
+  // Clear all marks in the next bitmap for the next marking cycle. This will allow us to skip the next
+  // concurrent bitmap clearing.
   _nextMarkBitMap->clearAll();
+
+  // Note we cannot clear the previous marking bitmap here
+  // since VerifyDuringGC verifies the objects marked during
+  // a full GC against the previous bitmap.
+
   // Clear the liveness counting data
   clear_all_count_data();
   // Empty mark stack
@@ -3421,20 +3552,6 @@
   }
 };
 
-// Closure for iterating over objects, currently only used for
-// processing SATB buffers.
-class CMObjectClosure : public ObjectClosure {
-private:
-  CMTask* _task;
-
-public:
-  void do_object(oop obj) {
-    _task->deal_with_reference(obj);
-  }
-
-  CMObjectClosure(CMTask* task) : _task(task) { }
-};
-
 G1CMOopClosure::G1CMOopClosure(G1CollectedHeap* g1h,
                                ConcurrentMark* cm,
                                CMTask* task)
@@ -3900,15 +4017,6 @@
     }
   }
 
-  if (!concurrent() && !has_aborted()) {
-    // We should only do this during remark.
-    if (G1CollectedHeap::use_parallel_gc_threads()) {
-      satb_mq_set.par_iterate_closure_all_threads(_worker_id);
-    } else {
-      satb_mq_set.iterate_closure_all_threads();
-    }
-  }
-
   _draining_satb_buffers = false;
 
   assert(has_aborted() ||
--- a/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMark.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_CONCURRENTMARK_HPP
 
+#include "classfile/javaClasses.hpp"
 #include "gc_implementation/g1/heapRegionSet.hpp"
 #include "gc_implementation/shared/gcId.hpp"
 #include "utilities/taskqueue.hpp"
@@ -86,19 +87,19 @@
   // Return the address corresponding to the next marked bit at or after
   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
   // such bit, returns "limit" if that is non-NULL, or else "endWord()".
-  HeapWord* getNextMarkedWordAddress(HeapWord* addr,
-                                     HeapWord* limit = NULL) const;
+  HeapWord* getNextMarkedWordAddress(const HeapWord* addr,
+                                     const HeapWord* limit = NULL) const;
   // Return the address corresponding to the next unmarked bit at or after
   // "addr", and before "limit", if "limit" is non-NULL.  If there is no
   // such bit, returns "limit" if that is non-NULL, or else "endWord()".
-  HeapWord* getNextUnmarkedWordAddress(HeapWord* addr,
-                                       HeapWord* limit = NULL) const;
+  HeapWord* getNextUnmarkedWordAddress(const HeapWord* addr,
+                                       const HeapWord* limit = NULL) const;
 
   // conversion utilities
   HeapWord* offsetToHeapWord(size_t offset) const {
     return _bmStartWord + (offset << _shifter);
   }
-  size_t heapWordToOffset(HeapWord* addr) const {
+  size_t heapWordToOffset(const HeapWord* addr) const {
     return pointer_delta(addr, _bmStartWord) >> _shifter;
   }
   int heapWordDiffToOffsetDiff(size_t diff) const;
@@ -476,6 +477,7 @@
   ForceOverflowSettings _force_overflow_conc;
   ForceOverflowSettings _force_overflow_stw;
 
+  void weakRefsWorkParallelPart(BoolObjectClosure* is_alive, bool purged_classes);
   void weakRefsWork(bool clear_all_soft_refs);
 
   void swapMarkBitMaps();
@@ -734,6 +736,9 @@
   // Clear the next marking bitmap (will be called concurrently).
   void clearNextBitmap();
 
+  // Return whether the next mark bitmap has no marks set.
+  bool nextMarkBitmapIsClear();
+
   // These two do the work that needs to be done before and after the
   // initial root checkpoint. Since this checkpoint can be done at two
   // different points (i.e. an explicit pause or piggy-backed on a
--- a/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/concurrentMarkThread.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -275,9 +275,13 @@
 
       // We now want to allow clearing of the marking bitmap to be
       // suspended by a collection pause.
-      {
+      // We may have aborted just before the remark. Do not bother clearing the
+      // bitmap then, as it has been done during mark abort.
+      if (!cm()->has_aborted()) {
         SuspendibleThreadSetJoiner sts;
         _cm->clearNextBitmap();
+      } else {
+        assert(!G1VerifyBitmaps || _cm->nextMarkBitmapIsClear(), "Next mark bitmap must be clear");
       }
     }
 
--- a/src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1AllocRegion.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_INLINE_HPP
 
 #include "gc_implementation/g1/g1AllocRegion.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 
 inline HeapWord* G1AllocRegion::allocate(HeapRegion* alloc_region,
                                          size_t word_size,
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -24,6 +24,7 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
 #include "memory/space.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/java.hpp"
@@ -98,6 +99,20 @@
   return (delta & right_n_bits(LogN_words)) == (size_t)NoBits;
 }
 
+void G1BlockOffsetSharedArray::set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
+  check_index(index_for(right - 1), "right address out of range");
+  assert(left  < right, "Heap addresses out of order");
+  size_t num_cards = pointer_delta(right, left) >> LogN_words;
+  if (UseMemSetInBOT) {
+    memset(&_offset_array[index_for(left)], offset, num_cards);
+  } else {
+    size_t i = index_for(left);
+    const size_t end = i + num_cards;
+    for (; i < end; i++) {
+      _offset_array[i] = offset;
+    }
+  }
+}
 
 //////////////////////////////////////////////////////////////////////
 // G1BlockOffsetArray
@@ -107,7 +122,7 @@
                                        MemRegion mr, bool init_to_zero) :
   G1BlockOffsetTable(mr.start(), mr.end()),
   _unallocated_block(_bottom),
-  _array(array), _csp(NULL),
+  _array(array), _gsp(NULL),
   _init_to_zero(init_to_zero) {
   assert(_bottom <= _end, "arguments out of order");
   if (!_init_to_zero) {
@@ -117,9 +132,8 @@
   }
 }
 
-void G1BlockOffsetArray::set_space(Space* sp) {
-  _sp = sp;
-  _csp = sp->toContiguousSpace();
+void G1BlockOffsetArray::set_space(G1OffsetTableContigSpace* sp) {
+  _gsp = sp;
 }
 
 // The arguments follow the normal convention of denoting
@@ -378,7 +392,7 @@
   }
   // Otherwise, find the block start using the table.
   HeapWord* q = block_at_or_preceding(addr, false, 0);
-  HeapWord* n = q + _sp->block_size(q);
+  HeapWord* n = q + block_size(q);
   return forward_to_block_containing_addr_const(q, n, addr);
 }
 
@@ -406,31 +420,17 @@
          err_msg("next_boundary is beyond the end of the covered region "
                  " next_boundary " PTR_FORMAT " _array->_end " PTR_FORMAT,
                  next_boundary, _array->_end));
-  if (csp() != NULL) {
-    if (addr >= csp()->top()) return csp()->top();
-    while (next_boundary < addr) {
-      while (n <= next_boundary) {
-        q = n;
-        oop obj = oop(q);
-        if (obj->klass_or_null() == NULL) return q;
-        n += obj->size();
-      }
-      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
-      // [q, n) is the block that crosses the boundary.
-      alloc_block_work2(&next_boundary, &next_index, q, n);
+  if (addr >= gsp()->top()) return gsp()->top();
+  while (next_boundary < addr) {
+    while (n <= next_boundary) {
+      q = n;
+      oop obj = oop(q);
+      if (obj->klass_or_null() == NULL) return q;
+      n += block_size(q);
     }
-  } else {
-    while (next_boundary < addr) {
-      while (n <= next_boundary) {
-        q = n;
-        oop obj = oop(q);
-        if (obj->klass_or_null() == NULL) return q;
-        n += _sp->block_size(q);
-      }
-      assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
-      // [q, n) is the block that crosses the boundary.
-      alloc_block_work2(&next_boundary, &next_index, q, n);
-    }
+    assert(q <= next_boundary && n > next_boundary, "Consequence of loop");
+    // [q, n) is the block that crosses the boundary.
+    alloc_block_work2(&next_boundary, &next_index, q, n);
   }
   return forward_to_block_containing_addr_const(q, n, addr);
 }
@@ -638,7 +638,7 @@
   assert(_bottom <= addr && addr < _end,
          "addr must be covered by this Array");
   HeapWord* q = block_at_or_preceding(addr, true, _next_offset_index-1);
-  HeapWord* n = q + _sp->block_size(q);
+  HeapWord* n = q + block_size(q);
   return forward_to_block_containing_addr_const(q, n, addr);
 }
 
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -52,8 +52,8 @@
 // consolidation.
 
 // Forward declarations
-class ContiguousSpace;
 class G1BlockOffsetSharedArray;
+class G1OffsetTableContigSpace;
 
 class G1BlockOffsetTable VALUE_OBJ_CLASS_SPEC {
   friend class VMStructs;
@@ -157,6 +157,8 @@
     return _offset_array[index];
   }
 
+  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset);
+
   void set_offset_array(size_t index, u_char offset) {
     check_index(index, "index out of range");
     check_offset(offset, "offset too large");
@@ -170,21 +172,6 @@
     _offset_array[index] = (u_char) pointer_delta(high, low);
   }
 
-  void set_offset_array(HeapWord* left, HeapWord* right, u_char offset) {
-    check_index(index_for(right - 1), "right address out of range");
-    assert(left  < right, "Heap addresses out of order");
-    size_t num_cards = pointer_delta(right, left) >> LogN_words;
-    if (UseMemSetInBOT) {
-      memset(&_offset_array[index_for(left)], offset, num_cards);
-    } else {
-      size_t i = index_for(left);
-      const size_t end = i + num_cards;
-      for (; i < end; i++) {
-        _offset_array[i] = offset;
-      }
-    }
-  }
-
   void set_offset_array(size_t left, size_t right, u_char offset) {
     check_index(right, "right index out of range");
     assert(left <= right, "indexes out of order");
@@ -281,11 +268,7 @@
   G1BlockOffsetSharedArray* _array;
 
   // The space that owns this subregion.
-  Space* _sp;
-
-  // If "_sp" is a contiguous space, the field below is the view of "_sp"
-  // as a contiguous space, else NULL.
-  ContiguousSpace* _csp;
+  G1OffsetTableContigSpace* _gsp;
 
   // If true, array entries are initialized to 0; otherwise, they are
   // initialized to point backwards to the beginning of the covered region.
@@ -310,7 +293,9 @@
 
 protected:
 
-  ContiguousSpace* csp() const { return _csp; }
+  G1OffsetTableContigSpace* gsp() const { return _gsp; }
+
+  inline size_t block_size(const HeapWord* p) const;
 
   // Returns the address of a block whose start is at most "addr".
   // If "has_max_index" is true, "assumes "max_index" is the last valid one
@@ -363,7 +348,7 @@
   // "this" to be passed as a parameter to a member constructor for
   // the containing concrete subtype of Space.
   // This would be legal C++, but MS VC++ doesn't allow it.
-  void set_space(Space* sp);
+  void set_space(G1OffsetTableContigSpace* sp);
 
   // Resets the covered region to the given "mr".
   void set_region(MemRegion mr);
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -26,6 +26,8 @@
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1BLOCKOFFSETTABLE_INLINE_HPP
 
 #include "gc_implementation/g1/g1BlockOffsetTable.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "memory/space.hpp"
 
 inline HeapWord* G1BlockOffsetTable::block_start(const void* addr) {
@@ -69,6 +71,11 @@
   return result;
 }
 
+inline size_t
+G1BlockOffsetArray::block_size(const HeapWord* p) const {
+  return gsp()->block_size(p);
+}
+
 inline HeapWord*
 G1BlockOffsetArray::block_at_or_preceding(const void* addr,
                                           bool has_max_index,
@@ -88,7 +95,7 @@
     // to go back by.
     size_t n_cards_back = BlockOffsetArray::entry_to_cards_back(offset);
     q -= (N_words * n_cards_back);
-    assert(q >= _sp->bottom(), "Went below bottom!");
+    assert(q >= gsp()->bottom(), "Went below bottom!");
     index -= n_cards_back;
     offset = _array->offset_array(index);
   }
@@ -101,21 +108,12 @@
 G1BlockOffsetArray::
 forward_to_block_containing_addr_const(HeapWord* q, HeapWord* n,
                                        const void* addr) const {
-  if (csp() != NULL) {
-    if (addr >= csp()->top()) return csp()->top();
-    while (n <= addr) {
-      q = n;
-      oop obj = oop(q);
-      if (obj->klass_or_null() == NULL) return q;
-      n += obj->size();
-    }
-  } else {
-    while (n <= addr) {
-      q = n;
-      oop obj = oop(q);
-      if (obj->klass_or_null() == NULL) return q;
-      n += _sp->block_size(q);
-    }
+  if (addr >= gsp()->top()) return gsp()->top();
+  while (n <= addr) {
+    q = n;
+    oop obj = oop(q);
+    if (obj->klass_or_null() == NULL) return q;
+    n += block_size(q);
   }
   assert(q <= n, "wrong order for q and addr");
   assert(addr < n, "wrong order for addr and n");
@@ -126,7 +124,7 @@
 G1BlockOffsetArray::forward_to_block_containing_addr(HeapWord* q,
                                                      const void* addr) {
   if (oop(q)->klass_or_null() == NULL) return q;
-  HeapWord* n = q + _sp->block_size(q);
+  HeapWord* n = q + block_size(q);
   // In the normal case, where the query "addr" is a card boundary, and the
   // offset table chunks are the same size as cards, the block starting at
   // "q" will contain addr, so the test below will fail, and we'll fall
--- a/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -30,23 +30,52 @@
 
 PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
 
-G1CodeRootChunk::G1CodeRootChunk() : _top(NULL), _next(NULL), _prev(NULL) {
+G1CodeRootChunk::G1CodeRootChunk() : _top(NULL), _next(NULL), _prev(NULL), _free(NULL) {
   _top = bottom();
 }
 
 void G1CodeRootChunk::reset() {
   _next = _prev = NULL;
+  _free = NULL;
   _top = bottom();
 }
 
 void G1CodeRootChunk::nmethods_do(CodeBlobClosure* cl) {
-  nmethod** cur = bottom();
+  NmethodOrLink* cur = bottom();
   while (cur != _top) {
-    cl->do_code_blob(*cur);
+    if (is_nmethod(cur)) {
+      cl->do_code_blob(cur->_nmethod);
+    }
     cur++;
   }
 }
 
+bool G1CodeRootChunk::remove_lock_free(nmethod* method) {
+  NmethodOrLink* cur = bottom();
+
+  for (NmethodOrLink* cur = bottom(); cur != _top; cur++) {
+    if (cur->_nmethod == method) {
+      bool result = Atomic::cmpxchg_ptr(NULL, &cur->_nmethod, method) == method;
+
+      if (!result) {
+        // Someone else cleared out this entry.
+        return false;
+      }
+
+      // The method was cleared. Time to link it into the free list.
+      NmethodOrLink* prev_free;
+      do {
+        prev_free = (NmethodOrLink*)_free;
+        cur->_link = prev_free;
+      } while (Atomic::cmpxchg_ptr(cur, &_free, prev_free) != prev_free);
+
+      return true;
+    }
+  }
+
+  return false;
+}
+
 G1CodeRootChunkManager::G1CodeRootChunkManager() : _free_list(), _num_chunks_handed_out(0) {
   _free_list.initialize();
   _free_list.set_size(G1CodeRootChunk::word_size());
@@ -140,34 +169,43 @@
 
 void G1CodeRootSet::add(nmethod* method) {
   if (!contains(method)) {
-    // Try to add the nmethod. If there is not enough space, get a new chunk.
-    if (_list.head() == NULL || _list.head()->is_full()) {
-      G1CodeRootChunk* cur = new_chunk();
+    // Find the first chunk that isn't full.
+    G1CodeRootChunk* cur = _list.head();
+    while (cur != NULL) {
+      if (!cur->is_full()) {
+        break;
+      }
+      cur = cur->next();
+    }
+
+    // All chunks are full, get a new chunk.
+    if (cur == NULL) {
+      cur = new_chunk();
       _list.return_chunk_at_head(cur);
     }
-    bool result = _list.head()->add(method);
+
+    // Add the nmethod.
+    bool result = cur->add(method);
+
     guarantee(result, err_msg("Not able to add nmethod "PTR_FORMAT" to newly allocated chunk.", method));
+
     _length++;
   }
 }
 
-void G1CodeRootSet::remove(nmethod* method) {
+void G1CodeRootSet::remove_lock_free(nmethod* method) {
   G1CodeRootChunk* found = find(method);
   if (found != NULL) {
-    bool result = found->remove(method);
-    guarantee(result, err_msg("could not find nmethod "PTR_FORMAT" during removal although we previously found it", method));
-    // eventually free completely emptied chunk
-    if (found->is_empty()) {
-      _list.remove_chunk(found);
-      free(found);
+    bool result = found->remove_lock_free(method);
+    if (result) {
+      Atomic::dec_ptr((volatile intptr_t*)&_length);
     }
-    _length--;
   }
   assert(!contains(method), err_msg(PTR_FORMAT" still contains nmethod "PTR_FORMAT, this, method));
 }
 
 nmethod* G1CodeRootSet::pop() {
-  do {
+  while (true) {
     G1CodeRootChunk* cur = _list.head();
     if (cur == NULL) {
       assert(_length == 0, "when there are no chunks, there should be no elements");
@@ -180,7 +218,7 @@
     } else {
       free(_list.get_chunk_at_head());
     }
-  } while (true);
+  }
 }
 
 G1CodeRootChunk* G1CodeRootSet::find(nmethod* method) {
--- a/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CodeCacheRemSet.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -31,6 +31,14 @@
 
 class CodeBlobClosure;
 
+// The elements of the G1CodeRootChunk is either:
+//  1) nmethod pointers
+//  2) nodes in an internally chained free list
+typedef union {
+  nmethod* _nmethod;
+  void*    _link;
+} NmethodOrLink;
+
 class G1CodeRootChunk : public CHeapObj<mtGC> {
  private:
   static const int NUM_ENTRIES = 32;
@@ -38,16 +46,28 @@
   G1CodeRootChunk*     _next;
   G1CodeRootChunk*     _prev;
 
-  nmethod** _top;
+  NmethodOrLink*          _top;
+  // First free position within the chunk.
+  volatile NmethodOrLink* _free;
 
-  nmethod* _data[NUM_ENTRIES];
+  NmethodOrLink _data[NUM_ENTRIES];
 
-  nmethod** bottom() const {
-    return (nmethod**) &(_data[0]);
+  NmethodOrLink* bottom() const {
+    return (NmethodOrLink*) &(_data[0]);
   }
 
-  nmethod** end() const {
-    return (nmethod**) &(_data[NUM_ENTRIES]);
+  NmethodOrLink* end() const {
+    return (NmethodOrLink*) &(_data[NUM_ENTRIES]);
+  }
+
+  bool is_link(NmethodOrLink* nmethod_or_link) {
+    return nmethod_or_link->_link == NULL ||
+        (bottom() <= nmethod_or_link->_link
+        && nmethod_or_link->_link < end());
+  }
+
+  bool is_nmethod(NmethodOrLink* nmethod_or_link) {
+    return !is_link(nmethod_or_link);
   }
 
  public:
@@ -85,46 +105,55 @@
   }
 
   bool is_full() const {
-    return _top == (nmethod**)end();
+    return _top == end() && _free == NULL;
   }
 
   bool contains(nmethod* method) {
-    nmethod** cur = bottom();
+    NmethodOrLink* cur = bottom();
     while (cur != _top) {
-      if (*cur == method) return true;
+      if (cur->_nmethod == method) return true;
       cur++;
     }
     return false;
   }
 
   bool add(nmethod* method) {
-    if (is_full()) return false;
-    *_top = method;
-    _top++;
+    if (is_full()) {
+      return false;
+    }
+
+    if (_free != NULL) {
+      // Take from internally chained free list
+      NmethodOrLink* first_free = (NmethodOrLink*)_free;
+      _free = (NmethodOrLink*)_free->_link;
+      first_free->_nmethod = method;
+    } else {
+      // Take from top.
+      _top->_nmethod = method;
+      _top++;
+    }
+
     return true;
   }
 
-  bool remove(nmethod* method) {
-    nmethod** cur = bottom();
-    while (cur != _top) {
-      if (*cur == method) {
-        memmove(cur, cur + 1, (_top - (cur + 1)) * sizeof(nmethod**));
-        _top--;
-        return true;
-      }
-      cur++;
-    }
-    return false;
-  }
+  bool remove_lock_free(nmethod* method);
 
   void nmethods_do(CodeBlobClosure* blk);
 
   nmethod* pop() {
-    if (is_empty()) {
-      return NULL;
+    if (_free != NULL) {
+      // Kill the free list.
+      _free = NULL;
     }
-    _top--;
-    return *_top;
+
+    while (!is_empty()) {
+      _top--;
+      if (is_nmethod(_top)) {
+        return _top->_nmethod;
+      }
+    }
+
+    return NULL;
   }
 };
 
@@ -193,7 +222,7 @@
   // method is likely to be repeatedly called with the same nmethod.
   void add(nmethod* method);
 
-  void remove(nmethod* method);
+  void remove_lock_free(nmethod* method);
   nmethod* pop();
 
   bool contains(nmethod* method);
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -55,6 +55,7 @@
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
 #include "gc_implementation/shared/isGCActiveMark.hpp"
+#include "memory/allocation.hpp"
 #include "memory/gcLocker.inline.hpp"
 #include "memory/generationSpec.hpp"
 #include "memory/iterator.hpp"
@@ -87,10 +88,10 @@
 // G1ParVerifyTask uses heap_region_par_iterate_chunked() for parallelism.
 // The number of GC workers is passed to heap_region_par_iterate_chunked().
 // It does use run_task() which sets _n_workers in the task.
-// G1ParTask executes g1_process_strong_roots() ->
-// SharedHeap::process_strong_roots() which calls eventually to
+// G1ParTask executes g1_process_roots() ->
+// SharedHeap::process_roots() which calls eventually to
 // CardTableModRefBS::par_non_clean_card_iterate_work() which uses
-// SequentialSubTasksDone.  SharedHeap::process_strong_roots() also
+// SequentialSubTasksDone.  SharedHeap::process_roots() also
 // directly uses SubTasksDone (_process_strong_tasks field in SharedHeap).
 //
 
@@ -770,6 +771,7 @@
   // match new_top.
   assert(hr == NULL ||
          (hr->end() == new_end && hr->top() == new_top), "sanity");
+  check_bitmaps("Humongous Region Allocation", first_hr);
 
   assert(first_hr->used() == word_size * HeapWordSize, "invariant");
   _summary_bytes_used += first_hr->used();
@@ -1328,6 +1330,7 @@
 
       verify_before_gc();
 
+      check_bitmaps("Full GC Start");
       pre_full_gc_dump(gc_timer);
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
@@ -1501,6 +1504,18 @@
 
       verify_after_gc();
 
+      // Clear the previous marking bitmap, if needed for bitmap verification.
+      // Note we cannot do this when we clear the next marking bitmap in
+      // ConcurrentMark::abort() above since VerifyDuringGC verifies the
+      // objects marked during a full GC against the previous bitmap.
+      // But we need to clear it before calling check_bitmaps below since
+      // the full GC has compacted objects and updated TAMS but not updated
+      // the prev bitmap.
+      if (G1VerifyBitmaps) {
+        ((CMBitMap*) concurrent_mark()->prevMarkBitMap())->clearAll();
+      }
+      check_bitmaps("Full GC End");
+
       // Start a new incremental collection set for the next pause
       assert(g1_policy()->collection_set() == NULL, "must be");
       g1_policy()->start_incremental_cset_building();
@@ -1913,6 +1928,8 @@
   _secondary_free_list("Secondary Free List", new SecondaryFreeRegionListMtSafeChecker()),
   _old_set("Old Set", false /* humongous */, new OldRegionSetMtSafeChecker()),
   _humongous_set("Master Humongous Set", true /* humongous */, new HumongousRegionSetMtSafeChecker()),
+  _humongous_is_live(),
+  _has_humongous_reclaim_candidates(false),
   _free_regions_coming(false),
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
@@ -2069,6 +2086,7 @@
   _g1h = this;
 
   _in_cset_fast_test.initialize(_g1_reserved.start(), _g1_reserved.end(), HeapRegion::GrainBytes);
+  _humongous_is_live.initialize(_g1_reserved.start(), _g1_reserved.end(), HeapRegion::GrainBytes);
 
   // Create the ConcurrentMark data structure and thread.
   // (Must do this late, so that "max_regions" is defined.)
@@ -2164,6 +2182,11 @@
   }
 }
 
+void G1CollectedHeap::clear_humongous_is_live_table() {
+  guarantee(G1ReclaimDeadHumongousObjectsAtYoungGC, "Should only be called if true");
+  _humongous_is_live.clear();
+}
+
 size_t G1CollectedHeap::conservative_max_heap_alignment() {
   return HeapRegion::max_region_size();
 }
@@ -2580,15 +2603,12 @@
 
 // Iteration functions.
 
-// Iterates an OopClosure over all ref-containing fields of objects
-// within a HeapRegion.
+// Applies an ExtendedOopClosure onto all references of objects within a HeapRegion.
 
 class IterateOopClosureRegionClosure: public HeapRegionClosure {
-  MemRegion _mr;
   ExtendedOopClosure* _cl;
 public:
-  IterateOopClosureRegionClosure(MemRegion mr, ExtendedOopClosure* cl)
-    : _mr(mr), _cl(cl) {}
+  IterateOopClosureRegionClosure(ExtendedOopClosure* cl) : _cl(cl) {}
   bool doHeapRegion(HeapRegion* r) {
     if (!r->continuesHumongous()) {
       r->oop_iterate(_cl);
@@ -2598,12 +2618,7 @@
 };
 
 void G1CollectedHeap::oop_iterate(ExtendedOopClosure* cl) {
-  IterateOopClosureRegionClosure blk(_g1_committed, cl);
-  heap_region_iterate(&blk);
-}
-
-void G1CollectedHeap::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  IterateOopClosureRegionClosure blk(mr, cl);
+  IterateOopClosureRegionClosure blk(cl);
   heap_region_iterate(&blk);
 }
 
@@ -2956,10 +2971,17 @@
   }
 }
 
-CompactibleSpace* G1CollectedHeap::first_compactible_space() {
-  return n_regions() > 0 ? region_at(0) : NULL;
-}
-
+HeapRegion* G1CollectedHeap::next_compaction_region(const HeapRegion* from) const {
+  // We're not using an iterator given that it will wrap around when
+  // it reaches the last region and this is not what we want here.
+  for (uint index = from->hrs_index() + 1; index < n_regions(); index++) {
+    HeapRegion* hr = region_at(index);
+    if (!hr->isHumongous()) {
+      return hr;
+    }
+  }
+  return NULL;
+}
 
 Space* G1CollectedHeap::space_containing(const void* addr) const {
   Space* res = heap_region_containing(addr);
@@ -3390,25 +3412,20 @@
 
     if (!silent) { gclog_or_tty->print("Roots "); }
     VerifyRootsClosure rootsCl(vo);
+    VerifyKlassClosure klassCl(this, &rootsCl);
+    CLDToKlassAndOopClosure cldCl(&klassCl, &rootsCl, false);
+
+    // We apply the relevant closures to all the oops in the
+    // system dictionary, class loader data graph, the string table
+    // and the nmethods in the code cache.
     G1VerifyCodeRootOopClosure codeRootsCl(this, &rootsCl, vo);
     G1VerifyCodeRootBlobClosure blobsCl(&codeRootsCl);
-    VerifyKlassClosure klassCl(this, &rootsCl);
-
-    // We apply the relevant closures to all the oops in the
-    // system dictionary, the string table and the code cache.
-    const int so = SO_AllClasses | SO_Strings | SO_CodeCache;
-
-    // Need cleared claim bits for the strong roots processing
-    ClassLoaderDataGraph::clear_claimed_marks();
-
-    process_strong_roots(true,      // activate StrongRootsScope
-                         false,     // we set "is scavenging" to false,
-                                    // so we don't reset the dirty cards.
-                         ScanningOption(so),  // roots scanning options
-                         &rootsCl,
-                         &blobsCl,
-                         &klassCl
-                         );
+
+    process_all_roots(true,            // activate StrongRootsScope
+                      SO_AllCodeCache, // roots scanning options
+                      &rootsCl,
+                      &cldCl,
+                      &blobsCl);
 
     bool failures = rootsCl.failures() || codeRootsCl.failures();
 
@@ -3780,6 +3797,61 @@
   return g1_rem_set()->cardsScanned();
 }
 
+bool G1CollectedHeap::humongous_region_is_always_live(uint index) {
+  HeapRegion* region = region_at(index);
+  assert(region->startsHumongous(), "Must start a humongous object");
+  return oop(region->bottom())->is_objArray() || !region->rem_set()->is_empty();
+}
+
+class RegisterHumongousWithInCSetFastTestClosure : public HeapRegionClosure {
+ private:
+  size_t _total_humongous;
+  size_t _candidate_humongous;
+ public:
+  RegisterHumongousWithInCSetFastTestClosure() : _total_humongous(0), _candidate_humongous(0) {
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    if (!r->startsHumongous()) {
+      return false;
+    }
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+    uint region_idx = r->hrs_index();
+    bool is_candidate = !g1h->humongous_region_is_always_live(region_idx);
+    // Is_candidate already filters out humongous regions with some remembered set.
+    // This will not lead to humongous object that we mistakenly keep alive because
+    // during young collection the remembered sets will only be added to.
+    if (is_candidate) {
+      g1h->register_humongous_region_with_in_cset_fast_test(region_idx);
+      _candidate_humongous++;
+    }
+    _total_humongous++;
+
+    return false;
+  }
+
+  size_t total_humongous() const { return _total_humongous; }
+  size_t candidate_humongous() const { return _candidate_humongous; }
+};
+
+void G1CollectedHeap::register_humongous_regions_with_in_cset_fast_test() {
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC) {
+    g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(0, 0);
+    return;
+  }
+
+  RegisterHumongousWithInCSetFastTestClosure cl;
+  heap_region_iterate(&cl);
+  g1_policy()->phase_times()->record_fast_reclaim_humongous_stats(cl.total_humongous(),
+                                                                  cl.candidate_humongous());
+  _has_humongous_reclaim_candidates = cl.candidate_humongous() > 0;
+
+  if (_has_humongous_reclaim_candidates) {
+    clear_humongous_is_live_table();
+  }
+}
+
 void
 G1CollectedHeap::setup_surviving_young_words() {
   assert(_surviving_young_words == NULL, "pre-condition");
@@ -3990,6 +4062,7 @@
       increment_gc_time_stamp();
 
       verify_before_gc();
+      check_bitmaps("GC Start");
 
       COMPILER2_PRESENT(DerivedPointerTable::clear());
 
@@ -4065,6 +4138,8 @@
 
         g1_policy()->finalize_cset(target_pause_time_ms, evacuation_info);
 
+        register_humongous_regions_with_in_cset_fast_test();
+
         _cm->note_start_of_gc();
         // We should not verify the per-thread SATB buffers given that
         // we have not filtered them yet (we'll do so during the
@@ -4115,6 +4190,9 @@
                                  true  /* verify_fingers */);
 
         free_collection_set(g1_policy()->collection_set(), evacuation_info);
+
+        eagerly_reclaim_humongous_regions();
+
         g1_policy()->clear_collection_set();
 
         cleanup_surviving_young_words();
@@ -4235,6 +4313,7 @@
         increment_gc_time_stamp();
 
         verify_after_gc();
+        check_bitmaps("GC End");
 
         assert(!ref_processor_stw()->discovery_enabled(), "Postcondition");
         ref_processor_stw()->verify_no_references_recorded();
@@ -4338,11 +4417,7 @@
   assert(_mutator_alloc_region.get() == NULL, "post-condition");
 }
 
-void G1CollectedHeap::init_gc_alloc_regions(EvacuationInfo& evacuation_info) {
-  assert_at_safepoint(true /* should_be_vm_thread */);
-
-  _survivor_gc_alloc_region.init();
-  _old_gc_alloc_region.init();
+void G1CollectedHeap::use_retained_old_gc_alloc_region(EvacuationInfo& evacuation_info) {
   HeapRegion* retained_region = _retained_old_gc_alloc_region;
   _retained_old_gc_alloc_region = NULL;
 
@@ -4360,7 +4435,7 @@
       !(retained_region->top() == retained_region->end()) &&
       !retained_region->is_empty() &&
       !retained_region->isHumongous()) {
-    retained_region->set_saved_mark();
+    retained_region->record_top_and_timestamp();
     // The retained region was added to the old region set when it was
     // retired. We have to remove it now, since we don't allow regions
     // we allocate to in the region sets. We'll re-add it later, when
@@ -4374,6 +4449,15 @@
   }
 }
 
+void G1CollectedHeap::init_gc_alloc_regions(EvacuationInfo& evacuation_info) {
+  assert_at_safepoint(true /* should_be_vm_thread */);
+
+  _survivor_gc_alloc_region.init();
+  _old_gc_alloc_region.init();
+
+  use_retained_old_gc_alloc_region(evacuation_info);
+}
+
 void G1CollectedHeap::release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info) {
   evacuation_info.set_allocation_regions(_survivor_gc_alloc_region.count() +
                                          _old_gc_alloc_region.count());
@@ -4607,7 +4691,7 @@
   }
 }
 
-template <G1Barrier barrier, bool do_mark_object>
+template <G1Barrier barrier, G1Mark do_mark_object>
 template <class T>
 void G1ParCopyClosure<barrier, do_mark_object>::do_oop_work(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
@@ -4620,7 +4704,9 @@
 
   assert(_worker_id == _par_scan_state->queue_num(), "sanity");
 
-  if (_g1->in_cset_fast_test(obj)) {
+  G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
+
+  if (state == G1CollectedHeap::InCSet) {
     oop forwardee;
     if (obj->is_forwarded()) {
       forwardee = obj->forwardee();
@@ -4629,7 +4715,7 @@
     }
     assert(forwardee != NULL, "forwardee should not be NULL");
     oopDesc::encode_store_heap_oop(p, forwardee);
-    if (do_mark_object && forwardee != obj) {
+    if (do_mark_object != G1MarkNone && forwardee != obj) {
       // If the object is self-forwarded we don't need to explicitly
       // mark it, the evacuation failure protocol will do so.
       mark_forwarded_object(obj, forwardee);
@@ -4639,10 +4725,12 @@
       do_klass_barrier(p, forwardee);
     }
   } else {
+    if (state == G1CollectedHeap::IsHumongous) {
+      _g1->set_humongous_is_live(obj);
+    }
     // The object is not in collection set. If we're a root scanning
-    // closure during an initial mark pause (i.e. do_mark_object will
-    // be true) then attempt to mark the object.
-    if (do_mark_object) {
+    // closure during an initial mark pause then attempt to mark the object.
+    if (do_mark_object == G1MarkFromRoot) {
       mark_object(obj);
     }
   }
@@ -4652,8 +4740,8 @@
   }
 }
 
-template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(oop* p);
-template void G1ParCopyClosure<G1BarrierEvac, false>::do_oop_work(narrowOop* p);
+template void G1ParCopyClosure<G1BarrierEvac, G1MarkNone>::do_oop_work(oop* p);
+template void G1ParCopyClosure<G1BarrierEvac, G1MarkNone>::do_oop_work(narrowOop* p);
 
 class G1ParEvacuateFollowersClosure : public VoidClosure {
 protected:
@@ -4732,11 +4820,6 @@
   Mutex _stats_lock;
   Mutex* stats_lock() { return &_stats_lock; }
 
-  size_t getNCards() {
-    return (_g1h->capacity() + G1BlockOffsetSharedArray::N_bytes - 1)
-      / G1BlockOffsetSharedArray::N_bytes;
-  }
-
 public:
   G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues)
     : AbstractGangTask("G1 collection"),
@@ -4766,6 +4849,51 @@
     _n_workers = active_workers;
   }
 
+  // Helps out with CLD processing.
+  //
+  // During InitialMark we need to:
+  // 1) Scavenge all CLDs for the young GC.
+  // 2) Mark all objects directly reachable from strong CLDs.
+  template <G1Mark do_mark_object>
+  class G1CLDClosure : public CLDClosure {
+    G1ParCopyClosure<G1BarrierNone,  do_mark_object>* _oop_closure;
+    G1ParCopyClosure<G1BarrierKlass, do_mark_object>  _oop_in_klass_closure;
+    G1KlassScanClosure                                _klass_in_cld_closure;
+    bool                                              _claim;
+
+   public:
+    G1CLDClosure(G1ParCopyClosure<G1BarrierNone, do_mark_object>* oop_closure,
+                 bool only_young, bool claim)
+        : _oop_closure(oop_closure),
+          _oop_in_klass_closure(oop_closure->g1(),
+                                oop_closure->pss(),
+                                oop_closure->rp()),
+          _klass_in_cld_closure(&_oop_in_klass_closure, only_young),
+          _claim(claim) {
+
+    }
+
+    void do_cld(ClassLoaderData* cld) {
+      cld->oops_do(_oop_closure, &_klass_in_cld_closure, _claim);
+    }
+  };
+
+  class G1CodeBlobClosure: public CodeBlobClosure {
+    OopClosure* _f;
+
+   public:
+    G1CodeBlobClosure(OopClosure* f) : _f(f) {}
+    void do_code_blob(CodeBlob* blob) {
+      nmethod* that = blob->as_nmethod_or_null();
+      if (that != NULL) {
+        if (!that->test_set_oops_do_mark()) {
+          that->oops_do(_f);
+          that->fix_oop_relocations();
+        }
+      }
+    }
+  };
+
   void work(uint worker_id) {
     if (worker_id >= _n_workers) return;  // no work needed this round
 
@@ -4783,40 +4911,67 @@
 
       pss.set_evac_failure_closure(&evac_failure_cl);
 
-      G1ParScanExtRootClosure        only_scan_root_cl(_g1h, &pss, rp);
-      G1ParScanMetadataClosure       only_scan_metadata_cl(_g1h, &pss, rp);
-
-      G1ParScanAndMarkExtRootClosure scan_mark_root_cl(_g1h, &pss, rp);
-      G1ParScanAndMarkMetadataClosure scan_mark_metadata_cl(_g1h, &pss, rp);
-
-      bool only_young                 = _g1h->g1_policy()->gcs_are_young();
-      G1KlassScanClosure              scan_mark_klasses_cl_s(&scan_mark_metadata_cl, false);
-      G1KlassScanClosure              only_scan_klasses_cl_s(&only_scan_metadata_cl, only_young);
-
-      OopClosure*                    scan_root_cl = &only_scan_root_cl;
-      G1KlassScanClosure*            scan_klasses_cl = &only_scan_klasses_cl_s;
+      bool only_young = _g1h->g1_policy()->gcs_are_young();
+
+      // Non-IM young GC.
+      G1ParCopyClosure<G1BarrierNone, G1MarkNone>             scan_only_root_cl(_g1h, &pss, rp);
+      G1CLDClosure<G1MarkNone>                                scan_only_cld_cl(&scan_only_root_cl,
+                                                                               only_young, // Only process dirty klasses.
+                                                                               false);     // No need to claim CLDs.
+      // IM young GC.
+      //    Strong roots closures.
+      G1ParCopyClosure<G1BarrierNone, G1MarkFromRoot>         scan_mark_root_cl(_g1h, &pss, rp);
+      G1CLDClosure<G1MarkFromRoot>                            scan_mark_cld_cl(&scan_mark_root_cl,
+                                                                               false, // Process all klasses.
+                                                                               true); // Need to claim CLDs.
+      //    Weak roots closures.
+      G1ParCopyClosure<G1BarrierNone, G1MarkPromotedFromRoot> scan_mark_weak_root_cl(_g1h, &pss, rp);
+      G1CLDClosure<G1MarkPromotedFromRoot>                    scan_mark_weak_cld_cl(&scan_mark_weak_root_cl,
+                                                                                    false, // Process all klasses.
+                                                                                    true); // Need to claim CLDs.
+
+      G1CodeBlobClosure scan_only_code_cl(&scan_only_root_cl);
+      G1CodeBlobClosure scan_mark_code_cl(&scan_mark_root_cl);
+      // IM Weak code roots are handled later.
+
+      OopClosure* strong_root_cl;
+      OopClosure* weak_root_cl;
+      CLDClosure* strong_cld_cl;
+      CLDClosure* weak_cld_cl;
+      CodeBlobClosure* strong_code_cl;
 
       if (_g1h->g1_policy()->during_initial_mark_pause()) {
         // We also need to mark copied objects.
-        scan_root_cl = &scan_mark_root_cl;
-        scan_klasses_cl = &scan_mark_klasses_cl_s;
+        strong_root_cl = &scan_mark_root_cl;
+        strong_cld_cl  = &scan_mark_cld_cl;
+        strong_code_cl = &scan_mark_code_cl;
+        if (ClassUnloadingWithConcurrentMark) {
+          weak_root_cl = &scan_mark_weak_root_cl;
+          weak_cld_cl  = &scan_mark_weak_cld_cl;
+        } else {
+          weak_root_cl = &scan_mark_root_cl;
+          weak_cld_cl  = &scan_mark_cld_cl;
+        }
+      } else {
+        strong_root_cl = &scan_only_root_cl;
+        weak_root_cl   = &scan_only_root_cl;
+        strong_cld_cl  = &scan_only_cld_cl;
+        weak_cld_cl    = &scan_only_cld_cl;
+        strong_code_cl = &scan_only_code_cl;
       }
 
-      G1ParPushHeapRSClosure          push_heap_rs_cl(_g1h, &pss);
-
-      // Don't scan the scavengable methods in the code cache as part
-      // of strong root scanning. The code roots that point into a
-      // region in the collection set are scanned when we scan the
-      // region's RSet.
-      int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings;
+
+      G1ParPushHeapRSClosure  push_heap_rs_cl(_g1h, &pss);
 
       pss.start_strong_roots();
-      _g1h->g1_process_strong_roots(/* is scavenging */ true,
-                                    SharedHeap::ScanningOption(so),
-                                    scan_root_cl,
-                                    &push_heap_rs_cl,
-                                    scan_klasses_cl,
-                                    worker_id);
+      _g1h->g1_process_roots(strong_root_cl,
+                             weak_root_cl,
+                             &push_heap_rs_cl,
+                             strong_cld_cl,
+                             weak_cld_cl,
+                             strong_code_cl,
+                             worker_id);
+
       pss.end_strong_roots();
 
       {
@@ -4854,30 +5009,32 @@
 
 void
 G1CollectedHeap::
-g1_process_strong_roots(bool is_scavenging,
-                        ScanningOption so,
-                        OopClosure* scan_non_heap_roots,
-                        OopsInHeapRegionClosure* scan_rs,
-                        G1KlassScanClosure* scan_klasses,
-                        uint worker_i) {
-
-  // First scan the strong roots
+g1_process_roots(OopClosure* scan_non_heap_roots,
+                 OopClosure* scan_non_heap_weak_roots,
+                 OopsInHeapRegionClosure* scan_rs,
+                 CLDClosure* scan_strong_clds,
+                 CLDClosure* scan_weak_clds,
+                 CodeBlobClosure* scan_strong_code,
+                 uint worker_i) {
+
+  // First scan the shared roots.
   double ext_roots_start = os::elapsedTime();
   double closure_app_time_sec = 0.0;
 
+  bool during_im = _g1h->g1_policy()->during_initial_mark_pause();
+  bool trace_metadata = during_im && ClassUnloadingWithConcurrentMark;
+
   BufferingOopClosure buf_scan_non_heap_roots(scan_non_heap_roots);
-
-  assert(so & SO_CodeCache || scan_rs != NULL, "must scan code roots somehow");
-  // Walk the code cache/strong code roots w/o buffering, because StarTask
-  // cannot handle unaligned oop locations.
-  CodeBlobToOopClosure eager_scan_code_roots(scan_non_heap_roots, true /* do_marking */);
-
-  process_strong_roots(false, // no scoping; this is parallel code
-                       is_scavenging, so,
-                       &buf_scan_non_heap_roots,
-                       &eager_scan_code_roots,
-                       scan_klasses
-                       );
+  BufferingOopClosure buf_scan_non_heap_weak_roots(scan_non_heap_weak_roots);
+
+  process_roots(false, // no scoping; this is parallel code
+                SharedHeap::SO_None,
+                &buf_scan_non_heap_roots,
+                &buf_scan_non_heap_weak_roots,
+                scan_strong_clds,
+                // Unloading Initial Marks handle the weak CLDs separately.
+                (trace_metadata ? NULL : scan_weak_clds),
+                scan_strong_code);
 
   // Now the CM ref_processor roots.
   if (!_process_strong_tasks->is_task_claimed(G1H_PS_refProcessor_oops_do)) {
@@ -4888,10 +5045,21 @@
     ref_processor_cm()->weak_oops_do(&buf_scan_non_heap_roots);
   }
 
+  if (trace_metadata) {
+    // Barrier to make sure all workers passed
+    // the strong CLD and strong nmethods phases.
+    active_strong_roots_scope()->wait_until_all_workers_done_with_threads(n_par_threads());
+
+    // Now take the complement of the strong CLDs.
+    ClassLoaderDataGraph::roots_cld_do(NULL, scan_weak_clds);
+  }
+
   // Finish up any enqueued closure apps (attributed as object copy time).
   buf_scan_non_heap_roots.done();
-
-  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds();
+  buf_scan_non_heap_weak_roots.done();
+
+  double obj_copy_time_sec = buf_scan_non_heap_roots.closure_app_seconds()
+      + buf_scan_non_heap_weak_roots.closure_app_seconds();
 
   g1_policy()->phase_times()->record_obj_copy_time(worker_i, obj_copy_time_sec * 1000.0);
 
@@ -4915,32 +5083,14 @@
   }
   g1_policy()->phase_times()->record_satb_filtering_time(worker_i, satb_filtering_ms);
 
-  // If this is an initial mark pause, and we're not scanning
-  // the entire code cache, we need to mark the oops in the
-  // strong code root lists for the regions that are not in
-  // the collection set.
-  // Note all threads participate in this set of root tasks.
-  double mark_strong_code_roots_ms = 0.0;
-  if (g1_policy()->during_initial_mark_pause() && !(so & SO_CodeCache)) {
-    double mark_strong_roots_start = os::elapsedTime();
-    mark_strong_code_roots(worker_i);
-    mark_strong_code_roots_ms = (os::elapsedTime() - mark_strong_roots_start) * 1000.0;
-  }
-  g1_policy()->phase_times()->record_strong_code_root_mark_time(worker_i, mark_strong_code_roots_ms);
-
   // Now scan the complement of the collection set.
-  if (scan_rs != NULL) {
-    g1_rem_set()->oops_into_collection_set_do(scan_rs, &eager_scan_code_roots, worker_i);
-  }
+  MarkingCodeBlobClosure scavenge_cs_nmethods(scan_non_heap_weak_roots, CodeBlobToOopClosure::FixRelocations);
+
+  g1_rem_set()->oops_into_collection_set_do(scan_rs, &scavenge_cs_nmethods, worker_i);
+
   _process_strong_tasks->all_tasks_completed();
 }
 
-void
-G1CollectedHeap::g1_process_weak_roots(OopClosure* root_closure) {
-  CodeBlobToOopClosure roots_in_blobs(root_closure, /*do_marking=*/ false);
-  SharedHeap::process_weak_roots(root_closure, &roots_in_blobs);
-}
-
 class G1StringSymbolTableUnlinkTask : public AbstractGangTask {
 private:
   BoolObjectClosure* _is_alive;
@@ -4958,7 +5108,8 @@
   bool _do_in_parallel;
 public:
   G1StringSymbolTableUnlinkTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols) :
-    AbstractGangTask("Par String/Symbol table unlink"), _is_alive(is_alive),
+    AbstractGangTask("String/Symbol Unlinking"),
+    _is_alive(is_alive),
     _do_in_parallel(G1CollectedHeap::use_parallel_gc_threads()),
     _process_strings(process_strings), _strings_processed(0), _strings_removed(0),
     _process_symbols(process_symbols), _symbols_processed(0), _symbols_removed(0) {
@@ -4980,6 +5131,14 @@
     guarantee(!_process_symbols || !_do_in_parallel || SymbolTable::parallel_claimed_index() >= _initial_symbol_table_size,
               err_msg("claim value "INT32_FORMAT" after unlink less than initial symbol table size "INT32_FORMAT,
                       SymbolTable::parallel_claimed_index(), _initial_symbol_table_size));
+
+    if (G1TraceStringSymbolTableScrubbing) {
+      gclog_or_tty->print_cr("Cleaned string and symbol table, "
+                             "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, "
+                             "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed",
+                             strings_processed(), strings_removed(),
+                             symbols_processed(), symbols_removed());
+    }
   }
 
   void work(uint worker_id) {
@@ -5015,12 +5174,279 @@
   size_t symbols_removed()   const { return (size_t)_symbols_removed; }
 };
 
-void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
-                                                     bool process_strings, bool process_symbols) {
+class G1CodeCacheUnloadingTask VALUE_OBJ_CLASS_SPEC {
+private:
+  static Monitor* _lock;
+
+  BoolObjectClosure* const _is_alive;
+  const bool               _unloading_occurred;
+  const uint               _num_workers;
+
+  // Variables used to claim nmethods.
+  nmethod* _first_nmethod;
+  volatile nmethod* _claimed_nmethod;
+
+  // The list of nmethods that need to be processed by the second pass.
+  volatile nmethod* _postponed_list;
+  volatile uint     _num_entered_barrier;
+
+ public:
+  G1CodeCacheUnloadingTask(uint num_workers, BoolObjectClosure* is_alive, bool unloading_occurred) :
+      _is_alive(is_alive),
+      _unloading_occurred(unloading_occurred),
+      _num_workers(num_workers),
+      _first_nmethod(NULL),
+      _claimed_nmethod(NULL),
+      _postponed_list(NULL),
+      _num_entered_barrier(0)
+  {
+    nmethod::increase_unloading_clock();
+    _first_nmethod = CodeCache::alive_nmethod(CodeCache::first());
+    _claimed_nmethod = (volatile nmethod*)_first_nmethod;
+  }
+
+  ~G1CodeCacheUnloadingTask() {
+    CodeCache::verify_clean_inline_caches();
+
+    CodeCache::set_needs_cache_clean(false);
+    guarantee(CodeCache::scavenge_root_nmethods() == NULL, "Must be");
+
+    CodeCache::verify_icholder_relocations();
+  }
+
+ private:
+  void add_to_postponed_list(nmethod* nm) {
+      nmethod* old;
+      do {
+        old = (nmethod*)_postponed_list;
+        nm->set_unloading_next(old);
+      } while ((nmethod*)Atomic::cmpxchg_ptr(nm, &_postponed_list, old) != old);
+  }
+
+  void clean_nmethod(nmethod* nm) {
+    bool postponed = nm->do_unloading_parallel(_is_alive, _unloading_occurred);
+
+    if (postponed) {
+      // This nmethod referred to an nmethod that has not been cleaned/unloaded yet.
+      add_to_postponed_list(nm);
+    }
+
+    // Mark that this thread has been cleaned/unloaded.
+    // After this call, it will be safe to ask if this nmethod was unloaded or not.
+    nm->set_unloading_clock(nmethod::global_unloading_clock());
+  }
+
+  void clean_nmethod_postponed(nmethod* nm) {
+    nm->do_unloading_parallel_postponed(_is_alive, _unloading_occurred);
+  }
+
+  static const int MaxClaimNmethods = 16;
+
+  void claim_nmethods(nmethod** claimed_nmethods, int *num_claimed_nmethods) {
+    nmethod* first;
+    nmethod* last;
+
+    do {
+      *num_claimed_nmethods = 0;
+
+      first = last = (nmethod*)_claimed_nmethod;
+
+      if (first != NULL) {
+        for (int i = 0; i < MaxClaimNmethods; i++) {
+          last = CodeCache::alive_nmethod(CodeCache::next(last));
+
+          if (last == NULL) {
+            break;
+          }
+
+          claimed_nmethods[i] = last;
+          (*num_claimed_nmethods)++;
+        }
+      }
+
+    } while ((nmethod*)Atomic::cmpxchg_ptr(last, &_claimed_nmethod, first) != first);
+  }
+
+  nmethod* claim_postponed_nmethod() {
+    nmethod* claim;
+    nmethod* next;
+
+    do {
+      claim = (nmethod*)_postponed_list;
+      if (claim == NULL) {
+        return NULL;
+      }
+
+      next = claim->unloading_next();
+
+    } while ((nmethod*)Atomic::cmpxchg_ptr(next, &_postponed_list, claim) != claim);
+
+    return claim;
+  }
+
+ public:
+  // Mark that we're done with the first pass of nmethod cleaning.
+  void barrier_mark(uint worker_id) {
+    MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+    _num_entered_barrier++;
+    if (_num_entered_barrier == _num_workers) {
+      ml.notify_all();
+    }
+  }
+
+  // See if we have to wait for the other workers to
+  // finish their first-pass nmethod cleaning work.
+  void barrier_wait(uint worker_id) {
+    if (_num_entered_barrier < _num_workers) {
+      MonitorLockerEx ml(_lock, Mutex::_no_safepoint_check_flag);
+      while (_num_entered_barrier < _num_workers) {
+          ml.wait(Mutex::_no_safepoint_check_flag, 0, false);
+      }
+    }
+  }
+
+  // Cleaning and unloading of nmethods. Some work has to be postponed
+  // to the second pass, when we know which nmethods survive.
+  void work_first_pass(uint worker_id) {
+    // The first nmethods is claimed by the first worker.
+    if (worker_id == 0 && _first_nmethod != NULL) {
+      clean_nmethod(_first_nmethod);
+      _first_nmethod = NULL;
+    }
+
+    int num_claimed_nmethods;
+    nmethod* claimed_nmethods[MaxClaimNmethods];
+
+    while (true) {
+      claim_nmethods(claimed_nmethods, &num_claimed_nmethods);
+
+      if (num_claimed_nmethods == 0) {
+        break;
+      }
+
+      for (int i = 0; i < num_claimed_nmethods; i++) {
+        clean_nmethod(claimed_nmethods[i]);
+      }
+    }
+  }
+
+  void work_second_pass(uint worker_id) {
+    nmethod* nm;
+    // Take care of postponed nmethods.
+    while ((nm = claim_postponed_nmethod()) != NULL) {
+      clean_nmethod_postponed(nm);
+    }
+  }
+};
+
+Monitor* G1CodeCacheUnloadingTask::_lock = new Monitor(Mutex::leaf, "Code Cache Unload lock");
+
+class G1KlassCleaningTask : public StackObj {
+  BoolObjectClosure*                      _is_alive;
+  volatile jint                           _clean_klass_tree_claimed;
+  ClassLoaderDataGraphKlassIteratorAtomic _klass_iterator;
+
+ public:
+  G1KlassCleaningTask(BoolObjectClosure* is_alive) :
+      _is_alive(is_alive),
+      _clean_klass_tree_claimed(0),
+      _klass_iterator() {
+  }
+
+ private:
+  bool claim_clean_klass_tree_task() {
+    if (_clean_klass_tree_claimed) {
+      return false;
+    }
+
+    return Atomic::cmpxchg(1, (jint*)&_clean_klass_tree_claimed, 0) == 0;
+  }
+
+  InstanceKlass* claim_next_klass() {
+    Klass* klass;
+    do {
+      klass =_klass_iterator.next_klass();
+    } while (klass != NULL && !klass->oop_is_instance());
+
+    return (InstanceKlass*)klass;
+  }
+
+public:
+
+  void clean_klass(InstanceKlass* ik) {
+    ik->clean_implementors_list(_is_alive);
+    ik->clean_method_data(_is_alive);
+
+    // G1 specific cleanup work that has
+    // been moved here to be done in parallel.
+    ik->clean_dependent_nmethods();
+  }
+
+  void work() {
+    ResourceMark rm;
+
+    // One worker will clean the subklass/sibling klass tree.
+    if (claim_clean_klass_tree_task()) {
+      Klass::clean_subklass_tree(_is_alive);
+    }
+
+    // All workers will help cleaning the classes,
+    InstanceKlass* klass;
+    while ((klass = claim_next_klass()) != NULL) {
+      clean_klass(klass);
+    }
+  }
+};
+
+// To minimize the remark pause times, the tasks below are done in parallel.
+class G1ParallelCleaningTask : public AbstractGangTask {
+private:
+  G1StringSymbolTableUnlinkTask _string_symbol_task;
+  G1CodeCacheUnloadingTask      _code_cache_task;
+  G1KlassCleaningTask           _klass_cleaning_task;
+
+public:
+  // The constructor is run in the VMThread.
+  G1ParallelCleaningTask(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols, uint num_workers, bool unloading_occurred) :
+      AbstractGangTask("Parallel Cleaning"),
+      _string_symbol_task(is_alive, process_strings, process_symbols),
+      _code_cache_task(num_workers, is_alive, unloading_occurred),
+      _klass_cleaning_task(is_alive) {
+  }
+
+  // The parallel work done by all worker threads.
+  void work(uint worker_id) {
+    // Do first pass of code cache cleaning.
+    _code_cache_task.work_first_pass(worker_id);
+
+    // Let the threads mark that the first pass is done.
+    _code_cache_task.barrier_mark(worker_id);
+
+    // Clean the Strings and Symbols.
+    _string_symbol_task.work(worker_id);
+
+    // Wait for all workers to finish the first code cache cleaning pass.
+    _code_cache_task.barrier_wait(worker_id);
+
+    // Do the second code cache cleaning work, which realize on
+    // the liveness information gathered during the first pass.
+    _code_cache_task.work_second_pass(worker_id);
+
+    // Clean all klasses that were not unloaded.
+    _klass_cleaning_task.work();
+  }
+};
+
+
+void G1CollectedHeap::parallel_cleaning(BoolObjectClosure* is_alive,
+                                        bool process_strings,
+                                        bool process_symbols,
+                                        bool class_unloading_occurred) {
   uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
-                   _g1h->workers()->active_workers() : 1);
-
-  G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
+                    workers()->active_workers() : 1);
+
+  G1ParallelCleaningTask g1_unlink_task(is_alive, process_strings, process_symbols,
+                                        n_workers, class_unloading_occurred);
   if (G1CollectedHeap::use_parallel_gc_threads()) {
     set_par_threads(n_workers);
     workers()->run_task(&g1_unlink_task);
@@ -5028,12 +5454,21 @@
   } else {
     g1_unlink_task.work(0);
   }
-  if (G1TraceStringSymbolTableScrubbing) {
-    gclog_or_tty->print_cr("Cleaned string and symbol table, "
-                           "strings: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed, "
-                           "symbols: "SIZE_FORMAT" processed, "SIZE_FORMAT" removed",
-                           g1_unlink_task.strings_processed(), g1_unlink_task.strings_removed(),
-                           g1_unlink_task.symbols_processed(), g1_unlink_task.symbols_removed());
+}
+
+void G1CollectedHeap::unlink_string_and_symbol_table(BoolObjectClosure* is_alive,
+                                                     bool process_strings, bool process_symbols) {
+  {
+    uint n_workers = (G1CollectedHeap::use_parallel_gc_threads() ?
+                     _g1h->workers()->active_workers() : 1);
+    G1StringSymbolTableUnlinkTask g1_unlink_task(is_alive, process_strings, process_symbols);
+    if (G1CollectedHeap::use_parallel_gc_threads()) {
+      set_par_threads(n_workers);
+      workers()->run_task(&g1_unlink_task);
+      set_par_threads(0);
+    } else {
+      g1_unlink_task.work(0);
+    }
   }
 
   if (G1StringDedup::is_enabled()) {
@@ -5117,12 +5552,21 @@
 public:
   G1KeepAliveClosure(G1CollectedHeap* g1) : _g1(g1) {}
   void do_oop(narrowOop* p) { guarantee(false, "Not needed"); }
-  void do_oop(      oop* p) {
+  void do_oop(oop* p) {
     oop obj = *p;
 
-    if (_g1->obj_in_cs(obj)) {
+    G1CollectedHeap::in_cset_state_t cset_state = _g1->in_cset_state(obj);
+    if (obj == NULL || cset_state == G1CollectedHeap::InNeither) {
+      return;
+    }
+    if (cset_state == G1CollectedHeap::InCSet) {
       assert( obj->is_forwarded(), "invariant" );
       *p = obj->forwardee();
+    } else {
+      assert(!obj->is_forwarded(), "invariant" );
+      assert(cset_state == G1CollectedHeap::IsHumongous,
+             err_msg("Only allowed InCSet state is IsHumongous, but is %d", cset_state));
+      _g1->set_humongous_is_live(obj);
     }
   }
 };
@@ -5135,17 +5579,14 @@
 class G1CopyingKeepAliveClosure: public OopClosure {
   G1CollectedHeap*         _g1h;
   OopClosure*              _copy_non_heap_obj_cl;
-  OopsInHeapRegionClosure* _copy_metadata_obj_cl;
   G1ParScanThreadState*    _par_scan_state;
 
 public:
   G1CopyingKeepAliveClosure(G1CollectedHeap* g1h,
                             OopClosure* non_heap_obj_cl,
-                            OopsInHeapRegionClosure* metadata_obj_cl,
                             G1ParScanThreadState* pss):
     _g1h(g1h),
     _copy_non_heap_obj_cl(non_heap_obj_cl),
-    _copy_metadata_obj_cl(metadata_obj_cl),
     _par_scan_state(pss)
   {}
 
@@ -5155,7 +5596,7 @@
   template <class T> void do_oop_work(T* p) {
     oop obj = oopDesc::load_decode_heap_oop(p);
 
-    if (_g1h->obj_in_cs(obj)) {
+    if (_g1h->is_in_cset_or_humongous(obj)) {
       // If the referent object has been forwarded (either copied
       // to a new location or to itself in the event of an
       // evacuation failure) then we need to update the reference
@@ -5178,12 +5619,12 @@
         _par_scan_state->push_on_queue(p);
       } else {
         assert(!Metaspace::contains((const void*)p),
-               err_msg("Otherwise need to call _copy_metadata_obj_cl->do_oop(p) "
+               err_msg("Unexpectedly found a pointer from metadata: "
                               PTR_FORMAT, p));
-          _copy_non_heap_obj_cl->do_oop(p);
-        }
+        _copy_non_heap_obj_cl->do_oop(p);
       }
     }
+  }
 };
 
 // Serial drain queue closure. Called as the 'complete_gc'
@@ -5273,22 +5714,18 @@
     pss.set_evac_failure_closure(&evac_failure_cl);
 
     G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanMetadataClosure       only_copy_metadata_cl(_g1h, &pss, NULL);
 
     G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanAndMarkMetadataClosure copy_mark_metadata_cl(_g1h, &pss, NULL);
 
     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
-    OopsInHeapRegionClosure*       copy_metadata_cl = &only_copy_metadata_cl;
 
     if (_g1h->g1_policy()->during_initial_mark_pause()) {
       // We also need to mark copied objects.
       copy_non_heap_cl = &copy_mark_non_heap_cl;
-      copy_metadata_cl = &copy_mark_metadata_cl;
     }
 
     // Keep alive closure.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, copy_metadata_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
 
     // Complete GC closure
     G1ParEvacuateFollowersClosure drain_queue(_g1h, &pss, _task_queues, _terminator);
@@ -5382,18 +5819,14 @@
     assert(pss.queue_is_empty(), "both queue and overflow should be empty");
 
     G1ParScanExtRootClosure        only_copy_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanMetadataClosure       only_copy_metadata_cl(_g1h, &pss, NULL);
 
     G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(_g1h, &pss, NULL);
-    G1ParScanAndMarkMetadataClosure copy_mark_metadata_cl(_g1h, &pss, NULL);
 
     OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
-    OopsInHeapRegionClosure*       copy_metadata_cl = &only_copy_metadata_cl;
 
     if (_g1h->g1_policy()->during_initial_mark_pause()) {
       // We also need to mark copied objects.
       copy_non_heap_cl = &copy_mark_non_heap_cl;
-      copy_metadata_cl = &copy_mark_metadata_cl;
     }
 
     // Is alive closure
@@ -5401,7 +5834,7 @@
 
     // Copying keep alive closure. Applied to referent objects that need
     // to be copied.
-    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, copy_metadata_cl, &pss);
+    G1CopyingKeepAliveClosure keep_alive(_g1h, copy_non_heap_cl, &pss);
 
     ReferenceProcessor* rp = _g1h->ref_processor_cm();
 
@@ -5507,22 +5940,18 @@
   assert(pss.queue_is_empty(), "pre-condition");
 
   G1ParScanExtRootClosure        only_copy_non_heap_cl(this, &pss, NULL);
-  G1ParScanMetadataClosure       only_copy_metadata_cl(this, &pss, NULL);
 
   G1ParScanAndMarkExtRootClosure copy_mark_non_heap_cl(this, &pss, NULL);
-  G1ParScanAndMarkMetadataClosure copy_mark_metadata_cl(this, &pss, NULL);
 
   OopClosure*                    copy_non_heap_cl = &only_copy_non_heap_cl;
-  OopsInHeapRegionClosure*       copy_metadata_cl = &only_copy_metadata_cl;
 
   if (_g1h->g1_policy()->during_initial_mark_pause()) {
     // We also need to mark copied objects.
     copy_non_heap_cl = &copy_mark_non_heap_cl;
-    copy_metadata_cl = &copy_mark_metadata_cl;
   }
 
   // Keep alive closure.
-  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, copy_metadata_cl, &pss);
+  G1CopyingKeepAliveClosure keep_alive(this, copy_non_heap_cl, &pss);
 
   // Serial Complete GC closure
   G1STWDrainQueueClosure drain_queue(this, &pss);
@@ -5641,6 +6070,10 @@
 
   {
     StrongRootsScope srs(this);
+    // InitialMark needs claim bits to keep track of the marked-through CLDs.
+    if (g1_policy()->during_initial_mark_pause()) {
+      ClassLoaderDataGraph::clear_claimed_marks();
+    }
 
     if (G1CollectedHeap::use_parallel_gc_threads()) {
       // The individual threads will set their evac-failure closures.
@@ -5745,6 +6178,11 @@
   assert(!hr->is_empty(), "the region should not be empty");
   assert(free_list != NULL, "pre-condition");
 
+  if (G1VerifyBitmaps) {
+    MemRegion mr(hr->bottom(), hr->end());
+    concurrent_mark()->clearRangePrevBitmap(mr);
+  }
+
   // Clear the card counts for this region.
   // Note: we only need to do this if the region is not young
   // (since we don't refine cards in young regions).
@@ -5879,7 +6317,87 @@
 void G1CollectedHeap::verify_dirty_young_regions() {
   verify_dirty_young_list(_young_list->first_region());
 }
-#endif
+
+bool G1CollectedHeap::verify_no_bits_over_tams(const char* bitmap_name, CMBitMapRO* bitmap,
+                                               HeapWord* tams, HeapWord* end) {
+  guarantee(tams <= end,
+            err_msg("tams: "PTR_FORMAT" end: "PTR_FORMAT, tams, end));
+  HeapWord* result = bitmap->getNextMarkedWordAddress(tams, end);
+  if (result < end) {
+    gclog_or_tty->cr();
+    gclog_or_tty->print_cr("## wrong marked address on %s bitmap: "PTR_FORMAT,
+                           bitmap_name, result);
+    gclog_or_tty->print_cr("## %s tams: "PTR_FORMAT" end: "PTR_FORMAT,
+                           bitmap_name, tams, end);
+    return false;
+  }
+  return true;
+}
+
+bool G1CollectedHeap::verify_bitmaps(const char* caller, HeapRegion* hr) {
+  CMBitMapRO* prev_bitmap = concurrent_mark()->prevMarkBitMap();
+  CMBitMapRO* next_bitmap = (CMBitMapRO*) concurrent_mark()->nextMarkBitMap();
+
+  HeapWord* bottom = hr->bottom();
+  HeapWord* ptams  = hr->prev_top_at_mark_start();
+  HeapWord* ntams  = hr->next_top_at_mark_start();
+  HeapWord* end    = hr->end();
+
+  bool res_p = verify_no_bits_over_tams("prev", prev_bitmap, ptams, end);
+
+  bool res_n = true;
+  // We reset mark_in_progress() before we reset _cmThread->in_progress() and in this window
+  // we do the clearing of the next bitmap concurrently. Thus, we can not verify the bitmap
+  // if we happen to be in that state.
+  if (mark_in_progress() || !_cmThread->in_progress()) {
+    res_n = verify_no_bits_over_tams("next", next_bitmap, ntams, end);
+  }
+  if (!res_p || !res_n) {
+    gclog_or_tty->print_cr("#### Bitmap verification failed for "HR_FORMAT,
+                           HR_FORMAT_PARAMS(hr));
+    gclog_or_tty->print_cr("#### Caller: %s", caller);
+    return false;
+  }
+  return true;
+}
+
+void G1CollectedHeap::check_bitmaps(const char* caller, HeapRegion* hr) {
+  if (!G1VerifyBitmaps) return;
+
+  guarantee(verify_bitmaps(caller, hr), "bitmap verification");
+}
+
+class G1VerifyBitmapClosure : public HeapRegionClosure {
+private:
+  const char* _caller;
+  G1CollectedHeap* _g1h;
+  bool _failures;
+
+public:
+  G1VerifyBitmapClosure(const char* caller, G1CollectedHeap* g1h) :
+    _caller(caller), _g1h(g1h), _failures(false) { }
+
+  bool failures() { return _failures; }
+
+  virtual bool doHeapRegion(HeapRegion* hr) {
+    if (hr->continuesHumongous()) return false;
+
+    bool result = _g1h->verify_bitmaps(_caller, hr);
+    if (!result) {
+      _failures = true;
+    }
+    return false;
+  }
+};
+
+void G1CollectedHeap::check_bitmaps(const char* caller) {
+  if (!G1VerifyBitmaps) return;
+
+  G1VerifyBitmapClosure cl(caller, this);
+  heap_region_iterate(&cl);
+  guarantee(!cl.failures(), "bitmap verification");
+}
+#endif // PRODUCT
 
 void G1CollectedHeap::cleanUpCardTable() {
   G1SATBCardTableModRefBS* ct_bs = g1_barrier_set();
@@ -6028,6 +6546,154 @@
   policy->phase_times()->record_non_young_free_cset_time_ms(non_young_time_ms);
 }
 
+class G1FreeHumongousRegionClosure : public HeapRegionClosure {
+ private:
+  FreeRegionList* _free_region_list;
+  HeapRegionSet* _proxy_set;
+  HeapRegionSetCount _humongous_regions_removed;
+  size_t _freed_bytes;
+ public:
+
+  G1FreeHumongousRegionClosure(FreeRegionList* free_region_list) :
+    _free_region_list(free_region_list), _humongous_regions_removed(), _freed_bytes(0) {
+  }
+
+  virtual bool doHeapRegion(HeapRegion* r) {
+    if (!r->startsHumongous()) {
+      return false;
+    }
+
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+
+    oop obj = (oop)r->bottom();
+    CMBitMap* next_bitmap = g1h->concurrent_mark()->nextMarkBitMap();
+
+    // The following checks whether the humongous object is live are sufficient.
+    // The main additional check (in addition to having a reference from the roots
+    // or the young gen) is whether the humongous object has a remembered set entry.
+    //
+    // A humongous object cannot be live if there is no remembered set for it
+    // because:
+    // - there can be no references from within humongous starts regions referencing
+    // the object because we never allocate other objects into them.
+    // (I.e. there are no intra-region references that may be missed by the
+    // remembered set)
+    // - as soon there is a remembered set entry to the humongous starts region
+    // (i.e. it has "escaped" to an old object) this remembered set entry will stay
+    // until the end of a concurrent mark.
+    //
+    // It is not required to check whether the object has been found dead by marking
+    // or not, in fact it would prevent reclamation within a concurrent cycle, as
+    // all objects allocated during that time are considered live.
+    // SATB marking is even more conservative than the remembered set.
+    // So if at this point in the collection there is no remembered set entry,
+    // nobody has a reference to it.
+    // At the start of collection we flush all refinement logs, and remembered sets
+    // are completely up-to-date wrt to references to the humongous object.
+    //
+    // Other implementation considerations:
+    // - never consider object arrays: while they are a valid target, they have not
+    // been observed to be used as temporary objects.
+    // - they would also pose considerable effort for cleaning up the the remembered
+    // sets.
+    // While this cleanup is not strictly necessary to be done (or done instantly),
+    // given that their occurrence is very low, this saves us this additional
+    // complexity.
+    uint region_idx = r->hrs_index();
+    if (g1h->humongous_is_live(region_idx) ||
+        g1h->humongous_region_is_always_live(region_idx)) {
+
+      if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
+        gclog_or_tty->print_cr("Live humongous %d region %d with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+                               r->isHumongous(),
+                               region_idx,
+                               r->rem_set()->occupied(),
+                               r->rem_set()->strong_code_roots_list_length(),
+                               next_bitmap->isMarked(r->bottom()),
+                               g1h->humongous_is_live(region_idx),
+                               obj->is_objArray()
+                              );
+      }
+
+      return false;
+    }
+
+    guarantee(!obj->is_objArray(),
+              err_msg("Eagerly reclaiming object arrays is not supported, but the object "PTR_FORMAT" is.",
+                      r->bottom()));
+
+    if (G1TraceReclaimDeadHumongousObjectsAtYoungGC) {
+      gclog_or_tty->print_cr("Reclaim humongous region %d start "PTR_FORMAT" region %d length "UINT32_FORMAT" with remset "SIZE_FORMAT" code roots "SIZE_FORMAT" is marked %d live-other %d obj array %d",
+                             r->isHumongous(),
+                             r->bottom(),
+                             region_idx,
+                             r->region_num(),
+                             r->rem_set()->occupied(),
+                             r->rem_set()->strong_code_roots_list_length(),
+                             next_bitmap->isMarked(r->bottom()),
+                             g1h->humongous_is_live(region_idx),
+                             obj->is_objArray()
+                            );
+    }
+    // Need to clear mark bit of the humongous object if already set.
+    if (next_bitmap->isMarked(r->bottom())) {
+      next_bitmap->clear(r->bottom());
+    }
+    _freed_bytes += r->used();
+    r->set_containing_set(NULL);
+    _humongous_regions_removed.increment(1u, r->capacity());
+    g1h->free_humongous_region(r, _free_region_list, false);
+
+    return false;
+  }
+
+  HeapRegionSetCount& humongous_free_count() {
+    return _humongous_regions_removed;
+  }
+
+  size_t bytes_freed() const {
+    return _freed_bytes;
+  }
+
+  size_t humongous_reclaimed() const {
+    return _humongous_regions_removed.length();
+  }
+};
+
+void G1CollectedHeap::eagerly_reclaim_humongous_regions() {
+  assert_at_safepoint(true);
+
+  if (!G1ReclaimDeadHumongousObjectsAtYoungGC || !_has_humongous_reclaim_candidates) {
+    g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms(0.0, 0);
+    return;
+  }
+
+  double start_time = os::elapsedTime();
+
+  FreeRegionList local_cleanup_list("Local Humongous Cleanup List");
+
+  G1FreeHumongousRegionClosure cl(&local_cleanup_list);
+  heap_region_iterate(&cl);
+
+  HeapRegionSetCount empty_set;
+  remove_from_old_sets(empty_set, cl.humongous_free_count());
+
+  G1HRPrinter* hr_printer = _g1h->hr_printer();
+  if (hr_printer->is_active()) {
+    FreeRegionListIterator iter(&local_cleanup_list);
+    while (iter.more_available()) {
+      HeapRegion* hr = iter.get_next();
+      hr_printer->cleanup(hr);
+    }
+  }
+
+  prepend_to_freelist(&local_cleanup_list);
+  decrement_summary_bytes(cl.bytes_freed());
+
+  g1_policy()->phase_times()->record_fast_reclaim_humongous_time_ms((os::elapsedTime() - start_time) * 1000.0,
+                                                                    cl.humongous_reclaimed());
+}
+
 // This routine is similar to the above but does not record
 // any policy statistics or update free lists; we are abandoning
 // the current incremental collection set in preparation of a
@@ -6268,6 +6934,7 @@
     if (new_alloc_region != NULL) {
       set_region_short_lived_locked(new_alloc_region);
       _hr_printer.alloc(new_alloc_region, G1HRPrinter::Eden, young_list_full);
+      check_bitmaps("Mutator Region Allocation", new_alloc_region);
       return new_alloc_region;
     }
   }
@@ -6330,12 +6997,14 @@
       // We really only need to do this for old regions given that we
       // should never scan survivors. But it doesn't hurt to do it
       // for survivors too.
-      new_alloc_region->set_saved_mark();
+      new_alloc_region->record_top_and_timestamp();
       if (survivor) {
         new_alloc_region->set_survivor();
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Survivor);
+        check_bitmaps("Survivor Region Allocation", new_alloc_region);
       } else {
         _hr_printer.alloc(new_alloc_region, G1HRPrinter::Old);
+        check_bitmaps("Old Region Allocation", new_alloc_region);
       }
       bool during_im = g1_policy()->during_initial_mark_pause();
       new_alloc_region->note_start_of_copying(during_im);
@@ -6592,106 +7261,6 @@
   g1_policy()->phase_times()->record_strong_code_root_purge_time(purge_time_ms);
 }
 
-// Mark all the code roots that point into regions *not* in the
-// collection set.
-//
-// Note we do not want to use a "marking" CodeBlobToOopClosure while
-// walking the the code roots lists of regions not in the collection
-// set. Suppose we have an nmethod (M) that points to objects in two
-// separate regions - one in the collection set (R1) and one not (R2).
-// Using a "marking" CodeBlobToOopClosure here would result in "marking"
-// nmethod M when walking the code roots for R1. When we come to scan
-// the code roots for R2, we would see that M is already marked and it
-// would be skipped and the objects in R2 that are referenced from M
-// would not be evacuated.
-
-class MarkStrongCodeRootCodeBlobClosure: public CodeBlobClosure {
-
-  class MarkStrongCodeRootOopClosure: public OopClosure {
-    ConcurrentMark* _cm;
-    HeapRegion* _hr;
-    uint _worker_id;
-
-    template <class T> void do_oop_work(T* p) {
-      T heap_oop = oopDesc::load_heap_oop(p);
-      if (!oopDesc::is_null(heap_oop)) {
-        oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-        // Only mark objects in the region (which is assumed
-        // to be not in the collection set).
-        if (_hr->is_in(obj)) {
-          _cm->grayRoot(obj, (size_t) obj->size(), _worker_id);
-        }
-      }
-    }
-
-  public:
-    MarkStrongCodeRootOopClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id) :
-      _cm(cm), _hr(hr), _worker_id(worker_id) {
-      assert(!_hr->in_collection_set(), "sanity");
-    }
-
-    void do_oop(narrowOop* p) { do_oop_work(p); }
-    void do_oop(oop* p)       { do_oop_work(p); }
-  };
-
-  MarkStrongCodeRootOopClosure _oop_cl;
-
-public:
-  MarkStrongCodeRootCodeBlobClosure(ConcurrentMark* cm, HeapRegion* hr, uint worker_id):
-    _oop_cl(cm, hr, worker_id) {}
-
-  void do_code_blob(CodeBlob* cb) {
-    nmethod* nm = (cb == NULL) ? NULL : cb->as_nmethod_or_null();
-    if (nm != NULL) {
-      nm->oops_do(&_oop_cl);
-    }
-  }
-};
-
-class MarkStrongCodeRootsHRClosure: public HeapRegionClosure {
-  G1CollectedHeap* _g1h;
-  uint _worker_id;
-
-public:
-  MarkStrongCodeRootsHRClosure(G1CollectedHeap* g1h, uint worker_id) :
-    _g1h(g1h), _worker_id(worker_id) {}
-
-  bool doHeapRegion(HeapRegion *hr) {
-    HeapRegionRemSet* hrrs = hr->rem_set();
-    if (hr->continuesHumongous()) {
-      // Code roots should never be attached to a continuation of a humongous region
-      assert(hrrs->strong_code_roots_list_length() == 0,
-             err_msg("code roots should never be attached to continuations of humongous region "HR_FORMAT
-                     " starting at "HR_FORMAT", but has "SIZE_FORMAT,
-                     HR_FORMAT_PARAMS(hr), HR_FORMAT_PARAMS(hr->humongous_start_region()),
-                     hrrs->strong_code_roots_list_length()));
-      return false;
-    }
-
-    if (hr->in_collection_set()) {
-      // Don't mark code roots into regions in the collection set here.
-      // They will be marked when we scan them.
-      return false;
-    }
-
-    MarkStrongCodeRootCodeBlobClosure cb_cl(_g1h->concurrent_mark(), hr, _worker_id);
-    hr->strong_code_roots_do(&cb_cl);
-    return false;
-  }
-};
-
-void G1CollectedHeap::mark_strong_code_roots(uint worker_id) {
-  MarkStrongCodeRootsHRClosure cl(this, worker_id);
-  if (G1CollectedHeap::use_parallel_gc_threads()) {
-    heap_region_par_iterate_chunked(&cl,
-                                    worker_id,
-                                    workers()->active_workers(),
-                                    HeapRegion::ParMarkRootClaimValue);
-  } else {
-    heap_region_iterate(&cl);
-  }
-}
-
 class RebuildStrongCodeRootClosure: public CodeBlobClosure {
   G1CollectedHeap* _g1h;
 
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -197,19 +197,10 @@
   bool do_object_b(oop p);
 };
 
-// Instances of this class are used for quick tests on whether a reference points
-// into the collection set. Each of the array's elements denotes whether the
-// corresponding region is in the collection set.
-class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<bool> {
- protected:
-  bool default_value() const { return false; }
- public:
-  void clear() { G1BiasedMappedArray<bool>::clear(); }
-};
-
 class RefineCardTableEntryClosure;
 
 class G1CollectedHeap : public SharedHeap {
+  friend class VM_CollectForMetadataAllocation;
   friend class VM_G1CollectForAllocation;
   friend class VM_G1CollectFull;
   friend class VM_G1IncCollectionPause;
@@ -219,7 +210,7 @@
   friend class OldGCAllocRegion;
 
   // Closures used in implementation.
-  template <G1Barrier barrier, bool do_mark_object>
+  template <G1Barrier barrier, G1Mark do_mark_object>
   friend class G1ParCopyClosure;
   friend class G1IsAliveClosure;
   friend class G1EvacuateFollowersClosure;
@@ -236,6 +227,7 @@
   friend class EvacPopObjClosure;
   friend class G1ParCleanupCTTask;
 
+  friend class G1FreeHumongousRegionClosure;
   // Other related classes.
   friend class G1MarkSweep;
 
@@ -266,6 +258,9 @@
   // It keeps track of the humongous regions.
   HeapRegionSet _humongous_set;
 
+  void clear_humongous_is_live_table();
+  void eagerly_reclaim_humongous_regions();
+
   // The number of regions we could create by expansion.
   uint _expansion_regions;
 
@@ -346,6 +341,9 @@
   // It initializes the GC alloc regions at the start of a GC.
   void init_gc_alloc_regions(EvacuationInfo& evacuation_info);
 
+  // Setup the retained old gc alloc region as the currrent old gc alloc region.
+  void use_retained_old_gc_alloc_region(EvacuationInfo& evacuation_info);
+
   // It releases the GC alloc regions at the end of a GC.
   void release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info);
 
@@ -363,10 +361,25 @@
   // than the current allocation region.
   size_t _summary_bytes_used;
 
-  // This array is used for a quick test on whether a reference points into
-  // the collection set or not. Each of the array's elements denotes whether the
-  // corresponding region is in the collection set or not.
-  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+  // Records whether the region at the given index is kept live by roots or
+  // references from the young generation.
+  class HumongousIsLiveBiasedMappedArray : public G1BiasedMappedArray<bool> {
+   protected:
+    bool default_value() const { return false; }
+   public:
+    void clear() { G1BiasedMappedArray<bool>::clear(); }
+    void set_live(uint region) {
+      set_by_index(region, true);
+    }
+    bool is_live(uint region) {
+      return get_by_index(region);
+    }
+  };
+
+  HumongousIsLiveBiasedMappedArray _humongous_is_live;
+  // Stores whether during humongous object registration we found candidate regions.
+  // If not, we can skip a few steps.
+  bool _has_humongous_reclaim_candidates;
 
   volatile unsigned _gc_time_stamp;
 
@@ -686,10 +699,24 @@
   virtual void gc_prologue(bool full);
   virtual void gc_epilogue(bool full);
 
+  inline void set_humongous_is_live(oop obj);
+
+  bool humongous_is_live(uint region) {
+    return _humongous_is_live.is_live(region);
+  }
+
+  // Returns whether the given region (which must be a humongous (start) region)
+  // is to be considered conservatively live regardless of any other conditions.
+  bool humongous_region_is_always_live(uint index);
+  // Register the given region to be part of the collection set.
+  inline void register_humongous_region_with_in_cset_fast_test(uint index);
+  // Register regions with humongous objects (actually on the start region) in
+  // the in_cset_fast_test table.
+  void register_humongous_regions_with_in_cset_fast_test();
   // We register a region with the fast "in collection set" test. We
   // simply set to true the array slot corresponding to this region.
   void register_region_with_in_cset_fast_test(HeapRegion* r) {
-    _in_cset_fast_test.set_by_index(r->hrs_index(), true);
+    _in_cset_fast_test.set_in_cset(r->hrs_index());
   }
 
   // This is a fast test on whether a reference points into the
@@ -827,17 +854,13 @@
   // param is for use with parallel roots processing, and should be
   // the "i" of the calling parallel worker thread's work(i) function.
   // In the sequential case this param will be ignored.
-  void g1_process_strong_roots(bool is_scavenging,
-                               ScanningOption so,
-                               OopClosure* scan_non_heap_roots,
-                               OopsInHeapRegionClosure* scan_rs,
-                               G1KlassScanClosure* scan_klasses,
-                               uint worker_i);
-
-  // Apply "blk" to all the weak roots of the system.  These include
-  // JNI weak roots, the code cache, system dictionary, symbol table,
-  // string table, and referents of reachable weak refs.
-  void g1_process_weak_roots(OopClosure* root_closure);
+  void g1_process_roots(OopClosure* scan_non_heap_roots,
+                        OopClosure* scan_non_heap_weak_roots,
+                        OopsInHeapRegionClosure* scan_rs,
+                        CLDClosure* scan_strong_clds,
+                        CLDClosure* scan_weak_clds,
+                        CodeBlobClosure* scan_strong_code,
+                        uint worker_i);
 
   // Notifies all the necessary spaces that the committed space has
   // been updated (either expanded or shrunk). It should be called
@@ -1030,7 +1053,7 @@
   // of G1CollectedHeap::_gc_time_stamp.
   unsigned int* _worker_cset_start_region_time_stamp;
 
-  enum G1H_process_strong_roots_tasks {
+  enum G1H_process_roots_tasks {
     G1H_PS_filter_satb_buffers,
     G1H_PS_refProcessor_oops_do,
     // Leave this one last.
@@ -1167,19 +1190,19 @@
   }
 
   // The total number of regions in the heap.
-  uint n_regions() { return _hrs.length(); }
+  uint n_regions() const { return _hrs.length(); }
 
   // The max number of regions in the heap.
-  uint max_regions() { return _hrs.max_length(); }
+  uint max_regions() const { return _hrs.max_length(); }
 
   // The number of regions that are completely free.
-  uint free_regions() { return _free_list.length(); }
+  uint free_regions() const { return _free_list.length(); }
 
   // The number of regions that are not completely free.
-  uint used_regions() { return n_regions() - free_regions(); }
+  uint used_regions() const { return n_regions() - free_regions(); }
 
   // The number of regions available for "regular" expansion.
-  uint expansion_regions() { return _expansion_regions; }
+  uint expansion_regions() const { return _expansion_regions; }
 
   // Factory method for HeapRegion instances. It will return NULL if
   // the allocation fails.
@@ -1190,6 +1213,30 @@
   void verify_dirty_young_list(HeapRegion* head) PRODUCT_RETURN;
   void verify_dirty_young_regions() PRODUCT_RETURN;
 
+#ifndef PRODUCT
+  // Make sure that the given bitmap has no marked objects in the
+  // range [from,limit). If it does, print an error message and return
+  // false. Otherwise, just return true. bitmap_name should be "prev"
+  // or "next".
+  bool verify_no_bits_over_tams(const char* bitmap_name, CMBitMapRO* bitmap,
+                                HeapWord* from, HeapWord* limit);
+
+  // Verify that the prev / next bitmap range [tams,end) for the given
+  // region has no marks. Return true if all is well, false if errors
+  // are detected.
+  bool verify_bitmaps(const char* caller, HeapRegion* hr);
+#endif // PRODUCT
+
+  // If G1VerifyBitmaps is set, verify that the marking bitmaps for
+  // the given region do not have any spurious marks. If errors are
+  // detected, print appropriate error messages and crash.
+  void check_bitmaps(const char* caller, HeapRegion* hr) PRODUCT_RETURN;
+
+  // If G1VerifyBitmaps is set, verify that the marking bitmaps do not
+  // have any spurious marks. If errors are detected, print
+  // appropriate error messages and crash.
+  void check_bitmaps(const char* caller) PRODUCT_RETURN;
+
   // verify_region_sets() performs verification over the region
   // lists. It will be compiled in the product code to be used when
   // necessary (i.e., during heap verification).
@@ -1268,9 +1315,61 @@
   virtual bool is_in(const void* p) const;
 
   // Return "TRUE" iff the given object address is within the collection
-  // set.
+  // set. Slow implementation.
   inline bool obj_in_cs(oop obj);
 
+  inline bool is_in_cset(oop obj);
+
+  inline bool is_in_cset_or_humongous(const oop obj);
+
+  enum in_cset_state_t {
+   InNeither,           // neither in collection set nor humongous
+   InCSet,              // region is in collection set only
+   IsHumongous          // region is a humongous start region
+  };
+ private:
+  // Instances of this class are used for quick tests on whether a reference points
+  // into the collection set or is a humongous object (points into a humongous
+  // object).
+  // Each of the array's elements denotes whether the corresponding region is in
+  // the collection set or a humongous region.
+  // We use this to quickly reclaim humongous objects: by making a humongous region
+  // succeed this test, we sort-of add it to the collection set. During the reference
+  // iteration closures, when we see a humongous region, we simply mark it as
+  // referenced, i.e. live.
+  class G1FastCSetBiasedMappedArray : public G1BiasedMappedArray<char> {
+   protected:
+    char default_value() const { return G1CollectedHeap::InNeither; }
+   public:
+    void set_humongous(uintptr_t index) {
+      assert(get_by_index(index) != InCSet, "Should not overwrite InCSet values");
+      set_by_index(index, G1CollectedHeap::IsHumongous);
+    }
+
+    void clear_humongous(uintptr_t index) {
+      set_by_index(index, G1CollectedHeap::InNeither);
+    }
+
+    void set_in_cset(uintptr_t index) {
+      assert(get_by_index(index) != G1CollectedHeap::IsHumongous, "Should not overwrite IsHumongous value");
+      set_by_index(index, G1CollectedHeap::InCSet);
+    }
+
+    bool is_in_cset_or_humongous(HeapWord* addr) const { return get_by_address(addr) != G1CollectedHeap::InNeither; }
+    bool is_in_cset(HeapWord* addr) const { return get_by_address(addr) == G1CollectedHeap::InCSet; }
+    G1CollectedHeap::in_cset_state_t at(HeapWord* addr) const { return (G1CollectedHeap::in_cset_state_t)get_by_address(addr); }
+    void clear() { G1BiasedMappedArray<char>::clear(); }
+  };
+
+  // This array is used for a quick test on whether a reference points into
+  // the collection set or not. Each of the array's elements denotes whether the
+  // corresponding region is in the collection set or not.
+  G1FastCSetBiasedMappedArray _in_cset_fast_test;
+
+ public:
+
+  inline in_cset_state_t in_cset_state(const oop obj);
+
   // Return "TRUE" iff the given object address is in the reserved
   // region of g1.
   bool is_in_g1_reserved(const void* p) const {
@@ -1305,9 +1404,6 @@
   // "cl.do_oop" on each.
   virtual void oop_iterate(ExtendedOopClosure* cl);
 
-  // Same as above, restricted to a memory region.
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
-
   // Iterate over all objects, calling "cl.do_object" on each.
   virtual void object_iterate(ObjectClosure* cl);
 
@@ -1325,6 +1421,10 @@
   // Return the region with the given index. It assumes the index is valid.
   inline HeapRegion* region_at(uint index) const;
 
+  // Calculate the region index of the given address. Given address must be
+  // within the heap.
+  inline uint addr_to_region(HeapWord* addr) const;
+
   // Divide the heap region sequence into "chunks" of some size (the number
   // of regions divided by the number of parallel threads times some
   // overpartition factor, currently 4).  Assumes that this will be called
@@ -1377,8 +1477,7 @@
   // As above but starting from region r
   void collection_set_iterate_from(HeapRegion* r, HeapRegionClosure *blk);
 
-  // Returns the first (lowest address) compactible space in the heap.
-  virtual CompactibleSpace* first_compactible_space();
+  HeapRegion* next_compaction_region(const HeapRegion* from) const;
 
   // A CollectedHeap will contain some number of spaces.  This finds the
   // space containing a given address, or else returns NULL.
@@ -1601,10 +1700,6 @@
   // Free up superfluous code root memory.
   void purge_code_root_memory();
 
-  // During an initial mark pause, mark all the code roots that
-  // point into regions *not* in the collection set.
-  void mark_strong_code_roots(uint worker_id);
-
   // Rebuild the stong code root lists for each region
   // after a full GC
   void rebuild_strong_code_roots();
@@ -1613,6 +1708,9 @@
   // in symbol table, possibly in parallel.
   void unlink_string_and_symbol_table(BoolObjectClosure* is_alive, bool unlink_strings = true, bool unlink_symbols = true);
 
+  // Parallel phase of unloading/cleaning after G1 concurrent mark.
+  void parallel_cleaning(BoolObjectClosure* is_alive, bool process_strings, bool process_symbols, bool class_unloading_occurred);
+
   // Redirty logged cards in the refinement queue.
   void redirty_logged_cards();
   // Verification
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -40,6 +40,13 @@
 // Return the region with the given index. It assumes the index is valid.
 inline HeapRegion* G1CollectedHeap::region_at(uint index) const { return _hrs.at(index); }
 
+inline uint G1CollectedHeap::addr_to_region(HeapWord* addr) const {
+  assert(is_in_reserved(addr),
+         err_msg("Cannot calculate region index for address "PTR_FORMAT" that is outside of the heap ["PTR_FORMAT", "PTR_FORMAT")",
+                 p2i(addr), p2i(_reserved.start()), p2i(_reserved.end())));
+  return (uint)(pointer_delta(addr, _reserved.start(), sizeof(uint8_t)) >> HeapRegion::LogOfHRGrainBytes);
+}
+
 template <class T>
 inline HeapRegion*
 G1CollectedHeap::heap_region_containing(const T addr) const {
@@ -172,12 +179,11 @@
   return _cm->nextMarkBitMap()->isMarked((HeapWord *)obj);
 }
 
-
 // This is a fast test on whether a reference points into the
 // collection set or not. Assume that the reference
 // points into the heap.
-inline bool G1CollectedHeap::in_cset_fast_test(oop obj) {
-  bool ret = _in_cset_fast_test.get_by_address((HeapWord*)obj);
+inline bool G1CollectedHeap::is_in_cset(oop obj) {
+  bool ret = _in_cset_fast_test.is_in_cset((HeapWord*)obj);
   // let's make sure the result is consistent with what the slower
   // test returns
   assert( ret || !obj_in_cs(obj), "sanity");
@@ -185,6 +191,18 @@
   return ret;
 }
 
+bool G1CollectedHeap::is_in_cset_or_humongous(const oop obj) {
+  return _in_cset_fast_test.is_in_cset_or_humongous((HeapWord*)obj);
+}
+
+G1CollectedHeap::in_cset_state_t G1CollectedHeap::in_cset_state(const oop obj) {
+  return _in_cset_fast_test.at((HeapWord*)obj);
+}
+
+void G1CollectedHeap::register_humongous_region_with_in_cset_fast_test(uint index) {
+  _in_cset_fast_test.set_humongous(index);
+}
+
 #ifndef PRODUCT
 // Support for G1EvacuationFailureALot
 
@@ -290,4 +308,22 @@
   else return is_obj_ill(obj, hr);
 }
 
+inline void G1CollectedHeap::set_humongous_is_live(oop obj) {
+  uint region = addr_to_region((HeapWord*)obj);
+  // We not only set the "live" flag in the humongous_is_live table, but also
+  // reset the entry in the _in_cset_fast_test table so that subsequent references
+  // to the same humongous object do not go into the slow path again.
+  // This is racy, as multiple threads may at the same time enter here, but this
+  // is benign.
+  // During collection we only ever set the "live" flag, and only ever clear the
+  // entry in the in_cset_fast_table.
+  // We only ever evaluate the contents of these tables (in the VM thread) after
+  // having synchronized the worker threads with the VM thread, or in the same
+  // thread (i.e. within the VM thread).
+  if (!_humongous_is_live.is_live(region)) {
+    _humongous_is_live.set_live(region);
+    _in_cset_fast_test.clear_humongous(region);
+  }
+}
+
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1COLLECTEDHEAP_INLINE_HPP
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1046,7 +1046,7 @@
 
   bool new_in_marking_window = _in_marking_window;
   bool new_in_marking_window_im = false;
-  if (during_initial_mark_pause()) {
+  if (last_pause_included_initial_mark) {
     new_in_marking_window = true;
     new_in_marking_window_im = true;
   }
--- a/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1EvacFailure.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -71,6 +71,9 @@
   bool _during_initial_mark;
   bool _during_conc_mark;
   uint _worker_id;
+  HeapWord* _end_of_last_gap;
+  HeapWord* _last_gap_threshold;
+  HeapWord* _last_obj_threshold;
 
 public:
   RemoveSelfForwardPtrObjClosure(G1CollectedHeap* g1, ConcurrentMark* cm,
@@ -83,7 +86,10 @@
     _update_rset_cl(update_rset_cl),
     _during_initial_mark(during_initial_mark),
     _during_conc_mark(during_conc_mark),
-    _worker_id(worker_id) { }
+    _worker_id(worker_id),
+    _end_of_last_gap(hr->bottom()),
+    _last_gap_threshold(hr->bottom()),
+    _last_obj_threshold(hr->bottom()) { }
 
   size_t marked_bytes() { return _marked_bytes; }
 
@@ -107,7 +113,12 @@
     HeapWord* obj_addr = (HeapWord*) obj;
     assert(_hr->is_in(obj_addr), "sanity");
     size_t obj_size = obj->size();
-    _hr->update_bot_for_object(obj_addr, obj_size);
+    HeapWord* obj_end = obj_addr + obj_size;
+
+    if (_end_of_last_gap != obj_addr) {
+      // there was a gap before obj_addr
+      _last_gap_threshold = _hr->cross_threshold(_end_of_last_gap, obj_addr);
+    }
 
     if (obj->is_forwarded() && obj->forwardee() == obj) {
       // The object failed to move.
@@ -115,7 +126,9 @@
       // We consider all objects that we find self-forwarded to be
       // live. What we'll do is that we'll update the prev marking
       // info so that they are all under PTAMS and explicitly marked.
-      _cm->markPrev(obj);
+      if (!_cm->isPrevMarked(obj)) {
+        _cm->markPrev(obj);
+      }
       if (_during_initial_mark) {
         // For the next marking info we'll only mark the
         // self-forwarded objects explicitly if we are during
@@ -145,13 +158,18 @@
       // remembered set entries missing given that we skipped cards on
       // the collection set. So, we'll recreate such entries now.
       obj->oop_iterate(_update_rset_cl);
-      assert(_cm->isPrevMarked(obj), "Should be marked!");
     } else {
+
       // The object has been either evacuated or is dead. Fill it with a
       // dummy object.
-      MemRegion mr((HeapWord*) obj, obj_size);
+      MemRegion mr(obj_addr, obj_size);
       CollectedHeap::fill_with_object(mr);
+
+      // must nuke all dead objects which we skipped when iterating over the region
+      _cm->clearRangePrevBitmap(MemRegion(_end_of_last_gap, obj_end));
     }
+    _end_of_last_gap = obj_end;
+    _last_obj_threshold = _hr->cross_threshold(obj_addr, obj_end);
   }
 };
 
@@ -182,15 +200,9 @@
                                             during_conc_mark,
                                             _worker_id);
 
-        MemRegion mr(hr->bottom(), hr->end());
-        // We'll recreate the prev marking info so we'll first clear
-        // the prev bitmap range for this region. We never mark any
-        // CSet objects explicitly so the next bitmap range should be
-        // cleared anyway.
-        _cm->clearRangePrevBitmap(mr);
-
         hr->note_self_forwarding_removal_start(during_initial_mark,
                                                during_conc_mark);
+        _g1h->check_bitmaps("Self-Forwarding Ptr Removal", hr);
 
         // In the common case (i.e. when there is no evacuation
         // failure) we make sure that the following is done when
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -166,7 +166,6 @@
   _last_update_rs_processed_buffers(_max_gc_threads, "%d"),
   _last_scan_rs_times_ms(_max_gc_threads, "%.1lf"),
   _last_strong_code_root_scan_times_ms(_max_gc_threads, "%.1lf"),
-  _last_strong_code_root_mark_times_ms(_max_gc_threads, "%.1lf"),
   _last_obj_copy_times_ms(_max_gc_threads, "%.1lf"),
   _last_termination_times_ms(_max_gc_threads, "%.1lf"),
   _last_termination_attempts(_max_gc_threads, SIZE_FORMAT),
@@ -193,7 +192,6 @@
   _last_update_rs_processed_buffers.reset();
   _last_scan_rs_times_ms.reset();
   _last_strong_code_root_scan_times_ms.reset();
-  _last_strong_code_root_mark_times_ms.reset();
   _last_obj_copy_times_ms.reset();
   _last_termination_times_ms.reset();
   _last_termination_attempts.reset();
@@ -214,7 +212,6 @@
   _last_update_rs_processed_buffers.verify();
   _last_scan_rs_times_ms.verify();
   _last_strong_code_root_scan_times_ms.verify();
-  _last_strong_code_root_mark_times_ms.verify();
   _last_obj_copy_times_ms.verify();
   _last_termination_times_ms.verify();
   _last_termination_attempts.verify();
@@ -229,7 +226,6 @@
                                _last_update_rs_times_ms.get(i) +
                                _last_scan_rs_times_ms.get(i) +
                                _last_strong_code_root_scan_times_ms.get(i) +
-                               _last_strong_code_root_mark_times_ms.get(i) +
                                _last_obj_copy_times_ms.get(i) +
                                _last_termination_times_ms.get(i);
 
@@ -240,8 +236,10 @@
   _last_gc_worker_times_ms.verify();
   _last_gc_worker_other_times_ms.verify();
 
-  _last_redirty_logged_cards_time_ms.verify();
-  _last_redirty_logged_cards_processed_cards.verify();
+  if (G1DeferredRSUpdate) {
+    _last_redirty_logged_cards_time_ms.verify();
+    _last_redirty_logged_cards_processed_cards.verify();
+  }
 }
 
 void G1GCPhaseTimes::note_string_dedup_fixup_start() {
@@ -258,6 +256,10 @@
   LineBuffer(level).append_and_print_cr("[%s: %.1lf ms]", str, value);
 }
 
+void G1GCPhaseTimes::print_stats(int level, const char* str, size_t value) {
+  LineBuffer(level).append_and_print_cr("[%s: "SIZE_FORMAT"]", str, value);
+}
+
 void G1GCPhaseTimes::print_stats(int level, const char* str, double value, uint workers) {
   LineBuffer(level).append_and_print_cr("[%s: %.1lf ms, GC Workers: " UINT32_FORMAT "]", str, value, workers);
 }
@@ -301,9 +303,6 @@
     if (_last_satb_filtering_times_ms.sum() > 0.0) {
       _last_satb_filtering_times_ms.print(2, "SATB Filtering (ms)");
     }
-    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
-     _last_strong_code_root_mark_times_ms.print(2, "Code Root Marking (ms)");
-    }
     _last_update_rs_times_ms.print(2, "Update RS (ms)");
       _last_update_rs_processed_buffers.print(3, "Processed Buffers");
     _last_scan_rs_times_ms.print(2, "Scan RS (ms)");
@@ -321,9 +320,6 @@
     if (_last_satb_filtering_times_ms.sum() > 0.0) {
       _last_satb_filtering_times_ms.print(1, "SATB Filtering (ms)");
     }
-    if (_last_strong_code_root_mark_times_ms.sum() > 0.0) {
-      _last_strong_code_root_mark_times_ms.print(1, "Code Root Marking (ms)");
-    }
     _last_update_rs_times_ms.print(1, "Update RS (ms)");
       _last_update_rs_processed_buffers.print(2, "Processed Buffers");
     _last_scan_rs_times_ms.print(1, "Scan RS (ms)");
@@ -366,6 +362,14 @@
       _last_redirty_logged_cards_processed_cards.print(3, "Redirtied Cards");
     }
   }
+  if (G1ReclaimDeadHumongousObjectsAtYoungGC) {
+    print_stats(2, "Humongous Reclaim", _cur_fast_reclaim_humongous_time_ms);
+    if (G1Log::finest()) {
+      print_stats(3, "Humongous Total", _cur_fast_reclaim_humongous_total);
+      print_stats(3, "Humongous Candidate", _cur_fast_reclaim_humongous_candidates);
+      print_stats(3, "Humongous Reclaimed", _cur_fast_reclaim_humongous_reclaimed);
+    }
+  }
   print_stats(2, "Free CSet",
     (_recorded_young_free_cset_time_ms +
     _recorded_non_young_free_cset_time_ms));
--- a/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1GCPhaseTimes.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -120,7 +120,6 @@
   WorkerDataArray<int>    _last_update_rs_processed_buffers;
   WorkerDataArray<double> _last_scan_rs_times_ms;
   WorkerDataArray<double> _last_strong_code_root_scan_times_ms;
-  WorkerDataArray<double> _last_strong_code_root_mark_times_ms;
   WorkerDataArray<double> _last_obj_copy_times_ms;
   WorkerDataArray<double> _last_termination_times_ms;
   WorkerDataArray<size_t> _last_termination_attempts;
@@ -158,11 +157,17 @@
   double _recorded_young_free_cset_time_ms;
   double _recorded_non_young_free_cset_time_ms;
 
+  double _cur_fast_reclaim_humongous_time_ms;
+  size_t _cur_fast_reclaim_humongous_total;
+  size_t _cur_fast_reclaim_humongous_candidates;
+  size_t _cur_fast_reclaim_humongous_reclaimed;
+
   double _cur_verify_before_time_ms;
   double _cur_verify_after_time_ms;
 
   // Helper methods for detailed logging
   void print_stats(int level, const char* str, double value);
+  void print_stats(int level, const char* str, size_t value);
   void print_stats(int level, const char* str, double value, uint workers);
 
  public:
@@ -199,10 +204,6 @@
     _last_strong_code_root_scan_times_ms.set(worker_i, ms);
   }
 
-  void record_strong_code_root_mark_time(uint worker_i, double ms) {
-    _last_strong_code_root_mark_times_ms.set(worker_i, ms);
-  }
-
   void record_obj_copy_time(uint worker_i, double ms) {
     _last_obj_copy_times_ms.set(worker_i, ms);
   }
@@ -287,6 +288,16 @@
     _recorded_non_young_free_cset_time_ms = time_ms;
   }
 
+  void record_fast_reclaim_humongous_stats(size_t total, size_t candidates) {
+    _cur_fast_reclaim_humongous_total = total;
+    _cur_fast_reclaim_humongous_candidates = candidates;
+  }
+
+  void record_fast_reclaim_humongous_time_ms(double value, size_t reclaimed) {
+    _cur_fast_reclaim_humongous_time_ms = value;
+    _cur_fast_reclaim_humongous_reclaimed = reclaimed;
+  }
+
   void record_young_cset_choice_time_ms(double time_ms) {
     _recorded_young_cset_choice_time_ms = time_ms;
   }
@@ -353,6 +364,10 @@
     return _recorded_non_young_free_cset_time_ms;
   }
 
+  double fast_reclaim_humongous_time_ms() {
+    return _cur_fast_reclaim_humongous_time_ms;
+  }
+
   double average_last_update_rs_time() {
     return _last_update_rs_times_ms.average();
   }
@@ -369,10 +384,6 @@
     return _last_strong_code_root_scan_times_ms.average();
   }
 
-  double average_last_strong_code_root_mark_time(){
-    return _last_strong_code_root_mark_times_ms.average();
-  }
-
   double average_last_obj_copy_time() {
     return _last_obj_copy_times_ms.average();
   }
--- a/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1MarkSweep.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -128,15 +128,15 @@
 
   SharedHeap* sh = SharedHeap::heap();
 
-  // Need cleared claim bits for the strong roots processing
+  // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  sh->process_strong_roots(true,  // activate StrongRootsScope
-                           false, // not scavenging.
-                           SharedHeap::SO_SystemClasses,
+  MarkingCodeBlobClosure follow_code_closure(&GenMarkSweep::follow_root_closure, !CodeBlobToOopClosure::FixRelocations);
+  sh->process_strong_roots(true,   // activate StrongRootsScope
+                           SharedHeap::SO_None,
                            &GenMarkSweep::follow_root_closure,
-                           &GenMarkSweep::follow_code_root_closure,
-                           &GenMarkSweep::follow_klass_closure);
+                           &GenMarkSweep::follow_cld_closure,
+                           &follow_code_closure);
 
   // Process reference objects found during marking
   ReferenceProcessor* rp = GenMarkSweep::ref_processor();
@@ -200,6 +200,23 @@
   CompactPoint _cp;
   HeapRegionSetCount _humongous_regions_removed;
 
+  bool is_cp_initialized() const {
+    return _cp.space != NULL;
+  }
+
+  void prepare_for_compaction(HeapRegion* hr, HeapWord* end) {
+    // If this is the first live region that we came across which we can compact,
+    // initialize the CompactPoint.
+    if (!is_cp_initialized()) {
+      _cp.space = hr;
+      _cp.threshold = hr->initialize_threshold();
+    }
+    hr->prepare_for_compaction(&_cp);
+    // Also clear the part of the card table that will be unused after
+    // compaction.
+    _mrbs->clear(MemRegion(hr->compaction_top(), end));
+  }
+
   void free_humongous_region(HeapRegion* hr) {
     HeapWord* end = hr->end();
     FreeRegionList dummy_free_list("Dummy Free List for G1MarkSweep");
@@ -211,18 +228,15 @@
     _humongous_regions_removed.increment(1u, hr->capacity());
 
     _g1h->free_humongous_region(hr, &dummy_free_list, false /* par */);
-    hr->prepare_for_compaction(&_cp);
-    // Also clear the part of the card table that will be unused after
-    // compaction.
-    _mrbs->clear(MemRegion(hr->compaction_top(), end));
+    prepare_for_compaction(hr, end);
     dummy_free_list.remove_all();
   }
 
 public:
-  G1PrepareCompactClosure(CompactibleSpace* cs)
+  G1PrepareCompactClosure()
   : _g1h(G1CollectedHeap::heap()),
     _mrbs(_g1h->g1_barrier_set()),
-    _cp(NULL, cs, cs->initialize_threshold()),
+    _cp(NULL),
     _humongous_regions_removed() { }
 
   void update_sets() {
@@ -245,10 +259,7 @@
         assert(hr->continuesHumongous(), "Invalid humongous.");
       }
     } else {
-      hr->prepare_for_compaction(&_cp);
-      // Also clear the part of the card table that will be unused after
-      // compaction.
-      _mrbs->clear(MemRegion(hr->compaction_top(), hr->end()));
+      prepare_for_compaction(hr, hr->end());
     }
     return false;
   }
@@ -266,14 +277,7 @@
   GCTraceTime tm("phase 2", G1Log::fine() && Verbose, true, gc_timer(), gc_tracer()->gc_id());
   GenMarkSweep::trace("2");
 
-  // find the first region
-  HeapRegion* r = g1h->region_at(0);
-  CompactibleSpace* sp = r;
-  if (r->isHumongous() && oop(r->bottom())->is_gc_marked()) {
-    sp = r->next_compaction_space();
-  }
-
-  G1PrepareCompactClosure blk(sp);
+  G1PrepareCompactClosure blk;
   g1h->heap_region_iterate(&blk);
   blk.update_sets();
 }
@@ -305,22 +309,22 @@
 
   SharedHeap* sh = SharedHeap::heap();
 
-  // Need cleared claim bits for the strong roots processing
+  // Need cleared claim bits for the roots processing
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  sh->process_strong_roots(true,  // activate StrongRootsScope
-                           false, // not scavenging.
-                           SharedHeap::SO_AllClasses,
-                           &GenMarkSweep::adjust_pointer_closure,
-                           NULL,  // do not touch code cache here
-                           &GenMarkSweep::adjust_klass_closure);
+  CodeBlobToOopClosure adjust_code_closure(&GenMarkSweep::adjust_pointer_closure, CodeBlobToOopClosure::FixRelocations);
+  sh->process_all_roots(true,  // activate StrongRootsScope
+                        SharedHeap::SO_AllCodeCache,
+                        &GenMarkSweep::adjust_pointer_closure,
+                        &GenMarkSweep::adjust_cld_closure,
+                        &adjust_code_closure);
 
   assert(GenMarkSweep::ref_processor() == g1h->ref_processor_stw(), "Sanity");
   g1h->ref_processor_stw()->weak_oops_do(&GenMarkSweep::adjust_pointer_closure);
 
   // Now adjust pointers in remaining weak roots.  (All of which should
   // have been cleared if they pointed to non-surviving objects.)
-  g1h->g1_process_weak_roots(&GenMarkSweep::adjust_pointer_closure);
+  sh->process_weak_roots(&GenMarkSweep::adjust_pointer_closure);
 
   if (G1StringDedup::is_enabled()) {
     G1StringDedup::oops_do(&GenMarkSweep::adjust_pointer_closure);
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_G1OOPCLOSURES_HPP
 
+#include "memory/iterator.hpp"
+
 class HeapRegion;
 class G1CollectedHeap;
 class G1RemSet;
@@ -106,7 +108,7 @@
   template <class T> void do_klass_barrier(T* p, oop new_obj);
 };
 
-template <G1Barrier barrier, bool do_mark_object>
+template <G1Barrier barrier, G1Mark do_mark_object>
 class G1ParCopyClosure : public G1ParCopyHelper {
 private:
   template <class T> void do_oop_work(T* p);
@@ -121,19 +123,19 @@
   template <class T> void do_oop_nv(T* p) { do_oop_work(p); }
   virtual void do_oop(oop* p)       { do_oop_nv(p); }
   virtual void do_oop(narrowOop* p) { do_oop_nv(p); }
+
+  G1CollectedHeap*      g1()  { return _g1; };
+  G1ParScanThreadState* pss() { return _par_scan_state; }
+  ReferenceProcessor*   rp()  { return _ref_processor; };
 };
 
-typedef G1ParCopyClosure<G1BarrierNone, false> G1ParScanExtRootClosure;
-typedef G1ParCopyClosure<G1BarrierKlass, false> G1ParScanMetadataClosure;
-
-
-typedef G1ParCopyClosure<G1BarrierNone, true> G1ParScanAndMarkExtRootClosure;
-typedef G1ParCopyClosure<G1BarrierKlass, true> G1ParScanAndMarkMetadataClosure;
-
+typedef G1ParCopyClosure<G1BarrierNone,  G1MarkNone>             G1ParScanExtRootClosure;
+typedef G1ParCopyClosure<G1BarrierNone,  G1MarkFromRoot>         G1ParScanAndMarkExtRootClosure;
+typedef G1ParCopyClosure<G1BarrierNone,  G1MarkPromotedFromRoot> G1ParScanAndMarkWeakExtRootClosure;
 // We use a separate closure to handle references during evacuation
 // failure processing.
 
-typedef G1ParCopyClosure<G1BarrierEvac, false> G1ParScanHeapEvacFailureClosure;
+typedef G1ParCopyClosure<G1BarrierEvac, G1MarkNone> G1ParScanHeapEvacFailureClosure;
 
 class FilterIntoCSClosure: public ExtendedOopClosure {
   G1CollectedHeap* _g1;
@@ -164,10 +166,11 @@
 };
 
 // Closure for iterating over object fields during concurrent marking
-class G1CMOopClosure : public ExtendedOopClosure {
+class G1CMOopClosure : public MetadataAwareOopClosure {
+protected:
+  ConcurrentMark*    _cm;
 private:
   G1CollectedHeap*   _g1h;
-  ConcurrentMark*    _cm;
   CMTask*            _task;
 public:
   G1CMOopClosure(G1CollectedHeap* g1h, ConcurrentMark* cm, CMTask* task);
@@ -177,7 +180,7 @@
 };
 
 // Closure to scan the root regions during concurrent marking
-class G1RootRegionScanClosure : public ExtendedOopClosure {
+class G1RootRegionScanClosure : public MetadataAwareOopClosure {
 private:
   G1CollectedHeap* _g1h;
   ConcurrentMark*  _cm;
--- a/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1OopClosures.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -32,6 +32,7 @@
 #include "gc_implementation/g1/g1RemSet.hpp"
 #include "gc_implementation/g1/g1RemSet.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
+#include "memory/iterator.inline.hpp"
 #include "runtime/prefetch.inline.hpp"
 
 /*
@@ -43,7 +44,7 @@
 inline void FilterIntoCSClosure::do_oop_nv(T* p) {
   T heap_oop = oopDesc::load_heap_oop(p);
   if (!oopDesc::is_null(heap_oop) &&
-      _g1->obj_in_cs(oopDesc::decode_heap_oop_not_null(heap_oop))) {
+      _g1->is_in_cset_or_humongous(oopDesc::decode_heap_oop_not_null(heap_oop))) {
     _oc->do_oop(p);
   }
 }
@@ -66,7 +67,8 @@
 
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (_g1->in_cset_fast_test(obj)) {
+    G1CollectedHeap::in_cset_state_t state = _g1->in_cset_state(obj);
+    if (state == G1CollectedHeap::InCSet) {
       // We're not going to even bother checking whether the object is
       // already forwarded or not, as this usually causes an immediate
       // stall. We'll try to prefetch the object (for write, given that
@@ -85,6 +87,9 @@
 
       _par_scan_state->push_on_queue(p);
     } else {
+      if (state == G1CollectedHeap::IsHumongous) {
+        _g1->set_humongous_is_live(obj);
+      }
       _par_scan_state->update_rs(_from, p, _worker_id);
     }
   }
@@ -96,22 +101,20 @@
 
   if (!oopDesc::is_null(heap_oop)) {
     oop obj = oopDesc::decode_heap_oop_not_null(heap_oop);
-    if (_g1->in_cset_fast_test(obj)) {
+    if (_g1->is_in_cset_or_humongous(obj)) {
       Prefetch::write(obj->mark_addr(), 0);
       Prefetch::read(obj->mark_addr(), (HeapWordSize*2));
 
       // Place on the references queue
       _par_scan_state->push_on_queue(p);
+    } else {
+      assert(!_g1->obj_in_cs(obj), "checking");
     }
   }
 }
 
 template <class T>
 inline void G1CMOopClosure::do_oop_nv(T* p) {
-  assert(_g1h->is_in_g1_reserved((HeapWord*) p), "invariant");
-  assert(!_g1h->is_on_master_free_list(
-                    _g1h->heap_region_containing((HeapWord*) p)), "invariant");
-
   oop obj = oopDesc::load_decode_heap_oop(p);
   if (_cm->verbose_high()) {
     gclog_or_tty->print_cr("[%u] we're looking at location "
--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -288,7 +288,12 @@
 }
 
 HeapWord* G1ParScanThreadState::allocate(GCAllocPurpose purpose, size_t word_sz) {
-  HeapWord* obj = alloc_buffer(purpose)->allocate(word_sz);
+  HeapWord* obj = NULL;
+  if (purpose == GCAllocForSurvived) {
+    obj = alloc_buffer(GCAllocForSurvived)->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
+  } else {
+    obj = alloc_buffer(GCAllocForTenured)->allocate(word_sz);
+  }
   if (obj != NULL) {
     return obj;
   }
--- a/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -52,15 +52,20 @@
   // set, due to (benign) races in the claim mechanism during RSet scanning more
   // than one thread might claim the same card. So the same card may be
   // processed multiple times. So redo this check.
-  if (_g1h->in_cset_fast_test(obj)) {
+  G1CollectedHeap::in_cset_state_t in_cset_state = _g1h->in_cset_state(obj);
+  if (in_cset_state == G1CollectedHeap::InCSet) {
     oop forwardee;
     if (obj->is_forwarded()) {
       forwardee = obj->forwardee();
     } else {
       forwardee = copy_to_survivor_space(obj);
     }
-    assert(forwardee != NULL, "forwardee should not be NULL");
     oopDesc::encode_store_heap_oop(p, forwardee);
+  } else if (in_cset_state == G1CollectedHeap::IsHumongous) {
+    _g1h->set_humongous_is_live(obj);
+  } else {
+    assert(in_cset_state == G1CollectedHeap::InNeither,
+           err_msg("In_cset_state must be InNeither here, but is %d", in_cset_state));
   }
 
   assert(obj != NULL, "Must be");
--- a/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -23,7 +23,6 @@
  */
 
 #include "precompiled.hpp"
-#include "gc_implementation/g1/bufferingOopClosure.hpp"
 #include "gc_implementation/g1/concurrentG1Refine.hpp"
 #include "gc_implementation/g1/concurrentG1RefineThread.hpp"
 #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
--- a/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1RemSet.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -65,6 +65,17 @@
   }
 }
 
+void G1SATBCardTableModRefBS::write_ref_array_pre(oop* dst, int count, bool dest_uninitialized) {
+  if (!dest_uninitialized) {
+    write_ref_array_pre_work(dst, count);
+  }
+}
+void G1SATBCardTableModRefBS::write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
+  if (!dest_uninitialized) {
+    write_ref_array_pre_work(dst, count);
+  }
+}
+
 bool G1SATBCardTableModRefBS::mark_card_deferred(size_t card_index) {
   jbyte val = _byte_map[card_index];
   // It's already processed
--- a/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1SATBCardTableModRefBS.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -86,16 +86,8 @@
   }
 
   template <class T> void write_ref_array_pre_work(T* dst, int count);
-  virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized) {
-    if (!dest_uninitialized) {
-      write_ref_array_pre_work(dst, count);
-    }
-  }
-  virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized) {
-    if (!dest_uninitialized) {
-      write_ref_array_pre_work(dst, count);
-    }
-  }
+  virtual void write_ref_array_pre(oop* dst, int count, bool dest_uninitialized);
+  virtual void write_ref_array_pre(narrowOop* dst, int count, bool dest_uninitialized);
 
 /*
    Claimed and deferred bits are used together in G1 during the evacuation
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -289,6 +289,13 @@
           "The amount of code root chunks that should be kept at most "     \
           "as percentage of already allocated.")                            \
                                                                             \
+  experimental(bool, G1ReclaimDeadHumongousObjectsAtYoungGC, true,          \
+          "Try to reclaim dead large objects at every young GC.")           \
+                                                                            \
+  experimental(bool, G1TraceReclaimDeadHumongousObjectsAtYoungGC, false,    \
+          "Print some information about large object liveness "             \
+          "at every young GC.")                                             \
+                                                                            \
   experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
           "An upper bound for the number of old CSet regions expressed "    \
           "as a percentage of the heap size.")                              \
@@ -325,11 +332,14 @@
           "evacuation pauses")                                              \
                                                                             \
   diagnostic(bool, G1VerifyRSetsDuringFullGC, false,                        \
-             "If true, perform verification of each heap region's "         \
-             "remembered set when verifying the heap during a full GC.")    \
+          "If true, perform verification of each heap region's "            \
+          "remembered set when verifying the heap during a full GC.")       \
                                                                             \
   diagnostic(bool, G1VerifyHeapRegionCodeRoots, false,                      \
-             "Verify the code root lists attached to each heap region.")
+          "Verify the code root lists attached to each heap region.")       \
+                                                                            \
+  develop(bool, G1VerifyBitmaps, false,                                     \
+          "Verifies the consistency of the marking bitmaps")
 
 G1_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG, DECLARE_MANAGEABLE_FLAG, DECLARE_PRODUCT_RW_FLAG)
 
--- a/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_specialized_oop_closures.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -30,14 +30,21 @@
 // non-virtually, using a mechanism defined in this file.  Extend these
 // macros in the obvious way to add specializations for new closures.
 
-// Forward declarations.
 enum G1Barrier {
   G1BarrierNone,
   G1BarrierEvac,
   G1BarrierKlass
 };
 
-template<G1Barrier barrier, bool do_mark_object>
+enum G1Mark {
+  G1MarkNone,
+  G1MarkFromRoot,
+  G1MarkPromotedFromRoot
+};
+
+// Forward declarations.
+
+template<G1Barrier barrier, G1Mark do_mark_object>
 class G1ParCopyClosure;
 
 class G1ParScanClosure;
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -30,6 +30,7 @@
 #include "gc_implementation/g1/heapRegion.inline.hpp"
 #include "gc_implementation/g1/heapRegionRemSet.hpp"
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
+#include "gc_implementation/shared/liveRange.hpp"
 #include "memory/genOopClosures.inline.hpp"
 #include "memory/iterator.hpp"
 #include "memory/space.inline.hpp"
@@ -48,7 +49,7 @@
                                  HeapRegion* hr, ExtendedOopClosure* cl,
                                  CardTableModRefBS::PrecisionStyle precision,
                                  FilterKind fk) :
-  ContiguousSpaceDCTOC(hr, cl, precision, NULL),
+  DirtyCardToOopClosure(hr, cl, precision, NULL),
   _hr(hr), _fk(fk), _g1(g1) { }
 
 FilterOutOfRegionClosure::FilterOutOfRegionClosure(HeapRegion* r,
@@ -60,7 +61,7 @@
                                HeapRegion* hr,
                                HeapWord* cur, HeapWord* top) {
   oop cur_oop = oop(cur);
-  int oop_size = cur_oop->size();
+  size_t oop_size = hr->block_size(cur);
   HeapWord* next_obj = cur + oop_size;
   while (next_obj < top) {
     // Keep filtering the remembered set.
@@ -71,25 +72,24 @@
     }
     cur = next_obj;
     cur_oop = oop(cur);
-    oop_size = cur_oop->size();
+    oop_size = hr->block_size(cur);
     next_obj = cur + oop_size;
   }
   return cur;
 }
 
-void HeapRegionDCTOC::walk_mem_region_with_cl(MemRegion mr,
-                                              HeapWord* bottom,
-                                              HeapWord* top,
-                                              ExtendedOopClosure* cl) {
+void HeapRegionDCTOC::walk_mem_region(MemRegion mr,
+                                      HeapWord* bottom,
+                                      HeapWord* top) {
   G1CollectedHeap* g1h = _g1;
-  int oop_size;
+  size_t oop_size;
   ExtendedOopClosure* cl2 = NULL;
 
-  FilterIntoCSClosure intoCSFilt(this, g1h, cl);
-  FilterOutOfRegionClosure outOfRegionFilt(_hr, cl);
+  FilterIntoCSClosure intoCSFilt(this, g1h, _cl);
+  FilterOutOfRegionClosure outOfRegionFilt(_hr, _cl);
 
   switch (_fk) {
-  case NoFilterKind:          cl2 = cl; break;
+  case NoFilterKind:          cl2 = _cl; break;
   case IntoCSFilterKind:      cl2 = &intoCSFilt; break;
   case OutOfRegionFilterKind: cl2 = &outOfRegionFilt; break;
   default:                    ShouldNotReachHere();
@@ -102,7 +102,7 @@
   if (!g1h->is_obj_dead(oop(bottom), _hr)) {
     oop_size = oop(bottom)->oop_iterate(cl2, mr);
   } else {
-    oop_size = oop(bottom)->size();
+    oop_size = _hr->block_size(bottom);
   }
 
   bottom += oop_size;
@@ -111,17 +111,17 @@
     // We replicate the loop below for several kinds of possible filters.
     switch (_fk) {
     case NoFilterKind:
-      bottom = walk_mem_region_loop(cl, g1h, _hr, bottom, top);
+      bottom = walk_mem_region_loop(_cl, g1h, _hr, bottom, top);
       break;
 
     case IntoCSFilterKind: {
-      FilterIntoCSClosure filt(this, g1h, cl);
+      FilterIntoCSClosure filt(this, g1h, _cl);
       bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
       break;
     }
 
     case OutOfRegionFilterKind: {
-      FilterOutOfRegionClosure filt(_hr, cl);
+      FilterOutOfRegionClosure filt(_hr, _cl);
       bottom = walk_mem_region_loop(&filt, g1h, _hr, bottom, top);
       break;
     }
@@ -374,50 +374,13 @@
   // region.
   hr_clear(false /*par*/, false /*clear_space*/);
   set_top(bottom());
-  set_saved_mark();
+  record_top_and_timestamp();
 
   assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant.");
 }
 
 CompactibleSpace* HeapRegion::next_compaction_space() const {
-  // We're not using an iterator given that it will wrap around when
-  // it reaches the last region and this is not what we want here.
-  G1CollectedHeap* g1h = G1CollectedHeap::heap();
-  uint index = hrs_index() + 1;
-  while (index < g1h->n_regions()) {
-    HeapRegion* hr = g1h->region_at(index);
-    if (!hr->isHumongous()) {
-      return hr;
-    }
-    index += 1;
-  }
-  return NULL;
-}
-
-void HeapRegion::save_marks() {
-  set_saved_mark();
-}
-
-void HeapRegion::oops_in_mr_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  HeapWord* p = mr.start();
-  HeapWord* e = mr.end();
-  oop obj;
-  while (p < e) {
-    obj = oop(p);
-    p += obj->oop_iterate(cl);
-  }
-  assert(p == e, "bad memregion: doesn't end on obj boundary");
-}
-
-#define HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN(OopClosureType, nv_suffix) \
-void HeapRegion::oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl) { \
-  ContiguousSpace::oop_since_save_marks_iterate##nv_suffix(cl);              \
-}
-SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DEFN)
-
-
-void HeapRegion::oop_before_save_marks_iterate(ExtendedOopClosure* cl) {
-  oops_in_mr_iterate(MemRegion(bottom(), saved_mark_word()), cl);
+  return G1CollectedHeap::heap()->next_compaction_region(this);
 }
 
 void HeapRegion::note_self_forwarding_removal_start(bool during_initial_mark,
@@ -425,7 +388,6 @@
   // We always recreate the prev marking info and we'll explicitly
   // mark all objects we find to be self-forwarded on the prev
   // bitmap. So all objects need to be below PTAMS.
-  _prev_top_at_mark_start = top();
   _prev_marked_bytes = 0;
 
   if (during_initial_mark) {
@@ -449,6 +411,7 @@
   assert(0 <= marked_bytes && marked_bytes <= used(),
          err_msg("marked: "SIZE_FORMAT" used: "SIZE_FORMAT,
                  marked_bytes, used()));
+  _prev_top_at_mark_start = top();
   _prev_marked_bytes = marked_bytes;
 }
 
@@ -479,7 +442,7 @@
     if (cl->abort()) return cur;
     // The check above must occur before the operation below, since an
     // abort might invalidate the "size" operation.
-    cur += obj->size();
+    cur += block_size(cur);
   }
   return NULL;
 }
@@ -551,7 +514,7 @@
       return cur;
     }
     // Otherwise...
-    next = (cur + obj->size());
+    next = cur + block_size(cur);
   }
 
   // If we finish the above loop...We have a parseable object that
@@ -559,10 +522,9 @@
   // inside or spans the entire region.
 
   assert(obj == oop(cur), "sanity");
-  assert(cur <= start &&
-         obj->klass_or_null() != NULL &&
-         (cur + obj->size()) > start,
-         "Loop postcondition");
+  assert(cur <= start, "Loop postcondition");
+  assert(obj->klass_or_null() != NULL, "Loop postcondition");
+  assert((cur + block_size(cur)) > start, "Loop postcondition");
 
   if (!g1h->is_obj_dead(obj)) {
     obj->oop_iterate(cl, mr);
@@ -576,7 +538,7 @@
     };
 
     // Otherwise:
-    next = (cur + obj->size());
+    next = cur + block_size(cur);
 
     if (!g1h->is_obj_dead(obj)) {
       if (next < end || !obj->is_objArray()) {
@@ -931,10 +893,11 @@
   size_t object_num = 0;
   while (p < top()) {
     oop obj = oop(p);
-    size_t obj_size = obj->size();
+    size_t obj_size = block_size(p);
     object_num += 1;
 
-    if (is_humongous != g1->isHumongous(obj_size)) {
+    if (is_humongous != g1->isHumongous(obj_size) &&
+        !g1->is_obj_dead(obj, this)) { // Dead objects may have bigger block_size since they span several objects.
       gclog_or_tty->print_cr("obj "PTR_FORMAT" is of %shumongous size ("
                              SIZE_FORMAT" words) in a %shumongous region",
                              p, g1->isHumongous(obj_size) ? "" : "non-",
@@ -945,7 +908,9 @@
 
     // If it returns false, verify_for_object() will output the
     // appropriate messasge.
-    if (do_bot_verify && !_offsets.verify_for_object(p, obj_size)) {
+    if (do_bot_verify &&
+        !g1->is_obj_dead(obj, this) &&
+        !_offsets.verify_for_object(p, obj_size)) {
       *failures = true;
       return;
     }
@@ -953,7 +918,10 @@
     if (!g1->is_obj_dead_cond(obj, this, vo)) {
       if (obj->is_oop()) {
         Klass* klass = obj->klass();
-        if (!klass->is_metaspace_object()) {
+        bool is_metaspace_object = Metaspace::contains(klass) ||
+                                   (vo == VerifyOption_G1UsePrevMarking &&
+                                   ClassLoaderDataGraph::unload_list_contains(klass));
+        if (!is_metaspace_object) {
           gclog_or_tty->print_cr("klass "PTR_FORMAT" of object "PTR_FORMAT" "
                                  "not metadata", klass, (void *)obj);
           *failures = true;
@@ -1067,7 +1035,9 @@
 // away eventually.
 
 void G1OffsetTableContigSpace::clear(bool mangle_space) {
-  ContiguousSpace::clear(mangle_space);
+  set_top(bottom());
+  set_saved_mark_word(bottom());
+  CompactibleSpace::clear(mangle_space);
   _offsets.zero_bottom_entry();
   _offsets.initialize_threshold();
 }
@@ -1105,10 +1075,10 @@
   if (_gc_time_stamp < g1h->get_gc_time_stamp())
     return top();
   else
-    return ContiguousSpace::saved_mark_word();
+    return Space::saved_mark_word();
 }
 
-void G1OffsetTableContigSpace::set_saved_mark() {
+void G1OffsetTableContigSpace::record_top_and_timestamp() {
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   unsigned curr_gc_time_stamp = g1h->get_gc_time_stamp();
 
@@ -1120,7 +1090,7 @@
     // of region. If it does so after _gc_time_stamp = ..., then it
     // will pick up the right saved_mark_word() as the high water mark
     // of the region. Either way, the behaviour will be correct.
-    ContiguousSpace::set_saved_mark();
+    Space::set_saved_mark_word(top());
     OrderAccess::storestore();
     _gc_time_stamp = curr_gc_time_stamp;
     // No need to do another barrier to flush the writes above. If
@@ -1131,6 +1101,26 @@
   }
 }
 
+void G1OffsetTableContigSpace::safe_object_iterate(ObjectClosure* blk) {
+  object_iterate(blk);
+}
+
+void G1OffsetTableContigSpace::object_iterate(ObjectClosure* blk) {
+  HeapWord* p = bottom();
+  while (p < top()) {
+    if (block_is_obj(p)) {
+      blk->do_object(oop(p));
+    }
+    p += block_size(p);
+  }
+}
+
+#define block_is_always_obj(q) true
+void G1OffsetTableContigSpace::prepare_for_compaction(CompactPoint* cp) {
+  SCAN_AND_FORWARD(cp, top, block_is_always_obj, block_size);
+}
+#undef block_is_always_obj
+
 G1OffsetTableContigSpace::
 G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
                          MemRegion mr) :
@@ -1140,7 +1130,8 @@
 {
   _offsets.set_space(this);
   // false ==> we'll do the clearing if there's clearing to be done.
-  ContiguousSpace::initialize(mr, false, SpaceDecorator::Mangle);
+  CompactibleSpace::initialize(mr, false, SpaceDecorator::Mangle);
+  _top = bottom();
   _offsets.zero_bottom_entry();
   _offsets.initialize_threshold();
 }
--- a/src/share/vm/gc_implementation/g1/heapRegion.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,7 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_HPP
 
-#include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
+#include "gc_implementation/g1/g1BlockOffsetTable.hpp"
 #include "gc_implementation/g1/g1_specialized_oop_closures.hpp"
 #include "gc_implementation/g1/survRateGroup.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
@@ -46,8 +46,6 @@
 // The solution is to remove this method from the definition
 // of a Space.
 
-class CompactibleSpace;
-class ContiguousSpace;
 class HeapRegionRemSet;
 class HeapRegionRemSetIterator;
 class HeapRegion;
@@ -71,7 +69,7 @@
 // in the concurrent marker used by G1 to filter remembered
 // sets.
 
-class HeapRegionDCTOC : public ContiguousSpaceDCTOC {
+class HeapRegionDCTOC : public DirtyCardToOopClosure {
 public:
   // Specification of possible DirtyCardToOopClosure filtering.
   enum FilterKind {
@@ -85,39 +83,13 @@
   FilterKind _fk;
   G1CollectedHeap* _g1;
 
-  void walk_mem_region_with_cl(MemRegion mr,
-                               HeapWord* bottom, HeapWord* top,
-                               ExtendedOopClosure* cl);
-
-  // We don't specialize this for FilteringClosure; filtering is handled by
-  // the "FilterKind" mechanism.  But we provide this to avoid a compiler
-  // warning.
-  void walk_mem_region_with_cl(MemRegion mr,
-                               HeapWord* bottom, HeapWord* top,
-                               FilteringClosure* cl) {
-    HeapRegionDCTOC::walk_mem_region_with_cl(mr, bottom, top,
-                                             (ExtendedOopClosure*)cl);
-  }
-
-  // Get the actual top of the area on which the closure will
-  // operate, given where the top is assumed to be (the end of the
-  // memory region passed to do_MemRegion) and where the object
-  // at the top is assumed to start. For example, an object may
-  // start at the top but actually extend past the assumed top,
-  // in which case the top becomes the end of the object.
-  HeapWord* get_actual_top(HeapWord* top, HeapWord* top_obj) {
-    return ContiguousSpaceDCTOC::get_actual_top(top, top_obj);
-  }
-
   // Walk the given memory region from bottom to (actual) top
   // looking for objects and applying the oop closure (_cl) to
   // them. The base implementation of this treats the area as
   // blocks, where a block may or may not be an object. Sub-
   // classes should override this to provide more accurate
   // or possibly more efficient walking.
-  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top) {
-    Filtering_DCTOC::walk_mem_region(mr, bottom, top);
-  }
+  void walk_mem_region(MemRegion mr, HeapWord* bottom, HeapWord* top);
 
 public:
   HeapRegionDCTOC(G1CollectedHeap* g1,
@@ -151,9 +123,9 @@
 // the regions anyway) and at the end of a Full GC. The current scheme
 // that uses sequential unsigned ints will fail only if we have 4b
 // evacuation pauses between two cleanups, which is _highly_ unlikely.
-
-class G1OffsetTableContigSpace: public ContiguousSpace {
+class G1OffsetTableContigSpace: public CompactibleSpace {
   friend class VMStructs;
+  HeapWord* _top;
  protected:
   G1BlockOffsetArrayContigSpace _offsets;
   Mutex _par_alloc_lock;
@@ -170,11 +142,32 @@
   G1OffsetTableContigSpace(G1BlockOffsetSharedArray* sharedOffsetArray,
                            MemRegion mr);
 
+  void set_top(HeapWord* value) { _top = value; }
+  HeapWord* top() const { return _top; }
+
+ protected:
+  HeapWord** top_addr() { return &_top; }
+  // Allocation helpers (return NULL if full).
+  inline HeapWord* allocate_impl(size_t word_size, HeapWord* end_value);
+  inline HeapWord* par_allocate_impl(size_t word_size, HeapWord* end_value);
+
+ public:
+  void reset_after_compaction() { set_top(compaction_top()); }
+
+  size_t used() const { return byte_size(bottom(), top()); }
+  size_t free() const { return byte_size(top(), end()); }
+  bool is_free_block(const HeapWord* p) const { return p >= top(); }
+
+  MemRegion used_region() const { return MemRegion(bottom(), top()); }
+
+  void object_iterate(ObjectClosure* blk);
+  void safe_object_iterate(ObjectClosure* blk);
+
   void set_bottom(HeapWord* value);
   void set_end(HeapWord* value);
 
   virtual HeapWord* saved_mark_word() const;
-  virtual void set_saved_mark();
+  void record_top_and_timestamp();
   void reset_gc_time_stamp() { _gc_time_stamp = 0; }
   unsigned get_gc_time_stamp() { return _gc_time_stamp; }
 
@@ -194,6 +187,8 @@
   HeapWord* block_start(const void* p);
   HeapWord* block_start_const(const void* p) const;
 
+  void prepare_for_compaction(CompactPoint* cp);
+
   // Add offset table update.
   virtual HeapWord* allocate(size_t word_size);
   HeapWord* par_allocate(size_t word_size);
@@ -228,10 +223,6 @@
     ContinuesHumongous
   };
 
-  // Requires that the region "mr" be dense with objects, and begin and end
-  // with an object.
-  void oops_in_mr_iterate(MemRegion mr, ExtendedOopClosure* cl);
-
   // The remembered set for this region.
   // (Might want to make this "inline" later, to avoid some alloc failure
   // issues.)
@@ -256,11 +247,9 @@
   bool _evacuation_failed;
 
   // A heap region may be a member one of a number of special subsets, each
-  // represented as linked lists through the field below.  Currently, these
-  // sets include:
+  // represented as linked lists through the field below.  Currently, there
+  // is only one set:
   //   The collection set.
-  //   The set of allocation regions used in a collection pause.
-  //   Spaces that may contain gray objects.
   HeapRegion* _next_in_special_set;
 
   // next region in the young "generation" region set
@@ -379,14 +368,15 @@
     ParMarkRootClaimValue      = 9
   };
 
-  inline HeapWord* par_allocate_no_bot_updates(size_t word_size) {
-    assert(is_young(), "we can only skip BOT updates on young regions");
-    return ContiguousSpace::par_allocate(word_size);
-  }
-  inline HeapWord* allocate_no_bot_updates(size_t word_size) {
-    assert(is_young(), "we can only skip BOT updates on young regions");
-    return ContiguousSpace::allocate(word_size);
-  }
+  // All allocated blocks are occupied by objects in a HeapRegion
+  bool block_is_obj(const HeapWord* p) const;
+
+  // Returns the object size for all valid block starts
+  // and the amount of unallocated words if called on top()
+  size_t block_size(const HeapWord* p) const;
+
+  inline HeapWord* par_allocate_no_bot_updates(size_t word_size);
+  inline HeapWord* allocate_no_bot_updates(size_t word_size);
 
   // If this region is a member of a HeapRegionSeq, the index in that
   // sequence, otherwise -1.
@@ -595,9 +585,6 @@
 
   HeapWord* orig_end() { return _orig_end; }
 
-  // Allows logical separation between objects allocated before and after.
-  void save_marks();
-
   // Reset HR stuff to default values.
   void hr_clear(bool par, bool clear_space, bool locked = false);
   void par_clear();
@@ -606,10 +593,6 @@
   HeapWord* prev_top_at_mark_start() const { return _prev_top_at_mark_start; }
   HeapWord* next_top_at_mark_start() const { return _next_top_at_mark_start; }
 
-  // Apply "cl->do_oop" to (the addresses of) all reference fields in objects
-  // allocated in the current region before the last call to "save_mark".
-  void oop_before_save_marks_iterate(ExtendedOopClosure* cl);
-
   // Note the start or end of marking. This tells the heap region
   // that the collector is about to start or has finished (concurrently)
   // marking the heap.
@@ -795,10 +778,6 @@
     _predicted_bytes_to_copy = bytes;
   }
 
-#define HeapRegion_OOP_SINCE_SAVE_MARKS_DECL(OopClosureType, nv_suffix)  \
-  virtual void oop_since_save_marks_iterate##nv_suffix(OopClosureType* cl);
-  SPECIALIZED_SINCE_SAVE_MARKS_CLOSURES(HeapRegion_OOP_SINCE_SAVE_MARKS_DECL)
-
   virtual CompactibleSpace* next_compaction_space() const;
 
   virtual void reset_after_compaction();
--- a/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -25,8 +25,49 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGION_INLINE_HPP
 
+#include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp"
+#include "gc_implementation/g1/g1CollectedHeap.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#include "memory/space.hpp"
+#include "runtime/atomic.inline.hpp"
+
+// This version requires locking.
+inline HeapWord* G1OffsetTableContigSpace::allocate_impl(size_t size,
+                                                HeapWord* const end_value) {
+  HeapWord* obj = top();
+  if (pointer_delta(end_value, obj) >= size) {
+    HeapWord* new_top = obj + size;
+    set_top(new_top);
+    assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+    return obj;
+  } else {
+    return NULL;
+  }
+}
+
+// This version is lock-free.
+inline HeapWord* G1OffsetTableContigSpace::par_allocate_impl(size_t size,
+                                                    HeapWord* const end_value) {
+  do {
+    HeapWord* obj = top();
+    if (pointer_delta(end_value, obj) >= size) {
+      HeapWord* new_top = obj + size;
+      HeapWord* result = (HeapWord*)Atomic::cmpxchg_ptr(new_top, top_addr(), obj);
+      // result can be one of two:
+      //  the old top value: the exchange succeeded
+      //  otherwise: the new value of the top is returned.
+      if (result == obj) {
+        assert(is_aligned(obj) && is_aligned(new_top), "checking alignment");
+        return obj;
+      }
+    } else {
+      return NULL;
+    }
+  } while (true);
+}
+
 inline HeapWord* G1OffsetTableContigSpace::allocate(size_t size) {
-  HeapWord* res = ContiguousSpace::allocate(size);
+  HeapWord* res = allocate_impl(size, end());
   if (res != NULL) {
     _offsets.alloc_block(res, size);
   }
@@ -38,12 +79,7 @@
 // this is used for larger LAB allocations only.
 inline HeapWord* G1OffsetTableContigSpace::par_allocate(size_t size) {
   MutexLocker x(&_par_alloc_lock);
-  // Given that we take the lock no need to use par_allocate() here.
-  HeapWord* res = ContiguousSpace::allocate(size);
-  if (res != NULL) {
-    _offsets.alloc_block(res, size);
-  }
-  return res;
+  return allocate(size);
 }
 
 inline HeapWord* G1OffsetTableContigSpace::block_start(const void* p) {
@@ -55,6 +91,52 @@
   return _offsets.block_start_const(p);
 }
 
+inline bool
+HeapRegion::block_is_obj(const HeapWord* p) const {
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  if (ClassUnloadingWithConcurrentMark) {
+    return !g1h->is_obj_dead(oop(p), this);
+  }
+  return p < top();
+}
+
+inline size_t
+HeapRegion::block_size(const HeapWord *addr) const {
+  if (addr == top()) {
+    return pointer_delta(end(), addr);
+  }
+
+  if (block_is_obj(addr)) {
+    return oop(addr)->size();
+  }
+
+  assert(ClassUnloadingWithConcurrentMark,
+      err_msg("All blocks should be objects if G1 Class Unloading isn't used. "
+              "HR: ["PTR_FORMAT", "PTR_FORMAT", "PTR_FORMAT") "
+              "addr: " PTR_FORMAT,
+              p2i(bottom()), p2i(top()), p2i(end()), p2i(addr)));
+
+  // Old regions' dead objects may have dead classes
+  // We need to find the next live object in some other
+  // manner than getting the oop size
+  G1CollectedHeap* g1h = G1CollectedHeap::heap();
+  HeapWord* next = g1h->concurrent_mark()->prevMarkBitMap()->
+      getNextMarkedWordAddress(addr, prev_top_at_mark_start());
+
+  assert(next > addr, "must get the next live object");
+  return pointer_delta(next, addr);
+}
+
+inline HeapWord* HeapRegion::par_allocate_no_bot_updates(size_t word_size) {
+  assert(is_young(), "we can only skip BOT updates on young regions");
+  return par_allocate_impl(word_size, end());
+}
+
+inline HeapWord* HeapRegion::allocate_no_bot_updates(size_t word_size) {
+  assert(is_young(), "we can only skip BOT updates on young regions");
+  return allocate_impl(word_size, end());
+}
+
 inline void HeapRegion::note_start_of_marking() {
   _next_marked_bytes = 0;
   _next_top_at_mark_start = top();
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -694,6 +694,9 @@
   clear_fcc();
 }
 
+bool OtherRegionsTable::is_empty() const {
+  return occ_sparse() == 0 && occ_coarse() == 0 && _first_all_fine_prts == NULL;
+}
 
 size_t OtherRegionsTable::occupied() const {
   size_t sum = occ_fine();
@@ -929,7 +932,10 @@
 
 void HeapRegionRemSet::remove_strong_code_root(nmethod* nm) {
   assert(nm != NULL, "sanity");
-  _code_roots.remove(nm);
+  assert_locked_or_safepoint(CodeCache_lock);
+
+  _code_roots.remove_lock_free(nm);
+
   // Check that there were no duplicates
   guarantee(!_code_roots.contains(nm), "duplicate entry found");
 }
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -185,6 +185,9 @@
   // objects.
   void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
 
+  // Returns whether this remembered set (and all sub-sets) contain no entries.
+  bool is_empty() const;
+
   size_t occupied() const;
   size_t occ_fine() const;
   size_t occ_coarse() const;
@@ -269,6 +272,10 @@
     return _other_regions.hr();
   }
 
+  bool is_empty() const {
+    return (strong_code_roots_list_length() == 0) && _other_regions.is_empty();
+  }
+
   size_t occupied() {
     MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
     return occupied_locked();
@@ -375,7 +382,7 @@
   void strong_code_roots_do(CodeBlobClosure* blk) const;
 
   // Returns the number of elements in the strong code roots list
-  size_t strong_code_roots_list_length() {
+  size_t strong_code_roots_list_length() const {
     return _code_roots.length();
   }
 
--- a/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionSet.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -119,7 +119,7 @@
 public:
   const char* name() { return _name; }
 
-  uint length() { return _count.length(); }
+  uint length() const { return _count.length(); }
 
   bool is_empty() { return _count.length() == 0; }
 
--- a/src/share/vm/gc_implementation/g1/satbQueue.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/satbQueue.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -285,37 +285,6 @@
   _par_closures[i] = par_closure;
 }
 
-void SATBMarkQueueSet::iterate_closure_all_threads() {
-  for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    t->satb_mark_queue().apply_closure_and_empty(_closure);
-  }
-  shared_satb_queue()->apply_closure_and_empty(_closure);
-}
-
-void SATBMarkQueueSet::par_iterate_closure_all_threads(uint worker) {
-  SharedHeap* sh = SharedHeap::heap();
-  int parity = sh->strong_roots_parity();
-
-  for(JavaThread* t = Threads::first(); t; t = t->next()) {
-    if (t->claim_oops_do(true, parity)) {
-      t->satb_mark_queue().apply_closure_and_empty(_par_closures[worker]);
-    }
-  }
-
-  // We also need to claim the VMThread so that its parity is updated
-  // otherwise the next call to Thread::possibly_parallel_oops_do inside
-  // a StrongRootsScope might skip the VMThread because it has a stale
-  // parity that matches the parity set by the StrongRootsScope
-  //
-  // Whichever worker succeeds in claiming the VMThread gets to do
-  // the shared queue.
-
-  VMThread* vmt = VMThread::vm_thread();
-  if (vmt->claim_oops_do(true, parity)) {
-    shared_satb_queue()->apply_closure_and_empty(_par_closures[worker]);
-  }
-}
-
 bool SATBMarkQueueSet::apply_closure_to_completed_buffer_work(bool par,
                                                               uint worker) {
   BufferNode* nd = NULL;
--- a/src/share/vm/gc_implementation/g1/satbQueue.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/satbQueue.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -33,7 +33,9 @@
 
 // A ptrQueue whose elements are "oops", pointers to object heads.
 class ObjPtrQueue: public PtrQueue {
+  friend class Threads;
   friend class SATBMarkQueueSet;
+  friend class G1RemarkThreadsClosure;
 
 private:
   // Filter out unwanted entries from the buffer.
@@ -119,13 +121,6 @@
   // closures, one for each parallel GC thread.
   void set_par_closure(int i, ObjectClosure* closure);
 
-  // Apply the registered closure to all entries on each
-  // currently-active buffer and then empty the buffer. It should only
-  // be called serially and at a safepoint.
-  void iterate_closure_all_threads();
-  // Parallel version of the above.
-  void par_iterate_closure_all_threads(uint worker);
-
   // If there exists some completed buffer, pop it, then apply the
   // registered closure to all its elements, and return true.  If no
   // completed buffers exist, return false.
--- a/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/g1/vmStructs_g1.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -34,6 +34,8 @@
   static_field(HeapRegion, GrainBytes,        size_t)                         \
   static_field(HeapRegion, LogOfHRGrainBytes, int)                            \
                                                                               \
+  nonstatic_field(G1OffsetTableContigSpace, _top,       HeapWord*)            \
+                                                                              \
   nonstatic_field(G1HeapRegionTable, _base,             address)              \
   nonstatic_field(G1HeapRegionTable, _length,           size_t)               \
   nonstatic_field(G1HeapRegionTable, _biased_base,      address)              \
@@ -69,7 +71,8 @@
                                                                               \
   declare_type(G1CollectedHeap, SharedHeap)                                   \
                                                                               \
-  declare_type(HeapRegion, ContiguousSpace)                                   \
+  declare_type(G1OffsetTableContigSpace, CompactibleSpace)                    \
+  declare_type(HeapRegion, G1OffsetTableContigSpace)                          \
   declare_toplevel_type(HeapRegionSeq)                                        \
   declare_toplevel_type(HeapRegionSetBase)                                    \
   declare_toplevel_type(HeapRegionSetCount)                                   \
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -28,12 +28,12 @@
 #include "gc_implementation/parNew/parOopClosures.inline.hpp"
 #include "gc_implementation/shared/adaptiveSizePolicy.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
-#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/copyFailedInfo.hpp"
 #include "gc_implementation/shared/gcHeapSummary.hpp"
 #include "gc_implementation/shared/gcTimer.hpp"
 #include "gc_implementation/shared/gcTrace.hpp"
 #include "gc_implementation/shared/gcTraceTime.hpp"
-#include "gc_implementation/shared/copyFailedInfo.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.inline.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "memory/defNewGeneration.inline.hpp"
 #include "memory/genCollectedHeap.hpp"
@@ -251,7 +251,7 @@
         plab->set_word_size(buf_size);
         plab->set_buf(buf_space);
         record_survivor_plab(buf_space, buf_size);
-        obj = plab->allocate(word_sz);
+        obj = plab->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
         // Note that we cannot compare buf_size < word_sz below
         // because of AlignmentReserve (see ParGCAllocBuffer::allocate()).
         assert(obj != NULL || plab->words_remaining() < word_sz,
@@ -613,20 +613,21 @@
 
   KlassScanClosure klass_scan_closure(&par_scan_state.to_space_root_closure(),
                                       gch->rem_set()->klass_rem_set());
-
-  int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
+  CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
+                                           &par_scan_state.to_space_root_closure(),
+                                           false);
 
   par_scan_state.start_strong_roots();
-  gch->gen_process_strong_roots(_gen->level(),
-                                true,  // Process younger gens, if any,
-                                       // as strong roots.
-                                false, // no scope; this is parallel code
-                                true,  // is scavenging
-                                SharedHeap::ScanningOption(so),
-                                &par_scan_state.to_space_root_closure(),
-                                true,   // walk *all* scavengable nmethods
-                                &par_scan_state.older_gen_closure(),
-                                &klass_scan_closure);
+  gch->gen_process_roots(_gen->level(),
+                         true,  // Process younger gens, if any,
+                                // as strong roots.
+                         false, // no scope; this is parallel code
+                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::StrongAndWeakRoots,
+                         &par_scan_state.to_space_root_closure(),
+                         &par_scan_state.older_gen_closure(),
+                         &cld_scan_closure);
+
   par_scan_state.end_strong_roots();
 
   // "evacuate followers".
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -69,7 +69,7 @@
   ParScanWithoutBarrierClosure         _to_space_closure; // scan_without_gc_barrier
   ParScanWithBarrierClosure            _old_gen_closure; // scan_with_gc_barrier
   ParRootScanWithoutBarrierClosure     _to_space_root_closure; // scan_root_without_gc_barrier
-  // One of these two will be passed to process_strong_roots, which will
+  // One of these two will be passed to process_roots, which will
   // set its generation.  The first is for two-gen configs where the
   // old gen collects the perm gen; the second is for arbitrary configs.
   // The second isn't used right now (it used to be used for the train, an
@@ -168,7 +168,7 @@
   HeapWord* alloc_in_to_space_slow(size_t word_sz);
 
   HeapWord* alloc_in_to_space(size_t word_sz) {
-    HeapWord* obj = to_space_alloc_buffer()->allocate(word_sz);
+    HeapWord* obj = to_space_alloc_buffer()->allocate_aligned(word_sz, SurvivorAlignmentInBytes);
     if (obj != NULL) return obj;
     else return alloc_in_to_space_slow(word_sz);
   }
--- a/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/pcTasks.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -59,7 +59,7 @@
 
   PSParallelCompact::MarkAndPushClosure mark_and_push_closure(cm);
   CLDToOopClosure mark_and_push_from_clds(&mark_and_push_closure, true);
-  CodeBlobToOopClosure mark_and_push_in_blobs(&mark_and_push_closure, /*do_marking=*/ true);
+  MarkingCodeBlobClosure mark_and_push_in_blobs(&mark_and_push_closure, !CodeBlobToOopClosure::FixRelocations);
 
   if (_java_thread != NULL)
     _java_thread->oops_do(
@@ -100,7 +100,7 @@
     case threads:
     {
       ResourceMark rm;
-      CodeBlobToOopClosure each_active_code_blob(&mark_and_push_closure, /*do_marking=*/ true);
+      MarkingCodeBlobClosure each_active_code_blob(&mark_and_push_closure, !CodeBlobToOopClosure::FixRelocations);
       CLDToOopClosure mark_and_push_from_cld(&mark_and_push_closure);
       Threads::oops_do(&mark_and_push_closure, &mark_and_push_from_cld, &each_active_code_blob);
     }
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -528,14 +528,14 @@
     Universe::oops_do(mark_and_push_closure());
     JNIHandles::oops_do(mark_and_push_closure());   // Global (strong) JNI handles
     CLDToOopClosure mark_and_push_from_cld(mark_and_push_closure());
-    CodeBlobToOopClosure each_active_code_blob(mark_and_push_closure(), /*do_marking=*/ true);
+    MarkingCodeBlobClosure each_active_code_blob(mark_and_push_closure(), !CodeBlobToOopClosure::FixRelocations);
     Threads::oops_do(mark_and_push_closure(), &mark_and_push_from_cld, &each_active_code_blob);
     ObjectSynchronizer::oops_do(mark_and_push_closure());
     FlatProfiler::oops_do(mark_and_push_closure());
     Management::oops_do(mark_and_push_closure());
     JvmtiExport::oops_do(mark_and_push_closure());
     SystemDictionary::always_strong_oops_do(mark_and_push_closure());
-    ClassLoaderDataGraph::always_strong_oops_do(mark_and_push_closure(), follow_klass_closure(), true);
+    ClassLoaderDataGraph::always_strong_cld_do(follow_cld_closure());
     // Do not treat nmethods as strong roots for mark/sweep, since we can unload them.
     //CodeCache::scavenge_root_nmethods_do(CodeBlobToOopClosure(mark_and_push_closure()));
   }
@@ -625,16 +625,16 @@
   FlatProfiler::oops_do(adjust_pointer_closure());
   Management::oops_do(adjust_pointer_closure());
   JvmtiExport::oops_do(adjust_pointer_closure());
-  // SO_AllClasses
   SystemDictionary::oops_do(adjust_pointer_closure());
-  ClassLoaderDataGraph::oops_do(adjust_pointer_closure(), adjust_klass_closure(), true);
+  ClassLoaderDataGraph::cld_do(adjust_cld_closure());
 
   // Now adjust pointers in remaining weak roots.  (All of which should
   // have been cleared if they pointed to non-surviving objects.)
   // Global (weak) JNI handles
   JNIHandles::weak_oops_do(&always_true, adjust_pointer_closure());
 
-  CodeCache::oops_do(adjust_pointer_closure());
+  CodeBlobToOopClosure adjust_from_blobs(adjust_pointer_closure(), CodeBlobToOopClosure::FixRelocations);
+  CodeCache::blobs_do(&adjust_from_blobs);
   StringTable::oops_do(adjust_pointer_closure());
   ref_processor()->weak_oops_do(adjust_pointer_closure());
   PSScavenge::reference_processor()->weak_oops_do(adjust_pointer_closure());
--- a/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psMarkSweep.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -40,11 +40,11 @@
   static CollectorCounters*  _counters;
 
   // Closure accessors
-  static OopClosure* mark_and_push_closure() { return &MarkSweep::mark_and_push_closure; }
-  static KlassClosure* follow_klass_closure() { return &MarkSweep::follow_klass_closure; }
-  static VoidClosure* follow_stack_closure() { return (VoidClosure*)&MarkSweep::follow_stack_closure; }
-  static OopClosure* adjust_pointer_closure() { return (OopClosure*)&MarkSweep::adjust_pointer_closure; }
-  static KlassClosure* adjust_klass_closure() { return &MarkSweep::adjust_klass_closure; }
+  static OopClosure* mark_and_push_closure()   { return &MarkSweep::mark_and_push_closure; }
+  static VoidClosure* follow_stack_closure()   { return (VoidClosure*)&MarkSweep::follow_stack_closure; }
+  static CLDClosure* follow_cld_closure()      { return &MarkSweep::follow_cld_closure; }
+  static OopClosure* adjust_pointer_closure()  { return (OopClosure*)&MarkSweep::adjust_pointer_closure; }
+  static CLDClosure* adjust_cld_closure()      { return &MarkSweep::adjust_cld_closure; }
   static BoolObjectClosure* is_alive_closure() { return (BoolObjectClosure*)&MarkSweep::is_alive; }
 
  debug_only(public:)  // Used for PSParallelCompact debugging
--- a/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psParallelCompact.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -2465,7 +2465,6 @@
   FlatProfiler::oops_do(adjust_pointer_closure());
   Management::oops_do(adjust_pointer_closure());
   JvmtiExport::oops_do(adjust_pointer_closure());
-  // SO_AllClasses
   SystemDictionary::oops_do(adjust_pointer_closure());
   ClassLoaderDataGraph::oops_do(adjust_pointer_closure(), adjust_klass_closure(), true);
 
@@ -2474,7 +2473,8 @@
   // Global (weak) JNI handles
   JNIHandles::weak_oops_do(&always_true, adjust_pointer_closure());
 
-  CodeCache::oops_do(adjust_pointer_closure());
+  CodeBlobToOopClosure adjust_from_blobs(adjust_pointer_closure(), CodeBlobToOopClosure::FixRelocations);
+  CodeCache::blobs_do(&adjust_from_blobs);
   StringTable::oops_do(adjust_pointer_closure());
   ref_processor()->weak_oops_do(adjust_pointer_closure());
   // Roots were visited so references into the young gen in roots
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -26,6 +26,7 @@
 #define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_HPP
 
 #include "gc_implementation/parallelScavenge/objectStartArray.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
 #include "memory/allocation.hpp"
 
 //
@@ -94,23 +95,9 @@
   PSYoungPromotionLAB() { }
 
   // Not MT safe
-  HeapWord* allocate(size_t size) {
-    // Can't assert this, when young fills, we keep the LAB around, but flushed.
-    // assert(_state != flushed, "Sanity");
-    HeapWord* obj = top();
-    HeapWord* new_top = obj + size;
-    // The 'new_top>obj' check is needed to detect overflow of obj+size.
-    if (new_top > obj && new_top <= end()) {
-      set_top(new_top);
-      assert(is_object_aligned((intptr_t)obj) && is_object_aligned((intptr_t)new_top),
-             "checking alignment");
-      return obj;
-    }
+  inline HeapWord* allocate(size_t size);
 
-    return NULL;
-  }
-
-  debug_only(virtual bool lab_is_valid(MemRegion lab));
+  debug_only(virtual bool lab_is_valid(MemRegion lab);)
 };
 
 class PSOldPromotionLAB : public PSPromotionLAB {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
+
+#include "gc_implementation/parallelScavenge/psPromotionLAB.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+
+HeapWord* PSYoungPromotionLAB::allocate(size_t size) {
+  // Can't assert this, when young fills, we keep the LAB around, but flushed.
+  // assert(_state != flushed, "Sanity");
+  HeapWord* obj = CollectedHeap::align_allocation_or_fail(top(), end(), SurvivorAlignmentInBytes);
+  if (obj == NULL) {
+    return NULL;
+  }
+
+  HeapWord* new_top = obj + size;
+  // The 'new_top>obj' check is needed to detect overflow of obj+size.
+  if (new_top > obj && new_top <= end()) {
+    set_top(new_top);
+    assert(is_ptr_aligned(obj, SurvivorAlignmentInBytes) && is_object_aligned((intptr_t)new_top),
+           "checking alignment");
+    return obj;
+  } else {
+    set_top(obj);
+    return NULL;
+  }
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_PARALLELSCAVENGE_PSPROMOTIONLAB_INLINE_HPP
--- a/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -27,6 +27,7 @@
 
 #include "gc_implementation/parallelScavenge/psOldGen.hpp"
 #include "gc_implementation/parallelScavenge/psPromotionManager.hpp"
+#include "gc_implementation/parallelScavenge/psPromotionLAB.inline.hpp"
 #include "gc_implementation/parallelScavenge/psScavenge.hpp"
 #include "oops/oop.psgc.inline.hpp"
 
--- a/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/parallelScavenge/psTasks.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -65,7 +65,7 @@
     case threads:
     {
       ResourceMark rm;
-      CLDToOopClosure* cld_closure = NULL; // Not needed. All CLDs are already visited.
+      CLDClosure* cld_closure = NULL; // Not needed. All CLDs are already visited.
       Threads::oops_do(&roots_closure, cld_closure, NULL);
     }
     break;
@@ -100,7 +100,7 @@
 
     case code_cache:
       {
-        CodeBlobToOopClosure each_scavengable_code_blob(&roots_to_old_closure, /*do_marking=*/ true);
+        MarkingCodeBlobClosure each_scavengable_code_blob(&roots_to_old_closure, CodeBlobToOopClosure::FixRelocations);
         CodeCache::scavenge_root_nmethods_do(&each_scavengable_code_blob);
       }
       break;
@@ -122,8 +122,8 @@
 
   PSPromotionManager* pm = PSPromotionManager::gc_thread_promotion_manager(which);
   PSScavengeRootsClosure roots_closure(pm);
-  CLDToOopClosure* roots_from_clds = NULL;  // Not needed. All CLDs are already visited.
-  CodeBlobToOopClosure roots_in_blobs(&roots_closure, /*do_marking=*/ true);
+  CLDClosure* roots_from_clds = NULL;  // Not needed. All CLDs are already visited.
+  MarkingCodeBlobClosure roots_in_blobs(&roots_closure, CodeBlobToOopClosure::FixRelocations);
 
   if (_java_thread != NULL)
     _java_thread->oops_do(&roots_closure, roots_from_clds, &roots_in_blobs);
--- a/src/share/vm/gc_implementation/shared/markSweep.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/markSweep.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -49,27 +49,19 @@
 SerialOldTracer*        MarkSweep::_gc_tracer       = NULL;
 
 MarkSweep::FollowRootClosure  MarkSweep::follow_root_closure;
-CodeBlobToOopClosure MarkSweep::follow_code_root_closure(&MarkSweep::follow_root_closure, /*do_marking=*/ true);
 
 void MarkSweep::FollowRootClosure::do_oop(oop* p)       { follow_root(p); }
 void MarkSweep::FollowRootClosure::do_oop(narrowOop* p) { follow_root(p); }
 
 MarkSweep::MarkAndPushClosure MarkSweep::mark_and_push_closure;
-MarkSweep::FollowKlassClosure MarkSweep::follow_klass_closure;
-MarkSweep::AdjustKlassClosure MarkSweep::adjust_klass_closure;
+CLDToOopClosure               MarkSweep::follow_cld_closure(&mark_and_push_closure);
+CLDToOopClosure               MarkSweep::adjust_cld_closure(&adjust_pointer_closure);
 
 void MarkSweep::MarkAndPushClosure::do_oop(oop* p)       { mark_and_push(p); }
 void MarkSweep::MarkAndPushClosure::do_oop(narrowOop* p) { mark_and_push(p); }
 
-void MarkSweep::FollowKlassClosure::do_klass(Klass* klass) {
-  klass->oops_do(&MarkSweep::mark_and_push_closure);
-}
-void MarkSweep::AdjustKlassClosure::do_klass(Klass* klass) {
-  klass->oops_do(&MarkSweep::adjust_pointer_closure);
-}
-
 void MarkSweep::follow_class_loader(ClassLoaderData* cld) {
-  cld->oops_do(&MarkSweep::mark_and_push_closure, &MarkSweep::follow_klass_closure, true);
+  MarkSweep::follow_cld_closure.do_cld(cld);
 }
 
 void MarkSweep::follow_stack() {
--- a/src/share/vm/gc_implementation/shared/markSweep.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/markSweep.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -65,17 +65,6 @@
     virtual void do_oop(narrowOop* p);
   };
 
-  // The one and only place to start following the classes.
-  // Should only be applied to the ClassLoaderData klasses list.
-  class FollowKlassClosure : public KlassClosure {
-   public:
-    void do_klass(Klass* klass);
-  };
-  class AdjustKlassClosure : public KlassClosure {
-   public:
-    void do_klass(Klass* klass);
-  };
-
   class FollowStackClosure: public VoidClosure {
    public:
     virtual void do_void();
@@ -143,12 +132,11 @@
   // Public closures
   static IsAliveClosure       is_alive;
   static FollowRootClosure    follow_root_closure;
-  static CodeBlobToOopClosure follow_code_root_closure; // => follow_root_closure
   static MarkAndPushClosure   mark_and_push_closure;
-  static FollowKlassClosure   follow_klass_closure;
   static FollowStackClosure   follow_stack_closure;
+  static CLDToOopClosure      follow_cld_closure;
   static AdjustPointerClosure adjust_pointer_closure;
-  static AdjustKlassClosure   adjust_klass_closure;
+  static CLDToOopClosure      adjust_cld_closure;
 
   // Accessors
   static uint total_invocations() { return _total_invocations; }
--- a/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -24,7 +24,7 @@
 
 #ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-
+#include "gc_interface/collectedHeap.hpp"
 #include "memory/allocation.hpp"
 #include "memory/blockOffsetTable.hpp"
 #include "memory/threadLocalAllocBuffer.hpp"
@@ -84,6 +84,9 @@
     }
   }
 
+  // Allocate the object aligned to "alignment_in_bytes".
+  HeapWord* allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes);
+
   // Undo the last allocation in the buffer, which is required to be of the
   // "obj" of the given "word_sz".
   void undo_allocation(HeapWord* obj, size_t word_sz) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
+#define SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
+
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+
+HeapWord* ParGCAllocBuffer::allocate_aligned(size_t word_sz, unsigned short alignment_in_bytes) {
+
+  HeapWord* res = CollectedHeap::align_allocation_or_fail(_top, _end, alignment_in_bytes);
+  if (res == NULL) {
+    return NULL;
+  }
+
+  // Set _top so that allocate(), which expects _top to be correctly set,
+  // can be used below.
+  _top = res;
+  return allocate(word_sz);
+}
+
+#endif // SHARE_VM_GC_IMPLEMENTATION_SHARED_PARGCALLOCBUFFER_INLINE_HPP
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2014, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -209,6 +209,45 @@
   gch->do_full_collection(gch->must_clear_all_soft_refs(), _max_level);
 }
 
+// Returns true iff concurrent GCs unloads metadata.
+bool VM_CollectForMetadataAllocation::initiate_concurrent_GC() {
+#if INCLUDE_ALL_GCS
+  if (UseConcMarkSweepGC && CMSClassUnloadingEnabled) {
+    MetaspaceGC::set_should_concurrent_collect(true);
+    return true;
+  }
+
+  if (UseG1GC && ClassUnloadingWithConcurrentMark) {
+    G1CollectedHeap* g1h = G1CollectedHeap::heap();
+    g1h->g1_policy()->set_initiate_conc_mark_if_possible();
+
+    GCCauseSetter x(g1h, _gc_cause);
+
+    // At this point we are supposed to start a concurrent cycle. We
+    // will do so if one is not already in progress.
+    bool should_start = g1h->g1_policy()->force_initial_mark_if_outside_cycle(_gc_cause);
+
+    if (should_start) {
+      double pause_target = g1h->g1_policy()->max_pause_time_ms();
+      g1h->do_collection_pause_at_safepoint(pause_target);
+    }
+    return true;
+  }
+#endif
+
+  return false;
+}
+
+static void log_metaspace_alloc_failure_for_concurrent_GC() {
+  if (Verbose && PrintGCDetails) {
+    if (UseConcMarkSweepGC) {
+      gclog_or_tty->print_cr("\nCMS full GC for Metaspace");
+    } else if (UseG1GC) {
+      gclog_or_tty->print_cr("\nG1 full GC for Metaspace");
+    }
+  }
+}
+
 void VM_CollectForMetadataAllocation::doit() {
   SvcGCMarker sgcm(SvcGCMarker::FULL);
 
@@ -220,54 +259,57 @@
   // a GC that freed space for the allocation.
   if (!MetadataAllocationFailALot) {
     _result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
-  }
-
-  if (_result == NULL) {
-    if (UseConcMarkSweepGC) {
-      if (CMSClassUnloadingEnabled) {
-        MetaspaceGC::set_should_concurrent_collect(true);
-      }
-      // For CMS expand since the collection is going to be concurrent.
-      _result =
-        _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
-    }
-    if (_result == NULL) {
-      // Don't clear the soft refs yet.
-      if (Verbose && PrintGCDetails && UseConcMarkSweepGC) {
-        gclog_or_tty->print_cr("\nCMS full GC for Metaspace");
-      }
-      heap->collect_as_vm_thread(GCCause::_metadata_GC_threshold);
-      // After a GC try to allocate without expanding.  Could fail
-      // and expansion will be tried below.
-      _result =
-        _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
-    }
-    if (_result == NULL) {
-      // If still failing, allow the Metaspace to expand.
-      // See delta_capacity_until_GC() for explanation of the
-      // amount of the expansion.
-      // This should work unless there really is no more space
-      // or a MaxMetaspaceSize has been specified on the command line.
-      _result =
-        _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
-      if (_result == NULL) {
-        // If expansion failed, do a last-ditch collection and try allocating
-        // again.  A last-ditch collection will clear softrefs.  This
-        // behavior is similar to the last-ditch collection done for perm
-        // gen when it was full and a collection for failed allocation
-        // did not free perm gen space.
-        heap->collect_as_vm_thread(GCCause::_last_ditch_collection);
-        _result =
-          _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
-      }
-    }
-    if (Verbose && PrintGCDetails && _result == NULL) {
-      gclog_or_tty->print_cr("\nAfter Metaspace GC failed to allocate size "
-                             SIZE_FORMAT, _size);
+    if (_result != NULL) {
+      return;
     }
   }
 
-  if (_result == NULL && GC_locker::is_active_and_needs_gc()) {
+  if (initiate_concurrent_GC()) {
+    // For CMS and G1 expand since the collection is going to be concurrent.
+    _result = _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
+    if (_result != NULL) {
+      return;
+    }
+
+    log_metaspace_alloc_failure_for_concurrent_GC();
+  }
+
+  // Don't clear the soft refs yet.
+  heap->collect_as_vm_thread(GCCause::_metadata_GC_threshold);
+  // After a GC try to allocate without expanding.  Could fail
+  // and expansion will be tried below.
+  _result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
+  if (_result != NULL) {
+    return;
+  }
+
+  // If still failing, allow the Metaspace to expand.
+  // See delta_capacity_until_GC() for explanation of the
+  // amount of the expansion.
+  // This should work unless there really is no more space
+  // or a MaxMetaspaceSize has been specified on the command line.
+  _result = _loader_data->metaspace_non_null()->expand_and_allocate(_size, _mdtype);
+  if (_result != NULL) {
+    return;
+  }
+
+  // If expansion failed, do a last-ditch collection and try allocating
+  // again.  A last-ditch collection will clear softrefs.  This
+  // behavior is similar to the last-ditch collection done for perm
+  // gen when it was full and a collection for failed allocation
+  // did not free perm gen space.
+  heap->collect_as_vm_thread(GCCause::_last_ditch_collection);
+  _result = _loader_data->metaspace_non_null()->allocate(_size, _mdtype);
+  if (_result != NULL) {
+    return;
+  }
+
+  if (Verbose && PrintGCDetails) {
+    gclog_or_tty->print_cr("\nAfter Metaspace GC failed to allocate size "
+                           SIZE_FORMAT, _size);
+  }
+
+  if (GC_locker::is_active_and_needs_gc()) {
     set_gc_locked();
   }
 }
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -217,6 +217,8 @@
   virtual VMOp_Type type() const { return VMOp_CollectForMetadataAllocation; }
   virtual void doit();
   MetaWord* result() const       { return _result; }
+
+  bool initiate_concurrent_GC();
 };
 
 class SvcGCMarker : public StackObj {
--- a/src/share/vm/gc_interface/collectedHeap.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -351,6 +351,12 @@
     fill_with_object(start, pointer_delta(end, start), zap);
   }
 
+  // Return the address "addr" aligned by "alignment_in_bytes" if such
+  // an address is below "end".  Return NULL otherwise.
+  inline static HeapWord* align_allocation_or_fail(HeapWord* addr,
+                                                   HeapWord* end,
+                                                   unsigned short alignment_in_bytes);
+
   // Some heaps may offer a contiguous region for shared non-blocking
   // allocation, via inlined code (by exporting the address of the top and
   // end fields defining the extent of the contiguous allocation region.)
--- a/src/share/vm/gc_interface/collectedHeap.inline.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/gc_interface/collectedHeap.inline.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -241,6 +241,44 @@
   oop_iterate(&no_header_cl);
 }
 
+
+inline HeapWord* CollectedHeap::align_allocation_or_fail(HeapWord* addr,
+                                                         HeapWord* end,
+                                                         unsigned short alignment_in_bytes) {
+  if (alignment_in_bytes <= ObjectAlignmentInBytes) {
+    return addr;
+  }
+
+  assert(is_ptr_aligned(addr, HeapWordSize),
+    err_msg("Address " PTR_FORMAT " is not properly aligned.", p2i(addr)));
+  assert(is_size_aligned(alignment_in_bytes, HeapWordSize),
+    err_msg("Alignment size %u is incorrect.", alignment_in_bytes));
+
+  HeapWord* new_addr = (HeapWord*) align_pointer_up(addr, alignment_in_bytes);
+  size_t padding = pointer_delta(new_addr, addr);
+
+  if (padding == 0) {
+    return addr;
+  }
+
+  if (padding < CollectedHeap::min_fill_size()) {
+    padding += alignment_in_bytes / HeapWordSize;
+    assert(padding >= CollectedHeap::min_fill_size(),
+      err_msg("alignment_in_bytes %u is expect to be larger "
+      "than the minimum object size", alignment_in_bytes));
+    new_addr = addr + padding;
+  }
+
+  assert(new_addr > addr, err_msg("Unexpected arithmetic overflow "
+    PTR_FORMAT " not greater than " PTR_FORMAT, p2i(new_addr), p2i(addr)));
+  if(new_addr < end) {
+    CollectedHeap::fill_with_object(addr, padding);
+    return new_addr;
+  } else {
+    return NULL;
+  }
+}
+
 #ifndef PRODUCT
 
 inline bool
--- a/src/share/vm/memory/cardTableModRefBS.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/cardTableModRefBS.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -429,7 +429,7 @@
                                                                  OopsInGenClosure* cl,
                                                                  CardTableRS* ct) {
   if (!mr.is_empty()) {
-    // Caller (process_strong_roots()) claims that all GC threads
+    // Caller (process_roots()) claims that all GC threads
     // execute this call.  With UseDynamicNumberOfGCThreads now all
     // active GC threads execute this call.  The number of active GC
     // threads needs to be passed to par_non_clean_card_iterate_work()
@@ -438,7 +438,7 @@
     // This is an example of where n_par_threads() is used instead
     // of workers()->active_workers().  n_par_threads can be set to 0 to
     // turn off parallelism.  For example when this code is called as
-    // part of verification and SharedHeap::process_strong_roots() is being
+    // part of verification and SharedHeap::process_roots() is being
     // used, then n_par_threads() may have been set to 0.  active_workers
     // is not overloaded with the meaning that it is a switch to disable
     // parallelism and so keeps the meaning of the number of
--- a/src/share/vm/memory/defNewGeneration.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/defNewGeneration.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -613,6 +613,9 @@
 
   KlassScanClosure klass_scan_closure(&fsc_with_no_gc_barrier,
                                       gch->rem_set()->klass_rem_set());
+  CLDToKlassAndOopClosure cld_scan_closure(&klass_scan_closure,
+                                           &fsc_with_no_gc_barrier,
+                                           false);
 
   set_promo_failure_scan_stack_closure(&fsc_with_no_gc_barrier);
   FastEvacuateFollowersClosure evacuate_followers(gch, _level, this,
@@ -622,18 +625,15 @@
   assert(gch->no_allocs_since_save_marks(0),
          "save marks have not been newly set.");
 
-  int so = SharedHeap::SO_AllClasses | SharedHeap::SO_Strings | SharedHeap::SO_CodeCache;
-
-  gch->gen_process_strong_roots(_level,
-                                true,  // Process younger gens, if any,
-                                       // as strong roots.
-                                true,  // activate StrongRootsScope
-                                true,  // is scavenging
-                                SharedHeap::ScanningOption(so),
-                                &fsc_with_no_gc_barrier,
-                                true,   // walk *all* scavengable nmethods
-                                &fsc_with_gc_barrier,
-                                &klass_scan_closure);
+  gch->gen_process_roots(_level,
+                         true,  // Process younger gens, if any,
+                                // as strong roots.
+                         true,  // activate StrongRootsScope
+                         SharedHeap::SO_ScavengeCodeCache,
+                         GenCollectedHeap::StrongAndWeakRoots,
+                         &fsc_with_no_gc_barrier,
+                         &fsc_with_gc_barrier,
+                         &cld_scan_closure);
 
   // "evacuate followers".
   evacuate_followers.do_void();
@@ -789,7 +789,7 @@
 
   // Try allocating obj in to-space (unless too old)
   if (old->age() < tenuring_threshold()) {
-    obj = (oop) to()->allocate(s);
+    obj = (oop) to()->allocate_aligned(s);
   }
 
   // Otherwise try allocating obj tenured
--- a/src/share/vm/memory/genCollectedHeap.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/genCollectedHeap.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -61,8 +61,8 @@
 GenCollectedHeap* GenCollectedHeap::_gch;
 NOT_PRODUCT(size_t GenCollectedHeap::_skip_header_HeapWords = 0;)
 
-// The set of potentially parallel tasks in strong root scanning.
-enum GCH_process_strong_roots_tasks {
+// The set of potentially parallel tasks in root scanning.
+enum GCH_strong_roots_tasks {
   // We probably want to parallelize both of these internally, but for now...
   GCH_PS_younger_gens,
   // Leave this one last.
@@ -72,11 +72,11 @@
 GenCollectedHeap::GenCollectedHeap(GenCollectorPolicy *policy) :
   SharedHeap(policy),
   _gen_policy(policy),
-  _gen_process_strong_tasks(new SubTasksDone(GCH_PS_NumElements)),
+  _gen_process_roots_tasks(new SubTasksDone(GCH_PS_NumElements)),
   _full_collections_completed(0)
 {
-  if (_gen_process_strong_tasks == NULL ||
-      !_gen_process_strong_tasks->valid()) {
+  if (_gen_process_roots_tasks == NULL ||
+      !_gen_process_roots_tasks->valid()) {
     vm_exit_during_initialization("Failed necessary allocation.");
   }
   assert(policy != NULL, "Sanity check");
@@ -590,33 +590,29 @@
 
 void GenCollectedHeap::set_par_threads(uint t) {
   SharedHeap::set_par_threads(t);
-  _gen_process_strong_tasks->set_n_threads(t);
+  _gen_process_roots_tasks->set_n_threads(t);
 }
 
 void GenCollectedHeap::
-gen_process_strong_roots(int level,
-                         bool younger_gens_as_roots,
-                         bool activate_scope,
-                         bool is_scavenging,
-                         SharedHeap::ScanningOption so,
-                         OopsInGenClosure* not_older_gens,
-                         bool do_code_roots,
-                         OopsInGenClosure* older_gens,
-                         KlassClosure* klass_closure) {
-  // General strong roots.
+gen_process_roots(int level,
+                  bool younger_gens_as_roots,
+                  bool activate_scope,
+                  SharedHeap::ScanningOption so,
+                  OopsInGenClosure* not_older_gens,
+                  OopsInGenClosure* weak_roots,
+                  OopsInGenClosure* older_gens,
+                  CLDClosure* cld_closure,
+                  CLDClosure* weak_cld_closure,
+                  CodeBlobClosure* code_closure) {
 
-  if (!do_code_roots) {
-    SharedHeap::process_strong_roots(activate_scope, is_scavenging, so,
-                                     not_older_gens, NULL, klass_closure);
-  } else {
-    bool do_code_marking = (activate_scope || nmethod::oops_do_marking_is_active());
-    CodeBlobToOopClosure code_roots(not_older_gens, /*do_marking=*/ do_code_marking);
-    SharedHeap::process_strong_roots(activate_scope, is_scavenging, so,
-                                     not_older_gens, &code_roots, klass_closure);
-  }
+  // General roots.
+  SharedHeap::process_roots(activate_scope, so,
+                            not_older_gens, weak_roots,
+                            cld_closure, weak_cld_closure,
+                            code_closure);
 
   if (younger_gens_as_roots) {
-    if (!_gen_process_strong_tasks->is_task_claimed(GCH_PS_younger_gens)) {
+    if (!_gen_process_roots_tasks->is_task_claimed(GCH_PS_younger_gens)) {
       for (int i = 0; i < level; i++) {
         not_older_gens->set_generation(_gens[i]);
         _gens[i]->oop_iterate(not_older_gens);
@@ -632,12 +628,42 @@
     older_gens->reset_generation();
   }
 
-  _gen_process_strong_tasks->all_tasks_completed();
+  _gen_process_roots_tasks->all_tasks_completed();
 }
 
-void GenCollectedHeap::gen_process_weak_roots(OopClosure* root_closure,
-                                              CodeBlobClosure* code_roots) {
-  SharedHeap::process_weak_roots(root_closure, code_roots);
+void GenCollectedHeap::
+gen_process_roots(int level,
+                  bool younger_gens_as_roots,
+                  bool activate_scope,
+                  SharedHeap::ScanningOption so,
+                  bool only_strong_roots,
+                  OopsInGenClosure* not_older_gens,
+                  OopsInGenClosure* older_gens,
+                  CLDClosure* cld_closure) {
+
+  const bool is_adjust_phase = !only_strong_roots && !younger_gens_as_roots;
+
+  bool is_moving_collection = false;
+  if (level == 0 || is_adjust_phase) {
+    // young collections are always moving
+    is_moving_collection = true;
+  }
+
+  MarkingCodeBlobClosure mark_code_closure(not_older_gens, is_moving_collection);
+  CodeBlobClosure* code_closure = &mark_code_closure;
+
+  gen_process_roots(level,
+                    younger_gens_as_roots,
+                    activate_scope, so,
+                    not_older_gens, only_strong_roots ? NULL : not_older_gens,
+                    older_gens,
+                    cld_closure, only_strong_roots ? NULL : cld_closure,
+                    code_closure);
+
+}
+
+void GenCollectedHeap::gen_process_weak_roots(OopClosure* root_closure) {
+  SharedHeap::process_weak_roots(root_closure);
   // "Local" "weak" refs
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->ref_processor()->weak_oops_do(root_closure);
@@ -856,12 +882,6 @@
   }
 }
 
-void GenCollectedHeap::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  for (int i = 0; i < _n_gens; i++) {
-    _gens[i]->oop_iterate(mr, cl);
-  }
-}
-
 void GenCollectedHeap::object_iterate(ObjectClosure* cl) {
   for (int i = 0; i < _n_gens; i++) {
     _gens[i]->object_iterate(cl);
@@ -1079,7 +1099,7 @@
   guarantee(_n_gens = 2, "Wrong number of generations");
   Generation* old_gen = _gens[1];
   // Start by compacting into same gen.
-  CompactPoint cp(old_gen, NULL, NULL);
+  CompactPoint cp(old_gen);
   old_gen->prepare_for_compaction(&cp);
   Generation* young_gen = _gens[0];
   young_gen->prepare_for_compaction(&cp);
--- a/src/share/vm/memory/genCollectedHeap.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/genCollectedHeap.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -78,9 +78,9 @@
   unsigned int _full_collections_completed;
 
   // Data structure for claiming the (potentially) parallel tasks in
-  // (gen-specific) strong roots processing.
-  SubTasksDone* _gen_process_strong_tasks;
-  SubTasksDone* gen_process_strong_tasks() { return _gen_process_strong_tasks; }
+  // (gen-specific) roots processing.
+  SubTasksDone* _gen_process_roots_tasks;
+  SubTasksDone* gen_process_roots_tasks() { return _gen_process_roots_tasks; }
 
   // In block contents verification, the number of header words to skip
   NOT_PRODUCT(static size_t _skip_header_HeapWords;)
@@ -220,7 +220,6 @@
 
   // Iteration functions.
   void oop_iterate(ExtendedOopClosure* cl);
-  void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
   void object_iterate(ObjectClosure* cl);
   void safe_object_iterate(ObjectClosure* cl);
   Space* space_containing(const void* addr) const;
@@ -412,26 +411,35 @@
   // The "so" argument determines which of the roots
   // the closure is applied to:
   // "SO_None" does none;
-  // "SO_AllClasses" applies the closure to all entries in the SystemDictionary;
-  // "SO_SystemClasses" to all the "system" classes and loaders;
-  // "SO_Strings" applies the closure to all entries in the StringTable.
-  void gen_process_strong_roots(int level,
-                                bool younger_gens_as_roots,
-                                // The remaining arguments are in an order
-                                // consistent with SharedHeap::process_strong_roots:
-                                bool activate_scope,
-                                bool is_scavenging,
-                                SharedHeap::ScanningOption so,
-                                OopsInGenClosure* not_older_gens,
-                                bool do_code_roots,
-                                OopsInGenClosure* older_gens,
-                                KlassClosure* klass_closure);
+ private:
+  void gen_process_roots(int level,
+                         bool younger_gens_as_roots,
+                         bool activate_scope,
+                         SharedHeap::ScanningOption so,
+                         OopsInGenClosure* not_older_gens,
+                         OopsInGenClosure* weak_roots,
+                         OopsInGenClosure* older_gens,
+                         CLDClosure* cld_closure,
+                         CLDClosure* weak_cld_closure,
+                         CodeBlobClosure* code_closure);
 
-  // Apply "blk" to all the weak roots of the system.  These include
-  // JNI weak roots, the code cache, system dictionary, symbol table,
-  // string table, and referents of reachable weak refs.
-  void gen_process_weak_roots(OopClosure* root_closure,
-                              CodeBlobClosure* code_roots);
+ public:
+  static const bool StrongAndWeakRoots = false;
+  static const bool StrongRootsOnly    = true;
+
+  void gen_process_roots(int level,
+                         bool younger_gens_as_roots,
+                         bool activate_scope,
+                         SharedHeap::ScanningOption so,
+                         bool only_strong_roots,
+                         OopsInGenClosure* not_older_gens,
+                         OopsInGenClosure* older_gens,
+                         CLDClosure* cld_closure);
+
+  // Apply "root_closure" to all the weak roots of the system.
+  // These include JNI weak roots, string table,
+  // and referents of reachable weak refs.
+  void gen_process_weak_roots(OopClosure* root_closure);
 
   // Set the saved marks of generations, if that makes sense.
   // In particular, if any generation might iterate over the oops
--- a/src/share/vm/memory/genMarkSweep.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/genMarkSweep.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -207,15 +207,14 @@
   // Need new claim bits before marking starts.
   ClassLoaderDataGraph::clear_claimed_marks();
 
-  gch->gen_process_strong_roots(level,
-                                false, // Younger gens are not roots.
-                                true,  // activate StrongRootsScope
-                                false, // not scavenging
-                                SharedHeap::SO_SystemClasses,
-                                &follow_root_closure,
-                                true,   // walk code active on stacks
-                                &follow_root_closure,
-                                &follow_klass_closure);
+  gch->gen_process_roots(level,
+                         false, // Younger gens are not roots.
+                         true,  // activate StrongRootsScope
+                         SharedHeap::SO_None,
+                         GenCollectedHeap::StrongRootsOnly,
+                         &follow_root_closure,
+                         &follow_root_closure,
+                         &follow_cld_closure);
 
   // Process reference objects found during marking
   {
@@ -293,22 +292,16 @@
   // are run.
   adjust_pointer_closure.set_orig_generation(gch->get_gen(level));
 
-  gch->gen_process_strong_roots(level,
-                                false, // Younger gens are not roots.
-                                true,  // activate StrongRootsScope
-                                false, // not scavenging
-                                SharedHeap::SO_AllClasses,
-                                &adjust_pointer_closure,
-                                false, // do not walk code
-                                &adjust_pointer_closure,
-                                &adjust_klass_closure);
+  gch->gen_process_roots(level,
+                         false, // Younger gens are not roots.
+                         true,  // activate StrongRootsScope
+                         SharedHeap::SO_AllCodeCache,
+                         GenCollectedHeap::StrongAndWeakRoots,
+                         &adjust_pointer_closure,
+                         &adjust_pointer_closure,
+                         &adjust_cld_closure);
 
-  // Now adjust pointers in remaining weak roots.  (All of which should
-  // have been cleared if they pointed to non-surviving objects.)
-  CodeBlobToOopClosure adjust_code_pointer_closure(&adjust_pointer_closure,
-                                                   /*do_marking=*/ false);
-  gch->gen_process_weak_roots(&adjust_pointer_closure,
-                              &adjust_code_pointer_closure);
+  gch->gen_process_weak_roots(&adjust_pointer_closure);
 
   adjust_marks();
   GenAdjustPointersClosure blk;
--- a/src/share/vm/memory/generation.cpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/generation.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -297,22 +297,16 @@
 
 class GenerationOopIterateClosure : public SpaceClosure {
  public:
-  ExtendedOopClosure* cl;
-  MemRegion mr;
+  ExtendedOopClosure* _cl;
   virtual void do_space(Space* s) {
-    s->oop_iterate(mr, cl);
+    s->oop_iterate(_cl);
   }
-  GenerationOopIterateClosure(ExtendedOopClosure* _cl, MemRegion _mr) :
-    cl(_cl), mr(_mr) {}
+  GenerationOopIterateClosure(ExtendedOopClosure* cl) :
+    _cl(cl) {}
 };
 
 void Generation::oop_iterate(ExtendedOopClosure* cl) {
-  GenerationOopIterateClosure blk(cl, _reserved);
-  space_iterate(&blk);
-}
-
-void Generation::oop_iterate(MemRegion mr, ExtendedOopClosure* cl) {
-  GenerationOopIterateClosure blk(cl, mr);
+  GenerationOopIterateClosure blk(cl);
   space_iterate(&blk);
 }
 
--- a/src/share/vm/memory/generation.hpp	Wed Aug 13 14:49:46 2014 -0700
+++ b/src/share/vm/memory/generation.hpp	Thu Aug 14 12:55:30 2014 -0700
@@ -543,10 +543,6 @@
   // generation, calling "cl.do_oop" on each.
   virtual void oop_iterate(ExtendedOopClosure* cl);
 
-  // Same as above, restricted to the intersection of a memory region and
-  // the generation.
-  virtual void oop_iterate(MemRegion mr, ExtendedOopClosure* cl);
-
   // Iterate over all objects in the generation, calling "cl.do_object" on
   // each.
   virtual void object_iterate(ObjectClosure* cl);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/guardedMemory.cpp	Thu Aug 14 12:55:30 2014 -0700
@@ -0,0 +1,161 @@
+/*
+ * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#include "precompiled.hpp"
+#include "memory/allocation.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/guardedMemory.hpp"
+#include "runtime/os.hpp"
+
+void* GuardedMemory::wrap_copy(const void* ptr, const size_t len, const void* tag) {
+  size_t total_sz = GuardedMemory::get_total_size(len);
+  void* outerp = os::malloc(total_sz, mtInternal);
+  if (outerp != NULL) {
+    GuardedMemory guarded(outerp, len, tag);
+    void* innerp = guarded.get_user_ptr();
+    memcpy(innerp, ptr, len);
+    return innerp;
+  }
+  return NULL; // OOM
+}
+
+bool GuardedMemory::free_copy(void* p) {
+  if (p == NULL) {
+    return true;
+  }
+  GuardedMemory guarded((u_char*)p);
+  bool verify_ok = guarded.verify_guards();
+
+  /* always attempt to free, pass problem on to any nested memchecker */
+  os::free(guarded.release_for_freeing());
+
+  return verify_ok;
+}
+
+void GuardedMemory::print_on(outputStream* st) const {
+  if (_base_addr == NULL) {
+    st->print_cr("GuardedMemory(" PTR_FORMAT ") not associated to any memory", p2i(this));
+    return;
+  }
+  st->print_cr("GuardedMemory(" PTR_FORMAT ") base_addr=" PTR_FORMAT
+      " tag=" PTR_FORMAT " user_size=" SIZE_FORMAT " user_data=" PTR_FORMAT,
+      p2i(this), p2i(_base_addr), p2i(get_tag()), get_user_size(), p2i(get_user_ptr()));
+
+  Guard* guard = get_head_guard();
+  st->print_cr("  Header guard @" PTR_FORMAT " is %s", p2i(guard), (guard->verify() ? "OK" : "BROKEN"));
+  guard = get_tail_guard();
+  st->print_cr("  Trailer guard @" PTR_FORMAT " is %s", p2i(guard), (guard->verify() ? "OK" : "BROKEN"));
+
+  u_char udata = *get_user_ptr();
+  switch (udata) {
+  case uninitBlockPad:
+    st->print_cr("  User data appears unused");
+    break;
+  case freeBlockPad:
+    st->print_cr("  User data appears to have been freed");
+    break;
+  default:
+    st->print_cr("  User data appears to be in use");
+    break;
+  }
+}
+
+// test code...
+
+#ifndef PRODUCT
+
+static void guarded_memory_test_check(void* p, size_t sz, void* tag) {
+  assert(p != NULL, "NULL pointer given to check");
+  u_char* c = (u_char*) p;
+  GuardedMemory guarded(c);
+  assert(guarded.get_tag() == tag, "Tag is not the same as supplied");
+  assert(guarded.get_user_ptr() == c, "User pointer is not the same as supplied");
+  assert(guarded.get_user_size() == sz, "User size is not the same as supplied");
+  assert(guarded.verify_guards(), "Guard broken");
+}
+
+void GuardedMemory::test_guarded_memory() {
+  // Test the basic characteristics...
+  size_t total_sz = GuardedMemory::get_total_size(1);
+  assert(total_sz > 1 && total_sz >= (sizeof(GuardHeader) + 1 + sizeof(Guard)), "Unexpected size");
+  u_char* basep = (u_char*) os::malloc(total_sz, mtInternal);
+
+  GuardedMemory guarded(basep, 1, (void*)0xf000f000);
+
+  assert(*basep == badResourceValue, "Expected guard in the form of badResourceValue");
+  u_char* userp = guarded.get_user_ptr();
+  assert(*userp == uninitBlockPad, "Expected uninitialized data in the form of uninitBlockPad");
+  guarded_memory_test_check(userp, 1, (void*)0xf000f000);
+
+  void* freep = guarded.release_for_freeing();
+  assert((u_char*)freep == basep, "Expected the same pointer guard was ");
+  assert(*userp == freeBlockPad, "Expected user data to be free block padded");
+  assert(!guarded.verify_guards(), "Expected failed");
+  os::free(freep);
+
+  // Test a number of odd sizes...
+  size_t sz = 0;
+  do {
+    void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal);
+    void* up = guarded.wrap_with_guards(p, sz, (void*)1);
+    memset(up, 0, sz);
+    guarded_memory_test_check(up, sz, (void*)1);
+    os::free(guarded.release_for_freeing());
+    sz = (sz << 4) + 1;
+  } while (sz < (256 * 1024));
+
+  // Test buffer overrun into head...
+  basep = (u_char*) os::malloc(GuardedMemory::get_total_size(1), mtInternal);
+  guarded.wrap_with_guards(basep, 1);
+  *basep = 0;
+  assert(!guarded.verify_guards(), "Expected failure");
+  os::free(basep);
+
+  // Test buffer overrun into tail with a number of odd sizes...
+  sz = 1;
+  do {
+    void* p = os::malloc(GuardedMemory::get_total_size(sz), mtInternal);
+    void* up = guarded.wrap_with_guards(p, sz, (void*)1);
+    memset(up, 0, sz + 1); // Buffer-overwrite (within guard)
+    assert(!guarded.verify_guards(), "Guard was not broken as expected");
+    os::free(guarded.release_for_freeing());
+    sz = (sz << 4) + 1;
+  } while (sz < (256 * 1024));
+