changeset 3811:ec204374e626

Merge
author kamg
date Fri, 02 Nov 2012 16:09:50 -0700
parents 4735d2c84362 ca8168203393
children 9cc901118f6b
files src/share/vm/classfile/vmSymbols.hpp src/share/vm/oops/method.cpp src/share/vm/runtime/globals.hpp test/runtime/7158800/BadUtf8.java test/runtime/7158800/InternTest.java test/runtime/7158800/Test7158800.sh test/runtime/7158800/badstrings.txt
diffstat 94 files changed, 6634 insertions(+), 33399 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Thu Oct 11 12:25:42 2012 -0400
+++ b/.hgtags	Fri Nov 02 16:09:50 2012 -0700
@@ -287,3 +287,7 @@
 b261523fe66c40a02968f0aa7e73602491bb3386 hs25-b05
 4547dc71db765276e027b0c2780b724bae0a07d3 jdk8-b61
 d0337c31c8be7716369b4e7c3bd5f352983c6a06 hs25-b06
+dccd40de8db1fa96f186e6179907818d75320440 jdk8-b62
+dc16fe422c535ecd4e9f80fb814a1bb9704da6f5 hs25-b07
+acabb5c282f59be7e3238920b2ea06b684ab68f7 jdk8-b63
+8cb93eadfb6dcab88d91b8e2cd3e0e07d0ac4048 hs25-b08
--- a/make/Makefile	Thu Oct 11 12:25:42 2012 -0400
+++ b/make/Makefile	Fri Nov 02 16:09:50 2012 -0700
@@ -453,14 +453,30 @@
     ifeq ($(JVM_VARIANT_ZEROSHARK), true)
         $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(SHARK_DIR)/%.$(LIBRARY_SUFFIX)
 		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo):	$(SHARK_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.diz:		$(SHARK_DIR)/%.diz
+		$(install-file)
         $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(SHARK_DIR)/%.$(LIBRARY_SUFFIX)
 		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.debuginfo:		$(SHARK_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.diz:			$(SHARK_DIR)/%.diz
+		$(install-file)
     endif
     ifeq ($(JVM_VARIANT_ZERO), true)
         $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(ZERO_DIR)/%.$(LIBRARY_SUFFIX)
 		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo:		$(ZERO_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_JRE_LIB_ARCH_DIR)/%.diz:		$(ZERO_DIR)/%.diz
+		$(install-file)
         $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(ZERO_DIR)/%.$(LIBRARY_SUFFIX)
 		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.debuginfo:		$(ZERO_DIR)/%.debuginfo
+		$(install-file)
+        $(EXPORT_SERVER_DIR)/%.diz:			$(ZERO_DIR)/%.diz
+		$(install-file)
     endif
     ifeq ($(JVM_VARIANT_MINIMAL1), true)
         $(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX):	$(MINIMAL1_DIR)/%.$(LIBRARY_SUFFIX)
--- a/make/excludeSrc.make	Thu Oct 11 12:25:42 2012 -0400
+++ b/make/excludeSrc.make	Fri Nov 02 16:09:50 2012 -0700
@@ -79,10 +79,10 @@
       CXXFLAGS += -DSERIALGC
       CFLAGS += -DSERIALGC
       Src_Files_EXCLUDE += \
-	binaryTreeDictionary.cpp cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \
+	cmsAdaptiveSizePolicy.cpp cmsCollectorPolicy.cpp \
 	cmsGCAdaptivePolicyCounters.cpp cmsLockVerifier.cpp cmsPermGen.cpp compactibleFreeListSpace.cpp \
-	concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp freeBlockDictionary.cpp \
-	freeChunk.cpp freeList.cpp promotionInfo.cpp vmCMSOperations.cpp collectionSetChooser.cpp \
+	concurrentMarkSweepGeneration.cpp concurrentMarkSweepThread.cpp \
+	freeChunk.cpp adaptiveFreeList.cpp promotionInfo.cpp vmCMSOperations.cpp collectionSetChooser.cpp \
 	concurrentG1Refine.cpp concurrentG1RefineThread.cpp concurrentMark.cpp concurrentMarkThread.cpp \
 	dirtyCardQueue.cpp g1AllocRegion.cpp g1BlockOffsetTable.cpp g1CollectedHeap.cpp g1GCPhaseTimes.cpp \
 	g1CollectorPolicy.cpp g1ErgoVerbose.cpp g1_globals.cpp g1HRPrinter.cpp g1MarkSweep.cpp \
--- a/make/hotspot_version	Thu Oct 11 12:25:42 2012 -0400
+++ b/make/hotspot_version	Fri Nov 02 16:09:50 2012 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=25
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=07
+HS_BUILD_NUMBER=09
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -1007,6 +1007,67 @@
   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
+void Assembler::aesdec(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_aes(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0xde);
+  emit_operand(dst, src);
+}
+
+void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_aes(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0xde);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::aesdeclast(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_aes(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0xdf);
+  emit_operand(dst, src);
+}
+
+void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_aes(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0xdf);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::aesenc(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_aes(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0xdc);
+  emit_operand(dst, src);
+}
+
+void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_aes(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0xdc);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::aesenclast(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_aes(), "");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0xdd);
+  emit_operand(dst, src);
+}
+
+void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_aes(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0xdd);
+  emit_byte(0xC0 | encode);
+}
+
+
 void Assembler::andl(Address dst, int32_t imm32) {
   InstructionMark im(this);
   prefix(dst);
@@ -2307,6 +2368,22 @@
   a_byte(p);
 }
 
+void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_ssse3(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x00);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::pshufb(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_ssse3(), "");
+  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x00);
+  emit_operand(dst, src);
+}
+
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -8067,6 +8144,15 @@
   LP64_ONLY(movq(dst, src)) NOT_LP64(movl(dst, src));
 }
 
+void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::movdqu(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::movdqu(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::movsd(XMMRegister dst, AddressLiteral src) {
   if (reachable(src)) {
     Assembler::movsd(dst, as_Address(src));
@@ -8357,6 +8443,17 @@
   }
 }
 
+void MacroAssembler::pshufb(XMMRegister dst, AddressLiteral src) {
+  // Used in sign-bit flipping with aligned address.
+  assert((UseAVX > 0) || (((intptr_t)src.target() & 15) == 0), "SSE mode requires address alignment 16 bytes");
+  if (reachable(src)) {
+    Assembler::pshufb(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::pshufb(dst, Address(rscratch1, 0));
+  }
+}
+
 // AVX 3-operands instructions
 
 void MacroAssembler::vaddsd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -875,6 +875,17 @@
   void addss(XMMRegister dst, Address src);
   void addss(XMMRegister dst, XMMRegister src);
 
+  // AES instructions
+  void aesdec(XMMRegister dst, Address src);
+  void aesdec(XMMRegister dst, XMMRegister src);
+  void aesdeclast(XMMRegister dst, Address src);
+  void aesdeclast(XMMRegister dst, XMMRegister src);
+  void aesenc(XMMRegister dst, Address src);
+  void aesenc(XMMRegister dst, XMMRegister src);
+  void aesenclast(XMMRegister dst, Address src);
+  void aesenclast(XMMRegister dst, XMMRegister src);
+
+
   void andl(Address  dst, int32_t imm32);
   void andl(Register dst, int32_t imm32);
   void andl(Register dst, Address src);
@@ -1424,6 +1435,10 @@
   void prefetcht2(Address src);
   void prefetchw(Address src);
 
+  // Shuffle Bytes
+  void pshufb(XMMRegister dst, XMMRegister src);
+  void pshufb(XMMRegister dst, Address src);
+
   // Shuffle Packed Doublewords
   void pshufd(XMMRegister dst, XMMRegister src, int mode);
   void pshufd(XMMRegister dst, Address src,     int mode);
@@ -2611,6 +2626,12 @@
   void divss(XMMRegister dst, Address src)        { Assembler::divss(dst, src); }
   void divss(XMMRegister dst, AddressLiteral src);
 
+  // Move Unaligned Double Quadword
+  void movdqu(Address     dst, XMMRegister src)   { Assembler::movdqu(dst, src); }
+  void movdqu(XMMRegister dst, Address src)       { Assembler::movdqu(dst, src); }
+  void movdqu(XMMRegister dst, XMMRegister src)   { Assembler::movdqu(dst, src); }
+  void movdqu(XMMRegister dst, AddressLiteral src);
+
   void movsd(XMMRegister dst, XMMRegister src) { Assembler::movsd(dst, src); }
   void movsd(Address dst, XMMRegister src)     { Assembler::movsd(dst, src); }
   void movsd(XMMRegister dst, Address src)     { Assembler::movsd(dst, src); }
@@ -2658,6 +2679,10 @@
   void xorps(XMMRegister dst, Address src)     { Assembler::xorps(dst, src); }
   void xorps(XMMRegister dst, AddressLiteral src);
 
+  // Shuffle Bytes
+  void pshufb(XMMRegister dst, XMMRegister src) { Assembler::pshufb(dst, src); }
+  void pshufb(XMMRegister dst, Address src)     { Assembler::pshufb(dst, src); }
+  void pshufb(XMMRegister dst, AddressLiteral src);
   // AVX 3-operands instructions
 
   void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vaddsd(dst, nds, src); }
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -2137,6 +2137,529 @@
     }
   }
 
+  // AES intrinsic stubs
+  enum {AESBlockSize = 16};
+
+  address generate_key_shuffle_mask() {
+    __ align(16);
+    StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
+    address start = __ pc();
+    __ emit_data(0x00010203, relocInfo::none, 0 );
+    __ emit_data(0x04050607, relocInfo::none, 0 );
+    __ emit_data(0x08090a0b, relocInfo::none, 0 );
+    __ emit_data(0x0c0d0e0f, relocInfo::none, 0 );
+    return start;
+  }
+
+  // Utility routine for loading a 128-bit key word in little endian format
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    __ movdqu(xmmdst, Address(key, offset));
+    if (xmm_shuf_mask != NULL) {
+      __ pshufb(xmmdst, xmm_shuf_mask);
+    } else {
+      __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    }
+  }
+
+  // aesenc using specified key+offset
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    load_key(xmmtmp, key, offset, xmm_shuf_mask);
+    __ aesenc(xmmdst, xmmtmp);
+  }
+
+  // aesdec using specified key+offset
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    load_key(xmmtmp, key, offset, xmm_shuf_mask);
+    __ aesdec(xmmdst, xmmtmp);
+  }
+
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_encryptBlock() {
+    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+    Label L_doLast;
+    address start = __ pc();
+
+    const Register from        = rsi;      // source array address
+    const Register to          = rdx;      // destination array address
+    const Register key         = rcx;      // key array address
+    const Register keylen      = rax;
+    const Address  from_param(rbp, 8+0);
+    const Address  to_param  (rbp, 8+4);
+    const Address  key_param (rbp, 8+8);
+
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    const XMMRegister xmm_key_shuf_mask = xmm2;
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ push(rsi);
+    __ movptr(from , from_param);
+    __ movptr(to   , to_param);
+    __ movptr(key  , key_param);
+
+    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    // keylen = # of 32-bit words, convert to 128-bit words
+    __ shrl(keylen, 2);
+    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
+
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
+
+    // For encryption, the java expanded key ordering is just what we need
+
+    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+    __ pxor(xmm_result, xmm_temp);
+    for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
+      aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+    }
+    load_key  (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
+    __ cmpl(keylen, 0);
+    __ jcc(Assembler::equal, L_doLast);
+    __ aesenc(xmm_result, xmm_temp);                   // only in 192 and 256 bit keys
+    aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+    __ subl(keylen, 2);
+    __ jcc(Assembler::equal, L_doLast);
+    __ aesenc(xmm_result, xmm_temp);                   // only in 256 bit keys
+    aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+    __ BIND(L_doLast);
+    __ aesenclast(xmm_result, xmm_temp);
+    __ movdqu(Address(to, 0), xmm_result);        // store the result
+    __ xorptr(rax, rax); // return 0
+    __ pop(rsi);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_decryptBlock() {
+    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+    Label L_doLast;
+    address start = __ pc();
+
+    const Register from        = rsi;      // source array address
+    const Register to          = rdx;      // destination array address
+    const Register key         = rcx;      // key array address
+    const Register keylen      = rax;
+    const Address  from_param(rbp, 8+0);
+    const Address  to_param  (rbp, 8+4);
+    const Address  key_param (rbp, 8+8);
+
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    const XMMRegister xmm_key_shuf_mask = xmm2;
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ push(rsi);
+    __ movptr(from , from_param);
+    __ movptr(to   , to_param);
+    __ movptr(key  , key_param);
+
+    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    // keylen = # of 32-bit words, convert to 128-bit words
+    __ shrl(keylen, 2);
+    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
+
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    __ movdqu(xmm_result, Address(from, 0));
+
+    // for decryption java expanded key ordering is rotated one position from what we want
+    // so we start from 0x10 here and hit 0x00 last
+    // we don't know if the key is aligned, hence not using load-execute form
+    load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
+    __ pxor  (xmm_result, xmm_temp);
+    for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
+      aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+    }
+    __ cmpl(keylen, 0);
+    __ jcc(Assembler::equal, L_doLast);
+    // only in 192 and 256 bit keys
+    aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+    aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+    __ subl(keylen, 2);
+    __ jcc(Assembler::equal, L_doLast);
+    // only in 256 bit keys
+    aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+    aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+    __ BIND(L_doLast);
+    // for decryption the aesdeclast operation is always on key+0x00
+    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+    __ aesdeclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, 0), xmm_result);  // store the result
+
+    __ xorptr(rax, rax); // return 0
+    __ pop(rsi);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+  void handleSOERegisters(bool saving) {
+    const int saveFrameSizeInBytes = 4 * wordSize;
+    const Address saved_rbx     (rbp, -3 * wordSize);
+    const Address saved_rsi     (rbp, -2 * wordSize);
+    const Address saved_rdi     (rbp, -1 * wordSize);
+
+    if (saving) {
+      __ subptr(rsp, saveFrameSizeInBytes);
+      __ movptr(saved_rsi, rsi);
+      __ movptr(saved_rdi, rdi);
+      __ movptr(saved_rbx, rbx);
+    } else {
+      // restoring
+      __ movptr(rsi, saved_rsi);
+      __ movptr(rdi, saved_rdi);
+      __ movptr(rbx, saved_rbx);
+    }
+  }
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  address generate_cipherBlockChaining_encryptAESCrypt() {
+    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+    address start = __ pc();
+
+    Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
+    const Register from        = rsi;      // source array address
+    const Register to          = rdx;      // destination array address
+    const Register key         = rcx;      // key array address
+    const Register rvec        = rdi;      // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+    const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
+    const Register pos         = rax;
+
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    // first 6 keys preloaded into xmm2-xmm7
+    const int XMM_REG_NUM_KEY_FIRST = 2;
+    const int XMM_REG_NUM_KEY_LAST  = 7;
+    const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    handleSOERegisters(true /*saving*/);
+
+    // load registers from incoming parameters
+    const Address  from_param(rbp, 8+0);
+    const Address  to_param  (rbp, 8+4);
+    const Address  key_param (rbp, 8+8);
+    const Address  rvec_param (rbp, 8+12);
+    const Address  len_param  (rbp, 8+16);
+    __ movptr(from , from_param);
+    __ movptr(to   , to_param);
+    __ movptr(key  , key_param);
+    __ movptr(rvec , rvec_param);
+    __ movptr(len_reg , len_param);
+
+    const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    // load up xmm regs 2 thru 7 with keys 0-5
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+      offset += 0x10;
+    }
+
+    __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
+
+    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ cmpl(rax, 44);
+    __ jcc(Assembler::notEqual, L_key_192_256);
+
+    // 128 bit code follows here
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_128);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
+
+    __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = 0x60; key_offset <= 0x90; key_offset += 0x10) {
+      aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0xa0);
+    __ aesenclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_128);
+
+    __ BIND(L_exit);
+    __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
+
+    handleSOERegisters(false /*restoring*/);
+    __ movl(rax, 0);                             // return 0 (why?)
+    __ leave();                                  // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+  __ BIND(L_key_192_256);
+  // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    __ cmpl(rax, 52);
+    __ jcc(Assembler::notEqual, L_key_256);
+
+    // 192-bit code follows here (could be changed to use more xmm registers)
+    __ movptr(pos, 0);
+  __ align(OptoLoopAlignment);
+  __ BIND(L_loopTop_192);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
+
+    __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = 0x60; key_offset <= 0xb0; key_offset += 0x10) {
+      aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0xc0);
+    __ aesenclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_192);
+    __ jmp(L_exit);
+
+  __ BIND(L_key_256);
+    // 256-bit code follows here (could be changed to use more xmm registers)
+    __ movptr(pos, 0);
+  __ align(OptoLoopAlignment);
+  __ BIND(L_loopTop_256);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);                                // xor with the current r vector
+
+    __ pxor  (xmm_result, xmm_key0);                                // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = 0x60; key_offset <= 0xd0; key_offset += 0x10) {
+      aes_enc_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0xe0);
+    __ aesenclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);   // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_256);
+    __ jmp(L_exit);
+
+    return start;
+  }
+
+
+  // CBC AES Decryption.
+  // In 32-bit stub, because of lack of registers we do not try to parallelize 4 blocks at a time.
+  //
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+
+  address generate_cipherBlockChaining_decryptAESCrypt() {
+    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+    address start = __ pc();
+
+    Label L_exit, L_key_192_256, L_key_256;
+    Label L_singleBlock_loopTop_128;
+    Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
+    const Register from        = rsi;      // source array address
+    const Register to          = rdx;      // destination array address
+    const Register key         = rcx;      // key array address
+    const Register rvec        = rdi;      // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+    const Register len_reg     = rbx;      // src len (must be multiple of blocksize 16)
+    const Register pos         = rax;
+
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    // first 6 keys preloaded into xmm2-xmm7
+    const int XMM_REG_NUM_KEY_FIRST = 2;
+    const int XMM_REG_NUM_KEY_LAST  = 7;
+    const int FIRST_NON_REG_KEY_offset = 0x70;
+    const XMMRegister xmm_key_first   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    handleSOERegisters(true /*saving*/);
+
+    // load registers from incoming parameters
+    const Address  from_param(rbp, 8+0);
+    const Address  to_param  (rbp, 8+4);
+    const Address  key_param (rbp, 8+8);
+    const Address  rvec_param (rbp, 8+12);
+    const Address  len_param  (rbp, 8+16);
+    __ movptr(from , from_param);
+    __ movptr(to   , to_param);
+    __ movptr(key  , key_param);
+    __ movptr(rvec , rvec_param);
+    __ movptr(len_reg , len_param);
+
+    // the java expanded key ordering is rotated one position from what we want
+    // so we start from 0x10 here and hit 0x00 last
+    const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    // load up xmm regs 2 thru 6 with first 5 keys
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+      offset += 0x10;
+    }
+
+    // inside here, use the rvec register to point to previous block cipher
+    // with which we xor at the end of each newly decrypted block
+    const Register  prev_block_cipher_ptr = rvec;
+
+    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ cmpl(rax, 44);
+    __ jcc(Assembler::notEqual, L_key_192_256);
+
+
+    // 128-bit code follows here, parallelized
+    __ movptr(pos, 0);
+  __ align(OptoLoopAlignment);
+  __ BIND(L_singleBlock_loopTop_128);
+    __ cmpptr(len_reg, 0);           // any blocks left??
+    __ jcc(Assembler::equal, L_exit);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xa0; key_offset += 0x10) {   // 128-bit runs up to key offset a0
+      aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
+    __ aesdeclast(xmm_result, xmm_temp);
+    __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+    __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jmp(L_singleBlock_loopTop_128);
+
+
+    __ BIND(L_exit);
+    __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+    __ movptr(rvec , rvec_param);                                     // restore this since used in loop
+    __ movdqu(Address(rvec, 0), xmm_temp);                            // final value of r stored in rvec of CipherBlockChaining object
+    handleSOERegisters(false /*restoring*/);
+    __ movl(rax, 0);                                                  // return 0 (why?)
+    __ leave();                                                       // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+
+    __ BIND(L_key_192_256);
+    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    __ cmpl(rax, 52);
+    __ jcc(Assembler::notEqual, L_key_256);
+
+    // 192-bit code follows here (could be optimized to use parallelism)
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_192);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xc0; key_offset += 0x10) {   // 192-bit runs up to key offset c0
+      aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
+    __ aesdeclast(xmm_result, xmm_temp);
+    __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+    __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
+    __ jmp(L_exit);
+
+    __ BIND(L_key_256);
+    // 256-bit code follows here (could be optimized to use parallelism)
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_256);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ pxor  (xmm_result, xmm_key_first);                             // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    for (int key_offset = FIRST_NON_REG_KEY_offset; key_offset <= 0xe0; key_offset += 0x10) {   // 256-bit runs up to key offset e0
+      aes_dec_key(xmm_result, xmm_temp, key, key_offset);
+    }
+    load_key(xmm_temp, key, 0x00);                                     // final key is stored in java expanded array at offset 0
+    __ aesdeclast(xmm_result, xmm_temp);
+    __ movdqu(xmm_temp, Address(prev_block_cipher_ptr, 0x00));
+    __ pxor  (xmm_result, xmm_temp);                                  // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ lea(prev_block_cipher_ptr, Address(from, pos, Address::times_1, 0));     // set up new ptr
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
+    __ jmp(L_exit);
+
+    return start;
+  }
+
+
  public:
   // Information about frame layout at time of blocking runtime call.
   // Note that we only have to preserve callee-saved registers since
@@ -2332,6 +2855,16 @@
     generate_arraycopy_stubs();
 
     generate_math_stubs();
+
+    // don't bother generating these AES intrinsic stubs unless global flag is set
+    if (UseAESIntrinsics) {
+      StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // might be needed by the others
+
+      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
+    }
   }
 
 
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -2941,6 +2941,548 @@
     }
   }
 
+  // AES intrinsic stubs
+  enum {AESBlockSize = 16};
+
+  address generate_key_shuffle_mask() {
+    __ align(16);
+    StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
+    address start = __ pc();
+    __ emit_data64( 0x0405060700010203, relocInfo::none );
+    __ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none );
+    return start;
+  }
+
+  // Utility routine for loading a 128-bit key word in little endian format
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    __ movdqu(xmmdst, Address(key, offset));
+    if (xmm_shuf_mask != NULL) {
+      __ pshufb(xmmdst, xmm_shuf_mask);
+    } else {
+      __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    }
+  }
+
+  // aesenc using specified key+offset
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void aes_enc_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    load_key(xmmtmp, key, offset, xmm_shuf_mask);
+    __ aesenc(xmmdst, xmmtmp);
+  }
+
+  // aesdec using specified key+offset
+  // can optionally specify that the shuffle mask is already in an xmmregister
+  void aes_dec_key(XMMRegister xmmdst, XMMRegister xmmtmp, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
+    load_key(xmmtmp, key, offset, xmm_shuf_mask);
+    __ aesdec(xmmdst, xmmtmp);
+  }
+
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_encryptBlock() {
+    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
+    Label L_doLast;
+    address start = __ pc();
+
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register keylen      = rax;
+
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    const XMMRegister xmm_key_shuf_mask = xmm2;
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    // keylen = # of 32-bit words, convert to 128-bit words
+    __ shrl(keylen, 2);
+    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
+
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
+
+    // For encryption, the java expanded key ordering is just what we need
+    // we don't know if the key is aligned, hence not using load-execute form
+
+    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+    __ pxor(xmm_result, xmm_temp);
+    for (int offset = 0x10; offset <= 0x90; offset += 0x10) {
+      aes_enc_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+    }
+    load_key  (xmm_temp, key, 0xa0, xmm_key_shuf_mask);
+    __ cmpl(keylen, 0);
+    __ jcc(Assembler::equal, L_doLast);
+    __ aesenc(xmm_result, xmm_temp);                   // only in 192 and 256 bit keys
+    aes_enc_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+    load_key(xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+    __ subl(keylen, 2);
+    __ jcc(Assembler::equal, L_doLast);
+    __ aesenc(xmm_result, xmm_temp);                   // only in 256 bit keys
+    aes_enc_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+    load_key(xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+    __ BIND(L_doLast);
+    __ aesenclast(xmm_result, xmm_temp);
+    __ movdqu(Address(to, 0), xmm_result);        // store the result
+    __ xorptr(rax, rax); // return 0
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //
+  address generate_aescrypt_decryptBlock() {
+    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
+    Label L_doLast;
+    address start = __ pc();
+
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register keylen      = rax;
+
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    const XMMRegister xmm_key_shuf_mask = xmm2;
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    // keylen = # of 32-bit words, convert to 128-bit words
+    __ shrl(keylen, 2);
+    __ subl(keylen, 11);   // every key has at least 11 128-bit words, some have more
+
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    __ movdqu(xmm_result, Address(from, 0));
+
+    // for decryption java expanded key ordering is rotated one position from what we want
+    // so we start from 0x10 here and hit 0x00 last
+    // we don't know if the key is aligned, hence not using load-execute form
+    load_key(xmm_temp, key, 0x10, xmm_key_shuf_mask);
+    __ pxor  (xmm_result, xmm_temp);
+    for (int offset = 0x20; offset <= 0xa0; offset += 0x10) {
+      aes_dec_key(xmm_result, xmm_temp, key, offset, xmm_key_shuf_mask);
+    }
+    __ cmpl(keylen, 0);
+    __ jcc(Assembler::equal, L_doLast);
+    // only in 192 and 256 bit keys
+    aes_dec_key(xmm_result, xmm_temp, key, 0xb0, xmm_key_shuf_mask);
+    aes_dec_key(xmm_result, xmm_temp, key, 0xc0, xmm_key_shuf_mask);
+    __ subl(keylen, 2);
+    __ jcc(Assembler::equal, L_doLast);
+    // only in 256 bit keys
+    aes_dec_key(xmm_result, xmm_temp, key, 0xd0, xmm_key_shuf_mask);
+    aes_dec_key(xmm_result, xmm_temp, key, 0xe0, xmm_key_shuf_mask);
+
+    __ BIND(L_doLast);
+    // for decryption the aesdeclast operation is always on key+0x00
+    load_key(xmm_temp, key, 0x00, xmm_key_shuf_mask);
+    __ aesdeclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, 0), xmm_result);  // store the result
+
+    __ xorptr(rax, rax); // return 0
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+  }
+
+
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+  address generate_cipherBlockChaining_encryptAESCrypt() {
+    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
+    address start = __ pc();
+
+    Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+#ifndef _WIN64
+    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
+#else
+    const Address  len_mem(rsp, 6 * wordSize);  // length is on stack on Win64
+    const Register len_reg     = r10;      // pick the first volatile windows register
+#endif
+    const Register pos         = rax;
+
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
+    const XMMRegister xmm_temp   = xmm1;
+    // keys 0-10 preloaded into xmm2-xmm12
+    const int XMM_REG_NUM_KEY_FIRST = 2;
+    const int XMM_REG_NUM_KEY_LAST  = 12;
+    const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+    const XMMRegister xmm_key10  = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+    // on win64, fill len_reg from stack position
+    __ movl(len_reg, len_mem);
+    // save the xmm registers which must be preserved 6-12
+    __ subptr(rsp, -rsp_after_call_off * wordSize);
+    for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+      __ movdqu(xmm_save(i), as_XMMRegister(i));
+    }
+#endif
+
+    const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    // load up xmm regs 2 thru 12 with key 0x00 - 0xa0
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+      offset += 0x10;
+    }
+
+    __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
+
+    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ cmpl(rax, 44);
+    __ jcc(Assembler::notEqual, L_key_192_256);
+
+    // 128 bit code follows here
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_128);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
+
+    __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    __ aesenclast(xmm_result, xmm_key10);
+
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_128);
+
+    __ BIND(L_exit);
+    __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
+
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+      __ movdqu(as_XMMRegister(i), xmm_save(i));
+    }
+#endif
+    __ movl(rax, 0); // return 0 (why?)
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    __ BIND(L_key_192_256);
+    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    __ cmpl(rax, 52);
+    __ jcc(Assembler::notEqual, L_key_256);
+
+    // 192-bit code follows here (could be changed to use more xmm registers)
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_192);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
+
+    __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
+    load_key(xmm_temp, key, 0xc0);
+    __ aesenclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_192);
+    __ jmp(L_exit);
+
+    __ BIND(L_key_256);
+    // 256-bit code follows here (could be changed to use more xmm registers)
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_loopTop_256);
+    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
+    __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
+
+    __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      __ aesenc(xmm_result, as_XMMRegister(rnum));
+    }
+    aes_enc_key(xmm_result, xmm_temp, key, 0xb0);
+    aes_enc_key(xmm_result, xmm_temp, key, 0xc0);
+    aes_enc_key(xmm_result, xmm_temp, key, 0xd0);
+    load_key(xmm_temp, key, 0xe0);
+    __ aesenclast(xmm_result, xmm_temp);
+
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual, L_loopTop_256);
+    __ jmp(L_exit);
+
+    return start;
+  }
+
+
+
+  // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time
+  // to hide instruction latency
+  //
+  // Arguments:
+  //
+  // Inputs:
+  //   c_rarg0   - source byte array address
+  //   c_rarg1   - destination byte array address
+  //   c_rarg2   - K (key) in little endian int array
+  //   c_rarg3   - r vector byte array address
+  //   c_rarg4   - input length
+  //
+
+  address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
+    assert(UseAES && (UseAVX > 0), "need AES instructions and misaligned SSE support");
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
+    address start = __ pc();
+
+    Label L_exit, L_key_192_256, L_key_256;
+    Label L_singleBlock_loopTop_128, L_multiBlock_loopTop_128;
+    Label L_singleBlock_loopTop_192, L_singleBlock_loopTop_256;
+    const Register from        = c_rarg0;  // source array address
+    const Register to          = c_rarg1;  // destination array address
+    const Register key         = c_rarg2;  // key array address
+    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
+                                           // and left with the results of the last encryption block
+#ifndef _WIN64
+    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
+#else
+    const Address  len_mem(rsp, 6 * wordSize);  // length is on stack on Win64
+    const Register len_reg     = r10;      // pick the first volatile windows register
+#endif
+    const Register pos         = rax;
+
+    // xmm register assignments for the loops below
+    const XMMRegister xmm_result = xmm0;
+    // keys 0-10 preloaded into xmm2-xmm12
+    const int XMM_REG_NUM_KEY_FIRST = 5;
+    const int XMM_REG_NUM_KEY_LAST  = 15;
+    const XMMRegister xmm_key_first   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
+    const XMMRegister xmm_key_last  = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+    // on win64, fill len_reg from stack position
+    __ movl(len_reg, len_mem);
+    // save the xmm registers which must be preserved 6-15
+    __ subptr(rsp, -rsp_after_call_off * wordSize);
+    for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+      __ movdqu(xmm_save(i), as_XMMRegister(i));
+    }
+#endif
+    // the java expanded key ordering is rotated one position from what we want
+    // so we start from 0x10 here and hit 0x00 last
+    const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
+    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
+    // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
+    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum <= XMM_REG_NUM_KEY_LAST; rnum++) {
+      if (rnum == XMM_REG_NUM_KEY_LAST) offset = 0x00;
+      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
+      offset += 0x10;
+    }
+
+    const XMMRegister xmm_prev_block_cipher = xmm1;  // holds cipher of previous block
+    // registers holding the four results in the parallelized loop
+    const XMMRegister xmm_result0 = xmm0;
+    const XMMRegister xmm_result1 = xmm2;
+    const XMMRegister xmm_result2 = xmm3;
+    const XMMRegister xmm_result3 = xmm4;
+
+    __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00));   // initialize with initial rvec
+
+    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
+    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
+    __ cmpl(rax, 44);
+    __ jcc(Assembler::notEqual, L_key_192_256);
+
+
+    // 128-bit code follows here, parallelized
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_multiBlock_loopTop_128);
+    __ cmpptr(len_reg, 4*AESBlockSize);           // see if at least 4 blocks left
+    __ jcc(Assembler::less, L_singleBlock_loopTop_128);
+
+    __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0*AESBlockSize));   // get next 4 blocks into xmmresult registers
+    __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1*AESBlockSize));
+    __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2*AESBlockSize));
+    __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3*AESBlockSize));
+
+#define DoFour(opc, src_reg)                    \
+    __ opc(xmm_result0, src_reg);               \
+    __ opc(xmm_result1, src_reg);               \
+    __ opc(xmm_result2, src_reg);               \
+    __ opc(xmm_result3, src_reg);
+
+    DoFour(pxor, xmm_key_first);
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+      DoFour(aesdec, as_XMMRegister(rnum));
+    }
+    DoFour(aesdeclast, xmm_key_last);
+    // for each result, xor with the r vector of previous cipher block
+    __ pxor(xmm_result0, xmm_prev_block_cipher);
+    __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0*AESBlockSize));
+    __ pxor(xmm_result1, xmm_prev_block_cipher);
+    __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1*AESBlockSize));
+    __ pxor(xmm_result2, xmm_prev_block_cipher);
+    __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2*AESBlockSize));
+    __ pxor(xmm_result3, xmm_prev_block_cipher);
+    __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3*AESBlockSize));   // this will carry over to next set of blocks
+
+    __ movdqu(Address(to, pos, Address::times_1, 0*AESBlockSize), xmm_result0);     // store 4 results into the next 64 bytes of output
+    __ movdqu(Address(to, pos, Address::times_1, 1*AESBlockSize), xmm_result1);
+    __ movdqu(Address(to, pos, Address::times_1, 2*AESBlockSize), xmm_result2);
+    __ movdqu(Address(to, pos, Address::times_1, 3*AESBlockSize), xmm_result3);
+
+    __ addptr(pos, 4*AESBlockSize);
+    __ subptr(len_reg, 4*AESBlockSize);
+    __ jmp(L_multiBlock_loopTop_128);
+
+    // registers used in the non-parallelized loops
+    const XMMRegister xmm_prev_block_cipher_save = xmm2;
+    const XMMRegister xmm_temp   = xmm3;
+
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_128);
+    __ cmpptr(len_reg, 0);           // any blocks left??
+    __ jcc(Assembler::equal, L_exit);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
+    __ pxor  (xmm_result, xmm_key_first);               // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    __ aesdeclast(xmm_result, xmm_key_last);
+    __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);              // set up next r vector with cipher input from this block
+
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jmp(L_singleBlock_loopTop_128);
+
+
+    __ BIND(L_exit);
+    __ movdqu(Address(rvec, 0), xmm_prev_block_cipher);     // final value of r stored in rvec of CipherBlockChaining object
+#ifdef _WIN64
+    // restore regs belonging to calling function
+    for (int i = 6; i <= XMM_REG_NUM_KEY_LAST; i++) {
+      __ movdqu(as_XMMRegister(i), xmm_save(i));
+    }
+#endif
+    __ movl(rax, 0); // return 0 (why?)
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+
+    __ BIND(L_key_192_256);
+    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
+    __ cmpl(rax, 52);
+    __ jcc(Assembler::notEqual, L_key_256);
+
+    // 192-bit code follows here (could be optimized to use parallelism)
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_192);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
+    __ pxor  (xmm_result, xmm_key_first);               // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    aes_dec_key(xmm_result, xmm_temp, key, 0xb0);     // 192-bit key goes up to c0
+    aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
+    __ aesdeclast(xmm_result, xmm_key_last);                    // xmm15 always came from key+0
+    __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);              // set up next r vector with cipher input from this block
+
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual,L_singleBlock_loopTop_192);
+    __ jmp(L_exit);
+
+    __ BIND(L_key_256);
+    // 256-bit code follows here (could be optimized to use parallelism)
+    __ movptr(pos, 0);
+    __ align(OptoLoopAlignment);
+    __ BIND(L_singleBlock_loopTop_256);
+    __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of cipher input
+    __ movdqa(xmm_prev_block_cipher_save, xmm_result);              // save for next r vector
+    __ pxor  (xmm_result, xmm_key_first);               // do the aes dec rounds
+    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_LAST - 1; rnum++) {
+      __ aesdec(xmm_result, as_XMMRegister(rnum));
+    }
+    aes_dec_key(xmm_result, xmm_temp, key, 0xb0);     // 256-bit key goes up to e0
+    aes_dec_key(xmm_result, xmm_temp, key, 0xc0);
+    aes_dec_key(xmm_result, xmm_temp, key, 0xd0);
+    aes_dec_key(xmm_result, xmm_temp, key, 0xe0);
+    __ aesdeclast(xmm_result, xmm_key_last);             // xmm15 came from key+0
+    __ pxor  (xmm_result, xmm_prev_block_cipher);               // xor with the current r vector
+    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
+    // no need to store r to memory until we exit
+    __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save);              // set up next r vector with cipher input from this block
+
+    __ addptr(pos, AESBlockSize);
+    __ subptr(len_reg, AESBlockSize);
+    __ jcc(Assembler::notEqual,L_singleBlock_loopTop_256);
+    __ jmp(L_exit);
+
+    return start;
+  }
+
+
+
 #undef __
 #define __ masm->
 
@@ -3135,6 +3677,16 @@
     generate_arraycopy_stubs();
 
     generate_math_stubs();
+
+    // don't bother generating these AES intrinsic stubs unless global flag is set
+    if (UseAESIntrinsics) {
+      StubRoutines::x86::_key_shuffle_mask_addr = generate_key_shuffle_mask();  // needed by the others
+
+      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+      StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+      StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
+      StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
+    }
   }
 
  public:
--- a/src/cpu/x86/vm/stubRoutines_x86_32.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/stubRoutines_x86_32.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -44,3 +44,4 @@
 
 address StubRoutines::x86::_verify_mxcsr_entry         = NULL;
 address StubRoutines::x86::_verify_fpu_cntrl_wrd_entry = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/src/cpu/x86/vm/stubRoutines_x86_32.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/stubRoutines_x86_32.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -41,10 +41,14 @@
  private:
   static address _verify_mxcsr_entry;
   static address _verify_fpu_cntrl_wrd_entry;
+  // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+  static address _key_shuffle_mask_addr;
 
  public:
   static address verify_mxcsr_entry()                        { return _verify_mxcsr_entry; }
   static address verify_fpu_cntrl_wrd_entry()                { return _verify_fpu_cntrl_wrd_entry; }
+  static address key_shuffle_mask_addr()                     { return _key_shuffle_mask_addr; }
+
 };
 
   static bool    returns_to_call_stub(address return_pc)     { return return_pc == _call_stub_return_address; }
--- a/src/cpu/x86/vm/stubRoutines_x86_64.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -56,3 +56,4 @@
 address StubRoutines::x86::_double_sign_mask = NULL;
 address StubRoutines::x86::_double_sign_flip = NULL;
 address StubRoutines::x86::_mxcsr_std = NULL;
+address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
--- a/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -54,6 +54,8 @@
   static address _double_sign_mask;
   static address _double_sign_flip;
   static address _mxcsr_std;
+  // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
+  static address _key_shuffle_mask_addr;
 
  public:
 
@@ -116,6 +118,9 @@
   {
     return _mxcsr_std;
   }
+
+  static address key_shuffle_mask_addr()                     { return _key_shuffle_mask_addr; }
+
 };
 
 #endif // CPU_X86_VM_STUBROUTINES_X86_64_HPP
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -419,13 +419,16 @@
   if (UseAVX < 1)
     _cpuFeatures &= ~CPU_AVX;
 
+  if (!UseAES && !FLAG_IS_DEFAULT(UseAES))
+    _cpuFeatures &= ~CPU_AES;
+
   if (logical_processors_per_package() == 1) {
     // HT processor could be installed on a system which doesn't support HT.
     _cpuFeatures &= ~CPU_HT;
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -441,6 +444,7 @@
                (supports_popcnt() ? ", popcnt" : ""),
                (supports_avx()    ? ", avx" : ""),
                (supports_avx2()   ? ", avx2" : ""),
+               (supports_aes()    ? ", aes" : ""),
                (supports_mmx_ext() ? ", mmxext" : ""),
                (supports_3dnow_prefetch() ? ", 3dnowpref" : ""),
                (supports_lzcnt()   ? ", lzcnt": ""),
@@ -472,6 +476,29 @@
   if (!supports_avx ()) // Drop to 0 if no AVX  support
     UseAVX = 0;
 
+  // Use AES instructions if available.
+  if (supports_aes()) {
+    if (FLAG_IS_DEFAULT(UseAES)) {
+      UseAES = true;
+    }
+  } else if (UseAES) {
+    if (!FLAG_IS_DEFAULT(UseAES))
+      warning("AES instructions not available on this CPU");
+    FLAG_SET_DEFAULT(UseAES, false);
+  }
+
+  // The AES intrinsic stubs require AES instruction support (of course)
+  // but also require AVX mode for misaligned SSE access
+  if (UseAES && (UseAVX > 0)) {
+    if (FLAG_IS_DEFAULT(UseAESIntrinsics)) {
+      UseAESIntrinsics = true;
+    }
+  } else if (UseAESIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseAESIntrinsics))
+      warning("AES intrinsics not available on this CPU");
+    FLAG_SET_DEFAULT(UseAESIntrinsics, false);
+  }
+
 #ifdef COMPILER2
   if (UseFPUForSpilling) {
     if (UseSSE < 2) {
@@ -714,6 +741,9 @@
     if (UseAVX > 0) {
       tty->print("  UseAVX=%d",UseAVX);
     }
+    if (UseAES) {
+      tty->print("  UseAES=1");
+    }
     tty->cr();
     tty->print("Allocation");
     if (AllocatePrefetchStyle <= 0 || UseSSE == 0 && !supports_3dnow_prefetch()) {
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -78,7 +78,9 @@
                sse4_2   : 1,
                         : 2,
                popcnt   : 1,
-                        : 3,
+                        : 1,
+               aes      : 1,
+                        : 1,
                osxsave  : 1,
                avx      : 1,
                         : 3;
@@ -244,7 +246,8 @@
     CPU_TSC    = (1 << 15),
     CPU_TSCINV = (1 << 16),
     CPU_AVX    = (1 << 17),
-    CPU_AVX2   = (1 << 18)
+    CPU_AVX2   = (1 << 18),
+    CPU_AES    = (1 << 19)
   } cpuFeatureFlags;
 
   enum {
@@ -420,6 +423,8 @@
       result |= CPU_TSC;
     if (_cpuid_info.ext_cpuid7_edx.bits.tsc_invariance != 0)
       result |= CPU_TSCINV;
+    if (_cpuid_info.std_cpuid1_ecx.bits.aes != 0)
+      result |= CPU_AES;
 
     // AMD features.
     if (is_amd()) {
@@ -544,6 +549,7 @@
   static bool supports_avx()      { return (_cpuFeatures & CPU_AVX) != 0; }
   static bool supports_avx2()     { return (_cpuFeatures & CPU_AVX2) != 0; }
   static bool supports_tsc()      { return (_cpuFeatures & CPU_TSC)    != 0; }
+  static bool supports_aes()      { return (_cpuFeatures & CPU_AES) != 0; }
 
   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
--- a/src/cpu/x86/vm/x86.ad	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/x86/vm/x86.ad	Fri Nov 02 16:09:50 2012 -0700
@@ -4102,9 +4102,158 @@
 
 // ----------------------- LogicalRightShift -----------------------------------
 
-// Shorts/Chars vector logical right shift produces incorrect Java result
+// Shorts vector logical right shift produces incorrect Java result
 // for negative data because java code convert short value into int with
-// sign extension before a shift.
+// sign extension before a shift. But char vectors are fine since chars are
+// unsigned values.
+
+instruct vsrl2S(vecS dst, vecS shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_imm(vecS dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg(vecS dst, vecS src, vecS shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S(vecD dst, vecS shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg(vecD dst, vecD src, vecS shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S(vecX dst, vecS shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS dst shift));
+  format %{ "psrlw   $dst,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    __ psrlw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg(vecX dst, vecX src, vecS shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg(vecY dst, vecY src, vecS shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (URShiftVS src shift));
+  format %{ "vpsrlw  $dst,$src,$shift\t! logical right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
 
 // Integers vector logical right shift
 instruct vsrl2I(vecD dst, vecS shift) %{
--- a/src/cpu/zero/vm/cppInterpreterGenerator_zero.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/cppInterpreterGenerator_zero.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -31,12 +31,17 @@
     return _masm;
   }
 
- protected:
-  address generate_entry(address entry_point) {
-    ZeroEntry *entry = (ZeroEntry *) assembler()->pc();
-    assembler()->advance(sizeof(ZeroEntry));
+ public:
+  static address generate_entry_impl(MacroAssembler* masm, address entry_point) {
+    ZeroEntry *entry = (ZeroEntry *) masm->pc();
+    masm->advance(sizeof(ZeroEntry));
     entry->set_entry_point(entry_point);
     return (address) entry;
   }
 
+ protected:
+  address generate_entry(address entry_point) {
+        return generate_entry_impl(assembler(), entry_point);
+  }
+
 #endif // CPU_ZERO_VM_CPPINTERPRETERGENERATOR_ZERO_HPP
--- a/src/cpu/zero/vm/cppInterpreter_zero.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/cppInterpreter_zero.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -180,25 +180,6 @@
         method, istate->osr_entry(), istate->osr_buf(), THREAD);
       return;
     }
-    else if (istate->msg() == BytecodeInterpreter::call_method_handle) {
-      oop method_handle = istate->callee();
-
-      // Trim back the stack to put the parameters at the top
-      stack->set_sp(istate->stack() + 1);
-
-      // Make the call
-      process_method_handle(method_handle, THREAD);
-      fixup_after_potential_safepoint();
-
-      // Convert the result
-      istate->set_stack(stack->sp() - 1);
-
-      // Restore the stack
-      stack->set_sp(istate->stack_limit() + 1);
-
-      // Resume the interpreter
-      istate->set_msg(BytecodeInterpreter::method_resume);
-    }
     else {
       ShouldNotReachHere();
     }
@@ -535,35 +516,35 @@
   if (entry->is_volatile()) {
     switch (entry->flag_state()) {
     case ctos:
-      SET_LOCALS_INT(object->char_field_acquire(entry->f2()), 0);
+      SET_LOCALS_INT(object->char_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case btos:
-      SET_LOCALS_INT(object->byte_field_acquire(entry->f2()), 0);
+      SET_LOCALS_INT(object->byte_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case stos:
-      SET_LOCALS_INT(object->short_field_acquire(entry->f2()), 0);
+      SET_LOCALS_INT(object->short_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case itos:
-      SET_LOCALS_INT(object->int_field_acquire(entry->f2()), 0);
+      SET_LOCALS_INT(object->int_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case ltos:
-      SET_LOCALS_LONG(object->long_field_acquire(entry->f2()), 0);
+      SET_LOCALS_LONG(object->long_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case ftos:
-      SET_LOCALS_FLOAT(object->float_field_acquire(entry->f2()), 0);
+      SET_LOCALS_FLOAT(object->float_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case dtos:
-      SET_LOCALS_DOUBLE(object->double_field_acquire(entry->f2()), 0);
+      SET_LOCALS_DOUBLE(object->double_field_acquire(entry->f2_as_index()), 0);
       break;
 
     case atos:
-      SET_LOCALS_OBJECT(object->obj_field_acquire(entry->f2()), 0);
+      SET_LOCALS_OBJECT(object->obj_field_acquire(entry->f2_as_index()), 0);
       break;
 
     default:
@@ -573,35 +554,35 @@
   else {
     switch (entry->flag_state()) {
     case ctos:
-      SET_LOCALS_INT(object->char_field(entry->f2()), 0);
+      SET_LOCALS_INT(object->char_field(entry->f2_as_index()), 0);
       break;
 
     case btos:
-      SET_LOCALS_INT(object->byte_field(entry->f2()), 0);
+      SET_LOCALS_INT(object->byte_field(entry->f2_as_index()), 0);
       break;
 
     case stos:
-      SET_LOCALS_INT(object->short_field(entry->f2()), 0);
+      SET_LOCALS_INT(object->short_field(entry->f2_as_index()), 0);
       break;
 
     case itos:
-      SET_LOCALS_INT(object->int_field(entry->f2()), 0);
+      SET_LOCALS_INT(object->int_field(entry->f2_as_index()), 0);
       break;
 
     case ltos:
-      SET_LOCALS_LONG(object->long_field(entry->f2()), 0);
+      SET_LOCALS_LONG(object->long_field(entry->f2_as_index()), 0);
       break;
 
     case ftos:
-      SET_LOCALS_FLOAT(object->float_field(entry->f2()), 0);
+      SET_LOCALS_FLOAT(object->float_field(entry->f2_as_index()), 0);
       break;
 
     case dtos:
-      SET_LOCALS_DOUBLE(object->double_field(entry->f2()), 0);
+      SET_LOCALS_DOUBLE(object->double_field(entry->f2_as_index()), 0);
       break;
 
     case atos:
-      SET_LOCALS_OBJECT(object->obj_field(entry->f2()), 0);
+      SET_LOCALS_OBJECT(object->obj_field(entry->f2_as_index()), 0);
       break;
 
     default:
@@ -629,516 +610,6 @@
   return 0;
 }
 
-int CppInterpreter::method_handle_entry(Method* method,
-                                        intptr_t UNUSED, TRAPS) {
-  JavaThread *thread = (JavaThread *) THREAD;
-  ZeroStack *stack = thread->zero_stack();
-  int argument_slots = method->size_of_parameters();
-  int result_slots = type2size[result_type_of(method)];
-  intptr_t *vmslots = stack->sp();
-  intptr_t *unwind_sp = vmslots + argument_slots;
-
-  // Find the MethodType
-  address p = (address) method;
-  for (jint* pc = method->method_type_offsets_chain(); (*pc) != -1; pc++) {
-    p = *(address*)(p + (*pc));
-  }
-  oop method_type = (oop) p;
-
-  // The MethodHandle is in the slot after the arguments
-  int num_vmslots = argument_slots - 1;
-  oop method_handle = VMSLOTS_OBJECT(num_vmslots);
-
-  // InvokeGeneric requires some extra shuffling
-  oop mhtype = java_lang_invoke_MethodHandle::type(method_handle);
-  bool is_exact = mhtype == method_type;
-  if (!is_exact) {
-    if (true || // FIXME
-        method->intrinsic_id() == vmIntrinsics::_invokeExact) {
-      CALL_VM_NOCHECK_NOFIX(
-        SharedRuntime::throw_WrongMethodTypeException(
-          thread, method_type, mhtype));
-      // NB all oops trashed!
-      assert(HAS_PENDING_EXCEPTION, "should do");
-      stack->set_sp(unwind_sp);
-      return 0;
-    }
-    assert(method->intrinsic_id() == vmIntrinsics::_invokeGeneric, "should be");
-
-    // Load up an adapter from the calling type
-    // NB the x86 code for this (in methodHandles_x86.cpp, search for
-    // "genericInvoker") is really really odd.  I'm hoping it's trying
-    // to accomodate odd VM/class library combinations I can ignore.
-    oop adapter = NULL; //FIXME: load the adapter from the CP cache
-    IF (adapter == NULL) {
-      CALL_VM_NOCHECK_NOFIX(
-        SharedRuntime::throw_WrongMethodTypeException(
-          thread, method_type, mhtype));
-      // NB all oops trashed!
-      assert(HAS_PENDING_EXCEPTION, "should do");
-      stack->set_sp(unwind_sp);
-      return 0;
-    }
-
-    // Adapters are shared among form-families of method-type.  The
-    // type being called is passed as a trusted first argument so that
-    // the adapter knows the actual types of its arguments and return
-    // values.
-    insert_vmslots(num_vmslots + 1, 1, THREAD);
-    if (HAS_PENDING_EXCEPTION) {
-      // NB all oops trashed!
-      stack->set_sp(unwind_sp);
-      return 0;
-    }
-
-    vmslots = stack->sp();
-    num_vmslots++;
-    SET_VMSLOTS_OBJECT(method_type, num_vmslots);
-
-    method_handle = adapter;
-  }
-
-  // Start processing
-  process_method_handle(method_handle, THREAD);
-  if (HAS_PENDING_EXCEPTION)
-    result_slots = 0;
-
-  // If this is an invokeExact then the eventual callee will not
-  // have unwound the method handle argument so we have to do it.
-  // If a result is being returned the it will be above the method
-  // handle argument we're unwinding.
-  if (is_exact) {
-    intptr_t result[2];
-    for (int i = 0; i < result_slots; i++)
-      result[i] = stack->pop();
-    stack->pop();
-    for (int i = result_slots - 1; i >= 0; i--)
-      stack->push(result[i]);
-  }
-
-  // Check
-  assert(stack->sp() == unwind_sp - result_slots, "should be");
-
-  // No deoptimized frames on the stack
-  return 0;
-}
-
-void CppInterpreter::process_method_handle(oop method_handle, TRAPS) {
-  JavaThread *thread = (JavaThread *) THREAD;
-  ZeroStack *stack = thread->zero_stack();
-  intptr_t *vmslots = stack->sp();
-
-  bool direct_to_method = false;
-  BasicType src_rtype = T_ILLEGAL;
-  BasicType dst_rtype = T_ILLEGAL;
-
-  MethodHandleEntry *entry =
-    java_lang_invoke_MethodHandle::vmentry(method_handle);
-  MethodHandles::EntryKind entry_kind =
-    (MethodHandles::EntryKind) (((intptr_t) entry) & 0xffffffff);
-
-  Method* method = NULL;
-  switch (entry_kind) {
-  case MethodHandles::_invokestatic_mh:
-    direct_to_method = true;
-    break;
-
-  case MethodHandles::_invokespecial_mh:
-  case MethodHandles::_invokevirtual_mh:
-  case MethodHandles::_invokeinterface_mh:
-    {
-      oop receiver =
-        VMSLOTS_OBJECT(
-          java_lang_invoke_MethodHandle::vmslots(method_handle) - 1);
-      if (receiver == NULL) {
-          stack->set_sp(calculate_unwind_sp(stack, method_handle));
-          CALL_VM_NOCHECK_NOFIX(
-            throw_exception(
-              thread, vmSymbols::java_lang_NullPointerException()));
-          // NB all oops trashed!
-          assert(HAS_PENDING_EXCEPTION, "should do");
-          return;
-      }
-      if (entry_kind != MethodHandles::_invokespecial_mh) {
-        intptr_t index = java_lang_invoke_DirectMethodHandle::vmindex(method_handle);
-        InstanceKlass* rcvrKlass =
-          (InstanceKlass *) receiver->klass();
-        if (entry_kind == MethodHandles::_invokevirtual_mh) {
-          method = (Method*) rcvrKlass->start_of_vtable()[index];
-        }
-        else {
-          oop iclass = java_lang_invoke_MethodHandle::next_target(method_handle);
-          itableOffsetEntry* ki =
-            (itableOffsetEntry *) rcvrKlass->start_of_itable();
-          int i, length = rcvrKlass->itable_length();
-          for (i = 0; i < length; i++, ki++ ) {
-            if (ki->interface_klass() == iclass)
-              break;
-          }
-          if (i == length) {
-            stack->set_sp(calculate_unwind_sp(stack, method_handle));
-            CALL_VM_NOCHECK_NOFIX(
-              throw_exception(
-                thread, vmSymbols::java_lang_IncompatibleClassChangeError()));
-            // NB all oops trashed!
-            assert(HAS_PENDING_EXCEPTION, "should do");
-            return;
-          }
-          itableMethodEntry* im = ki->first_method_entry(receiver->klass());
-          method = im[index].method();
-          if (method == NULL) {
-            stack->set_sp(calculate_unwind_sp(stack, method_handle));
-            CALL_VM_NOCHECK_NOFIX(
-              throw_exception(
-                thread, vmSymbols::java_lang_AbstractMethodError()));
-            // NB all oops trashed!
-            assert(HAS_PENDING_EXCEPTION, "should do");
-            return;
-          }
-        }
-      }
-    }
-    direct_to_method = true;
-    break;
-
-  case MethodHandles::_bound_ref_direct_mh:
-  case MethodHandles::_bound_int_direct_mh:
-  case MethodHandles::_bound_long_direct_mh:
-    direct_to_method = true;
-    // fall through
-  case MethodHandles::_bound_ref_mh:
-  case MethodHandles::_bound_int_mh:
-  case MethodHandles::_bound_long_mh:
-    {
-      BasicType arg_type  = T_ILLEGAL;
-      int       arg_mask  = -1;
-      int       arg_slots = -1;
-      MethodHandles::get_ek_bound_mh_info(
-        entry_kind, arg_type, arg_mask, arg_slots);
-      int arg_slot =
-        java_lang_invoke_BoundMethodHandle::vmargslot(method_handle);
-
-      // Create the new slot(s)
-      intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
-      insert_vmslots(arg_slot, arg_slots, THREAD);
-      if (HAS_PENDING_EXCEPTION) {
-        // all oops trashed
-        stack->set_sp(unwind_sp);
-        return;
-      }
-      vmslots = stack->sp();
-
-      // Store bound argument into new stack slot
-      oop arg = java_lang_invoke_BoundMethodHandle::argument(method_handle);
-      if (arg_type == T_OBJECT) {
-        assert(arg_slots == 1, "should be");
-        SET_VMSLOTS_OBJECT(arg, arg_slot);
-      }
-      else {
-        jvalue arg_value;
-        arg_type = java_lang_boxing_object::get_value(arg, &arg_value);
-        switch (arg_type) {
-        case T_BOOLEAN:
-          SET_VMSLOTS_INT(arg_value.z, arg_slot);
-          break;
-        case T_CHAR:
-          SET_VMSLOTS_INT(arg_value.c, arg_slot);
-          break;
-        case T_BYTE:
-          SET_VMSLOTS_INT(arg_value.b, arg_slot);
-          break;
-        case T_SHORT:
-          SET_VMSLOTS_INT(arg_value.s, arg_slot);
-          break;
-        case T_INT:
-          SET_VMSLOTS_INT(arg_value.i, arg_slot);
-          break;
-        case T_FLOAT:
-          SET_VMSLOTS_FLOAT(arg_value.f, arg_slot);
-          break;
-        case T_LONG:
-          SET_VMSLOTS_LONG(arg_value.j, arg_slot + 1);
-          break;
-        case T_DOUBLE:
-          SET_VMSLOTS_DOUBLE(arg_value.d, arg_slot + 1);
-          break;
-        default:
-          tty->print_cr("unhandled type %s", type2name(arg_type));
-          ShouldNotReachHere();
-        }
-      }
-    }
-    break;
-
-  case MethodHandles::_adapter_retype_only:
-  case MethodHandles::_adapter_retype_raw:
-    src_rtype = result_type_of_handle(
-      java_lang_invoke_MethodHandle::next_target(method_handle));
-    dst_rtype = result_type_of_handle(method_handle);
-    break;
-
-  case MethodHandles::_adapter_check_cast:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      oop arg = VMSLOTS_OBJECT(arg_slot);
-      if (arg != NULL) {
-        Klass* objKlassOop = arg->klass();
-        Klass* klassOf = java_lang_Class::as_Klass(
-          java_lang_invoke_AdapterMethodHandle::argument(method_handle));
-
-        if (objKlassOop != klassOf &&
-            !objKlassOop->is_subtype_of(klassOf)) {
-          ResourceMark rm(THREAD);
-          const char* objName = Klass::cast(objKlassOop)->external_name();
-          const char* klassName = Klass::cast(klassOf)->external_name();
-          char* message = SharedRuntime::generate_class_cast_message(
-            objName, klassName);
-
-          stack->set_sp(calculate_unwind_sp(stack, method_handle));
-          CALL_VM_NOCHECK_NOFIX(
-            throw_exception(
-              thread, vmSymbols::java_lang_ClassCastException(), message));
-          // NB all oops trashed!
-          assert(HAS_PENDING_EXCEPTION, "should do");
-          return;
-        }
-      }
-    }
-    break;
-
-  case MethodHandles::_adapter_dup_args:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      int conv =
-        java_lang_invoke_AdapterMethodHandle::conversion(method_handle);
-      int num_slots = -MethodHandles::adapter_conversion_stack_move(conv);
-      assert(num_slots > 0, "should be");
-
-      // Create the new slot(s)
-      intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
-      stack->overflow_check(num_slots, THREAD);
-      if (HAS_PENDING_EXCEPTION) {
-        // all oops trashed
-        stack->set_sp(unwind_sp);
-        return;
-      }
-
-      // Duplicate the arguments
-      for (int i = num_slots - 1; i >= 0; i--)
-        stack->push(*VMSLOTS_SLOT(arg_slot + i));
-
-      vmslots = stack->sp(); // unused, but let the compiler figure that out
-    }
-    break;
-
-  case MethodHandles::_adapter_drop_args:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      int conv =
-        java_lang_invoke_AdapterMethodHandle::conversion(method_handle);
-      int num_slots = MethodHandles::adapter_conversion_stack_move(conv);
-      assert(num_slots > 0, "should be");
-
-      remove_vmslots(arg_slot, num_slots, THREAD); // doesn't trap
-      vmslots = stack->sp(); // unused, but let the compiler figure that out
-    }
-    break;
-
-  case MethodHandles::_adapter_opt_swap_1:
-  case MethodHandles::_adapter_opt_swap_2:
-  case MethodHandles::_adapter_opt_rot_1_up:
-  case MethodHandles::_adapter_opt_rot_1_down:
-  case MethodHandles::_adapter_opt_rot_2_up:
-  case MethodHandles::_adapter_opt_rot_2_down:
-    {
-      int arg1 =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      int conv =
-        java_lang_invoke_AdapterMethodHandle::conversion(method_handle);
-      int arg2 = MethodHandles::adapter_conversion_vminfo(conv);
-
-      int swap_bytes = 0, rotate = 0;
-      MethodHandles::get_ek_adapter_opt_swap_rot_info(
-        entry_kind, swap_bytes, rotate);
-      int swap_slots = swap_bytes >> LogBytesPerWord;
-
-      intptr_t tmp;
-      switch (rotate) {
-      case 0: // swap
-        for (int i = 0; i < swap_slots; i++) {
-          tmp = *VMSLOTS_SLOT(arg1 + i);
-          SET_VMSLOTS_SLOT(VMSLOTS_SLOT(arg2 + i), arg1 + i);
-          SET_VMSLOTS_SLOT(&tmp, arg2 + i);
-        }
-        break;
-
-      case 1: // up
-        assert(arg1 - swap_slots > arg2, "should be");
-
-        tmp = *VMSLOTS_SLOT(arg1);
-        for (int i = arg1 - swap_slots; i >= arg2; i--)
-          SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i), i + swap_slots);
-        SET_VMSLOTS_SLOT(&tmp, arg2);
-
-        break;
-
-      case -1: // down
-        assert(arg2 - swap_slots > arg1, "should be");
-
-        tmp = *VMSLOTS_SLOT(arg1);
-        for (int i = arg1 + swap_slots; i <= arg2; i++)
-          SET_VMSLOTS_SLOT(VMSLOTS_SLOT(i), i - swap_slots);
-        SET_VMSLOTS_SLOT(&tmp, arg2);
-        break;
-
-      default:
-        ShouldNotReachHere();
-      }
-    }
-    break;
-
-  case MethodHandles::_adapter_opt_i2l:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      int arg = VMSLOTS_INT(arg_slot);
-      intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
-      insert_vmslots(arg_slot, 1, THREAD);
-      if (HAS_PENDING_EXCEPTION) {
-        // all oops trashed
-        stack->set_sp(unwind_sp);
-        return;
-      }
-      vmslots = stack->sp();
-      arg_slot++;
-      SET_VMSLOTS_LONG(arg, arg_slot);
-    }
-    break;
-
-  case MethodHandles::_adapter_opt_unboxi:
-  case MethodHandles::_adapter_opt_unboxl:
-    {
-      int arg_slot =
-        java_lang_invoke_AdapterMethodHandle::vmargslot(method_handle);
-      oop arg = VMSLOTS_OBJECT(arg_slot);
-      jvalue arg_value;
-      if (arg == NULL) {
-        // queue a nullpointer exception for the caller
-        stack->set_sp(calculate_unwind_sp(stack, method_handle));
-        CALL_VM_NOCHECK_NOFIX(
-          throw_exception(
-            thread, vmSymbols::java_lang_NullPointerException()));
-        // NB all oops trashed!
-        assert(HAS_PENDING_EXCEPTION, "should do");
-        return;
-      }
-      BasicType arg_type = java_lang_boxing_object::get_value(arg, &arg_value);
-      if (arg_type == T_LONG || arg_type == T_DOUBLE) {
-        intptr_t *unwind_sp = calculate_unwind_sp(stack, method_handle);
-        insert_vmslots(arg_slot, 1, THREAD);
-        if (HAS_PENDING_EXCEPTION) {
-          // all oops trashed
-          stack->set_sp(unwind_sp);
-          return;
-        }
-        vmslots = stack->sp();
-        arg_slot++;
-      }
-      switch (arg_type) {
-      case T_BOOLEAN:
-        SET_VMSLOTS_INT(arg_value.z, arg_slot);
-        break;
-      case T_CHAR:
-        SET_VMSLOTS_INT(arg_value.c, arg_slot);
-        break;
-      case T_BYTE:
-        SET_VMSLOTS_INT(arg_value.b, arg_slot);
-        break;
-      case T_SHORT:
-        SET_VMSLOTS_INT(arg_value.s, arg_slot);
-        break;
-      case T_INT:
-        SET_VMSLOTS_INT(arg_value.i, arg_slot);
-        break;
-      case T_FLOAT:
-        SET_VMSLOTS_FLOAT(arg_value.f, arg_slot);
-        break;
-      case T_LONG:
-        SET_VMSLOTS_LONG(arg_value.j, arg_slot);
-        break;
-      case T_DOUBLE:
-        SET_VMSLOTS_DOUBLE(arg_value.d, arg_slot);
-        break;
-      default:
-        tty->print_cr("unhandled type %s", type2name(arg_type));
-        ShouldNotReachHere();
-      }
-    }
-    break;
-
-  default:
-    tty->print_cr("unhandled entry_kind %s",
-                  MethodHandles::entry_name(entry_kind));
-    ShouldNotReachHere();
-  }
-
-  // Continue along the chain
-  if (direct_to_method) {
-    if (method == NULL) {
-      method =
-        (Method*) java_lang_invoke_MethodHandle::vmtarget(method_handle);
-    }
-    address entry_point = method->from_interpreted_entry();
-    Interpreter::invoke_method(method, entry_point, THREAD);
-  }
-  else {
-    process_method_handle(
-      java_lang_invoke_MethodHandle::next_target(method_handle), THREAD);
-  }
-  // NB all oops now trashed
-
-  // Adapt the result type, if necessary
-  if (src_rtype != dst_rtype && !HAS_PENDING_EXCEPTION) {
-    switch (dst_rtype) {
-    case T_VOID:
-      for (int i = 0; i < type2size[src_rtype]; i++)
-        stack->pop();
-      return;
-
-    case T_INT:
-      switch (src_rtype) {
-      case T_VOID:
-        stack->overflow_check(1, CHECK);
-        stack->push(0);
-        return;
-
-      case T_BOOLEAN:
-      case T_CHAR:
-      case T_BYTE:
-      case T_SHORT:
-        return;
-      }
-      // INT results sometimes need narrowing
-    case T_BOOLEAN:
-    case T_CHAR:
-    case T_BYTE:
-    case T_SHORT:
-      switch (src_rtype) {
-      case T_INT:
-        return;
-      }
-    }
-
-    tty->print_cr("unhandled conversion:");
-    tty->print_cr("src_rtype = %s", type2name(src_rtype));
-    tty->print_cr("dst_rtype = %s", type2name(dst_rtype));
-    ShouldNotReachHere();
-  }
-}
-
 // The new slots will be inserted before slot insert_before.
 // Slots < insert_before will have the same slot number after the insert.
 // Slots >= insert_before will become old_slot + num_slots.
@@ -1380,10 +851,6 @@
     entry_point = ((InterpreterGenerator*) this)->generate_abstract_entry();
     break;
 
-  case Interpreter::method_handle:
-    entry_point = ((InterpreterGenerator*) this)->generate_method_handle_entry();
-    break;
-
   case Interpreter::java_lang_math_sin:
   case Interpreter::java_lang_math_cos:
   case Interpreter::java_lang_math_tan:
@@ -1391,6 +858,8 @@
   case Interpreter::java_lang_math_log:
   case Interpreter::java_lang_math_log10:
   case Interpreter::java_lang_math_sqrt:
+  case Interpreter::java_lang_math_pow:
+  case Interpreter::java_lang_math_exp:
     entry_point = ((InterpreterGenerator*) this)->generate_math_entry(kind);
     break;
 
--- a/src/cpu/zero/vm/cppInterpreter_zero.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/cppInterpreter_zero.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -36,7 +36,6 @@
   static int native_entry(Method* method, intptr_t UNUSED, TRAPS);
   static int accessor_entry(Method* method, intptr_t UNUSED, TRAPS);
   static int empty_entry(Method* method, intptr_t UNUSED, TRAPS);
-  static int method_handle_entry(Method* method, intptr_t UNUSED, TRAPS);
 
  public:
   // Main loop of normal_entry
@@ -44,7 +43,6 @@
 
  private:
   // Helpers for method_handle_entry
-  static void process_method_handle(oop method_handle, TRAPS);
   static void insert_vmslots(int insert_before, int num_slots, TRAPS);
   static void remove_vmslots(int first_slot, int num_slots, TRAPS);
   static BasicType result_type_of_handle(oop method_handle);
--- a/src/cpu/zero/vm/frame_zero.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/frame_zero.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -351,7 +351,7 @@
   switch (offset) {
   case pc_off:
     strncpy(fieldbuf, "pc", buflen);
-    if (method()->is_oop()) {
+    if (method()->is_method()) {
       nmethod *code = method()->code();
       if (code && code->pc_desc_at(pc())) {
         SimpleScopeDesc ssd(code, pc());
@@ -367,7 +367,7 @@
 
   case method_off:
     strncpy(fieldbuf, "method", buflen);
-    if (method()->is_oop()) {
+    if (method()->is_method()) {
       method()->name_and_sig_as_C_string(valuebuf, buflen);
     }
     return;
@@ -378,7 +378,7 @@
   }
 
   // Variable part
-  if (method()->is_oop()) {
+  if (method()->is_method()) {
     identify_vp_word(frame_index, addr_of_word(offset),
                      addr_of_word(header_words + 1),
                      unextended_sp() + method()->max_stack(),
@@ -430,4 +430,3 @@
   // unused... but returns fp() to minimize changes introduced by 7087445
   return fp();
 }
-
--- a/src/cpu/zero/vm/frame_zero.inline.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/frame_zero.inline.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -36,6 +36,8 @@
   _deopt_state = unknown;
 }
 
+inline address  frame::sender_pc()           const { ShouldNotCallThis();  }
+
 inline frame::frame(ZeroFrame* zf, intptr_t* sp) {
   _zeroframe = zf;
   _sp = sp;
--- a/src/cpu/zero/vm/icBuffer_zero.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/icBuffer_zero.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -40,7 +40,7 @@
 }
 
 void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin,
-                                                Metadata* cached_oop,
+                                                void* cached_oop,
                                                 address entry_point) {
   // NB ic_stub_code_size() must return the size of the code we generate
   ShouldNotCallThis();
@@ -51,7 +51,6 @@
   ShouldNotCallThis();
 }
 
-Metadata* InlineCacheBuffer::ic_buffer_cached_oop(address code_begin) {
-  // NB ic_stub_code_size() must return the size of the code we generate
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
   ShouldNotCallThis();
 }
--- a/src/cpu/zero/vm/methodHandles_zero.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/methodHandles_zero.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -24,26 +24,159 @@
  */
 
 #include "precompiled.hpp"
+#include "interpreter/interpreterGenerator.hpp"
 #include "interpreter/interpreter.hpp"
 #include "memory/allocation.inline.hpp"
 #include "prims/methodHandles.hpp"
 
-int MethodHandles::adapter_conversion_ops_supported_mask() {
-  return ((1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
-         //|(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS) //BUG!
-         );
-  // FIXME: MethodHandlesTest gets a crash if we enable OP_SPREAD_ARGS.
+void MethodHandles::invoke_target(Method* method, TRAPS) {
+
+  JavaThread *thread = (JavaThread *) THREAD;
+  ZeroStack *stack = thread->zero_stack();
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+
+  // Trim back the stack to put the parameters at the top
+  stack->set_sp(istate->stack() + 1);
+
+  Interpreter::invoke_method(method, method->from_interpreted_entry(), THREAD);
+
+  // Convert the result
+  istate->set_stack(stack->sp() - 1);
+
 }
 
-void MethodHandles::generate_method_handle_stub(MacroAssembler*          masm,
-                                                MethodHandles::EntryKind ek) {
-  init_entry(ek, (MethodHandleEntry *) ek);
+oop MethodHandles::popFromStack(TRAPS) {
+
+  JavaThread *thread = (JavaThread *) THREAD;
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+  intptr_t* topOfStack = istate->stack();
+
+  oop top = STACK_OBJECT(-1);
+  MORE_STACK(-1);
+  istate->set_stack(topOfStack);
+
+  return top;
+
 }
+
+int MethodHandles::method_handle_entry_invokeBasic(Method* method, intptr_t UNUSED, TRAPS) {
+
+  JavaThread *thread = (JavaThread *) THREAD;
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+  intptr_t* topOfStack = istate->stack();
+
+  // 'this' is a MethodHandle. We resolve the target method by accessing this.form.vmentry.vmtarget.
+  int numArgs = method->size_of_parameters();
+  oop lform1 = java_lang_invoke_MethodHandle::form(STACK_OBJECT(-numArgs)); // this.form
+  oop vmEntry1 = java_lang_invoke_LambdaForm::vmentry(lform1);
+  Method* vmtarget = (Method*) java_lang_invoke_MemberName::vmtarget(vmEntry1);
+
+  invoke_target(vmtarget, THREAD);
+
+  // No deoptimized frames on the stack
+  return 0;
+}
+
+int MethodHandles::method_handle_entry_linkToStaticOrSpecial(Method* method, intptr_t UNUSED, TRAPS) {
+
+  // Pop appendix argument from stack. This is a MemberName which we resolve to the
+  // target method.
+  oop vmentry = popFromStack(THREAD);
+
+  Method* vmtarget = (Method*) java_lang_invoke_MemberName::vmtarget(vmentry);
+
+  invoke_target(vmtarget, THREAD);
+
+  return 0;
+}
+
+int MethodHandles::method_handle_entry_linkToInterface(Method* method, intptr_t UNUSED, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+
+  // Pop appendix argument from stack. This is a MemberName which we resolve to the
+  // target method.
+  oop vmentry = popFromStack(THREAD);
+  intptr_t* topOfStack = istate->stack();
+
+  // Resolve target method by looking up in the receiver object's itable.
+  Klass* clazz = java_lang_Class::as_Klass(java_lang_invoke_MemberName::clazz(vmentry));
+  intptr_t vmindex = java_lang_invoke_MemberName::vmindex(vmentry);
+  Method* target = (Method*) java_lang_invoke_MemberName::vmtarget(vmentry);
+
+  int numArgs = target->size_of_parameters();
+  oop recv = STACK_OBJECT(-numArgs);
+
+  InstanceKlass* klass_part = InstanceKlass::cast(recv->klass());
+  itableOffsetEntry* ki = (itableOffsetEntry*) klass_part->start_of_itable();
+  int i;
+  for ( i = 0 ; i < klass_part->itable_length() ; i++, ki++ ) {
+    if (ki->interface_klass() == clazz) break;
+  }
+
+  itableMethodEntry* im = ki->first_method_entry(recv->klass());
+  Method* vmtarget = im[vmindex].method();
+
+  invoke_target(vmtarget, THREAD);
+
+  return 0;
+}
+
+int MethodHandles::method_handle_entry_linkToVirtual(Method* method, intptr_t UNUSED, TRAPS) {
+  JavaThread *thread = (JavaThread *) THREAD;
+
+  InterpreterFrame *frame = thread->top_zero_frame()->as_interpreter_frame();
+  interpreterState istate = frame->interpreter_state();
+
+  // Pop appendix argument from stack. This is a MemberName which we resolve to the
+  // target method.
+  oop vmentry = popFromStack(THREAD);
+  intptr_t* topOfStack = istate->stack();
+
+  // Resolve target method by looking up in the receiver object's vtable.
+  intptr_t vmindex = java_lang_invoke_MemberName::vmindex(vmentry);
+  Method* target = (Method*) java_lang_invoke_MemberName::vmtarget(vmentry);
+  int numArgs = target->size_of_parameters();
+  oop recv = STACK_OBJECT(-numArgs);
+  Klass* clazz = recv->klass();
+  Klass* klass_part = InstanceKlass::cast(clazz);
+  klassVtable* vtable = klass_part->vtable();
+  Method* vmtarget = vtable->method_at(vmindex);
+
+  invoke_target(vmtarget, THREAD);
+
+  return 0;
+}
+
+int MethodHandles::method_handle_entry_invalid(Method* method, intptr_t UNUSED, TRAPS) {
+  ShouldNotReachHere();
+  return 0;
+}
+
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* masm,
+                                                                vmIntrinsics::ID iid) {
+  switch (iid) {
+  case vmIntrinsics::_invokeGeneric:
+  case vmIntrinsics::_compiledLambdaForm:
+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+    // They all allow an appendix argument.
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_invalid);
+  case vmIntrinsics::_invokeBasic:
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_invokeBasic);
+  case vmIntrinsics::_linkToStatic:
+  case vmIntrinsics::_linkToSpecial:
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_linkToStaticOrSpecial);
+  case vmIntrinsics::_linkToInterface:
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_linkToInterface);
+  case vmIntrinsics::_linkToVirtual:
+    return InterpreterGenerator::generate_entry_impl(masm, (address) MethodHandles::method_handle_entry_linkToVirtual);
+  default:
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
--- a/src/cpu/zero/vm/methodHandles_zero.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/methodHandles_zero.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -26,6 +26,14 @@
 
 // Adapters
 enum /* platform_dependent_constants */ {
-  adapter_code_size = 0
+  adapter_code_size = sizeof(ZeroEntry) * (Interpreter::method_handle_invoke_LAST - Interpreter::method_handle_invoke_FIRST + 1)
 };
 
+private:
+  static oop popFromStack(TRAPS);
+  static void invoke_target(Method* method, TRAPS);
+  static int method_handle_entry_invokeBasic(Method* method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry_linkToStaticOrSpecial(Method* method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry_linkToVirtual(Method* method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry_linkToInterface(Method* method, intptr_t UNUSED, TRAPS);
+  static int method_handle_entry_invalid(Method* method, intptr_t UNUSED, TRAPS);
--- a/src/cpu/zero/vm/register_zero.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/register_zero.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -114,5 +114,8 @@
 };
 
 CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
+#ifndef DONT_USE_REGISTER_DEFINES
+#define noreg ((Register)(noreg_RegisterEnumValue))
+#endif
 
 #endif // CPU_ZERO_VM_REGISTER_ZERO_HPP
--- a/src/cpu/zero/vm/relocInfo_zero.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/relocInfo_zero.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -77,3 +77,7 @@
                                                        CodeBuffer*       dst) {
   ShouldNotCallThis();
 }
+
+void metadata_Relocation::pd_fix_value(address x) {
+  ShouldNotCallThis();
+}
--- a/src/cpu/zero/vm/sharedRuntime_zero.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/cpu/zero/vm/sharedRuntime_zero.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -35,6 +35,7 @@
 #include "runtime/sharedRuntime.hpp"
 #include "runtime/vframeArray.hpp"
 #include "vmreg_zero.inline.hpp"
+
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
 #endif
@@ -47,6 +48,12 @@
 #endif
 
 
+
+static address zero_null_code_stub() {
+  address start = ShouldNotCallThisStub();
+  return start;
+}
+
 int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
                                            VMRegPair *regs,
                                            int total_args_passed,
@@ -63,16 +70,14 @@
                         AdapterFingerPrint *fingerprint) {
   return AdapterHandlerLibrary::new_entry(
     fingerprint,
-    ShouldNotCallThisStub(),
-    ShouldNotCallThisStub(),
-    ShouldNotCallThisStub());
+    CAST_FROM_FN_PTR(address,zero_null_code_stub),
+    CAST_FROM_FN_PTR(address,zero_null_code_stub),
+    CAST_FROM_FN_PTR(address,zero_null_code_stub));
 }
 
 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler *masm,
                                                 methodHandle method,
                                                 int compile_id,
-                                                int total_args_passed,
-                                                int max_arg,
                                                 BasicType *sig_bt,
                                                 VMRegPair *regs,
                                                 BasicType ret_type) {
@@ -96,19 +101,20 @@
   ShouldNotCallThis();
 }
 
+JRT_LEAF(void, zero_stub())
+  ShouldNotCallThis();
+JRT_END
+
 static RuntimeStub* generate_empty_runtime_stub(const char* name) {
-  CodeBuffer buffer(name, 0, 0);
-  return RuntimeStub::new_runtime_stub(name, &buffer, 0, 0, NULL, false);
+  return CAST_FROM_FN_PTR(RuntimeStub*,zero_stub);
 }
 
 static SafepointBlob* generate_empty_safepoint_blob() {
-  CodeBuffer buffer("handler_blob", 0, 0);
-  return SafepointBlob::create(&buffer, NULL, 0);
+  return CAST_FROM_FN_PTR(SafepointBlob*,zero_stub);
 }
 
 static DeoptimizationBlob* generate_empty_deopt_blob() {
-  CodeBuffer buffer("handler_blob", 0, 0);
-  return DeoptimizationBlob::create(&buffer, NULL, 0, 0, 0, 0);
+  return CAST_FROM_FN_PTR(DeoptimizationBlob*,zero_stub);
 }
 
 
@@ -116,7 +122,7 @@
   _deopt_blob = generate_empty_deopt_blob();
 }
 
-SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, bool cause_return) {
+SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
   return generate_empty_safepoint_blob();
 }
 
@@ -124,6 +130,7 @@
   return generate_empty_runtime_stub("resolve_blob");
 }
 
+
 int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
                                          VMRegPair *regs,
                                          int total_args_passed) {
--- a/src/share/vm/asm/codeBuffer.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/asm/codeBuffer.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -758,7 +758,7 @@
     }
   }
 
-  if (dest->blob() == NULL) {
+  if (dest->blob() == NULL && dest_filled != NULL) {
     // Destination is a final resting place, not just another buffer.
     // Normalize uninitialized bytes in the final padding.
     Copy::fill_to_bytes(dest_filled, dest_end - dest_filled,
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -1844,17 +1844,12 @@
         code == Bytecodes::_invokevirtual && target->is_final_method() ||
         code == Bytecodes::_invokedynamic) {
       ciMethod* inline_target = (cha_monomorphic_target != NULL) ? cha_monomorphic_target : target;
-      bool success = false;
-      if (target->is_method_handle_intrinsic()) {
-        // method handle invokes
-        success = try_method_handle_inline(target);
-      } else {
-        // static binding => check if callee is ok
-        success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
-      }
+      // static binding => check if callee is ok
+      bool success = try_inline(inline_target, (cha_monomorphic_target != NULL) || (exact_target != NULL), code, better_receiver);
+
       CHECK_BAILOUT();
-
       clear_inline_bailout();
+
       if (success) {
         // Register dependence if JVMTI has either breakpoint
         // setting or hotswapping of methods capabilities since they may
@@ -3201,6 +3196,11 @@
     return false;
   }
 
+  // method handle invokes
+  if (callee->is_method_handle_intrinsic()) {
+    return try_method_handle_inline(callee);
+  }
+
   // handle intrinsics
   if (callee->intrinsic_id() != vmIntrinsics::_none) {
     if (try_inline_intrinsics(callee)) {
@@ -3885,10 +3885,14 @@
       ValueType* type = state()->stack_at(args_base)->type();
       if (type->is_constant()) {
         ciMethod* target = type->as_ObjectType()->constant_value()->as_method_handle()->get_vmtarget();
-        guarantee(!target->is_method_handle_intrinsic(), "should not happen");  // XXX remove
-        Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
-        if (try_inline(target, /*holder_known*/ true, bc)) {
-          return true;
+        // We don't do CHA here so only inline static and statically bindable methods.
+        if (target->is_static() || target->can_be_statically_bound()) {
+          Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+          if (try_inline(target, /*holder_known*/ true, bc)) {
+            return true;
+          }
+        } else {
+          print_inlining(target, "not static or statically bindable", /*success*/ false);
         }
       } else {
         print_inlining(callee, "receiver not constant", /*success*/ false);
@@ -3941,9 +3945,14 @@
             }
             j += t->size();  // long and double take two slots
           }
-          Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
-          if (try_inline(target, /*holder_known*/ true, bc)) {
-            return true;
+          // We don't do CHA here so only inline static and statically bindable methods.
+          if (target->is_static() || target->can_be_statically_bound()) {
+            Bytecodes::Code bc = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokevirtual;
+            if (try_inline(target, /*holder_known*/ true, bc)) {
+              return true;
+            }
+          } else {
+            print_inlining(target, "not static or statically bindable", /*success*/ false);
           }
         }
       } else {
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -110,6 +110,7 @@
   template(sun_jkernel_DownloadManager,               "sun/jkernel/DownloadManager")              \
   template(getBootClassPathEntryForClass_name,        "getBootClassPathEntryForClass")            \
   template(sun_misc_PostVMInitHook,                   "sun/misc/PostVMInitHook")                  \
+  template(sun_misc_Launcher_ExtClassLoader,          "sun/misc/Launcher$ExtClassLoader")         \
                                                                                                   \
   /* Java runtime version access */                                                               \
   template(sun_misc_Version,                          "sun/misc/Version")                         \
@@ -725,6 +726,21 @@
   /* java/lang/ref/Reference */                                                                                         \
   do_intrinsic(_Reference_get,            java_lang_ref_Reference, get_name,    void_object_signature, F_R)             \
                                                                                                                         \
+  /* support for com.sum.crypto.provider.AESCrypt and some of its callers */                                            \
+  do_class(com_sun_crypto_provider_aescrypt,      "com/sun/crypto/provider/AESCrypt")                                   \
+  do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R)   \
+  do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R)   \
+   do_name(     encryptBlock_name,                                 "encryptBlock")                                      \
+   do_name(     decryptBlock_name,                                 "decryptBlock")                                      \
+   do_signature(byteArray_int_byteArray_int_signature,             "([BI[BI)V")                                         \
+                                                                                                                        \
+  do_class(com_sun_crypto_provider_cipherBlockChaining,            "com/sun/crypto/provider/CipherBlockChaining")       \
+   do_intrinsic(_cipherBlockChaining_encryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, encrypt_name, byteArray_int_int_byteArray_int_signature, F_R)   \
+   do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R)   \
+   do_name(     encrypt_name,                                      "encrypt")                                           \
+   do_name(     decrypt_name,                                      "decrypt")                                           \
+   do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)V")                                        \
+                                                                                                                        \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
                                                                                                                         \
--- a/src/share/vm/compiler/compilerOracle.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/compiler/compilerOracle.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -574,7 +574,7 @@
   char token[1024];
   int  pos = 0;
   int  c = getc(stream);
-  while(c != EOF) {
+  while(c != EOF && pos < (int)(sizeof(token)-1)) {
     if (c == '\n') {
       token[pos++] = '\0';
       parse_from_line(token);
@@ -595,7 +595,7 @@
   int  pos = 0;
   const char* sp = str;
   int  c = *sp++;
-  while (c != '\0') {
+  while (c != '\0' && pos < (int)(sizeof(token)-1)) {
     if (c == '\n') {
       token[pos++] = '\0';
       parse_line(token);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -0,0 +1,175 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
+#include "memory/freeBlockDictionary.hpp"
+#include "memory/sharedHeap.hpp"
+#include "runtime/globals.hpp"
+#include "runtime/mutex.hpp"
+#include "runtime/vmThread.hpp"
+
+template <>
+void AdaptiveFreeList<FreeChunk>::print_on(outputStream* st, const char* c) const {
+  if (c != NULL) {
+    st->print("%16s", c);
+  } else {
+    st->print(SIZE_FORMAT_W(16), size());
+  }
+  st->print("\t"
+           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t"
+           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n",
+           bfr_surp(),             surplus(),             desired(),             prev_sweep(),           before_sweep(),
+           count(),               coal_births(),          coal_deaths(),          split_births(),         split_deaths());
+}
+
+template <class Chunk>
+AdaptiveFreeList<Chunk>::AdaptiveFreeList() : FreeList<Chunk>(), _hint(0) {
+  init_statistics();
+}
+
+template <class Chunk>
+AdaptiveFreeList<Chunk>::AdaptiveFreeList(Chunk* fc) : FreeList<Chunk>(fc), _hint(0) {
+  init_statistics();
+#ifndef PRODUCT
+  _allocation_stats.set_returned_bytes(size() * HeapWordSize);
+#endif
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::initialize() {
+  FreeList<Chunk>::initialize();
+  set_hint(0);
+  init_statistics(true /* split_birth */);
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::reset(size_t hint) {
+  FreeList<Chunk>::reset();
+  set_hint(hint);
+}
+
+#ifndef PRODUCT
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::assert_proper_lock_protection_work() const {
+  assert(protecting_lock() != NULL, "Don't call this directly");
+  assert(ParallelGCThreads > 0, "Don't call this directly");
+  Thread* thr = Thread::current();
+  if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
+    // assert that we are holding the freelist lock
+  } else if (thr->is_GC_task_thread()) {
+    assert(protecting_lock()->owned_by_self(), "FreeList RACE DETECTED");
+  } else if (thr->is_Java_thread()) {
+    assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
+  } else {
+    ShouldNotReachHere();  // unaccounted thread type?
+  }
+}
+#endif
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::init_statistics(bool split_birth) {
+  _allocation_stats.initialize(split_birth);
+}
+
+template <class Chunk>
+size_t AdaptiveFreeList<Chunk>::get_better_size() {
+
+  // A candidate chunk has been found.  If it is already under
+  // populated and there is a hinT, REturn the hint().  Else
+  // return the size of this chunk.
+  if (surplus() <= 0) {
+    if (hint() != 0) {
+      return hint();
+    } else {
+      return size();
+    }
+  } else {
+    // This list has a surplus so use it.
+    return size();
+  }
+}
+
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_head(Chunk* chunk) {
+  assert_proper_lock_protection();
+  return_chunk_at_head(chunk, true);
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_head(Chunk* chunk, bool record_return) {
+  FreeList<Chunk>::return_chunk_at_head(chunk, record_return);
+#ifdef ASSERT
+  if (record_return) {
+    increment_returned_bytes_by(size()*HeapWordSize);
+  }
+#endif
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_tail(Chunk* chunk) {
+  return_chunk_at_tail(chunk, true);
+}
+
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::return_chunk_at_tail(Chunk* chunk, bool record_return) {
+  FreeList<Chunk>::return_chunk_at_tail(chunk, record_return);
+#ifdef ASSERT
+  if (record_return) {
+    increment_returned_bytes_by(size()*HeapWordSize);
+  }
+#endif
+}
+
+#ifndef PRODUCT
+template <class Chunk>
+void AdaptiveFreeList<Chunk>::verify_stats() const {
+  // The +1 of the LH comparand is to allow some "looseness" in
+  // checking: we usually call this interface when adding a block
+  // and we'll subsequently update the stats; we cannot update the
+  // stats beforehand because in the case of the large-block BT
+  // dictionary for example, this might be the first block and
+  // in that case there would be no place that we could record
+  // the stats (which are kept in the block itself).
+  assert((_allocation_stats.prev_sweep() + _allocation_stats.split_births()
+          + _allocation_stats.coal_births() + 1)   // Total Production Stock + 1
+         >= (_allocation_stats.split_deaths() + _allocation_stats.coal_deaths()
+             + (ssize_t)count()),                // Total Current Stock + depletion
+         err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT
+                 " violates Conservation Principle: "
+                 "prev_sweep(" SIZE_FORMAT ")"
+                 " + split_births(" SIZE_FORMAT ")"
+                 " + coal_births(" SIZE_FORMAT ") + 1 >= "
+                 " split_deaths(" SIZE_FORMAT ")"
+                 " coal_deaths(" SIZE_FORMAT ")"
+                 " + count(" SSIZE_FORMAT ")",
+                 this, size(), _allocation_stats.prev_sweep(), _allocation_stats.split_births(),
+                 _allocation_stats.split_births(), _allocation_stats.split_deaths(),
+                 _allocation_stats.coal_deaths(), count()));
+}
+#endif
+
+// Needs to be after the definitions have been seen.
+template class AdaptiveFreeList<FreeChunk>;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
+#define SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
+
+#include "memory/freeList.hpp"
+#include "gc_implementation/shared/allocationStats.hpp"
+
+class CompactibleFreeListSpace;
+
+// A class for maintaining a free list of Chunk's.  The FreeList
+// maintains a the structure of the list (head, tail, etc.) plus
+// statistics for allocations from the list.  The links between items
+// are not part of FreeList.  The statistics are
+// used to make decisions about coalescing Chunk's when they
+// are swept during collection.
+//
+// See the corresponding .cpp file for a description of the specifics
+// for that implementation.
+
+class Mutex;
+
+template <class Chunk>
+class AdaptiveFreeList : public FreeList<Chunk> {
+  friend class CompactibleFreeListSpace;
+  friend class VMStructs;
+  // friend class PrintTreeCensusClosure<Chunk, FreeList_t>;
+
+  size_t        _hint;          // next larger size list with a positive surplus
+
+  AllocationStats _allocation_stats; // allocation-related statistics
+
+ public:
+
+  AdaptiveFreeList();
+  AdaptiveFreeList(Chunk* fc);
+
+  using FreeList<Chunk>::assert_proper_lock_protection;
+#ifdef ASSERT
+  using FreeList<Chunk>::protecting_lock;
+#endif
+  using FreeList<Chunk>::count;
+  using FreeList<Chunk>::size;
+  using FreeList<Chunk>::verify_chunk_in_free_list;
+  using FreeList<Chunk>::getFirstNChunksFromList;
+  using FreeList<Chunk>::print_on;
+  void return_chunk_at_head(Chunk* fc, bool record_return);
+  void return_chunk_at_head(Chunk* fc);
+  void return_chunk_at_tail(Chunk* fc, bool record_return);
+  void return_chunk_at_tail(Chunk* fc);
+  using FreeList<Chunk>::return_chunk_at_tail;
+  using FreeList<Chunk>::remove_chunk;
+  using FreeList<Chunk>::prepend;
+  using FreeList<Chunk>::print_labels_on;
+  using FreeList<Chunk>::get_chunk_at_head;
+
+  // Initialize.
+  void initialize();
+
+  // Reset the head, tail, hint, and count of a free list.
+  void reset(size_t hint);
+
+  void assert_proper_lock_protection_work() const PRODUCT_RETURN;
+
+  void print_on(outputStream* st, const char* c = NULL) const;
+
+  size_t hint() const {
+    return _hint;
+  }
+  void set_hint(size_t v) {
+    assert_proper_lock_protection();
+    assert(v == 0 || size() < v, "Bad hint");
+    _hint = v;
+  }
+
+  size_t get_better_size();
+
+  // Accessors for statistics
+  void init_statistics(bool split_birth = false);
+
+  AllocationStats* allocation_stats() {
+    assert_proper_lock_protection();
+    return &_allocation_stats;
+  }
+
+  ssize_t desired() const {
+    return _allocation_stats.desired();
+  }
+  void set_desired(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_desired(v);
+  }
+  void compute_desired(float inter_sweep_current,
+                       float inter_sweep_estimate,
+                       float intra_sweep_estimate) {
+    assert_proper_lock_protection();
+    _allocation_stats.compute_desired(count(),
+                                      inter_sweep_current,
+                                      inter_sweep_estimate,
+                                      intra_sweep_estimate);
+  }
+  ssize_t coal_desired() const {
+    return _allocation_stats.coal_desired();
+  }
+  void set_coal_desired(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_coal_desired(v);
+  }
+
+  ssize_t surplus() const {
+    return _allocation_stats.surplus();
+  }
+  void set_surplus(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_surplus(v);
+  }
+  void increment_surplus() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_surplus();
+  }
+  void decrement_surplus() {
+    assert_proper_lock_protection();
+    _allocation_stats.decrement_surplus();
+  }
+
+  ssize_t bfr_surp() const {
+    return _allocation_stats.bfr_surp();
+  }
+  void set_bfr_surp(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_bfr_surp(v);
+  }
+  ssize_t prev_sweep() const {
+    return _allocation_stats.prev_sweep();
+  }
+  void set_prev_sweep(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_prev_sweep(v);
+  }
+  ssize_t before_sweep() const {
+    return _allocation_stats.before_sweep();
+  }
+  void set_before_sweep(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_before_sweep(v);
+  }
+
+  ssize_t coal_births() const {
+    return _allocation_stats.coal_births();
+  }
+  void set_coal_births(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_coal_births(v);
+  }
+  void increment_coal_births() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_coal_births();
+  }
+
+  ssize_t coal_deaths() const {
+    return _allocation_stats.coal_deaths();
+  }
+  void set_coal_deaths(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_coal_deaths(v);
+  }
+  void increment_coal_deaths() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_coal_deaths();
+  }
+
+  ssize_t split_births() const {
+    return _allocation_stats.split_births();
+  }
+  void set_split_births(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_split_births(v);
+  }
+  void increment_split_births() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_split_births();
+  }
+
+  ssize_t split_deaths() const {
+    return _allocation_stats.split_deaths();
+  }
+  void set_split_deaths(ssize_t v) {
+    assert_proper_lock_protection();
+    _allocation_stats.set_split_deaths(v);
+  }
+  void increment_split_deaths() {
+    assert_proper_lock_protection();
+    _allocation_stats.increment_split_deaths();
+  }
+
+#ifndef PRODUCT
+  // For debugging.  The "_returned_bytes" in all the lists are summed
+  // and compared with the total number of bytes swept during a
+  // collection.
+  size_t returned_bytes() const { return _allocation_stats.returned_bytes(); }
+  void set_returned_bytes(size_t v) { _allocation_stats.set_returned_bytes(v); }
+  void increment_returned_bytes_by(size_t v) {
+    _allocation_stats.set_returned_bytes(_allocation_stats.returned_bytes() + v);
+  }
+  // Stats verification
+  void verify_stats() const;
+#endif  // NOT PRODUCT
+};
+
+#endif // SHARE_VM_MEMORY_ADAPTIVEFREELIST_HPP
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -91,7 +91,7 @@
   _collector(NULL)
 {
   assert(sizeof(FreeChunk) / BytesPerWord <= MinChunkSize,
-    "FreeChunk is larger than expected");
+         "FreeChunk is larger than expected");
   _bt.set_space(this);
   initialize(mr, SpaceDecorator::Clear, SpaceDecorator::Mangle);
   // We have all of "mr", all of which we place in the dictionary
@@ -101,14 +101,14 @@
   // implementation, namely, the simple binary tree (splaying
   // temporarily disabled).
   switch (dictionaryChoice) {
+    case FreeBlockDictionary<FreeChunk>::dictionaryBinaryTree:
+      _dictionary = new BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>(mr);
+      break;
     case FreeBlockDictionary<FreeChunk>::dictionarySplayTree:
     case FreeBlockDictionary<FreeChunk>::dictionarySkipList:
     default:
       warning("dictionaryChoice: selected option not understood; using"
               " default BinaryTreeDictionary implementation instead.");
-    case FreeBlockDictionary<FreeChunk>::dictionaryBinaryTree:
-      _dictionary = new BinaryTreeDictionary<FreeChunk>(mr, use_adaptive_freelists);
-      break;
   }
   assert(_dictionary != NULL, "CMS dictionary initialization");
   // The indexed free lists are initially all empty and are lazily
@@ -453,7 +453,7 @@
   reportIndexedFreeListStatistics();
   gclog_or_tty->print_cr("Layout of Indexed Freelists");
   gclog_or_tty->print_cr("---------------------------");
-  FreeList<FreeChunk>::print_labels_on(st, "size");
+  AdaptiveFreeList<FreeChunk>::print_labels_on(st, "size");
   for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
     _indexedFreeList[i].print_on(gclog_or_tty);
     for (FreeChunk* fc = _indexedFreeList[i].head(); fc != NULL;
@@ -1319,7 +1319,7 @@
   size_t currSize = numWords + MinChunkSize;
   assert(currSize % MinObjAlignment == 0, "currSize should be aligned");
   for (i = currSize; i < IndexSetSize; i += IndexSetStride) {
-    FreeList<FreeChunk>* fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk>* fl = &_indexedFreeList[i];
     if (fl->head()) {
       ret = getFromListGreater(fl, numWords);
       assert(ret == NULL || ret->is_free(), "Should be returning a free chunk");
@@ -1702,7 +1702,9 @@
   _dictionary->return_chunk(chunk);
 #ifndef PRODUCT
   if (CMSCollector::abstract_state() != CMSCollector::Sweeping) {
-    TreeChunk<FreeChunk>::as_TreeChunk(chunk)->list()->verify_stats();
+    TreeChunk<FreeChunk, AdaptiveFreeList>* tc = TreeChunk<FreeChunk, AdaptiveFreeList>::as_TreeChunk(chunk);
+    TreeList<FreeChunk, AdaptiveFreeList>* tl = tc->list();
+    tl->verify_stats();
   }
 #endif // PRODUCT
 }
@@ -1745,7 +1747,7 @@
   {
     MutexLockerEx x(lock, Mutex::_no_safepoint_check_flag);
     ec = dictionary()->find_largest_dict();  // get largest block
-    if (ec != NULL && ec->end() == chunk) {
+    if (ec != NULL && ec->end() == (uintptr_t*) chunk) {
       // It's a coterminal block - we can coalesce.
       size_t old_size = ec->size();
       coalDeath(old_size);
@@ -1850,11 +1852,11 @@
      the excess is >= MIN_CHUNK. */
   size_t start = align_object_size(numWords + MinChunkSize);
   if (start < IndexSetSize) {
-    FreeList<FreeChunk>* it   = _indexedFreeList;
+    AdaptiveFreeList<FreeChunk>* it   = _indexedFreeList;
     size_t    hint = _indexedFreeList[start].hint();
     while (hint < IndexSetSize) {
       assert(hint % MinObjAlignment == 0, "hint should be aligned");
-      FreeList<FreeChunk> *fl = &_indexedFreeList[hint];
+      AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[hint];
       if (fl->surplus() > 0 && fl->head() != NULL) {
         // Found a list with surplus, reset original hint
         // and split out a free chunk which is returned.
@@ -1873,7 +1875,7 @@
 }
 
 /* Requires fl->size >= numWords + MinChunkSize */
-FreeChunk* CompactibleFreeListSpace::getFromListGreater(FreeList<FreeChunk>* fl,
+FreeChunk* CompactibleFreeListSpace::getFromListGreater(AdaptiveFreeList<FreeChunk>* fl,
   size_t numWords) {
   FreeChunk *curr = fl->head();
   size_t oldNumWords = curr->size();
@@ -2155,7 +2157,7 @@
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    FreeList<FreeChunk>* fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk>* fl    = &_indexedFreeList[i];
     if (PrintFLSStatistics > 1) {
       gclog_or_tty->print("size[%d] : ", i);
     }
@@ -2174,7 +2176,7 @@
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
     fl->set_surplus(fl->count() -
                     (ssize_t)((double)fl->desired() * CMSSmallSplitSurplusPercent));
   }
@@ -2185,7 +2187,7 @@
   size_t i;
   size_t h = IndexSetSize;
   for (i = IndexSetSize - 1; i != 0; i -= IndexSetStride) {
-    FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
     fl->set_hint(h);
     if (fl->surplus() > 0) {
       h = i;
@@ -2197,7 +2199,7 @@
   assert_locked();
   size_t i;
   for (i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
     fl->set_prev_sweep(fl->count());
     fl->set_coal_births(0);
     fl->set_coal_deaths(0);
@@ -2224,7 +2226,7 @@
 
 bool CompactibleFreeListSpace::coalOverPopulated(size_t size) {
   if (size < SmallForDictionary) {
-    FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+    AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
     return (fl->coal_desired() < 0) ||
            ((int)fl->count() > fl->coal_desired());
   } else {
@@ -2234,14 +2236,14 @@
 
 void CompactibleFreeListSpace::smallCoalBirth(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
   fl->increment_coal_births();
   fl->increment_surplus();
 }
 
 void CompactibleFreeListSpace::smallCoalDeath(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
   fl->increment_coal_deaths();
   fl->decrement_surplus();
 }
@@ -2250,7 +2252,7 @@
   if (size  < SmallForDictionary) {
     smallCoalBirth(size);
   } else {
-    dictionary()->dict_census_udpate(size,
+    dictionary()->dict_census_update(size,
                                    false /* split */,
                                    true /* birth */);
   }
@@ -2260,7 +2262,7 @@
   if(size  < SmallForDictionary) {
     smallCoalDeath(size);
   } else {
-    dictionary()->dict_census_udpate(size,
+    dictionary()->dict_census_update(size,
                                    false /* split */,
                                    false /* birth */);
   }
@@ -2268,14 +2270,14 @@
 
 void CompactibleFreeListSpace::smallSplitBirth(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
   fl->increment_split_births();
   fl->increment_surplus();
 }
 
 void CompactibleFreeListSpace::smallSplitDeath(size_t size) {
   assert(size < SmallForDictionary, "Size too large for indexed list");
-  FreeList<FreeChunk> *fl = &_indexedFreeList[size];
+  AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[size];
   fl->increment_split_deaths();
   fl->decrement_surplus();
 }
@@ -2284,7 +2286,7 @@
   if (size  < SmallForDictionary) {
     smallSplitBirth(size);
   } else {
-    dictionary()->dict_census_udpate(size,
+    dictionary()->dict_census_update(size,
                                    true /* split */,
                                    true /* birth */);
   }
@@ -2294,7 +2296,7 @@
   if (size  < SmallForDictionary) {
     smallSplitDeath(size);
   } else {
-    dictionary()->dict_census_udpate(size,
+    dictionary()->dict_census_update(size,
                                    true /* split */,
                                    false /* birth */);
   }
@@ -2517,10 +2519,10 @@
 
 #ifndef PRODUCT
 void CompactibleFreeListSpace::check_free_list_consistency() const {
-  assert(_dictionary->min_size() <= IndexSetSize,
+  assert((TreeChunk<FreeChunk, AdaptiveFreeList>::min_size() <= IndexSetSize),
     "Some sizes can't be allocated without recourse to"
     " linear allocation buffers");
-  assert(BinaryTreeDictionary<FreeChunk>::min_tree_chunk_size*HeapWordSize == sizeof(TreeChunk<FreeChunk>),
+  assert((TreeChunk<FreeChunk, AdaptiveFreeList>::min_size()*HeapWordSize == sizeof(TreeChunk<FreeChunk, AdaptiveFreeList>)),
     "else MIN_TREE_CHUNK_SIZE is wrong");
   assert(IndexSetStart != 0, "IndexSetStart not initialized");
   assert(IndexSetStride != 0, "IndexSetStride not initialized");
@@ -2529,15 +2531,15 @@
 
 void CompactibleFreeListSpace::printFLCensus(size_t sweep_count) const {
   assert_lock_strong(&_freelistLock);
-  FreeList<FreeChunk> total;
+  AdaptiveFreeList<FreeChunk> total;
   gclog_or_tty->print("end sweep# " SIZE_FORMAT "\n", sweep_count);
-  FreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+  AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
   size_t total_free = 0;
   for (size_t i = IndexSetStart; i < IndexSetSize; i += IndexSetStride) {
-    const FreeList<FreeChunk> *fl = &_indexedFreeList[i];
+    const AdaptiveFreeList<FreeChunk> *fl = &_indexedFreeList[i];
     total_free += fl->count() * fl->size();
     if (i % (40*IndexSetStride) == 0) {
-      FreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+      AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
     }
     fl->print_on(gclog_or_tty);
     total.set_bfr_surp(    total.bfr_surp()     + fl->bfr_surp()    );
@@ -2620,7 +2622,7 @@
     res = _cfls->getChunkFromDictionaryExact(word_sz);
     if (res == NULL) return NULL;
   } else {
-    FreeList<FreeChunk>* fl = &_indexedFreeList[word_sz];
+    AdaptiveFreeList<FreeChunk>* fl = &_indexedFreeList[word_sz];
     if (fl->count() == 0) {
       // Attempt to refill this local free list.
       get_from_global_pool(word_sz, fl);
@@ -2640,7 +2642,7 @@
 
 // Get a chunk of blocks of the right size and update related
 // book-keeping stats
-void CFLS_LAB::get_from_global_pool(size_t word_sz, FreeList<FreeChunk>* fl) {
+void CFLS_LAB::get_from_global_pool(size_t word_sz, AdaptiveFreeList<FreeChunk>* fl) {
   // Get the #blocks we want to claim
   size_t n_blks = (size_t)_blocks_to_claim[word_sz].average();
   assert(n_blks > 0, "Error");
@@ -2722,7 +2724,7 @@
         if (num_retire > 0) {
           _cfls->_indexedFreeList[i].prepend(&_indexedFreeList[i]);
           // Reset this list.
-          _indexedFreeList[i] = FreeList<FreeChunk>();
+          _indexedFreeList[i] = AdaptiveFreeList<FreeChunk>();
           _indexedFreeList[i].set_size(i);
         }
       }
@@ -2736,7 +2738,7 @@
   }
 }
 
-void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList<FreeChunk>* fl) {
+void CompactibleFreeListSpace:: par_get_chunk_of_blocks(size_t word_sz, size_t n, AdaptiveFreeList<FreeChunk>* fl) {
   assert(fl->count() == 0, "Precondition.");
   assert(word_sz < CompactibleFreeListSpace::IndexSetSize,
          "Precondition");
@@ -2752,12 +2754,12 @@
          (cur_sz < CompactibleFreeListSpace::IndexSetSize) &&
          (CMSSplitIndexedFreeListBlocks || k <= 1);
          k++, cur_sz = k * word_sz) {
-      FreeList<FreeChunk> fl_for_cur_sz;  // Empty.
+      AdaptiveFreeList<FreeChunk> fl_for_cur_sz;  // Empty.
       fl_for_cur_sz.set_size(cur_sz);
       {
         MutexLockerEx x(_indexedFreeListParLocks[cur_sz],
                         Mutex::_no_safepoint_check_flag);
-        FreeList<FreeChunk>* gfl = &_indexedFreeList[cur_sz];
+        AdaptiveFreeList<FreeChunk>* gfl = &_indexedFreeList[cur_sz];
         if (gfl->count() != 0) {
           // nn is the number of chunks of size cur_sz that
           // we'd need to split k-ways each, in order to create
@@ -2832,12 +2834,11 @@
     MutexLockerEx x(parDictionaryAllocLock(),
                     Mutex::_no_safepoint_check_flag);
     while (n > 0) {
-      fc = dictionary()->get_chunk(MAX2(n * word_sz,
-                                  _dictionary->min_size()),
+      fc = dictionary()->get_chunk(MAX2(n * word_sz, _dictionary->min_size()),
                                   FreeBlockDictionary<FreeChunk>::atLeast);
       if (fc != NULL) {
         _bt.allocated((HeapWord*)fc, fc->size(), true /* reducing */);  // update _unallocated_blk
-        dictionary()->dict_census_udpate(fc->size(),
+        dictionary()->dict_census_update(fc->size(),
                                        true /*split*/,
                                        false /*birth*/);
         break;
@@ -2890,7 +2891,7 @@
       fc->set_size(prefix_size);
       if (rem >= IndexSetSize) {
         returnChunkToDictionary(rem_fc);
-        dictionary()->dict_census_udpate(rem, true /*split*/, true /*birth*/);
+        dictionary()->dict_census_update(rem, true /*split*/, true /*birth*/);
         rem_fc = NULL;
       }
       // Otherwise, return it to the small list below.
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/compactibleFreeListSpace.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -25,6 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_COMPACTIBLEFREELISTSPACE_HPP
 
+#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
 #include "gc_implementation/concurrentMarkSweep/promotionInfo.hpp"
 #include "memory/binaryTreeDictionary.hpp"
 #include "memory/blockOffsetTable.inline.hpp"
@@ -38,6 +39,7 @@
 class CompactibleFreeListSpace;
 class BlkClosure;
 class BlkClosureCareful;
+class FreeChunk;
 class UpwardsObjectClosure;
 class ObjectClosureCareful;
 class Klass;
@@ -131,7 +133,7 @@
   FreeBlockDictionary<FreeChunk>::DictionaryChoice _dictionaryChoice;
   FreeBlockDictionary<FreeChunk>* _dictionary;    // ptr to dictionary for large size blocks
 
-  FreeList<FreeChunk> _indexedFreeList[IndexSetSize];
+  AdaptiveFreeList<FreeChunk> _indexedFreeList[IndexSetSize];
                                        // indexed array for small size blocks
   // allocation stategy
   bool       _fitStrategy;      // Use best fit strategy.
@@ -168,7 +170,7 @@
   // If the count of "fl" is negative, it's absolute value indicates a
   // number of free chunks that had been previously "borrowed" from global
   // list of size "word_sz", and must now be decremented.
-  void par_get_chunk_of_blocks(size_t word_sz, size_t n, FreeList<FreeChunk>* fl);
+  void par_get_chunk_of_blocks(size_t word_sz, size_t n, AdaptiveFreeList<FreeChunk>* fl);
 
   // Allocation helper functions
   // Allocate using a strategy that takes from the indexed free lists
@@ -214,7 +216,7 @@
   // and return it.  The split off remainder is returned to
   // the free lists.  The old name for getFromListGreater
   // was lookInListGreater.
-  FreeChunk* getFromListGreater(FreeList<FreeChunk>* fl, size_t numWords);
+  FreeChunk* getFromListGreater(AdaptiveFreeList<FreeChunk>* fl, size_t numWords);
   // Get a chunk in the indexed free list or dictionary,
   // by considering a larger chunk and splitting it.
   FreeChunk* getChunkFromGreater(size_t numWords);
@@ -621,7 +623,7 @@
   CompactibleFreeListSpace* _cfls;
 
   // Our local free lists.
-  FreeList<FreeChunk> _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
+  AdaptiveFreeList<FreeChunk> _indexedFreeList[CompactibleFreeListSpace::IndexSetSize];
 
   // Initialized from a command-line arg.
 
@@ -634,7 +636,7 @@
   size_t        _num_blocks        [CompactibleFreeListSpace::IndexSetSize];
 
   // Internal work method
-  void get_from_global_pool(size_t word_sz, FreeList<FreeChunk>* fl);
+  void get_from_global_pool(size_t word_sz, AdaptiveFreeList<FreeChunk>* fl);
 
 public:
   CFLS_LAB(CompactibleFreeListSpace* cfls);
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -9143,7 +9143,7 @@
     size_t shrinkable_size_in_bytes = chunk_at_end->size();
     size_t aligned_shrinkable_size_in_bytes =
       align_size_down(shrinkable_size_in_bytes, os::vm_page_size());
-    assert(unallocated_start <= chunk_at_end->end(),
+    assert(unallocated_start <= (HeapWord*) chunk_at_end->end(),
       "Inconsistent chunk at end of space");
     size_t bytes = MIN2(desired_bytes, aligned_shrinkable_size_in_bytes);
     size_t word_size_before = heap_word_size(_virtual_space.committed_size());
@@ -9210,7 +9210,7 @@
 
     assert(_cmsSpace->unallocated_block() <= _cmsSpace->end(),
       "Inconsistency at end of space");
-    assert(chunk_at_end->end() == _cmsSpace->end(),
+    assert(chunk_at_end->end() == (uintptr_t*) _cmsSpace->end(),
       "Shrinking is inconsistent");
     return;
   }
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/freeChunk.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -133,7 +133,7 @@
   }
 
   // Return the address past the end of this chunk
-  HeapWord* end() const { return ((HeapWord*) this) + size(); }
+  uintptr_t* end() const { return ((uintptr_t*) this) + size(); }
 
   // debugging
   void verify()             const PRODUCT_RETURN;
--- a/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/gc_implementation/concurrentMarkSweep/vmStructs_cms.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -25,6 +25,8 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_VMSTRUCTS_CMS_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_CONCURRENTMARKSWEEP_VMSTRUCTS_CMS_HPP
 
+typedef BinaryTreeDictionary<FreeChunk, AdaptiveFreeList> AFLBinaryTreeDictionary;
+
 #define VM_STRUCTS_CMS(nonstatic_field, \
                    volatile_nonstatic_field, \
                    static_field) \
@@ -38,14 +40,8 @@
   nonstatic_field(CMSCollector,                _markBitMap,                                   CMSBitMap)                             \
   nonstatic_field(ConcurrentMarkSweepGeneration, _cmsSpace,                                   CompactibleFreeListSpace*)             \
      static_field(ConcurrentMarkSweepThread,   _collector,                                    CMSCollector*)                         \
-  volatile_nonstatic_field(FreeChunk,          _size,                                         size_t)                                \
-  nonstatic_field(FreeChunk,                   _next,                                         FreeChunk*)                            \
-  nonstatic_field(FreeChunk,                   _prev,                                         FreeChunk*)                            \
   nonstatic_field(LinearAllocBlock,            _word_size,                                    size_t)                                \
-  nonstatic_field(FreeList<FreeChunk>,         _size,                                         size_t)                                \
-  nonstatic_field(FreeList<FreeChunk>,         _count,                                        ssize_t)                               \
-  nonstatic_field(BinaryTreeDictionary<FreeChunk>,_total_size,                                 size_t)                                \
-  nonstatic_field(CompactibleFreeListSpace,    _dictionary,                                   FreeBlockDictionary<FreeChunk>*)       \
+  nonstatic_field(AFLBinaryTreeDictionary,     _total_size,                                   size_t)                                \
   nonstatic_field(CompactibleFreeListSpace,    _indexedFreeList[0],                           FreeList<FreeChunk>)                   \
   nonstatic_field(CompactibleFreeListSpace,    _smallLinearAllocBlock,                        LinearAllocBlock)
 
@@ -60,19 +56,17 @@
   declare_toplevel_type(CMSCollector)                                     \
   declare_toplevel_type(CMSBitMap)                                        \
   declare_toplevel_type(FreeChunk)                                        \
+  declare_toplevel_type(Metablock)                                        \
   declare_toplevel_type(ConcurrentMarkSweepThread*)                       \
   declare_toplevel_type(ConcurrentMarkSweepGeneration*)                   \
   declare_toplevel_type(SurrogateLockerThread*)                           \
   declare_toplevel_type(CompactibleFreeListSpace*)                        \
   declare_toplevel_type(CMSCollector*)                                    \
-  declare_toplevel_type(FreeChunk*)                                       \
-  declare_toplevel_type(BinaryTreeDictionary<FreeChunk>*)                 \
-  declare_toplevel_type(FreeBlockDictionary<FreeChunk>*)                  \
-  declare_toplevel_type(FreeList<FreeChunk>*)                             \
-  declare_toplevel_type(FreeList<FreeChunk>)                              \
+  declare_toplevel_type(AFLBinaryTreeDictionary*)                         \
   declare_toplevel_type(LinearAllocBlock)                                 \
   declare_toplevel_type(FreeBlockDictionary<FreeChunk>)                   \
-            declare_type(BinaryTreeDictionary<FreeChunk>, FreeBlockDictionary<FreeChunk>)
+  declare_type(AFLBinaryTreeDictionary, FreeBlockDictionary<FreeChunk>)   \
+            declare_type(AFLBinaryTreeDictionary, FreeBlockDictionary<FreeChunk>) \
 
 #define VM_INT_CONSTANTS_CMS(declare_constant)                            \
   declare_constant(Generation::ConcurrentMarkSweep)                       \
--- a/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/gc_implementation/shared/vmGCOperations.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -191,7 +191,7 @@
 class VM_CollectForMetadataAllocation: public VM_GC_Operation {
  private:
   MetaWord*                _result;
-  size_t      _size;                       // size of object to be allocated
+  size_t                   _size;     // size of object to be allocated
   Metaspace::MetadataType  _mdtype;
   ClassLoaderData*         _loader_data;
  public:
--- a/src/share/vm/interpreter/abstractInterpreter.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/interpreter/abstractInterpreter.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -320,6 +320,7 @@
   void bang_stack_shadow_pages(bool native_call);
 
   void generate_all();
+  void initialize_method_handle_entries();
 
  public:
   AbstractInterpreterGenerator(StubQueue* _code);
--- a/src/share/vm/interpreter/bytecodeInterpreter.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/interpreter/bytecodeInterpreter.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -235,10 +235,6 @@
 #endif
 #endif
 
-// JavaStack Implementation
-#define MORE_STACK(count)  \
-    (topOfStack -= ((count) * Interpreter::stackElementWords))
-
 
 #define UPDATE_PC(opsize) {pc += opsize; }
 /*
@@ -575,7 +571,7 @@
 
 /* 0xE0 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
 /* 0xE4 */ &&opc_default,     &&opc_fast_aldc,      &&opc_fast_aldc_w,  &&opc_return_register_finalizer,
-/* 0xE8 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
+/* 0xE8 */ &&opc_invokehandle,&&opc_default,        &&opc_default,      &&opc_default,
 /* 0xEC */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
 
 /* 0xF0 */ &&opc_default,     &&opc_default,        &&opc_default,      &&opc_default,
@@ -1773,7 +1769,7 @@
 
           oop obj;
           if ((Bytecodes::Code)opcode == Bytecodes::_getstatic) {
-            Klass* k = (Klass*) cache->f1();
+            Klass* k = cache->f1_as_klass();
             obj = k->java_mirror();
             MORE_STACK(1);  // Assume single slot push
           } else {
@@ -1885,7 +1881,7 @@
             --count;
           }
           if ((Bytecodes::Code)opcode == Bytecodes::_putstatic) {
-            Klass* k = (Klass*) cache->f1();
+            Klass* k = cache->f1_as_klass();
             obj = k->java_mirror();
           } else {
             --count;
@@ -2190,6 +2186,7 @@
       }
 
       CASE(_invokedynamic): {
+
         if (!EnableInvokeDynamic) {
           // We should not encounter this bytecode if !EnableInvokeDynamic.
           // The verifier will stop it.  However, if we get past the verifier,
@@ -2199,30 +2196,68 @@
           ShouldNotReachHere();
         }
 
-        int index = Bytes::get_native_u4(pc+1);
+        u4 index = Bytes::get_native_u4(pc+1);
+        ConstantPoolCacheEntry* cache = cp->constant_pool()->invokedynamic_cp_cache_entry_at(index);
 
         // We are resolved if the resolved_references field contains a non-null object (CallSite, etc.)
         // This kind of CP cache entry does not need to match the flags byte, because
         // there is a 1-1 relation between bytecode type and CP entry type.
-        ConstantPool* constants = METHOD->constants();
-        oop result = constants->resolved_references()->obj_at(index);
-        if (result == NULL) {
+        if (! cache->is_resolved((Bytecodes::Code) opcode)) {
           CALL_VM(InterpreterRuntime::resolve_invokedynamic(THREAD),
                   handle_exception);
-          result = THREAD->vm_result();
+          cache = cp->constant_pool()->invokedynamic_cp_cache_entry_at(index);
         }
 
-        VERIFY_OOP(result);
-        oop method_handle = java_lang_invoke_CallSite::target(result);
-        CHECK_NULL(method_handle);
-
-        istate->set_msg(call_method_handle);
-        istate->set_callee((Method*) method_handle);
+        Method* method = cache->f1_as_method();
+        VERIFY_OOP(method);
+
+        if (cache->has_appendix()) {
+          ConstantPool* constants = METHOD->constants();
+          SET_STACK_OBJECT(cache->appendix_if_resolved(constants), 0);
+          MORE_STACK(1);
+        }
+
+        istate->set_msg(call_method);
+        istate->set_callee(method);
+        istate->set_callee_entry_point(method->from_interpreted_entry());
         istate->set_bcp_advance(5);
 
         UPDATE_PC_AND_RETURN(0); // I'll be back...
       }
 
+      CASE(_invokehandle): {
+
+        if (!EnableInvokeDynamic) {
+          ShouldNotReachHere();
+        }
+
+        u2 index = Bytes::get_native_u2(pc+1);
+        ConstantPoolCacheEntry* cache = cp->entry_at(index);
+
+        if (! cache->is_resolved((Bytecodes::Code) opcode)) {
+          CALL_VM(InterpreterRuntime::resolve_invokehandle(THREAD),
+                  handle_exception);
+          cache = cp->entry_at(index);
+        }
+
+        Method* method = cache->f1_as_method();
+
+        VERIFY_OOP(method);
+
+        if (cache->has_appendix()) {
+          ConstantPool* constants = METHOD->constants();
+          SET_STACK_OBJECT(cache->appendix_if_resolved(constants), 0);
+          MORE_STACK(1);
+        }
+
+        istate->set_msg(call_method);
+        istate->set_callee(method);
+        istate->set_callee_entry_point(method->from_interpreted_entry());
+        istate->set_bcp_advance(3);
+
+        UPDATE_PC_AND_RETURN(0); // I'll be back...
+      }
+
       CASE(_invokeinterface): {
         u2 index = Bytes::get_native_u2(pc+1);
 
--- a/src/share/vm/interpreter/bytecodeInterpreter.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/interpreter/bytecodeInterpreter.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -50,6 +50,10 @@
 
 #ifdef CC_INTERP
 
+// JavaStack Implementation
+#define MORE_STACK(count)  \
+    (topOfStack -= ((count) * Interpreter::stackElementWords))
+
 // CVM definitions find hotspot equivalents...
 
 union VMJavaVal64 {
@@ -107,7 +111,6 @@
          rethrow_exception,         // unwinding and throwing exception
          // requests to frame manager from C++ interpreter
          call_method,               // request for new frame from interpreter, manager responds with method_entry
-         call_method_handle,        // like the above, except the callee is a method handle
          return_from_method,        // request from interpreter to unwind, manager responds with method_continue
          more_monitors,             // need a new monitor
          throwing_exception,        // unwind stack and rethrow
--- a/src/share/vm/interpreter/cppInterpreter.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/interpreter/cppInterpreter.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -117,7 +117,6 @@
     method_entry(empty);
     method_entry(accessor);
     method_entry(abstract);
-    method_entry(method_handle);
     method_entry(java_lang_math_sin   );
     method_entry(java_lang_math_cos   );
     method_entry(java_lang_math_tan   );
@@ -125,7 +124,12 @@
     method_entry(java_lang_math_sqrt  );
     method_entry(java_lang_math_log   );
     method_entry(java_lang_math_log10 );
+    method_entry(java_lang_math_pow );
+    method_entry(java_lang_math_exp );
     method_entry(java_lang_ref_reference_get);
+
+    initialize_method_handle_entries();
+
     Interpreter::_native_entry_begin = Interpreter::code()->code_end();
     method_entry(native);
     method_entry(native_synchronized);
--- a/src/share/vm/interpreter/interpreter.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/interpreter/interpreter.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -464,3 +464,11 @@
     }
   }
 }
+
+void AbstractInterpreterGenerator::initialize_method_handle_entries() {
+  // method handle entry kinds are generated later in MethodHandlesAdapterGenerator::generate:
+  for (int i = Interpreter::method_handle_invoke_FIRST; i <= Interpreter::method_handle_invoke_LAST; i++) {
+    Interpreter::MethodKind kind = (Interpreter::MethodKind) i;
+    Interpreter::_entry_table[kind] = Interpreter::_entry_table[Interpreter::abstract];
+  }
+}
--- a/src/share/vm/interpreter/templateInterpreter.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/interpreter/templateInterpreter.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -373,11 +373,7 @@
   method_entry(java_lang_math_pow  )
   method_entry(java_lang_ref_reference_get)
 
-  // method handle entry kinds are generated later in MethodHandlesAdapterGenerator::generate:
-  for (int i = Interpreter::method_handle_invoke_FIRST; i <= Interpreter::method_handle_invoke_LAST; i++) {
-    Interpreter::MethodKind kind = (Interpreter::MethodKind) i;
-    Interpreter::_entry_table[kind] = Interpreter::_entry_table[Interpreter::abstract];
-  }
+  initialize_method_handle_entries();
 
   // all native method kinds (must be one contiguous block)
   Interpreter::_native_entry_begin = Interpreter::code()->code_end();
--- a/src/share/vm/memory/binaryTreeDictionary.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/memory/binaryTreeDictionary.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -25,9 +25,15 @@
 #include "precompiled.hpp"
 #include "gc_implementation/shared/allocationStats.hpp"
 #include "memory/binaryTreeDictionary.hpp"
+#include "memory/freeList.hpp"
+#include "memory/freeBlockDictionary.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
 #include "runtime/globals.hpp"
 #include "utilities/ostream.hpp"
 #ifndef SERIALGC
+#include "gc_implementation/concurrentMarkSweep/adaptiveFreeList.hpp"
+#include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
 #endif // SERIALGC
@@ -37,15 +43,18 @@
 // This is currently used in the Concurrent Mark&Sweep implementation.
 ////////////////////////////////////////////////////////////////////////////////
 
-template <class Chunk>
-TreeChunk<Chunk>* TreeChunk<Chunk>::as_TreeChunk(Chunk* fc) {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t TreeChunk<Chunk_t, FreeList_t>::_min_tree_chunk_size = sizeof(TreeChunk<Chunk_t,  FreeList_t>)/HeapWordSize;
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(Chunk_t* fc) {
   // Do some assertion checking here.
-  return (TreeChunk<Chunk>*) fc;
+  return (TreeChunk<Chunk_t, FreeList_t>*) fc;
 }
 
-template <class Chunk>
-void TreeChunk<Chunk>::verify_tree_chunk_list() const {
-  TreeChunk<Chunk>* nextTC = (TreeChunk<Chunk>*)next();
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeChunk<Chunk_t, FreeList_t>::verify_tree_chunk_list() const {
+  TreeChunk<Chunk_t, FreeList_t>* nextTC = (TreeChunk<Chunk_t, FreeList_t>*)next();
   if (prev() != NULL) { // interior list node shouldn'r have tree fields
     guarantee(embedded_list()->parent() == NULL && embedded_list()->left() == NULL &&
               embedded_list()->right()  == NULL, "should be clear");
@@ -57,53 +66,113 @@
   }
 }
 
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>::TreeList() {}
 
-template <class Chunk>
-TreeList<Chunk>* TreeList<Chunk>::as_TreeList(TreeChunk<Chunk>* tc) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+TreeList<Chunk_t, FreeList_t>::as_TreeList(TreeChunk<Chunk_t,FreeList_t>* tc) {
   // This first free chunk in the list will be the tree list.
-  assert(tc->size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "Chunk is too small for a TreeChunk");
-  TreeList<Chunk>* tl = tc->embedded_list();
+  assert((tc->size() >= (TreeChunk<Chunk_t, FreeList_t>::min_size())),
+    "Chunk is too small for a TreeChunk");
+  TreeList<Chunk_t, FreeList_t>* tl = tc->embedded_list();
+  tl->initialize();
   tc->set_list(tl);
-#ifdef ASSERT
-  tl->set_protecting_lock(NULL);
-#endif
-  tl->set_hint(0);
   tl->set_size(tc->size());
   tl->link_head(tc);
   tl->link_tail(tc);
   tl->set_count(1);
-  tl->init_statistics(true /* split_birth */);
-  tl->set_parent(NULL);
-  tl->set_left(NULL);
-  tl->set_right(NULL);
+
   return tl;
 }
 
-template <class Chunk>
-TreeList<Chunk>* TreeList<Chunk>::as_TreeList(HeapWord* addr, size_t size) {
-  TreeChunk<Chunk>* tc = (TreeChunk<Chunk>*) addr;
-  assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "Chunk is too small for a TreeChunk");
-  // The space in the heap will have been mangled initially but
-  // is not remangled when a free chunk is returned to the free list
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+get_chunk(size_t size, enum FreeBlockDictionary<Chunk_t>::Dither dither) {
+  FreeBlockDictionary<Chunk_t>::verify_par_locked();
+  Chunk_t* res = get_chunk_from_tree(size, dither);
+  assert(res == NULL || res->is_free(),
+         "Should be returning a free chunk");
+  assert(dither != FreeBlockDictionary<Chunk_t>::exactly ||
+         res->size() == size, "Not correct size");
+  return res;
+}
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+TreeList<Chunk_t, FreeList_t>::as_TreeList(HeapWord* addr, size_t size) {
+  TreeChunk<Chunk_t, FreeList_t>* tc = (TreeChunk<Chunk_t, FreeList_t>*) addr;
+  assert((size >= TreeChunk<Chunk_t, FreeList_t>::min_size()),
+    "Chunk is too small for a TreeChunk");
+  // The space will have been mangled initially but
+  // is not remangled when a Chunk_t is returned to the free list
   // (since it is used to maintain the chunk on the free list).
-  assert((ZapUnusedHeapArea &&
-          SpaceMangler::is_mangled((HeapWord*) tc->size_addr()) &&
-          SpaceMangler::is_mangled((HeapWord*) tc->prev_addr()) &&
-          SpaceMangler::is_mangled((HeapWord*) tc->next_addr())) ||
-          (tc->size() == 0 && tc->prev() == NULL && tc->next() == NULL),
-    "Space should be clear or mangled");
+  tc->assert_is_mangled();
   tc->set_size(size);
   tc->link_prev(NULL);
   tc->link_next(NULL);
-  TreeList<Chunk>* tl = TreeList<Chunk>::as_TreeList(tc);
+  TreeList<Chunk_t, FreeList_t>* tl = TreeList<Chunk_t, FreeList_t>::as_TreeList(tc);
   return tl;
 }
 
-template <class Chunk>
-TreeList<Chunk>* TreeList<Chunk>::remove_chunk_replace_if_needed(TreeChunk<Chunk>* tc) {
 
-  TreeList<Chunk>* retTL = this;
-  Chunk* list = head();
+#ifndef SERIALGC
+// Specialize for AdaptiveFreeList which tries to avoid
+// splitting a chunk of a size that is under populated in favor of
+// an over populated size.  The general get_better_list() just returns
+// the current list.
+template <>
+TreeList<FreeChunk, AdaptiveFreeList>*
+TreeList<FreeChunk, AdaptiveFreeList>::get_better_list(
+  BinaryTreeDictionary<FreeChunk, ::AdaptiveFreeList>* dictionary) {
+  // A candidate chunk has been found.  If it is already under
+  // populated, get a chunk associated with the hint for this
+  // chunk.
+
+  TreeList<FreeChunk, ::AdaptiveFreeList>* curTL = this;
+  if (surplus() <= 0) {
+    /* Use the hint to find a size with a surplus, and reset the hint. */
+    TreeList<FreeChunk, ::AdaptiveFreeList>* hintTL = this;
+    while (hintTL->hint() != 0) {
+      assert(hintTL->hint() > hintTL->size(),
+        "hint points in the wrong direction");
+      hintTL = dictionary->find_list(hintTL->hint());
+      assert(curTL != hintTL, "Infinite loop");
+      if (hintTL == NULL ||
+          hintTL == curTL /* Should not happen but protect against it */ ) {
+        // No useful hint.  Set the hint to NULL and go on.
+        curTL->set_hint(0);
+        break;
+      }
+      assert(hintTL->size() > curTL->size(), "hint is inconsistent");
+      if (hintTL->surplus() > 0) {
+        // The hint led to a list that has a surplus.  Use it.
+        // Set the hint for the candidate to an overpopulated
+        // size.
+        curTL->set_hint(hintTL->size());
+        // Change the candidate.
+        curTL = hintTL;
+        break;
+      }
+    }
+  }
+  return curTL;
+}
+#endif // SERIALGC
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>*
+TreeList<Chunk_t, FreeList_t>::get_better_list(
+  BinaryTreeDictionary<Chunk_t, FreeList_t>* dictionary) {
+  return this;
+}
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::remove_chunk_replace_if_needed(TreeChunk<Chunk_t, FreeList_t>* tc) {
+
+  TreeList<Chunk_t, FreeList_t>* retTL = this;
+  Chunk_t* list = head();
   assert(!list || list != list->next(), "Chunk on list twice");
   assert(tc != NULL, "Chunk being removed is NULL");
   assert(parent() == NULL || this == parent()->left() ||
@@ -112,13 +181,13 @@
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
 
-  Chunk* prevFC = tc->prev();
-  TreeChunk<Chunk>* nextTC = TreeChunk<Chunk>::as_TreeChunk(tc->next());
+  Chunk_t* prevFC = tc->prev();
+  TreeChunk<Chunk_t, FreeList_t>* nextTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(tc->next());
   assert(list != NULL, "should have at least the target chunk");
 
   // Is this the first item on the list?
   if (tc == list) {
-    // The "getChunk..." functions for a TreeList<Chunk> will not return the
+    // The "getChunk..." functions for a TreeList<Chunk_t, FreeList_t> will not return the
     // first chunk in the list unless it is the last chunk in the list
     // because the first chunk is also acting as the tree node.
     // When coalescing happens, however, the first chunk in the a tree
@@ -127,8 +196,8 @@
     // allocated when the sweeper yields (giving up the free list lock)
     // to allow mutator activity.  If this chunk is the first in the
     // list and is not the last in the list, do the work to copy the
-    // TreeList<Chunk> from the first chunk to the next chunk and update all
-    // the TreeList<Chunk> pointers in the chunks in the list.
+    // TreeList<Chunk_t, FreeList_t> from the first chunk to the next chunk and update all
+    // the TreeList<Chunk_t, FreeList_t> pointers in the chunks in the list.
     if (nextTC == NULL) {
       assert(prevFC == NULL, "Not last chunk in the list");
       set_tail(NULL);
@@ -141,11 +210,11 @@
       // This can be slow for a long list.  Consider having
       // an option that does not allow the first chunk on the
       // list to be coalesced.
-      for (TreeChunk<Chunk>* curTC = nextTC; curTC != NULL;
-          curTC = TreeChunk<Chunk>::as_TreeChunk(curTC->next())) {
+      for (TreeChunk<Chunk_t, FreeList_t>* curTC = nextTC; curTC != NULL;
+          curTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(curTC->next())) {
         curTC->set_list(retTL);
       }
-      // Fix the parent to point to the new TreeList<Chunk>.
+      // Fix the parent to point to the new TreeList<Chunk_t, FreeList_t>.
       if (retTL->parent() != NULL) {
         if (this == retTL->parent()->left()) {
           retTL->parent()->set_left(retTL);
@@ -176,9 +245,9 @@
     prevFC->link_after(nextTC);
   }
 
-  // Below this point the embeded TreeList<Chunk> being used for the
+  // Below this point the embeded TreeList<Chunk_t, FreeList_t> being used for the
   // tree node may have changed. Don't use "this"
-  // TreeList<Chunk>*.
+  // TreeList<Chunk_t, FreeList_t>*.
   // chunk should still be a free chunk (bit set in _prev)
   assert(!retTL->head() || retTL->size() == retTL->head()->size(),
     "Wrong sized chunk in list");
@@ -188,7 +257,7 @@
     tc->set_list(NULL);
     bool prev_found = false;
     bool next_found = false;
-    for (Chunk* curFC = retTL->head();
+    for (Chunk_t* curFC = retTL->head();
          curFC != NULL; curFC = curFC->next()) {
       assert(curFC != tc, "Chunk is still in list");
       if (curFC == prevFC) {
@@ -215,8 +284,8 @@
   return retTL;
 }
 
-template <class Chunk>
-void TreeList<Chunk>::return_chunk_at_tail(TreeChunk<Chunk>* chunk) {
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeList<Chunk_t, FreeList_t>::return_chunk_at_tail(TreeChunk<Chunk_t, FreeList_t>* chunk) {
   assert(chunk != NULL, "returning NULL chunk");
   assert(chunk->list() == this, "list should be set for chunk");
   assert(tail() != NULL, "The tree list is embedded in the first chunk");
@@ -225,12 +294,12 @@
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
 
-  Chunk* fc = tail();
+  Chunk_t* fc = tail();
   fc->link_after(chunk);
   link_tail(chunk);
 
   assert(!tail() || size() == tail()->size(), "Wrong sized chunk in list");
-  increment_count();
+  FreeList_t<Chunk_t>::increment_count();
   debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
@@ -238,10 +307,10 @@
 
 // Add this chunk at the head of the list.  "At the head of the list"
 // is defined to be after the chunk pointer to by head().  This is
-// because the TreeList<Chunk> is embedded in the first TreeChunk<Chunk> in the
-// list.  See the definition of TreeChunk<Chunk>.
-template <class Chunk>
-void TreeList<Chunk>::return_chunk_at_head(TreeChunk<Chunk>* chunk) {
+// because the TreeList<Chunk_t, FreeList_t> is embedded in the first TreeChunk<Chunk_t, FreeList_t> in the
+// list.  See the definition of TreeChunk<Chunk_t, FreeList_t>.
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeList<Chunk_t, FreeList_t>::return_chunk_at_head(TreeChunk<Chunk_t, FreeList_t>* chunk) {
   assert(chunk->list() == this, "list should be set for chunk");
   assert(head() != NULL, "The tree list is embedded in the first chunk");
   assert(chunk != NULL, "returning NULL chunk");
@@ -249,7 +318,7 @@
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
 
-  Chunk* fc = head()->next();
+  Chunk_t* fc = head()->next();
   if (fc != NULL) {
     chunk->link_after(fc);
   } else {
@@ -258,28 +327,38 @@
   }
   head()->link_after(chunk);
   assert(!head() || size() == head()->size(), "Wrong sized chunk in list");
-  increment_count();
+  FreeList_t<Chunk_t>::increment_count();
   debug_only(increment_returned_bytes_by(chunk->size()*sizeof(HeapWord));)
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
 }
 
-template <class Chunk>
-TreeChunk<Chunk>* TreeList<Chunk>::head_as_TreeChunk() {
-  assert(head() == NULL || TreeChunk<Chunk>::as_TreeChunk(head())->list() == this,
-    "Wrong type of chunk?");
-  return TreeChunk<Chunk>::as_TreeChunk(head());
+template <class Chunk_t, template <class> class FreeList_t>
+void TreeChunk<Chunk_t, FreeList_t>::assert_is_mangled() const {
+  assert((ZapUnusedHeapArea &&
+          SpaceMangler::is_mangled((HeapWord*) Chunk_t::size_addr()) &&
+          SpaceMangler::is_mangled((HeapWord*) Chunk_t::prev_addr()) &&
+          SpaceMangler::is_mangled((HeapWord*) Chunk_t::next_addr())) ||
+          (size() == 0 && prev() == NULL && next() == NULL),
+    "Space should be clear or mangled");
 }
 
-template <class Chunk>
-TreeChunk<Chunk>* TreeList<Chunk>::first_available() {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::head_as_TreeChunk() {
+  assert(head() == NULL || (TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(head())->list() == this),
+    "Wrong type of chunk?");
+  return TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(head());
+}
+
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::first_available() {
   assert(head() != NULL, "The head of the list cannot be NULL");
-  Chunk* fc = head()->next();
-  TreeChunk<Chunk>* retTC;
+  Chunk_t* fc = head()->next();
+  TreeChunk<Chunk_t, FreeList_t>* retTC;
   if (fc == NULL) {
     retTC = head_as_TreeChunk();
   } else {
-    retTC = TreeChunk<Chunk>::as_TreeChunk(fc);
+    retTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(fc);
   }
   assert(retTC->list() == this, "Wrong type of chunk.");
   return retTC;
@@ -288,41 +367,32 @@
 // Returns the block with the largest heap address amongst
 // those in the list for this size; potentially slow and expensive,
 // use with caution!
-template <class Chunk>
-TreeChunk<Chunk>* TreeList<Chunk>::largest_address() {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>* TreeList<Chunk_t, FreeList_t>::largest_address() {
   assert(head() != NULL, "The head of the list cannot be NULL");
-  Chunk* fc = head()->next();
-  TreeChunk<Chunk>* retTC;
+  Chunk_t* fc = head()->next();
+  TreeChunk<Chunk_t, FreeList_t>* retTC;
   if (fc == NULL) {
     retTC = head_as_TreeChunk();
   } else {
     // walk down the list and return the one with the highest
     // heap address among chunks of this size.
-    Chunk* last = fc;
+    Chunk_t* last = fc;
     while (fc->next() != NULL) {
       if ((HeapWord*)last < (HeapWord*)fc) {
         last = fc;
       }
       fc = fc->next();
     }
-    retTC = TreeChunk<Chunk>::as_TreeChunk(last);
+    retTC = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(last);
   }
   assert(retTC->list() == this, "Wrong type of chunk.");
   return retTC;
 }
 
-template <class Chunk>
-BinaryTreeDictionary<Chunk>::BinaryTreeDictionary(bool adaptive_freelists, bool splay) :
-  _splay(splay), _adaptive_freelists(adaptive_freelists),
-  _total_size(0), _total_free_blocks(0), _root(0) {}
-
-template <class Chunk>
-BinaryTreeDictionary<Chunk>::BinaryTreeDictionary(MemRegion mr,
-                                           bool adaptive_freelists,
-                                           bool splay):
-  _adaptive_freelists(adaptive_freelists), _splay(splay)
-{
-  assert(mr.word_size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
+template <class Chunk_t, template <class> class FreeList_t>
+BinaryTreeDictionary<Chunk_t, FreeList_t>::BinaryTreeDictionary(MemRegion mr) {
+  assert((mr.byte_size() > min_size()), "minimum chunk size");
 
   reset(mr);
   assert(root()->left() == NULL, "reset check failed");
@@ -333,52 +403,48 @@
   assert(total_free_blocks() == 1, "reset check failed");
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::inc_total_size(size_t inc) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::inc_total_size(size_t inc) {
   _total_size = _total_size + inc;
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::dec_total_size(size_t dec) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::dec_total_size(size_t dec) {
   _total_size = _total_size - dec;
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::reset(MemRegion mr) {
-  assert(mr.word_size() >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
-  set_root(TreeList<Chunk>::as_TreeList(mr.start(), mr.word_size()));
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::reset(MemRegion mr) {
+  assert((mr.byte_size() > min_size()), "minimum chunk size");
+  set_root(TreeList<Chunk_t, FreeList_t>::as_TreeList(mr.start(), mr.word_size()));
   set_total_size(mr.word_size());
   set_total_free_blocks(1);
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::reset(HeapWord* addr, size_t byte_size) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::reset(HeapWord* addr, size_t byte_size) {
   MemRegion mr(addr, heap_word_size(byte_size));
   reset(mr);
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::reset() {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::reset() {
   set_root(NULL);
   set_total_size(0);
   set_total_free_blocks(0);
 }
 
 // Get a free block of size at least size from tree, or NULL.
-// If a splay step is requested, the removal algorithm (only) incorporates
-// a splay step as follows:
-// . the search proceeds down the tree looking for a possible
-//   match. At the (closest) matching location, an appropriate splay step is applied
-//   (zig, zig-zig or zig-zag). A chunk of the appropriate size is then returned
-//   if available, and if it's the last chunk, the node is deleted. A deteleted
-//   node is replaced in place by its tree successor.
-template <class Chunk>
-TreeChunk<Chunk>*
-BinaryTreeDictionary<Chunk>::get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither, bool splay)
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>*
+BinaryTreeDictionary<Chunk_t, FreeList_t>::get_chunk_from_tree(
+                              size_t size,
+                              enum FreeBlockDictionary<Chunk_t>::Dither dither)
 {
-  TreeList<Chunk> *curTL, *prevTL;
-  TreeChunk<Chunk>* retTC = NULL;
-  assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "minimum chunk size");
+  TreeList<Chunk_t, FreeList_t> *curTL, *prevTL;
+  TreeChunk<Chunk_t, FreeList_t>* retTC = NULL;
+
+  assert((size >= min_size()), "minimum chunk size");
   if (FLSVerifyDictionary) {
     verify_tree();
   }
@@ -398,7 +464,7 @@
   }
   if (curTL == NULL) { // couldn't find exact match
 
-    if (dither == FreeBlockDictionary<Chunk>::exactly) return NULL;
+    if (dither == FreeBlockDictionary<Chunk_t>::exactly) return NULL;
 
     // try and find the next larger size by walking back up the search path
     for (curTL = prevTL; curTL != NULL;) {
@@ -410,46 +476,9 @@
   }
   if (curTL != NULL) {
     assert(curTL->size() >= size, "size inconsistency");
-    if (adaptive_freelists()) {
 
-      // A candidate chunk has been found.  If it is already under
-      // populated, get a chunk associated with the hint for this
-      // chunk.
-      if (curTL->surplus() <= 0) {
-        /* Use the hint to find a size with a surplus, and reset the hint. */
-        TreeList<Chunk>* hintTL = curTL;
-        while (hintTL->hint() != 0) {
-          assert(hintTL->hint() == 0 || hintTL->hint() > hintTL->size(),
-            "hint points in the wrong direction");
-          hintTL = find_list(hintTL->hint());
-          assert(curTL != hintTL, "Infinite loop");
-          if (hintTL == NULL ||
-              hintTL == curTL /* Should not happen but protect against it */ ) {
-            // No useful hint.  Set the hint to NULL and go on.
-            curTL->set_hint(0);
-            break;
-          }
-          assert(hintTL->size() > size, "hint is inconsistent");
-          if (hintTL->surplus() > 0) {
-            // The hint led to a list that has a surplus.  Use it.
-            // Set the hint for the candidate to an overpopulated
-            // size.
-            curTL->set_hint(hintTL->size());
-            // Change the candidate.
-            curTL = hintTL;
-            break;
-          }
-          // The evm code reset the hint of the candidate as
-          // at an interim point.  Why?  Seems like this leaves
-          // the hint pointing to a list that didn't work.
-          // curTL->set_hint(hintTL->size());
-        }
-      }
-    }
-    // don't waste time splaying if chunk's singleton
-    if (splay && curTL->head()->next() != NULL) {
-      semi_splay_step(curTL);
-    }
+    curTL = curTL->get_better_list(this);
+
     retTC = curTL->first_available();
     assert((retTC != NULL) && (curTL->count() > 0),
       "A list in the binary tree should not be NULL");
@@ -465,9 +494,9 @@
   return retTC;
 }
 
-template <class Chunk>
-TreeList<Chunk>* BinaryTreeDictionary<Chunk>::find_list(size_t size) const {
-  TreeList<Chunk>* curTL;
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>* BinaryTreeDictionary<Chunk_t, FreeList_t>::find_list(size_t size) const {
+  TreeList<Chunk_t, FreeList_t>* curTL;
   for (curTL = root(); curTL != NULL;) {
     if (curTL->size() == size) {        // exact match
       break;
@@ -484,10 +513,10 @@
 }
 
 
-template <class Chunk>
-bool BinaryTreeDictionary<Chunk>::verify_chunk_in_free_list(Chunk* tc) const {
+template <class Chunk_t, template <class> class FreeList_t>
+bool BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_chunk_in_free_list(Chunk_t* tc) const {
   size_t size = tc->size();
-  TreeList<Chunk>* tl = find_list(size);
+  TreeList<Chunk_t, FreeList_t>* tl = find_list(size);
   if (tl == NULL) {
     return false;
   } else {
@@ -495,9 +524,9 @@
   }
 }
 
-template <class Chunk>
-Chunk* BinaryTreeDictionary<Chunk>::find_largest_dict() const {
-  TreeList<Chunk> *curTL = root();
+template <class Chunk_t, template <class> class FreeList_t>
+Chunk_t* BinaryTreeDictionary<Chunk_t, FreeList_t>::find_largest_dict() const {
+  TreeList<Chunk_t, FreeList_t> *curTL = root();
   if (curTL != NULL) {
     while(curTL->right() != NULL) curTL = curTL->right();
     return curTL->largest_address();
@@ -510,15 +539,15 @@
 // chunk in a list on a tree node, just unlink it.
 // If it is the last chunk in the list (the next link is NULL),
 // remove the node and repair the tree.
-template <class Chunk>
-TreeChunk<Chunk>*
-BinaryTreeDictionary<Chunk>::remove_chunk_from_tree(TreeChunk<Chunk>* tc) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeChunk<Chunk_t, FreeList_t>*
+BinaryTreeDictionary<Chunk_t, FreeList_t>::remove_chunk_from_tree(TreeChunk<Chunk_t, FreeList_t>* tc) {
   assert(tc != NULL, "Should not call with a NULL chunk");
   assert(tc->is_free(), "Header is not marked correctly");
 
-  TreeList<Chunk> *newTL, *parentTL;
-  TreeChunk<Chunk>* retTC;
-  TreeList<Chunk>* tl = tc->list();
+  TreeList<Chunk_t, FreeList_t> *newTL, *parentTL;
+  TreeChunk<Chunk_t, FreeList_t>* retTC;
+  TreeList<Chunk_t, FreeList_t>* tl = tc->list();
   debug_only(
     bool removing_only_chunk = false;
     if (tl == _root) {
@@ -538,8 +567,8 @@
 
   retTC = tc;
   // Removing this chunk can have the side effect of changing the node
-  // (TreeList<Chunk>*) in the tree.  If the node is the root, update it.
-  TreeList<Chunk>* replacementTL = tl->remove_chunk_replace_if_needed(tc);
+  // (TreeList<Chunk_t, FreeList_t>*) in the tree.  If the node is the root, update it.
+  TreeList<Chunk_t, FreeList_t>* replacementTL = tl->remove_chunk_replace_if_needed(tc);
   assert(tc->is_free(), "Chunk should still be free");
   assert(replacementTL->parent() == NULL ||
          replacementTL == replacementTL->parent()->left() ||
@@ -549,17 +578,18 @@
     assert(replacementTL->parent() == NULL, "Incorrectly replacing root");
     set_root(replacementTL);
   }
-  debug_only(
+#ifdef ASSERT
     if (tl != replacementTL) {
       assert(replacementTL->head() != NULL,
         "If the tree list was replaced, it should not be a NULL list");
-      TreeList<Chunk>* rhl = replacementTL->head_as_TreeChunk()->list();
-      TreeList<Chunk>* rtl = TreeChunk<Chunk>::as_TreeChunk(replacementTL->tail())->list();
+      TreeList<Chunk_t, FreeList_t>* rhl = replacementTL->head_as_TreeChunk()->list();
+      TreeList<Chunk_t, FreeList_t>* rtl =
+        TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(replacementTL->tail())->list();
       assert(rhl == replacementTL, "Broken head");
       assert(rtl == replacementTL, "Broken tail");
       assert(replacementTL->size() == tc->size(),  "Broken size");
     }
-  )
+#endif
 
   // Does the tree need to be repaired?
   if (replacementTL->count() == 0) {
@@ -574,7 +604,7 @@
     } else if (replacementTL->right() == NULL) {
       // right is NULL
       newTL = replacementTL->left();
-      debug_only(replacementTL->clearLeft();)
+      debug_only(replacementTL->clear_left();)
     } else {  // we have both children, so, by patriarchal convention,
               // my replacement is least node in right sub-tree
       complicated_splice = true;
@@ -623,7 +653,7 @@
       newTL->set_right(replacementTL->right());
       debug_only(
         replacementTL->clear_right();
-        replacementTL->clearLeft();
+        replacementTL->clear_left();
       )
     }
     assert(replacementTL->right() == NULL &&
@@ -644,21 +674,21 @@
     verify_tree();
   }
   assert(!removing_only_chunk || _root == NULL, "root should be NULL");
-  return TreeChunk<Chunk>::as_TreeChunk(retTC);
+  return TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(retTC);
 }
 
 // Remove the leftmost node (lm) in the tree and return it.
 // If lm has a right child, link it to the left node of
 // the parent of lm.
-template <class Chunk>
-TreeList<Chunk>* BinaryTreeDictionary<Chunk>::remove_tree_minimum(TreeList<Chunk>* tl) {
+template <class Chunk_t, template <class> class FreeList_t>
+TreeList<Chunk_t, FreeList_t>* BinaryTreeDictionary<Chunk_t, FreeList_t>::remove_tree_minimum(TreeList<Chunk_t, FreeList_t>* tl) {
   assert(tl != NULL && tl->parent() != NULL, "really need a proper sub-tree");
   // locate the subtree minimum by walking down left branches
-  TreeList<Chunk>* curTL = tl;
+  TreeList<Chunk_t, FreeList_t>* curTL = tl;
   for (; curTL->left() != NULL; curTL = curTL->left());
   // obviously curTL now has at most one child, a right child
   if (curTL != root()) {  // Should this test just be removed?
-    TreeList<Chunk>* parentTL = curTL->parent();
+    TreeList<Chunk_t, FreeList_t>* parentTL = curTL->parent();
     if (parentTL->left() == curTL) { // curTL is a left child
       parentTL->set_left(curTL->right());
     } else {
@@ -685,31 +715,14 @@
   return curTL;
 }
 
-// Based on a simplification of the algorithm by Sleator and Tarjan (JACM 1985).
-// The simplifications are the following:
-// . we splay only when we delete (not when we insert)
-// . we apply a single spay step per deletion/access
-// By doing such partial splaying, we reduce the amount of restructuring,
-// while getting a reasonably efficient search tree (we think).
-// [Measurements will be needed to (in)validate this expectation.]
-
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::semi_splay_step(TreeList<Chunk>* tc) {
-  // apply a semi-splay step at the given node:
-  // . if root, norting needs to be done
-  // . if child of root, splay once
-  // . else zig-zig or sig-zag depending on path from grandparent
-  if (root() == tc) return;
-  warning("*** Splaying not yet implemented; "
-          "tree operations may be inefficient ***");
-}
-
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::insert_chunk_in_tree(Chunk* fc) {
-  TreeList<Chunk> *curTL, *prevTL;
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::insert_chunk_in_tree(Chunk_t* fc) {
+  TreeList<Chunk_t, FreeList_t> *curTL, *prevTL;
   size_t size = fc->size();
 
-  assert(size >= BinaryTreeDictionary<Chunk>::min_tree_chunk_size, "too small to be a TreeList<Chunk>");
+  assert((size >= min_size()),
+    err_msg(SIZE_FORMAT " is too small to be a TreeChunk<Chunk_t, FreeList_t> " SIZE_FORMAT,
+      size, min_size()));
   if (FLSVerifyDictionary) {
     verify_tree();
   }
@@ -729,9 +742,9 @@
       curTL = curTL->right();
     }
   }
-  TreeChunk<Chunk>* tc = TreeChunk<Chunk>::as_TreeChunk(fc);
+  TreeChunk<Chunk_t, FreeList_t>* tc = TreeChunk<Chunk_t, FreeList_t>::as_TreeChunk(fc);
   // This chunk is being returned to the binary tree.  Its embedded
-  // TreeList<Chunk> should be unused at this point.
+  // TreeList<Chunk_t, FreeList_t> should be unused at this point.
   tc->initialize();
   if (curTL != NULL) {          // exact match
     tc->set_list(curTL);
@@ -739,8 +752,8 @@
   } else {                     // need a new node in tree
     tc->clear_next();
     tc->link_prev(NULL);
-    TreeList<Chunk>* newTL = TreeList<Chunk>::as_TreeList(tc);
-    assert(((TreeChunk<Chunk>*)tc)->list() == newTL,
+    TreeList<Chunk_t, FreeList_t>* newTL = TreeList<Chunk_t, FreeList_t>::as_TreeList(tc);
+    assert(((TreeChunk<Chunk_t, FreeList_t>*)tc)->list() == newTL,
       "List was not initialized correctly");
     if (prevTL == NULL) {      // we are the only tree node
       assert(root() == NULL, "control point invariant");
@@ -768,30 +781,30 @@
   }
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::max_chunk_size() const {
-  FreeBlockDictionary<Chunk>::verify_par_locked();
-  TreeList<Chunk>* tc = root();
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::max_chunk_size() const {
+  FreeBlockDictionary<Chunk_t>::verify_par_locked();
+  TreeList<Chunk_t, FreeList_t>* tc = root();
   if (tc == NULL) return 0;
   for (; tc->right() != NULL; tc = tc->right());
   return tc->size();
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_list_length(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_list_length(TreeList<Chunk_t, FreeList_t>* tl) const {
   size_t res;
   res = tl->count();
 #ifdef ASSERT
   size_t cnt;
-  Chunk* tc = tl->head();
+  Chunk_t* tc = tl->head();
   for (cnt = 0; tc != NULL; tc = tc->next(), cnt++);
   assert(res == cnt, "The count is not being maintained correctly");
 #endif
   return res;
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_size_in_tree(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_size_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL)
     return 0;
   return (tl->size() * total_list_length(tl)) +
@@ -799,8 +812,8 @@
          total_size_in_tree(tl->right());
 }
 
-template <class Chunk>
-double BinaryTreeDictionary<Chunk>::sum_of_squared_block_sizes(TreeList<Chunk>* const tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+double BinaryTreeDictionary<Chunk_t, FreeList_t>::sum_of_squared_block_sizes(TreeList<Chunk_t, FreeList_t>* const tl) const {
   if (tl == NULL) {
     return 0.0;
   }
@@ -811,8 +824,8 @@
   return curr;
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_free_blocks_in_tree(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_free_blocks_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL)
     return 0;
   return total_list_length(tl) +
@@ -820,28 +833,28 @@
          total_free_blocks_in_tree(tl->right());
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::num_free_blocks() const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::num_free_blocks() const {
   assert(total_free_blocks_in_tree(root()) == total_free_blocks(),
          "_total_free_blocks inconsistency");
   return total_free_blocks();
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::tree_height_helper(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::tree_height_helper(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL)
     return 0;
   return 1 + MAX2(tree_height_helper(tl->left()),
                   tree_height_helper(tl->right()));
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::treeHeight() const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::tree_height() const {
   return tree_height_helper(root());
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_nodes_helper(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_nodes_helper(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL) {
     return 0;
   }
@@ -849,14 +862,18 @@
     total_nodes_helper(tl->right());
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_nodes_in_tree(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_nodes_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const {
   return total_nodes_helper(root());
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::dict_census_udpate(size_t size, bool split, bool birth){
-  TreeList<Chunk>* nd = find_list(size);
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::dict_census_update(size_t size, bool split, bool birth){}
+
+#ifndef SERIALGC
+template <>
+void BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>::dict_census_update(size_t size, bool split, bool birth){
+  TreeList<FreeChunk, AdaptiveFreeList>* nd = find_list(size);
   if (nd) {
     if (split) {
       if (birth) {
@@ -882,16 +899,26 @@
   //   This is a birth associated with a LinAB.  The chunk
   //     for the LinAB is not in the dictionary.
 }
+#endif // SERIALGC
 
-template <class Chunk>
-bool BinaryTreeDictionary<Chunk>::coal_dict_over_populated(size_t size) {
+template <class Chunk_t, template <class> class FreeList_t>
+bool BinaryTreeDictionary<Chunk_t, FreeList_t>::coal_dict_over_populated(size_t size) {
+  // For the general type of freelists, encourage coalescing by
+  // returning true.
+  return true;
+}
+
+#ifndef SERIALGC
+template <>
+bool BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>::coal_dict_over_populated(size_t size) {
   if (FLSAlwaysCoalesceLarge) return true;
 
-  TreeList<Chunk>* list_of_size = find_list(size);
+  TreeList<FreeChunk, AdaptiveFreeList>* list_of_size = find_list(size);
   // None of requested size implies overpopulated.
   return list_of_size == NULL || list_of_size->coal_desired() <= 0 ||
          list_of_size->count() > list_of_size->coal_desired();
 }
+#endif  // SERIALGC
 
 // Closures for walking the binary tree.
 //   do_list() walks the free list in a node applying the closure
@@ -899,19 +926,18 @@
 //   do_tree() walks the nodes in the binary tree applying do_list()
 //     to each list at each node.
 
-template <class Chunk>
+template <class Chunk_t, template <class> class FreeList_t>
 class TreeCensusClosure : public StackObj {
  protected:
-  virtual void do_list(FreeList<Chunk>* fl) = 0;
+  virtual void do_list(FreeList_t<Chunk_t>* fl) = 0;
  public:
-  virtual void do_tree(TreeList<Chunk>* tl) = 0;
+  virtual void do_tree(TreeList<Chunk_t, FreeList_t>* tl) = 0;
 };
 
-template <class Chunk>
-class AscendTreeCensusClosure : public TreeCensusClosure<Chunk> {
-  using TreeCensusClosure<Chunk>::do_list;
+template <class Chunk_t, template <class> class FreeList_t>
+class AscendTreeCensusClosure : public TreeCensusClosure<Chunk_t, FreeList_t> {
  public:
-  void do_tree(TreeList<Chunk>* tl) {
+  void do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
     if (tl != NULL) {
       do_tree(tl->left());
       do_list(tl);
@@ -920,11 +946,10 @@
   }
 };
 
-template <class Chunk>
-class DescendTreeCensusClosure : public TreeCensusClosure<Chunk> {
-  using TreeCensusClosure<Chunk>::do_list;
+template <class Chunk_t, template <class> class FreeList_t>
+class DescendTreeCensusClosure : public TreeCensusClosure<Chunk_t, FreeList_t> {
  public:
-  void do_tree(TreeList<Chunk>* tl) {
+  void do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
     if (tl != NULL) {
       do_tree(tl->right());
       do_list(tl);
@@ -935,8 +960,8 @@
 
 // For each list in the tree, calculate the desired, desired
 // coalesce, count before sweep, and surplus before sweep.
-template <class Chunk>
-class BeginSweepClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class BeginSweepClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   double _percentage;
   float _inter_sweep_current;
   float _inter_sweep_estimate;
@@ -951,32 +976,36 @@
    _inter_sweep_estimate(inter_sweep_estimate),
    _intra_sweep_estimate(intra_sweep_estimate) { }
 
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
     double coalSurplusPercent = _percentage;
     fl->compute_desired(_inter_sweep_current, _inter_sweep_estimate, _intra_sweep_estimate);
     fl->set_coal_desired((ssize_t)((double)fl->desired() * coalSurplusPercent));
     fl->set_before_sweep(fl->count());
     fl->set_bfr_surp(fl->surplus());
   }
+#endif // SERIALGC
 };
 
 // Used to search the tree until a condition is met.
 // Similar to TreeCensusClosure but searches the
 // tree and returns promptly when found.
 
-template <class Chunk>
+template <class Chunk_t, template <class> class FreeList_t>
 class TreeSearchClosure : public StackObj {
  protected:
-  virtual bool do_list(FreeList<Chunk>* fl) = 0;
+  virtual bool do_list(FreeList_t<Chunk_t>* fl) = 0;
  public:
-  virtual bool do_tree(TreeList<Chunk>* tl) = 0;
+  virtual bool do_tree(TreeList<Chunk_t, FreeList_t>* tl) = 0;
 };
 
 #if 0 //  Don't need this yet but here for symmetry.
-template <class Chunk>
-class AscendTreeSearchClosure : public TreeSearchClosure {
+template <class Chunk_t, template <class> class FreeList_t>
+class AscendTreeSearchClosure : public TreeSearchClosure<Chunk_t> {
  public:
-  bool do_tree(TreeList<Chunk>* tl) {
+  bool do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
     if (tl != NULL) {
       if (do_tree(tl->left())) return true;
       if (do_list(tl)) return true;
@@ -987,11 +1016,10 @@
 };
 #endif
 
-template <class Chunk>
-class DescendTreeSearchClosure : public TreeSearchClosure<Chunk> {
-  using TreeSearchClosure<Chunk>::do_list;
+template <class Chunk_t, template <class> class FreeList_t>
+class DescendTreeSearchClosure : public TreeSearchClosure<Chunk_t, FreeList_t> {
  public:
-  bool do_tree(TreeList<Chunk>* tl) {
+  bool do_tree(TreeList<Chunk_t, FreeList_t>* tl) {
     if (tl != NULL) {
       if (do_tree(tl->right())) return true;
       if (do_list(tl)) return true;
@@ -1003,17 +1031,17 @@
 
 // Searches the tree for a chunk that ends at the
 // specified address.
-template <class Chunk>
-class EndTreeSearchClosure : public DescendTreeSearchClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class EndTreeSearchClosure : public DescendTreeSearchClosure<Chunk_t, FreeList_t> {
   HeapWord* _target;
-  Chunk* _found;
+  Chunk_t* _found;
 
  public:
   EndTreeSearchClosure(HeapWord* target) : _target(target), _found(NULL) {}
-  bool do_list(FreeList<Chunk>* fl) {
-    Chunk* item = fl->head();
+  bool do_list(FreeList_t<Chunk_t>* fl) {
+    Chunk_t* item = fl->head();
     while (item != NULL) {
-      if (item->end() == _target) {
+      if (item->end() == (uintptr_t*) _target) {
         _found = item;
         return true;
       }
@@ -1021,22 +1049,22 @@
     }
     return false;
   }
-  Chunk* found() { return _found; }
+  Chunk_t* found() { return _found; }
 };
 
-template <class Chunk>
-Chunk* BinaryTreeDictionary<Chunk>::find_chunk_ends_at(HeapWord* target) const {
-  EndTreeSearchClosure<Chunk> etsc(target);
+template <class Chunk_t, template <class> class FreeList_t>
+Chunk_t* BinaryTreeDictionary<Chunk_t, FreeList_t>::find_chunk_ends_at(HeapWord* target) const {
+  EndTreeSearchClosure<Chunk_t, FreeList_t> etsc(target);
   bool found_target = etsc.do_tree(root());
   assert(found_target || etsc.found() == NULL, "Consistency check");
   assert(!found_target || etsc.found() != NULL, "Consistency check");
   return etsc.found();
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::begin_sweep_dict_census(double coalSurplusPercent,
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::begin_sweep_dict_census(double coalSurplusPercent,
   float inter_sweep_current, float inter_sweep_estimate, float intra_sweep_estimate) {
-  BeginSweepClosure<Chunk> bsc(coalSurplusPercent, inter_sweep_current,
+  BeginSweepClosure<Chunk_t, FreeList_t> bsc(coalSurplusPercent, inter_sweep_current,
                                             inter_sweep_estimate,
                                             intra_sweep_estimate);
   bsc.do_tree(root());
@@ -1045,84 +1073,91 @@
 // Closures and methods for calculating total bytes returned to the
 // free lists in the tree.
 #ifndef PRODUCT
-template <class Chunk>
-class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class InitializeDictReturnedBytesClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
    public:
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList_t<Chunk_t>* fl) {
     fl->set_returned_bytes(0);
   }
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::initialize_dict_returned_bytes() {
-  InitializeDictReturnedBytesClosure<Chunk> idrb;
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::initialize_dict_returned_bytes() {
+  InitializeDictReturnedBytesClosure<Chunk_t, FreeList_t> idrb;
   idrb.do_tree(root());
 }
 
-template <class Chunk>
-class ReturnedBytesClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class ReturnedBytesClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   size_t _dict_returned_bytes;
  public:
   ReturnedBytesClosure() { _dict_returned_bytes = 0; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList_t<Chunk_t>* fl) {
     _dict_returned_bytes += fl->returned_bytes();
   }
   size_t dict_returned_bytes() { return _dict_returned_bytes; }
 };
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::sum_dict_returned_bytes() {
-  ReturnedBytesClosure<Chunk> rbc;
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::sum_dict_returned_bytes() {
+  ReturnedBytesClosure<Chunk_t, FreeList_t> rbc;
   rbc.do_tree(root());
 
   return rbc.dict_returned_bytes();
 }
 
 // Count the number of entries in the tree.
-template <class Chunk>
-class treeCountClosure : public DescendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class treeCountClosure : public DescendTreeCensusClosure<Chunk_t, FreeList_t> {
  public:
   uint count;
   treeCountClosure(uint c) { count = c; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList_t<Chunk_t>* fl) {
     count++;
   }
 };
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::total_count() {
-  treeCountClosure<Chunk> ctc(0);
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::total_count() {
+  treeCountClosure<Chunk_t, FreeList_t> ctc(0);
   ctc.do_tree(root());
   return ctc.count;
 }
 #endif // PRODUCT
 
 // Calculate surpluses for the lists in the tree.
-template <class Chunk>
-class setTreeSurplusClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class setTreeSurplusClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   double percentage;
  public:
   setTreeSurplusClosure(double v) { percentage = v; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
     double splitSurplusPercent = percentage;
     fl->set_surplus(fl->count() -
                    (ssize_t)((double)fl->desired() * splitSurplusPercent));
   }
+#endif // SERIALGC
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::set_tree_surplus(double splitSurplusPercent) {
-  setTreeSurplusClosure<Chunk> sts(splitSurplusPercent);
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::set_tree_surplus(double splitSurplusPercent) {
+  setTreeSurplusClosure<Chunk_t, FreeList_t> sts(splitSurplusPercent);
   sts.do_tree(root());
 }
 
 // Set hints for the lists in the tree.
-template <class Chunk>
-class setTreeHintsClosure : public DescendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class setTreeHintsClosure : public DescendTreeCensusClosure<Chunk_t, FreeList_t> {
   size_t hint;
  public:
   setTreeHintsClosure(size_t v) { hint = v; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
     fl->set_hint(hint);
     assert(fl->hint() == 0 || fl->hint() > fl->size(),
       "Current hint is inconsistent");
@@ -1130,35 +1165,40 @@
       hint = fl->size();
     }
   }
+#endif // SERIALGC
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::set_tree_hints(void) {
-  setTreeHintsClosure<Chunk> sth(0);
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::set_tree_hints(void) {
+  setTreeHintsClosure<Chunk_t, FreeList_t> sth(0);
   sth.do_tree(root());
 }
 
 // Save count before previous sweep and splits and coalesces.
-template <class Chunk>
-class clearTreeCensusClosure : public AscendTreeCensusClosure<Chunk> {
-  void do_list(FreeList<Chunk>* fl) {
+template <class Chunk_t, template <class> class FreeList_t>
+class clearTreeCensusClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
+  void do_list(FreeList<Chunk_t>* fl) {}
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
     fl->set_prev_sweep(fl->count());
     fl->set_coal_births(0);
     fl->set_coal_deaths(0);
     fl->set_split_births(0);
     fl->set_split_deaths(0);
   }
+#endif  // SERIALGC
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::clear_tree_census(void) {
-  clearTreeCensusClosure<Chunk> ctc;
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::clear_tree_census(void) {
+  clearTreeCensusClosure<Chunk_t, FreeList_t> ctc;
   ctc.do_tree(root());
 }
 
 // Do reporting and post sweep clean up.
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::end_sweep_dict_census(double splitSurplusPercent) {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::end_sweep_dict_census(double splitSurplusPercent) {
   // Does walking the tree 3 times hurt?
   set_tree_surplus(splitSurplusPercent);
   set_tree_hints();
@@ -1169,9 +1209,9 @@
 }
 
 // Print summary statistics
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::report_statistics() const {
-  FreeBlockDictionary<Chunk>::verify_par_locked();
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::report_statistics() const {
+  FreeBlockDictionary<Chunk_t>::verify_par_locked();
   gclog_or_tty->print("Statistics for BinaryTreeDictionary:\n"
          "------------------------------------\n");
   size_t total_size = total_chunk_size(debug_only(NULL));
@@ -1182,36 +1222,47 @@
   if (free_blocks > 0) {
     gclog_or_tty->print("Av.  Block  Size: %d\n", total_size/free_blocks);
   }
-  gclog_or_tty->print("Tree      Height: %d\n", treeHeight());
+  gclog_or_tty->print("Tree      Height: %d\n", tree_height());
 }
 
 // Print census information - counts, births, deaths, etc.
 // for each list in the tree.  Also print some summary
 // information.
-template <class Chunk>
-class PrintTreeCensusClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class PrintTreeCensusClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   int _print_line;
   size_t _total_free;
-  FreeList<Chunk> _total;
+  FreeList_t<Chunk_t> _total;
 
  public:
   PrintTreeCensusClosure() {
     _print_line = 0;
     _total_free = 0;
   }
-  FreeList<Chunk>* total() { return &_total; }
+  FreeList_t<Chunk_t>* total() { return &_total; }
   size_t total_free() { return _total_free; }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList<Chunk_t>* fl) {
     if (++_print_line >= 40) {
-      FreeList<Chunk>::print_labels_on(gclog_or_tty, "size");
+      FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, "size");
       _print_line = 0;
     }
     fl->print_on(gclog_or_tty);
     _total_free +=            fl->count()            * fl->size()        ;
     total()->set_count(      total()->count()       + fl->count()      );
-    total()->set_bfr_surp(    total()->bfr_surp()     + fl->bfr_surp()    );
+  }
+
+#ifndef SERIALGC
+  void do_list(AdaptiveFreeList<Chunk_t>* fl) {
+    if (++_print_line >= 40) {
+      FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, "size");
+      _print_line = 0;
+    }
+    fl->print_on(gclog_or_tty);
+    _total_free +=           fl->count()             * fl->size()        ;
+    total()->set_count(      total()->count()        + fl->count()      );
+    total()->set_bfr_surp(   total()->bfr_surp()     + fl->bfr_surp()    );
     total()->set_surplus(    total()->split_deaths() + fl->surplus()    );
-    total()->set_desired(    total()->desired()     + fl->desired()    );
+    total()->set_desired(    total()->desired()      + fl->desired()    );
     total()->set_prev_sweep(  total()->prev_sweep()   + fl->prev_sweep()  );
     total()->set_before_sweep(total()->before_sweep() + fl->before_sweep());
     total()->set_coal_births( total()->coal_births()  + fl->coal_births() );
@@ -1219,18 +1270,32 @@
     total()->set_split_births(total()->split_births() + fl->split_births());
     total()->set_split_deaths(total()->split_deaths() + fl->split_deaths());
   }
+#endif  // SERIALGC
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::print_dict_census(void) const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::print_dict_census(void) const {
 
   gclog_or_tty->print("\nBinaryTree\n");
-  FreeList<Chunk>::print_labels_on(gclog_or_tty, "size");
-  PrintTreeCensusClosure<Chunk> ptc;
+  FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, "size");
+  PrintTreeCensusClosure<Chunk_t, FreeList_t> ptc;
   ptc.do_tree(root());
 
-  FreeList<Chunk>* total = ptc.total();
-  FreeList<Chunk>::print_labels_on(gclog_or_tty, " ");
+  FreeList_t<Chunk_t>* total = ptc.total();
+  FreeList_t<Chunk_t>::print_labels_on(gclog_or_tty, " ");
+}
+
+#ifndef SERIALGC
+template <>
+void BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>::print_dict_census(void) const {
+
+  gclog_or_tty->print("\nBinaryTree\n");
+  AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, "size");
+  PrintTreeCensusClosure<FreeChunk, AdaptiveFreeList> ptc;
+  ptc.do_tree(root());
+
+  AdaptiveFreeList<FreeChunk>* total = ptc.total();
+  AdaptiveFreeList<FreeChunk>::print_labels_on(gclog_or_tty, " ");
   total->print_on(gclog_or_tty, "TOTAL\t");
   gclog_or_tty->print(
               "total_free(words): " SIZE_FORMAT_W(16)
@@ -1242,9 +1307,10 @@
              (double)(total->desired() - total->count())
              /(total->desired() != 0 ? (double)total->desired() : 1.0));
 }
+#endif  // SERIALGC
 
-template <class Chunk>
-class PrintFreeListsClosure : public AscendTreeCensusClosure<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class PrintFreeListsClosure : public AscendTreeCensusClosure<Chunk_t, FreeList_t> {
   outputStream* _st;
   int _print_line;
 
@@ -1253,14 +1319,14 @@
     _st = st;
     _print_line = 0;
   }
-  void do_list(FreeList<Chunk>* fl) {
+  void do_list(FreeList_t<Chunk_t>* fl) {
     if (++_print_line >= 40) {
-      FreeList<Chunk>::print_labels_on(_st, "size");
+      FreeList_t<Chunk_t>::print_labels_on(_st, "size");
       _print_line = 0;
     }
     fl->print_on(gclog_or_tty);
     size_t sz = fl->size();
-    for (Chunk* fc = fl->head(); fc != NULL;
+    for (Chunk_t* fc = fl->head(); fc != NULL;
          fc = fc->next()) {
       _st->print_cr("\t[" PTR_FORMAT "," PTR_FORMAT ")  %s",
                     fc, (HeapWord*)fc + sz,
@@ -1269,11 +1335,11 @@
   }
 };
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::print_free_lists(outputStream* st) const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::print_free_lists(outputStream* st) const {
 
-  FreeList<Chunk>::print_labels_on(st, "size");
-  PrintFreeListsClosure<Chunk> pflc(st);
+  FreeList_t<Chunk_t>::print_labels_on(st, "size");
+  PrintFreeListsClosure<Chunk_t, FreeList_t> pflc(st);
   pflc.do_tree(root());
 }
 
@@ -1281,18 +1347,18 @@
 // . _root has no parent
 // . parent and child point to each other
 // . each node's key correctly related to that of its child(ren)
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::verify_tree() const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_tree() const {
   guarantee(root() == NULL || total_free_blocks() == 0 ||
     total_size() != 0, "_total_size should't be 0?");
   guarantee(root() == NULL || root()->parent() == NULL, "_root shouldn't have parent");
   verify_tree_helper(root());
 }
 
-template <class Chunk>
-size_t BinaryTreeDictionary<Chunk>::verify_prev_free_ptrs(TreeList<Chunk>* tl) {
+template <class Chunk_t, template <class> class FreeList_t>
+size_t BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_prev_free_ptrs(TreeList<Chunk_t, FreeList_t>* tl) {
   size_t ct = 0;
-  for (Chunk* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) {
+  for (Chunk_t* curFC = tl->head(); curFC != NULL; curFC = curFC->next()) {
     ct++;
     assert(curFC->prev() == NULL || curFC->prev()->is_free(),
       "Chunk should be free");
@@ -1303,8 +1369,8 @@
 // Note: this helper is recursive rather than iterative, so use with
 // caution on very deep trees; and watch out for stack overflow errors;
 // In general, to be used only for debugging.
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::verify_tree_helper(TreeList<Chunk>* tl) const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::verify_tree_helper(TreeList<Chunk_t, FreeList_t>* tl) const {
   if (tl == NULL)
     return;
   guarantee(tl->size() != 0, "A list must has a size");
@@ -1332,15 +1398,25 @@
   verify_tree_helper(tl->right());
 }
 
-template <class Chunk>
-void BinaryTreeDictionary<Chunk>::verify() const {
+template <class Chunk_t, template <class> class FreeList_t>
+void BinaryTreeDictionary<Chunk_t, FreeList_t>::verify() const {
   verify_tree();
   guarantee(total_size() == total_size_in_tree(root()), "Total Size inconsistency");
 }
 
+template class TreeList<Metablock, FreeList>;
+template class BinaryTreeDictionary<Metablock, FreeList>;
+template class TreeChunk<Metablock, FreeList>;
+
+template class TreeList<Metachunk, FreeList>;
+template class BinaryTreeDictionary<Metachunk, FreeList>;
+template class TreeChunk<Metachunk, FreeList>;
+
+
 #ifndef SERIALGC
 // Explicitly instantiate these types for FreeChunk.
-template class BinaryTreeDictionary<FreeChunk>;
-template class TreeChunk<FreeChunk>;
-template class TreeList<FreeChunk>;
+template class TreeList<FreeChunk, AdaptiveFreeList>;
+template class BinaryTreeDictionary<FreeChunk, AdaptiveFreeList>;
+template class TreeChunk<FreeChunk, AdaptiveFreeList>;
+
 #endif // SERIALGC
--- a/src/share/vm/memory/binaryTreeDictionary.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/memory/binaryTreeDictionary.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -37,77 +37,78 @@
 // A TreeList is a FreeList which can be used to maintain a
 // binary tree of free lists.
 
-template <class Chunk> class TreeChunk;
-template <class Chunk> class BinaryTreeDictionary;
-template <class Chunk> class AscendTreeCensusClosure;
-template <class Chunk> class DescendTreeCensusClosure;
-template <class Chunk> class DescendTreeSearchClosure;
+template <class Chunk_t, template <class> class FreeList_t> class TreeChunk;
+template <class Chunk_t, template <class> class FreeList_t> class BinaryTreeDictionary;
+template <class Chunk_t, template <class> class FreeList_t> class AscendTreeCensusClosure;
+template <class Chunk_t, template <class> class FreeList_t> class DescendTreeCensusClosure;
+template <class Chunk_t, template <class> class FreeList_t> class DescendTreeSearchClosure;
 
-template <class Chunk>
-class TreeList: public FreeList<Chunk> {
-  friend class TreeChunk<Chunk>;
-  friend class BinaryTreeDictionary<Chunk>;
-  friend class AscendTreeCensusClosure<Chunk>;
-  friend class DescendTreeCensusClosure<Chunk>;
-  friend class DescendTreeSearchClosure<Chunk>;
+template <class Chunk_t, template <class> class FreeList_t>
+class TreeList : public FreeList_t<Chunk_t> {
+  friend class TreeChunk<Chunk_t, FreeList_t>;
+  friend class BinaryTreeDictionary<Chunk_t, FreeList_t>;
+  friend class AscendTreeCensusClosure<Chunk_t, FreeList_t>;
+  friend class DescendTreeCensusClosure<Chunk_t, FreeList_t>;
+  friend class DescendTreeSearchClosure<Chunk_t, FreeList_t>;
 
-  TreeList<Chunk>* _parent;
-  TreeList<Chunk>* _left;
-  TreeList<Chunk>* _right;
+  TreeList<Chunk_t, FreeList_t>* _parent;
+  TreeList<Chunk_t, FreeList_t>* _left;
+  TreeList<Chunk_t, FreeList_t>* _right;
 
  protected:
-  TreeList<Chunk>* parent() const { return _parent; }
-  TreeList<Chunk>* left()   const { return _left;   }
-  TreeList<Chunk>* right()  const { return _right;  }
 
-  // Explicitly import these names into our namespace to fix name lookup with templates
-  using FreeList<Chunk>::head;
-  using FreeList<Chunk>::set_head;
+  TreeList<Chunk_t, FreeList_t>* parent() const { return _parent; }
+  TreeList<Chunk_t, FreeList_t>* left()   const { return _left;   }
+  TreeList<Chunk_t, FreeList_t>* right()  const { return _right;  }
 
-  using FreeList<Chunk>::tail;
-  using FreeList<Chunk>::set_tail;
-  using FreeList<Chunk>::link_tail;
+  // Wrapper on call to base class, to get the template to compile.
+  Chunk_t* head() const { return FreeList_t<Chunk_t>::head(); }
+  Chunk_t* tail() const { return FreeList_t<Chunk_t>::tail(); }
+  void set_head(Chunk_t* head) { FreeList_t<Chunk_t>::set_head(head); }
+  void set_tail(Chunk_t* tail) { FreeList_t<Chunk_t>::set_tail(tail); }
 
-  using FreeList<Chunk>::increment_count;
-  NOT_PRODUCT(using FreeList<Chunk>::increment_returned_bytes_by;)
-  using FreeList<Chunk>::verify_chunk_in_free_list;
-  using FreeList<Chunk>::size;
+  size_t size() const { return FreeList_t<Chunk_t>::size(); }
 
   // Accessors for links in tree.
 
-  void set_left(TreeList<Chunk>* tl) {
+  void set_left(TreeList<Chunk_t, FreeList_t>* tl) {
     _left   = tl;
     if (tl != NULL)
       tl->set_parent(this);
   }
-  void set_right(TreeList<Chunk>* tl) {
+  void set_right(TreeList<Chunk_t, FreeList_t>* tl) {
     _right  = tl;
     if (tl != NULL)
       tl->set_parent(this);
   }
-  void set_parent(TreeList<Chunk>* tl)  { _parent = tl;   }
+  void set_parent(TreeList<Chunk_t, FreeList_t>* tl)  { _parent = tl;   }
 
-  void clearLeft()               { _left = NULL;   }
+  void clear_left()               { _left = NULL;   }
   void clear_right()              { _right = NULL;  }
   void clear_parent()             { _parent = NULL; }
-  void initialize()              { clearLeft(); clear_right(), clear_parent(); }
+  void initialize()               { clear_left(); clear_right(), clear_parent(); FreeList_t<Chunk_t>::initialize(); }
 
   // For constructing a TreeList from a Tree chunk or
   // address and size.
-  static TreeList<Chunk>* as_TreeList(TreeChunk<Chunk>* tc);
-  static TreeList<Chunk>* as_TreeList(HeapWord* addr, size_t size);
+  TreeList();
+  static TreeList<Chunk_t, FreeList_t>*
+          as_TreeList(TreeChunk<Chunk_t, FreeList_t>* tc);
+  static TreeList<Chunk_t, FreeList_t>* as_TreeList(HeapWord* addr, size_t size);
 
   // Returns the head of the free list as a pointer to a TreeChunk.
-  TreeChunk<Chunk>* head_as_TreeChunk();
+  TreeChunk<Chunk_t, FreeList_t>* head_as_TreeChunk();
 
   // Returns the first available chunk in the free list as a pointer
   // to a TreeChunk.
-  TreeChunk<Chunk>* first_available();
+  TreeChunk<Chunk_t, FreeList_t>* first_available();
 
   // Returns the block with the largest heap address amongst
   // those in the list for this size; potentially slow and expensive,
   // use with caution!
-  TreeChunk<Chunk>* largest_address();
+  TreeChunk<Chunk_t, FreeList_t>* largest_address();
+
+  TreeList<Chunk_t, FreeList_t>* get_better_list(
+    BinaryTreeDictionary<Chunk_t, FreeList_t>* dictionary);
 
   // remove_chunk_replace_if_needed() removes the given "tc" from the TreeList.
   // If "tc" is the first chunk in the list, it is also the
@@ -115,10 +116,10 @@
   // returns the possibly replaced TreeList* for the node in
   // the tree.  It also updates the parent of the original
   // node to point to the new node.
-  TreeList<Chunk>* remove_chunk_replace_if_needed(TreeChunk<Chunk>* tc);
+  TreeList<Chunk_t, FreeList_t>* remove_chunk_replace_if_needed(TreeChunk<Chunk_t, FreeList_t>* tc);
   // See FreeList.
-  void return_chunk_at_head(TreeChunk<Chunk>* tc);
-  void return_chunk_at_tail(TreeChunk<Chunk>* tc);
+  void return_chunk_at_head(TreeChunk<Chunk_t, FreeList_t>* tc);
+  void return_chunk_at_tail(TreeChunk<Chunk_t, FreeList_t>* tc);
 };
 
 // A TreeChunk is a subclass of a Chunk that additionally
@@ -134,52 +135,54 @@
 // on the free list for a node in the tree and is only removed if
 // it is the last chunk on the free list.
 
-template <class Chunk>
-class TreeChunk : public Chunk {
-  friend class TreeList<Chunk>;
-  TreeList<Chunk>* _list;
-  TreeList<Chunk> _embedded_list;  // if non-null, this chunk is on _list
+template <class Chunk_t, template <class> class FreeList_t>
+class TreeChunk : public Chunk_t {
+  friend class TreeList<Chunk_t, FreeList_t>;
+  TreeList<Chunk_t, FreeList_t>* _list;
+  TreeList<Chunk_t, FreeList_t> _embedded_list;  // if non-null, this chunk is on _list
+
+  static size_t _min_tree_chunk_size;
+
  protected:
-  TreeList<Chunk>* embedded_list() const { return (TreeList<Chunk>*) &_embedded_list; }
-  void set_embedded_list(TreeList<Chunk>* v) { _embedded_list = *v; }
+  TreeList<Chunk_t, FreeList_t>* embedded_list() const { return (TreeList<Chunk_t, FreeList_t>*) &_embedded_list; }
+  void set_embedded_list(TreeList<Chunk_t, FreeList_t>* v) { _embedded_list = *v; }
  public:
-  TreeList<Chunk>* list() { return _list; }
-  void set_list(TreeList<Chunk>* v) { _list = v; }
-  static TreeChunk<Chunk>* as_TreeChunk(Chunk* fc);
+  TreeList<Chunk_t, FreeList_t>* list() { return _list; }
+  void set_list(TreeList<Chunk_t, FreeList_t>* v) { _list = v; }
+  static TreeChunk<Chunk_t, FreeList_t>* as_TreeChunk(Chunk_t* fc);
   // Initialize fields in a TreeChunk that should be
   // initialized when the TreeChunk is being added to
   // a free list in the tree.
   void initialize() { embedded_list()->initialize(); }
 
-  Chunk* next() const { return Chunk::next(); }
-  Chunk* prev() const { return Chunk::prev(); }
-  size_t size() const volatile { return Chunk::size(); }
+  Chunk_t* next() const { return Chunk_t::next(); }
+  Chunk_t* prev() const { return Chunk_t::prev(); }
+  size_t size() const volatile { return Chunk_t::size(); }
+
+  static size_t min_size() {
+    return _min_tree_chunk_size;
+  }
 
   // debugging
   void verify_tree_chunk_list() const;
+  void assert_is_mangled() const;
 };
 
 
-template <class Chunk>
-class BinaryTreeDictionary: public FreeBlockDictionary<Chunk> {
+template <class Chunk_t, template <class> class FreeList_t>
+class BinaryTreeDictionary: public FreeBlockDictionary<Chunk_t> {
   friend class VMStructs;
-  bool       _splay;
-  bool       _adaptive_freelists;
   size_t     _total_size;
   size_t     _total_free_blocks;
-  TreeList<Chunk>* _root;
+  TreeList<Chunk_t, FreeList_t>* _root;
 
   // private accessors
-  bool splay() const { return _splay; }
-  void set_splay(bool v) { _splay = v; }
   void set_total_size(size_t v) { _total_size = v; }
   virtual void inc_total_size(size_t v);
   virtual void dec_total_size(size_t v);
-  size_t total_free_blocks() const { return _total_free_blocks; }
   void set_total_free_blocks(size_t v) { _total_free_blocks = v; }
-  TreeList<Chunk>* root() const { return _root; }
-  void set_root(TreeList<Chunk>* v) { _root = v; }
-  bool adaptive_freelists() { return _adaptive_freelists; }
+  TreeList<Chunk_t, FreeList_t>* root() const { return _root; }
+  void set_root(TreeList<Chunk_t, FreeList_t>* v) { _root = v; }
 
   // This field is added and can be set to point to the
   // the Mutex used to synchronize access to the
@@ -191,54 +194,55 @@
   // return it.  If the chunk
   // is the last chunk of that size, remove the node for that size
   // from the tree.
-  TreeChunk<Chunk>* get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither, bool splay);
+  TreeChunk<Chunk_t, FreeList_t>* get_chunk_from_tree(size_t size, enum FreeBlockDictionary<Chunk_t>::Dither dither);
+  // Remove this chunk from the tree.  If the removal results
+  // in an empty list in the tree, remove the empty list.
+  TreeChunk<Chunk_t, FreeList_t>* remove_chunk_from_tree(TreeChunk<Chunk_t, FreeList_t>* tc);
+  // Remove the node in the trees starting at tl that has the
+  // minimum value and return it.  Repair the tree as needed.
+  TreeList<Chunk_t, FreeList_t>* remove_tree_minimum(TreeList<Chunk_t, FreeList_t>* tl);
+  // Add this free chunk to the tree.
+  void       insert_chunk_in_tree(Chunk_t* freeChunk);
+ public:
+
   // Return a list of the specified size or NULL from the tree.
   // The list is not removed from the tree.
-  TreeList<Chunk>* find_list (size_t size) const;
-  // Remove this chunk from the tree.  If the removal results
-  // in an empty list in the tree, remove the empty list.
-  TreeChunk<Chunk>* remove_chunk_from_tree(TreeChunk<Chunk>* tc);
-  // Remove the node in the trees starting at tl that has the
-  // minimum value and return it.  Repair the tree as needed.
-  TreeList<Chunk>* remove_tree_minimum(TreeList<Chunk>* tl);
-  void       semi_splay_step(TreeList<Chunk>* tl);
-  // Add this free chunk to the tree.
-  void       insert_chunk_in_tree(Chunk* freeChunk);
- public:
-
-  static const size_t min_tree_chunk_size  = sizeof(TreeChunk<Chunk>)/HeapWordSize;
+  TreeList<Chunk_t, FreeList_t>* find_list (size_t size) const;
 
   void       verify_tree() const;
   // verify that the given chunk is in the tree.
-  bool       verify_chunk_in_free_list(Chunk* tc) const;
+  bool       verify_chunk_in_free_list(Chunk_t* tc) const;
  private:
-  void          verify_tree_helper(TreeList<Chunk>* tl) const;
-  static size_t verify_prev_free_ptrs(TreeList<Chunk>* tl);
+  void          verify_tree_helper(TreeList<Chunk_t, FreeList_t>* tl) const;
+  static size_t verify_prev_free_ptrs(TreeList<Chunk_t, FreeList_t>* tl);
 
   // Returns the total number of chunks in the list.
-  size_t     total_list_length(TreeList<Chunk>* tl) const;
+  size_t     total_list_length(TreeList<Chunk_t, FreeList_t>* tl) const;
   // Returns the total number of words in the chunks in the tree
   // starting at "tl".
-  size_t     total_size_in_tree(TreeList<Chunk>* tl) const;
+  size_t     total_size_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const;
   // Returns the sum of the square of the size of each block
   // in the tree starting at "tl".
-  double     sum_of_squared_block_sizes(TreeList<Chunk>* const tl) const;
+  double     sum_of_squared_block_sizes(TreeList<Chunk_t, FreeList_t>* const tl) const;
   // Returns the total number of free blocks in the tree starting
   // at "tl".
-  size_t     total_free_blocks_in_tree(TreeList<Chunk>* tl) const;
-  size_t     num_free_blocks() const;
-  size_t     treeHeight() const;
-  size_t     tree_height_helper(TreeList<Chunk>* tl) const;
-  size_t     total_nodes_in_tree(TreeList<Chunk>* tl) const;
-  size_t     total_nodes_helper(TreeList<Chunk>* tl) const;
+  size_t     total_free_blocks_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const;
+  size_t     num_free_blocks()  const;
+  size_t     tree_height() const;
+  size_t     tree_height_helper(TreeList<Chunk_t, FreeList_t>* tl) const;
+  size_t     total_nodes_in_tree(TreeList<Chunk_t, FreeList_t>* tl) const;
+  size_t     total_nodes_helper(TreeList<Chunk_t, FreeList_t>* tl) const;
 
  public:
   // Constructor
-  BinaryTreeDictionary(bool adaptive_freelists, bool splay = false);
-  BinaryTreeDictionary(MemRegion mr, bool adaptive_freelists, bool splay = false);
+  BinaryTreeDictionary() :
+    _total_size(0), _total_free_blocks(0), _root(0) {}
+
+  BinaryTreeDictionary(MemRegion mr);
 
   // Public accessors
   size_t total_size() const { return _total_size; }
+  size_t total_free_blocks() const { return _total_free_blocks; }
 
   // Reset the dictionary to the initial conditions with
   // a single free chunk.
@@ -249,23 +253,24 @@
 
   // Return a chunk of size "size" or greater from
   // the tree.
-  // want a better dynamic splay strategy for the future.
-  Chunk* get_chunk(size_t size, enum FreeBlockDictionary<Chunk>::Dither dither) {
-    FreeBlockDictionary<Chunk>::verify_par_locked();
-    Chunk* res = get_chunk_from_tree(size, dither, splay());
+  Chunk_t* get_chunk(size_t size, enum FreeBlockDictionary<Chunk_t>::Dither dither) {
+    FreeBlockDictionary<Chunk_t>::verify_par_locked();
+    Chunk_t* res = get_chunk_from_tree(size, dither);
     assert(res == NULL || res->is_free(),
            "Should be returning a free chunk");
+    assert(dither != FreeBlockDictionary<Chunk_t>::exactly ||
+           res == NULL || res->size() == size, "Not correct size");
     return res;
   }
 
-  void return_chunk(Chunk* chunk) {
-    FreeBlockDictionary<Chunk>::verify_par_locked();
+  void return_chunk(Chunk_t* chunk) {
+    FreeBlockDictionary<Chunk_t>::verify_par_locked();
     insert_chunk_in_tree(chunk);
   }
 
-  void remove_chunk(Chunk* chunk) {
-    FreeBlockDictionary<Chunk>::verify_par_locked();
-    remove_chunk_from_tree((TreeChunk<Chunk>*)chunk);
+  void remove_chunk(Chunk_t* chunk) {
+    FreeBlockDictionary<Chunk_t>::verify_par_locked();
+    remove_chunk_from_tree((TreeChunk<Chunk_t, FreeList_t>*)chunk);
     assert(chunk->is_free(), "Should still be a free chunk");
   }
 
@@ -281,19 +286,19 @@
   }
 
   size_t     min_size() const {
-    return min_tree_chunk_size;
+    return TreeChunk<Chunk_t, FreeList_t>::min_size();
   }
 
   double     sum_of_squared_block_sizes() const {
     return sum_of_squared_block_sizes(root());
   }
 
-  Chunk* find_chunk_ends_at(HeapWord* target) const;
+  Chunk_t* find_chunk_ends_at(HeapWord* target) const;
 
   // Find the list with size "size" in the binary tree and update
   // the statistics in the list according to "split" (chunk was
   // split or coalesce) and "birth" (chunk was added or removed).
-  void       dict_census_udpate(size_t size, bool split, bool birth);
+  void       dict_census_update(size_t size, bool split, bool birth);
   // Return true if the dictionary is overpopulated (more chunks of
   // this size than desired) for size "size".
   bool       coal_dict_over_populated(size_t size);
@@ -307,7 +312,7 @@
   // statistics for the sweep.
   void       end_sweep_dict_census(double splitSurplusPercent);
   // Return the largest free chunk in the tree.
-  Chunk* find_largest_dict() const;
+  Chunk_t* find_largest_dict() const;
   // Accessors for statistics
   void       set_tree_surplus(double splitSurplusPercent);
   void       set_tree_hints(void);
--- a/src/share/vm/memory/collectorPolicy.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/memory/collectorPolicy.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -742,6 +742,8 @@
   uint gc_count = 0;
   uint full_gc_count = 0;
 
+  assert(!Heap_lock->owned_by_self(), "Should not be holding the Heap_lock");
+
   do {
     MetaWord* result = NULL;
     if (GC_locker::is_active_and_needs_gc()) {
@@ -756,7 +758,6 @@
       }
       JavaThread* jthr = JavaThread::current();
       if (!jthr->in_critical()) {
-        MutexUnlocker mul(Heap_lock);
         // Wait for JNI critical section to be exited
         GC_locker::stall_until_clear();
         // The GC invoked by the last thread leaving the critical
--- a/src/share/vm/memory/freeBlockDictionary.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/memory/freeBlockDictionary.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -27,6 +27,8 @@
 #include "gc_implementation/concurrentMarkSweep/freeChunk.hpp"
 #endif // SERIALGC
 #include "memory/freeBlockDictionary.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
 #ifdef TARGET_OS_FAMILY_linux
 # include "thread_linux.inline.hpp"
 #endif
@@ -62,6 +64,9 @@
 }
 #endif
 
+template class FreeBlockDictionary<Metablock>;
+template class FreeBlockDictionary<Metachunk>;
+
 #ifndef SERIALGC
 // Explicitly instantiate for FreeChunk
 template class FreeBlockDictionary<FreeChunk>;
--- a/src/share/vm/memory/freeBlockDictionary.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/memory/freeBlockDictionary.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -66,7 +66,7 @@
   virtual void       reset(HeapWord* addr, size_t size) = 0;
   virtual void       reset() = 0;
 
-  virtual void       dict_census_udpate(size_t size, bool split, bool birth) = 0;
+  virtual void       dict_census_update(size_t size, bool split, bool birth) = 0;
   virtual bool       coal_dict_over_populated(size_t size) = 0;
   virtual void       begin_sweep_dict_census(double coalSurplusPercent,
                        float inter_sweep_current, float inter_sweep_estimate,
--- a/src/share/vm/memory/freeList.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/memory/freeList.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -25,6 +25,8 @@
 #include "precompiled.hpp"
 #include "memory/freeBlockDictionary.hpp"
 #include "memory/freeList.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
 #include "memory/sharedHeap.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/mutex.hpp"
@@ -49,8 +51,6 @@
 {
   _size         = 0;
   _count        = 0;
-  _hint         = 0;
-  init_statistics();
 }
 
 template <class Chunk>
@@ -62,34 +62,50 @@
 {
   _size         = fc->size();
   _count        = 1;
-  _hint         = 0;
-  init_statistics();
-#ifndef PRODUCT
-  _allocation_stats.set_returned_bytes(size() * HeapWordSize);
-#endif
 }
 
 template <class Chunk>
-void FreeList<Chunk>::reset(size_t hint) {
+void FreeList<Chunk>::link_head(Chunk* v) {
+  assert_proper_lock_protection();
+  set_head(v);
+  // If this method is not used (just set the head instead),
+  // this check can be avoided.
+  if (v != NULL) {
+    v->link_prev(NULL);
+  }
+}
+
+
+
+template <class Chunk>
+void FreeList<Chunk>::reset() {
+  // Don't set the _size to 0 because this method is
+  // used with a existing list that has a size but which has
+  // been emptied.
+  // Don't clear the _protecting_lock of an existing list.
   set_count(0);
   set_head(NULL);
   set_tail(NULL);
-  set_hint(hint);
 }
 
 template <class Chunk>
-void FreeList<Chunk>::init_statistics(bool split_birth) {
-  _allocation_stats.initialize(split_birth);
+void FreeList<Chunk>::initialize() {
+#ifdef ASSERT
+  // Needed early because it might be checked in other initializing code.
+  set_protecting_lock(NULL);
+#endif
+  reset();
+  set_size(0);
 }
 
-template <class Chunk>
-Chunk* FreeList<Chunk>::get_chunk_at_head() {
+template <class Chunk_t>
+Chunk_t* FreeList<Chunk_t>::get_chunk_at_head() {
   assert_proper_lock_protection();
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
-  Chunk* fc = head();
+  Chunk_t* fc = head();
   if (fc != NULL) {
-    Chunk* nextFC = fc->next();
+    Chunk_t* nextFC = fc->next();
     if (nextFC != NULL) {
       // The chunk fc being removed has a "next".  Set the "next" to the
       // "prev" of fc.
@@ -197,11 +213,6 @@
     link_tail(chunk);
   }
   increment_count(); // of # of chunks in list
-  DEBUG_ONLY(
-    if (record_return) {
-      increment_returned_bytes_by(size()*HeapWordSize);
-    }
-  )
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
   assert(head() == NULL || head()->size() == size(), "wrong item on list");
@@ -233,11 +244,6 @@
   }
   link_tail(chunk);
   increment_count();  // of # of chunks in list
-  DEBUG_ONLY(
-    if (record_return) {
-      increment_returned_bytes_by(size()*HeapWordSize);
-    }
-  )
   assert(head() == NULL || head()->prev() == NULL, "list invariant");
   assert(tail() == NULL || tail()->next() == NULL, "list invariant");
   assert(head() == NULL || head()->size() == size(), "wrong item on list");
@@ -273,7 +279,7 @@
   }
 }
 
-// verify_chunk_in_free_list() is used to verify that an item is in this free list.
+// verify_chunk_in_free_lists() is used to verify that an item is in this free list.
 // It is used as a debugging aid.
 template <class Chunk>
 bool FreeList<Chunk>::verify_chunk_in_free_list(Chunk* fc) const {
@@ -294,40 +300,14 @@
 
 #ifndef PRODUCT
 template <class Chunk>
-void FreeList<Chunk>::verify_stats() const {
-  // The +1 of the LH comparand is to allow some "looseness" in
-  // checking: we usually call this interface when adding a block
-  // and we'll subsequently update the stats; we cannot update the
-  // stats beforehand because in the case of the large-block BT
-  // dictionary for example, this might be the first block and
-  // in that case there would be no place that we could record
-  // the stats (which are kept in the block itself).
-  assert((_allocation_stats.prev_sweep() + _allocation_stats.split_births()
-          + _allocation_stats.coal_births() + 1)   // Total Production Stock + 1
-         >= (_allocation_stats.split_deaths() + _allocation_stats.coal_deaths()
-             + (ssize_t)count()),                // Total Current Stock + depletion
-         err_msg("FreeList " PTR_FORMAT " of size " SIZE_FORMAT
-                 " violates Conservation Principle: "
-                 "prev_sweep(" SIZE_FORMAT ")"
-                 " + split_births(" SIZE_FORMAT ")"
-                 " + coal_births(" SIZE_FORMAT ") + 1 >= "
-                 " split_deaths(" SIZE_FORMAT ")"
-                 " coal_deaths(" SIZE_FORMAT ")"
-                 " + count(" SSIZE_FORMAT ")",
-                 this, _size, _allocation_stats.prev_sweep(), _allocation_stats.split_births(),
-                 _allocation_stats.split_births(), _allocation_stats.split_deaths(),
-                 _allocation_stats.coal_deaths(), count()));
-}
-
-template <class Chunk>
 void FreeList<Chunk>::assert_proper_lock_protection_work() const {
-  assert(_protecting_lock != NULL, "Don't call this directly");
+  assert(protecting_lock() != NULL, "Don't call this directly");
   assert(ParallelGCThreads > 0, "Don't call this directly");
   Thread* thr = Thread::current();
   if (thr->is_VM_thread() || thr->is_ConcurrentGC_thread()) {
     // assert that we are holding the freelist lock
   } else if (thr->is_GC_task_thread()) {
-    assert(_protecting_lock->owned_by_self(), "FreeList RACE DETECTED");
+    assert(protecting_lock()->owned_by_self(), "FreeList RACE DETECTED");
   } else if (thr->is_Java_thread()) {
     assert(!SafepointSynchronize::is_at_safepoint(), "Should not be executing");
   } else {
@@ -350,21 +330,17 @@
 // to the call is a non-null string, it is printed in the first column;
 // otherwise, if the argument is null (the default), then the size of the
 // (free list) block is printed in the first column.
-template <class Chunk>
-void FreeList<Chunk>::print_on(outputStream* st, const char* c) const {
+template <class Chunk_t>
+void FreeList<Chunk_t>::print_on(outputStream* st, const char* c) const {
   if (c != NULL) {
     st->print("%16s", c);
   } else {
     st->print(SIZE_FORMAT_W(16), size());
   }
-  st->print("\t"
-           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t"
-           SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\t" SSIZE_FORMAT_W(14) "\n",
-           bfr_surp(),             surplus(),             desired(),             prev_sweep(),           before_sweep(),
-           count(),               coal_births(),          coal_deaths(),          split_births(),         split_deaths());
 }
 
+template class FreeList<Metablock>;
+template class FreeList<Metachunk>;
 #ifndef SERIALGC
-// Needs to be after the definitions have been seen.
 template class FreeList<FreeChunk>;
 #endif // SERIALGC
--- a/src/share/vm/memory/freeList.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/memory/freeList.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -40,23 +40,19 @@
 // for that implementation.
 
 class Mutex;
-template <class Chunk> class TreeList;
-template <class Chunk> class PrintTreeCensusClosure;
 
-template <class Chunk>
+template <class Chunk_t>
 class FreeList VALUE_OBJ_CLASS_SPEC {
   friend class CompactibleFreeListSpace;
   friend class VMStructs;
-  friend class PrintTreeCensusClosure<Chunk>;
 
  private:
-  Chunk*        _head;          // Head of list of free chunks
-  Chunk*        _tail;          // Tail of list of free chunks
+  Chunk_t*      _head;          // Head of list of free chunks
+  Chunk_t*      _tail;          // Tail of list of free chunks
   size_t        _size;          // Size in Heap words of each chunk
   ssize_t       _count;         // Number of entries in list
-  size_t        _hint;          // next larger size list with a positive surplus
 
-  AllocationStats _allocation_stats; // allocation-related statistics
+ protected:
 
 #ifdef ASSERT
   Mutex*        _protecting_lock;
@@ -71,10 +67,6 @@
 #endif
   }
 
-  // Initialize the allocation statistics.
- protected:
-  void init_statistics(bool split_birth = false);
-  void set_count(ssize_t v) { _count = v;}
   void increment_count()    {
     _count++;
   }
@@ -89,52 +81,48 @@
   // Construct a list without any entries.
   FreeList();
   // Construct a list with "fc" as the first (and lone) entry in the list.
-  FreeList(Chunk* fc);
+  FreeList(Chunk_t* fc);
 
-  // Reset the head, tail, hint, and count of a free list.
-  void reset(size_t hint);
+  // Do initialization
+  void initialize();
+
+  // Reset the head, tail, and count of a free list.
+  void reset();
 
   // Declare the current free list to be protected by the given lock.
 #ifdef ASSERT
-  void set_protecting_lock(Mutex* protecting_lock) {
-    _protecting_lock = protecting_lock;
+  Mutex* protecting_lock() const { return _protecting_lock; }
+  void set_protecting_lock(Mutex* v) {
+    _protecting_lock = v;
   }
 #endif
 
   // Accessors.
-  Chunk* head() const {
+  Chunk_t* head() const {
     assert_proper_lock_protection();
     return _head;
   }
-  void set_head(Chunk* v) {
+  void set_head(Chunk_t* v) {
     assert_proper_lock_protection();
     _head = v;
     assert(!_head || _head->size() == _size, "bad chunk size");
   }
   // Set the head of the list and set the prev field of non-null
   // values to NULL.
-  void link_head(Chunk* v) {
-    assert_proper_lock_protection();
-    set_head(v);
-    // If this method is not used (just set the head instead),
-    // this check can be avoided.
-    if (v != NULL) {
-      v->link_prev(NULL);
-    }
-  }
+  void link_head(Chunk_t* v);
 
-  Chunk* tail() const {
+  Chunk_t* tail() const {
     assert_proper_lock_protection();
     return _tail;
   }
-  void set_tail(Chunk* v) {
+  void set_tail(Chunk_t* v) {
     assert_proper_lock_protection();
     _tail = v;
     assert(!_tail || _tail->size() == _size, "bad chunk size");
   }
   // Set the tail of the list and set the next field of non-null
   // values to NULL.
-  void link_tail(Chunk* v) {
+  void link_tail(Chunk_t* v) {
     assert_proper_lock_protection();
     set_tail(v);
     if (v != NULL) {
@@ -152,174 +140,45 @@
     assert_proper_lock_protection();
     _size = v;
   }
-  ssize_t count() const {
-    return _count;
-  }
-  size_t hint() const {
-    return _hint;
-  }
-  void set_hint(size_t v) {
-    assert_proper_lock_protection();
-    assert(v == 0 || _size < v, "Bad hint"); _hint = v;
-  }
+  ssize_t count() const { return _count; }
+  void set_count(ssize_t v) { _count = v;}
 
-  // Accessors for statistics
-  AllocationStats* allocation_stats() {
-    assert_proper_lock_protection();
-    return &_allocation_stats;
-  }
+  size_t get_better_size() { return size(); }
 
-  ssize_t desired() const {
-    return _allocation_stats.desired();
-  }
-  void set_desired(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_desired(v);
-  }
-  void compute_desired(float inter_sweep_current,
-                       float inter_sweep_estimate,
-                       float intra_sweep_estimate) {
-    assert_proper_lock_protection();
-    _allocation_stats.compute_desired(_count,
-                                      inter_sweep_current,
-                                      inter_sweep_estimate,
-                                      intra_sweep_estimate);
-  }
-  ssize_t coal_desired() const {
-    return _allocation_stats.coal_desired();
-  }
-  void set_coal_desired(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_coal_desired(v);
-  }
-
-  ssize_t surplus() const {
-    return _allocation_stats.surplus();
-  }
-  void set_surplus(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_surplus(v);
-  }
-  void increment_surplus() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_surplus();
-  }
-  void decrement_surplus() {
-    assert_proper_lock_protection();
-    _allocation_stats.decrement_surplus();
-  }
-
-  ssize_t bfr_surp() const {
-    return _allocation_stats.bfr_surp();
-  }
-  void set_bfr_surp(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_bfr_surp(v);
-  }
-  ssize_t prev_sweep() const {
-    return _allocation_stats.prev_sweep();
-  }
-  void set_prev_sweep(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_prev_sweep(v);
-  }
-  ssize_t before_sweep() const {
-    return _allocation_stats.before_sweep();
-  }
-  void set_before_sweep(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_before_sweep(v);
-  }
-
-  ssize_t coal_births() const {
-    return _allocation_stats.coal_births();
-  }
-  void set_coal_births(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_coal_births(v);
-  }
-  void increment_coal_births() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_coal_births();
-  }
-
-  ssize_t coal_deaths() const {
-    return _allocation_stats.coal_deaths();
-  }
-  void set_coal_deaths(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_coal_deaths(v);
-  }
-  void increment_coal_deaths() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_coal_deaths();
-  }
-
-  ssize_t split_births() const {
-    return _allocation_stats.split_births();
-  }
-  void set_split_births(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_split_births(v);
-  }
-  void increment_split_births() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_split_births();
-  }
-
-  ssize_t split_deaths() const {
-    return _allocation_stats.split_deaths();
-  }
-  void set_split_deaths(ssize_t v) {
-    assert_proper_lock_protection();
-    _allocation_stats.set_split_deaths(v);
-  }
-  void increment_split_deaths() {
-    assert_proper_lock_protection();
-    _allocation_stats.increment_split_deaths();
-  }
-
-  NOT_PRODUCT(
-    // For debugging.  The "_returned_bytes" in all the lists are summed
-    // and compared with the total number of bytes swept during a
-    // collection.
-    size_t returned_bytes() const { return _allocation_stats.returned_bytes(); }
-    void set_returned_bytes(size_t v) { _allocation_stats.set_returned_bytes(v); }
-    void increment_returned_bytes_by(size_t v) {
-      _allocation_stats.set_returned_bytes(_allocation_stats.returned_bytes() + v);
-    }
-  )
+  size_t returned_bytes() const { ShouldNotReachHere(); return 0; }
+  void set_returned_bytes(size_t v) {}
+  void increment_returned_bytes_by(size_t v) {}
 
   // Unlink head of list and return it.  Returns NULL if
   // the list is empty.
-  Chunk* get_chunk_at_head();
+  Chunk_t* get_chunk_at_head();
 
   // Remove the first "n" or "count", whichever is smaller, chunks from the
   // list, setting "fl", which is required to be empty, to point to them.
-  void getFirstNChunksFromList(size_t n, FreeList<Chunk>* fl);
+  void getFirstNChunksFromList(size_t n, FreeList<Chunk_t>* fl);
 
   // Unlink this chunk from it's free list
-  void remove_chunk(Chunk* fc);
+  void remove_chunk(Chunk_t* fc);
 
   // Add this chunk to this free list.
-  void return_chunk_at_head(Chunk* fc);
-  void return_chunk_at_tail(Chunk* fc);
+  void return_chunk_at_head(Chunk_t* fc);
+  void return_chunk_at_tail(Chunk_t* fc);
 
   // Similar to returnChunk* but also records some diagnostic
   // information.
-  void return_chunk_at_head(Chunk* fc, bool record_return);
-  void return_chunk_at_tail(Chunk* fc, bool record_return);
+  void return_chunk_at_head(Chunk_t* fc, bool record_return);
+  void return_chunk_at_tail(Chunk_t* fc, bool record_return);
 
   // Prepend "fl" (whose size is required to be the same as that of "this")
   // to the front of "this" list.
-  void prepend(FreeList<Chunk>* fl);
+  void prepend(FreeList<Chunk_t>* fl);
 
   // Verify that the chunk is in the list.
   // found.  Return NULL if "fc" is not found.
-  bool verify_chunk_in_free_list(Chunk* fc) const;
+  bool verify_chunk_in_free_list(Chunk_t* fc) const;
 
   // Stats verification
-  void verify_stats() const PRODUCT_RETURN;
+//  void verify_stats() const { ShouldNotReachHere(); };
 
   // Printing support
   static void print_labels_on(outputStream* st, const char* c);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metablock.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -0,0 +1,103 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_MEMORY_METABLOCK_HPP
+#define SHARE_VM_MEMORY_METABLOCK_HPP
+
+// Metablock are the unit of allocation from a Chunk.  It is initialized
+// with the size of the requested allocation.  That size is overwritten
+// once the allocation returns.
+//
+// A Metablock may be reused by its SpaceManager but are never moved between
+// SpaceManagers.  There is no explicit link to the Metachunk
+// from which it was allocated.  Metablock may be deallocated and
+// put on a freelist but the space is never freed, rather
+// the Metachunk it is a part of will be deallocated when it's
+// associated class loader is collected.
+
+class Metablock VALUE_OBJ_CLASS_SPEC {
+  friend class VMStructs;
+ private:
+  // Used to align the allocation (see below).
+  union block_t {
+    void* _data[3];
+    struct header_t {
+      size_t _word_size;
+      Metablock* _next;
+      Metablock* _prev;
+    } _header;
+  } _block;
+  static size_t _min_block_byte_size;
+  static size_t _overhead;
+
+  typedef union block_t Block;
+  typedef struct header_t Header;
+  const Block* block() const { return &_block; }
+  const Block::header_t* header() const { return &(block()->_header); }
+ public:
+
+  static Metablock* initialize(MetaWord* p, size_t word_size);
+
+  // This places the body of the block at a 2 word boundary
+  // because every block starts on a 2 word boundary.  Work out
+  // how to make the body on a 2 word boundary if the block
+  // starts on a arbitrary boundary.  JJJ
+
+  size_t word_size() const  { return header()->_word_size; }
+  void set_word_size(size_t v) { _block._header._word_size = v; }
+  size_t size() const volatile { return _block._header._word_size; }
+  void set_size(size_t v) { _block._header._word_size = v; }
+  Metablock* next() const { return header()->_next; }
+  void set_next(Metablock* v) { _block._header._next = v; }
+  Metablock* prev() const { return header()->_prev; }
+  void set_prev(Metablock* v) { _block._header._prev = v; }
+
+  static size_t min_block_byte_size() { return _min_block_byte_size; }
+  static size_t overhead() { return _overhead; }
+
+  bool is_free()                 { return header()->_word_size != 0; }
+  void clear_next()              { set_next(NULL); }
+  void link_prev(Metablock* ptr) { set_prev(ptr); }
+  uintptr_t* end()              { return ((uintptr_t*) this) + size(); }
+  bool cantCoalesce() const     { return false; }
+  void link_next(Metablock* ptr) { set_next(ptr); }
+  void link_after(Metablock* ptr){
+    link_next(ptr);
+    if (ptr != NULL) ptr->link_prev(this);
+  }
+
+  // Should not be needed in a free list of Metablocks
+  void markNotFree()            { ShouldNotReachHere(); }
+
+  // Debug support
+#ifdef ASSERT
+  void* prev_addr() const { return (void*)&_block._header._prev; }
+  void* next_addr() const { return (void*)&_block._header._next; }
+  void* size_addr() const { return (void*)&_block._header._word_size; }
+#endif
+  bool verify_chunk_in_free_list(Metablock* tc) const { return true; }
+  bool verify_par_locked() { return true; }
+
+  void assert_is_mangled() const {/* Don't check "\*/}
+};
+#endif // SHARE_VM_MEMORY_METABLOCK_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/memory/metachunk.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -0,0 +1,133 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+#ifndef SHARE_VM_MEMORY_METACHUNK_HPP
+#define SHARE_VM_MEMORY_METACHUNK_HPP
+
+//  Metachunk - Quantum of allocation from a Virtualspace
+//    Metachunks are reused (when freed are put on a global freelist) and
+//    have no permanent association to a SpaceManager.
+
+//            +--------------+ <- end
+//            |              |          --+       ---+
+//            |              |            | free     |
+//            |              |            |          |
+//            |              |            |          | capacity
+//            |              |            |          |
+//            |              | <- top   --+          |
+//            |              |           ---+        |
+//            |              |              | used   |
+//            |              |              |        |
+//            |              |              |        |
+//            +--------------+ <- bottom ---+     ---+
+
+class Metachunk VALUE_OBJ_CLASS_SPEC {
+  // link to support lists of chunks
+  Metachunk* _next;
+  Metachunk* _prev;
+
+  MetaWord* _bottom;
+  MetaWord* _end;
+  MetaWord* _top;
+  size_t _word_size;
+  // Used in a guarantee() so included in the Product builds
+  // even through it is only for debugging.
+  bool _is_free;
+
+  // Metachunks are allocated out of a MetadataVirtualSpace and
+  // and use some of its space to describe itself (plus alignment
+  // considerations).  Metadata is allocated in the rest of the chunk.
+  // This size is the overhead of maintaining the Metachunk within
+  // the space.
+  static size_t _overhead;
+
+  void set_bottom(MetaWord* v) { _bottom = v; }
+  void set_end(MetaWord* v) { _end = v; }
+  void set_top(MetaWord* v) { _top = v; }
+  void set_word_size(size_t v) { _word_size = v; }
+ public:
+#ifdef ASSERT
+  Metachunk() : _bottom(NULL), _end(NULL), _top(NULL), _is_free(false) {}
+#else
+  Metachunk() : _bottom(NULL), _end(NULL), _top(NULL) {}
+#endif
+
+  // Used to add a Metachunk to a list of Metachunks
+  void set_next(Metachunk* v) { _next = v; assert(v != this, "Boom");}
+  void set_prev(Metachunk* v) { _prev = v; assert(v != this, "Boom");}
+
+  MetaWord* allocate(size_t word_size);
+  static Metachunk* initialize(MetaWord* ptr, size_t word_size);
+
+  // Accessors
+  Metachunk* next() const { return _next; }
+  Metachunk* prev() const { return _prev; }
+  MetaWord* bottom() const { return _bottom; }
+  MetaWord* end() const { return _end; }
+  MetaWord* top() const { return _top; }
+  size_t word_size() const { return _word_size; }
+  size_t size() const volatile { return _word_size; }
+  void set_size(size_t v) { _word_size = v; }
+  bool is_free() { return _is_free; }
+  void set_is_free(bool v) { _is_free = v; }
+  static size_t overhead() { return _overhead; }
+  void clear_next()              { set_next(NULL); }
+  void link_prev(Metachunk* ptr) { set_prev(ptr); }
+  uintptr_t* end()              { return ((uintptr_t*) this) + size(); }
+  bool cantCoalesce() const     { return false; }
+  void link_next(Metachunk* ptr) { set_next(ptr); }
+  void link_after(Metachunk* ptr){
+    link_next(ptr);
+    if (ptr != NULL) ptr->link_prev(this);
+  }
+
+  // Reset top to bottom so chunk can be reused.
+  void reset_empty() { _top = (_bottom + _overhead); }
+  bool is_empty() { return _top == (_bottom + _overhead); }
+
+  // used (has been allocated)
+  // free (available for future allocations)
+  // capacity (total size of chunk)
+  size_t used_word_size();
+  size_t free_word_size();
+  size_t capacity_word_size();
+
+  // Debug support
+#ifdef ASSERT
+  void* prev_addr() const { return (void*)&_prev; }
+  void* next_addr() const { return (void*)&_next; }
+  void* size_addr() const { return (void*)&_word_size; }
+#endif
+  bool verify_chunk_in_free_list(Metachunk* tc) const { return true; }
+  bool verify_par_locked() { return true; }
+
+  void assert_is_mangled() const {/* Don't check "\*/}
+
+#ifdef ASSERT
+  void mangle();
+#endif // ASSERT
+
+  void print_on(outputStream* st) const;
+  void verify();
+};
+#endif  // SHARE_VM_MEMORY_METACHUNK_HPP
--- a/src/share/vm/memory/metaspace.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/memory/metaspace.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -24,9 +24,12 @@
 #include "precompiled.hpp"
 #include "gc_interface/collectedHeap.hpp"
 #include "memory/binaryTreeDictionary.hpp"
+#include "memory/freeList.hpp"
 #include "memory/collectorPolicy.hpp"
 #include "memory/filemap.hpp"
 #include "memory/freeList.hpp"
+#include "memory/metablock.hpp"
+#include "memory/metachunk.hpp"
 #include "memory/metaspace.hpp"
 #include "memory/metaspaceShared.hpp"
 #include "memory/resourceArea.hpp"
@@ -37,15 +40,8 @@
 #include "utilities/copy.hpp"
 #include "utilities/debug.hpp"
 
-// Define this macro to deallocate Metablock.  If not defined,
-// blocks are not yet deallocated and are only mangled.
-#undef DEALLOCATE_BLOCKS
-
-// Easily recognizable patterns
-// These patterns can be the same in 32bit or 64bit since
-// they only have to be easily recognizable.
-const void* metaspace_allocation_leader = (void*) 0X11111111;
-const void* metaspace_allocation_trailer = (void*) 0X77777777;
+typedef BinaryTreeDictionary<Metablock, FreeList> BlockTreeDictionary;
+typedef BinaryTreeDictionary<Metachunk, FreeList> ChunkTreeDictionary;
 
 // Parameters for stress mode testing
 const uint metadata_deallocate_a_lot_block = 10;
@@ -53,7 +49,6 @@
 size_t const allocation_from_dictionary_limit = 64 * K;
 const size_t metadata_chunk_initialize = 0xf7f7f7f7;
 const size_t metadata_deallocate = 0xf5f5f5f5;
-const size_t metadata_space_manager_allocate = 0xf3f3f3f3;
 
 MetaWord* last_allocated = 0;
 
@@ -62,11 +57,12 @@
   SmallIndex = 0,
   MediumIndex = 1,
   HumongousIndex = 2,
-  NumberOfFreeLists = 3
+  NumberOfFreeLists = 2,
+  NumberOfInUseLists = 3
 };
 
 static ChunkIndex next_chunk_index(ChunkIndex i) {
-  assert(i < NumberOfFreeLists, "Out of bound");
+  assert(i < NumberOfInUseLists, "Out of bound");
   return (ChunkIndex) (i+1);
 }
 
@@ -100,164 +96,13 @@
 // the Chunk after the header for the Chunk) where as Metachunks
 // point to space in a VirtualSpace.  To replace Metachunks with
 // Chunks, change Chunks so that they can be allocated out of a VirtualSpace.
-//
-
-// Metablock are the unit of allocation from a Chunk.  It contains
-// the size of the requested allocation in a debug build.
-// Also in a debug build it has a marker before and after the
-// body of the block. The address of the body is the address returned
-// by the allocation.
-//
-// Layout in a debug build.  In a product build only the body is present.
-//
-//     +-----------+-----------+------------+     +-----------+
-//     | word size | leader    | body       | ... | trailer   |
-//     +-----------+-----------+------------+     +-----------+
-//
-// A Metablock may be reused by its SpaceManager but are never moved between
-// SpaceManagers.  There is no explicit link to the Metachunk
-// from which it was allocated.  Metablock are not deallocated, rather
-// the Metachunk it is a part of will be deallocated when it's
-// associated class loader is collected.
-//
-// When the word size of a block is passed in to the deallocation
-// call the word size no longer needs to be part of a Metablock.
-
-class Metablock {
-  friend class VMStructs;
- private:
-  // Used to align the allocation (see below) and for debugging.
+size_t Metablock::_min_block_byte_size = sizeof(Metablock);
 #ifdef ASSERT
-  struct {
-    size_t _word_size;
-    void*  _leader;
-  } _header;
-  void* _data[1];
+  size_t Metablock::_overhead =
+    Chunk::aligned_overhead_size(sizeof(Metablock)) / BytesPerWord;
+#else
+  size_t Metablock::_overhead = 0;
 #endif
-  static size_t _overhead;
-
-#ifdef ASSERT
-  void set_word_size(size_t v) { _header._word_size = v; }
-  void* leader() { return _header._leader; }
-  void* trailer() {
-    jlong index = (jlong) _header._word_size - sizeof(_header)/BytesPerWord - 1;
-    assert(index > 0, err_msg("Bad indexling of trailer %d", index));
-    void** ptr = &_data[index];
-    return *ptr;
-  }
-  void set_leader(void* v) { _header._leader = v; }
-  void set_trailer(void* v) {
-    void** ptr = &_data[_header._word_size - sizeof(_header)/BytesPerWord - 1];
-    *ptr = v;
-  }
- public:
-  size_t word_size() { return _header._word_size; }
-#endif
- public:
-
-  static Metablock* initialize(MetaWord* p, size_t word_size);
-
-  // This places the body of the block at a 2 word boundary
-  // because every block starts on a 2 word boundary.  Work out
-  // how to make the body on a 2 word boundary if the block
-  // starts on a arbitrary boundary.  JJJ
-
-#ifdef ASSERT
-  MetaWord* data() { return (MetaWord*) &_data[0]; }
-#else
-  MetaWord* data() { return (MetaWord*) this; }
-#endif
-  static Metablock* metablock_from_data(MetaWord* p) {
-#ifdef ASSERT
-    size_t word_offset = offset_of(Metablock, _data)/BytesPerWord;
-    Metablock* result = (Metablock*) (p - word_offset);
-    return result;
-#else
-    return (Metablock*) p;
-#endif
-  }
-
-  static size_t overhead() { return _overhead; }
-  void verify();
-};
-
-//  Metachunk - Quantum of allocation from a Virtualspace
-//    Metachunks are reused (when freed are put on a global freelist) and
-//    have no permanent association to a SpaceManager.
-
-//            +--------------+ <- end
-//            |              |          --+       ---+
-//            |              |            | free     |
-//            |              |            |          |
-//            |              |            |          | capacity
-//            |              |            |          |
-//            |              | <- top   --+          |
-//            |              |           ---+        |
-//            |              |              | used   |
-//            |              |              |        |
-//            |              |              |        |
-//            +--------------+ <- bottom ---+     ---+
-
-class Metachunk VALUE_OBJ_CLASS_SPEC {
-  // link to support lists of chunks
-  Metachunk* _next;
-
-  MetaWord* _bottom;
-  MetaWord* _end;
-  MetaWord* _top;
-  size_t _word_size;
-
-  // Metachunks are allocated out of a MetadataVirtualSpace and
-  // and use some of its space to describe itself (plus alignment
-  // considerations).  Metadata is allocated in the rest of the chunk.
-  // This size is the overhead of maintaining the Metachunk within
-  // the space.
-  static size_t _overhead;
-
-  void set_bottom(MetaWord* v) { _bottom = v; }
-  void set_end(MetaWord* v) { _end = v; }
-  void set_top(MetaWord* v) { _top = v; }
-  void set_word_size(size_t v) { _word_size = v; }
- public:
-
-  // Used to add a Metachunk to a list of Metachunks
-  void set_next(Metachunk* v) { _next = v; assert(v != this, "Boom");}
-
-  Metablock* allocate(size_t word_size);
-  static Metachunk* initialize(MetaWord* ptr, size_t word_size);
-
-  // Accessors
-  Metachunk* next() const { return _next; }
-  MetaWord* bottom() const { return _bottom; }
-  MetaWord* end() const { return _end; }
-  MetaWord* top() const { return _top; }
-  size_t word_size() const { return _word_size; }
-  static size_t overhead() { return _overhead; }
-
-  // Reset top to bottom so chunk can be reused.
-  void reset_empty() { _top = (_bottom + _overhead); }
-  bool is_empty() { return _top == (_bottom + _overhead); }
-
-  // used (has been allocated)
-  // free (available for future allocations)
-  // capacity (total size of chunk)
-  size_t used_word_size();
-  size_t free_word_size();
-  size_t capacity_word_size();
-
-#ifdef ASSERT
-  void mangle() {
-    // Mangle the payload of the chunk and not the links that
-    // maintain list of chunks.
-    HeapWord* start = (HeapWord*)(bottom() + overhead());
-    size_t word_size = capacity_word_size() - overhead();
-    Copy::fill_to_words(start, word_size, metadata_chunk_initialize);
-  }
-#endif // ASSERT
-
-  void print_on(outputStream* st) const;
-  void verify();
-};
 
 
 // Pointer to list of Metachunks.
@@ -292,7 +137,10 @@
   //   SmallChunk
   //   MediumChunk
   //   HumongousChunk
-  ChunkList _free_chunks[3];
+  ChunkList _free_chunks[NumberOfFreeLists];
+
+  //   HumongousChunk
+  ChunkTreeDictionary _humongous_dictionary;
 
   // ChunkManager in all lists of this type
   size_t _free_chunks_total;
@@ -337,7 +185,9 @@
   }
   ChunkList* free_medium_chunks() { return &_free_chunks[1]; }
   ChunkList* free_small_chunks() { return &_free_chunks[0]; }
-  ChunkList* free_humongous_chunks() { return &_free_chunks[2]; }
+  ChunkTreeDictionary* humongous_dictionary() {
+    return &_humongous_dictionary;
+  }
 
   ChunkList* free_chunks(ChunkIndex index);
 
@@ -356,41 +206,35 @@
 
   void locked_print_free_chunks(outputStream* st);
   void locked_print_sum_free_chunks(outputStream* st);
+
+  void print_on(outputStream* st);
 };
 
 
 // Used to manage the free list of Metablocks (a block corresponds
 // to the allocation of a quantum of metadata).
 class BlockFreelist VALUE_OBJ_CLASS_SPEC {
-#ifdef DEALLOCATE_BLOCKS
-  BinaryTreeDictionary<Metablock>* _dictionary;
-#endif
-  static Metablock* initialize_free_chunk(Metablock* block, size_t word_size);
-
-#ifdef DEALLOCATE_BLOCKS
+  BlockTreeDictionary* _dictionary;
+  static Metablock* initialize_free_chunk(MetaWord* p, size_t word_size);
+
   // Accessors
-  BinaryTreeDictionary<Metablock>* dictionary() const { return _dictionary; }
-#endif
+  BlockTreeDictionary* dictionary() const { return _dictionary; }
 
  public:
   BlockFreelist();
   ~BlockFreelist();
 
   // Get and return a block to the free list
-  Metablock* get_block(size_t word_size);
-  void return_block(Metablock* block, size_t word_size);
-
-  size_t totalSize() {
-#ifdef DEALLOCATE_BLOCKS
-    if (dictionary() == NULL) {
-      return 0;
-    } else {
-      return dictionary()->totalSize();
-    }
-#else
+  MetaWord* get_block(size_t word_size);
+  void return_block(MetaWord* p, size_t word_size);
+
+  size_t total_size() {
+  if (dictionary() == NULL) {
     return 0;
-#endif
+  } else {
+    return dictionary()->total_size();
   }
+}
 
   void print_on(outputStream* st) const;
 };
@@ -600,7 +444,6 @@
   };
 };
 
-
 class Metadebug : AllStatic {
   // Debugging support for Metaspaces
   static int _deallocate_block_a_lot_count;
@@ -655,7 +498,7 @@
   // List of chunks in use by this SpaceManager.  Allocations
   // are done from the current chunk.  The list is used for deallocating
   // chunks when the SpaceManager is freed.
-  Metachunk* _chunks_in_use[NumberOfFreeLists];
+  Metachunk* _chunks_in_use[NumberOfInUseLists];
   Metachunk* _current_chunk;
 
   // Virtual space where allocation comes from.
@@ -700,24 +543,6 @@
   // Add chunk to the list of chunks in use
   void add_chunk(Metachunk* v, bool make_current);
 
-  // Debugging support
-  void verify_chunks_in_use_index(ChunkIndex index, Metachunk* v) {
-    switch (index) {
-    case 0:
-      assert(v->word_size() == SmallChunk, "Not a SmallChunk");
-      break;
-    case 1:
-      assert(v->word_size() == MediumChunk, "Not a MediumChunk");
-      break;
-    case 2:
-      assert(v->word_size() > MediumChunk, "Not a HumongousChunk");
-      break;
-    default:
-      assert(false, "Wrong list.");
-    }
-  }
-
- protected:
   Mutex* lock() const { return _lock; }
 
  public:
@@ -751,10 +576,10 @@
   MetaWord* allocate(size_t word_size);
 
   // Helper for allocations
-  Metablock* allocate_work(size_t word_size);
+  MetaWord* allocate_work(size_t word_size);
 
   // Returns a block to the per manager freelist
-  void deallocate(MetaWord* p);
+  void deallocate(MetaWord* p, size_t word_size);
 
   // Based on the allocation size and a minimum chunk size,
   // returned chunk size (for expanding space for chunk allocation).
@@ -763,7 +588,7 @@
   // Called when an allocation from the current chunk fails.
   // Gets a new chunk (may require getting a new virtual space),
   // and allocates from that chunk.
-  Metablock* grow_and_allocate(size_t word_size);
+  MetaWord* grow_and_allocate(size_t word_size);
 
   // debugging support.
 
@@ -780,6 +605,8 @@
 
 uint const SpaceManager::_small_chunk_limit = 4;
 
+
+
 const char* SpaceManager::_expand_lock_name =
   "SpaceManager chunk allocation lock";
 const int SpaceManager::_expand_lock_rank = Monitor::leaf - 1;
@@ -788,39 +615,26 @@
             SpaceManager::_expand_lock_name,
             Mutex::_allow_vm_block_flag);
 
-#ifdef ASSERT
-size_t Metablock::_overhead =
-  Chunk::aligned_overhead_size(sizeof(Metablock)) / BytesPerWord;
-#else
-size_t Metablock::_overhead = 0;
-#endif
 size_t Metachunk::_overhead =
   Chunk::aligned_overhead_size(sizeof(Metachunk)) / BytesPerWord;
 
 // New blocks returned by the Metaspace are zero initialized.
 // We should fix the constructors to not assume this instead.
 Metablock* Metablock::initialize(MetaWord* p, size_t word_size) {
+  if (p == NULL) {
+    return NULL;
+  }
+
   Metablock* result = (Metablock*) p;
 
   // Clear the memory
   Copy::fill_to_aligned_words((HeapWord*)result, word_size);
 #ifdef ASSERT
   result->set_word_size(word_size);
-  // Check after work size is set.
-  result->set_leader((void*) metaspace_allocation_leader);
-  result->set_trailer((void*) metaspace_allocation_trailer);
 #endif
   return result;
 }
 
-void Metablock::verify() {
-#ifdef ASSERT
-  assert(leader() == metaspace_allocation_leader &&
-         trailer() == metaspace_allocation_trailer,
-         "block has been corrupted");
-#endif
-}
-
 // Metachunk methods
 
 Metachunk* Metachunk::initialize(MetaWord* ptr, size_t word_size) {
@@ -843,18 +657,13 @@
 }
 
 
-Metablock* Metachunk::allocate(size_t word_size) {
-  Metablock* result = NULL;
+MetaWord* Metachunk::allocate(size_t word_size) {
+  MetaWord* result = NULL;
   // If available, bump the pointer to allocate.
   if (free_word_size() >= word_size) {
-    result = Metablock::initialize(_top, word_size);
+    result = _top;
     _top = _top + word_size;
   }
-#ifdef ASSERT
-  assert(result == NULL ||
-         result->word_size() == word_size,
-         "Block size is not set correctly");
-#endif
   return result;
 }
 
@@ -878,103 +687,85 @@
                bottom(), top(), end(), word_size());
 }
 
+#ifdef ASSERT
+void Metachunk::mangle() {
+  // Mangle the payload of the chunk and not the links that
+  // maintain list of chunks.
+  HeapWord* start = (HeapWord*)(bottom() + overhead());
+  size_t word_size = capacity_word_size() - overhead();
+  Copy::fill_to_words(start, word_size, metadata_chunk_initialize);
+}
+#endif // ASSERT
 
 void Metachunk::verify() {
 #ifdef ASSERT
   // Cannot walk through the blocks unless the blocks have
   // headers with sizes.
-  MetaWord* curr = bottom() + overhead();
-  while (curr < top()) {
-    Metablock* block = (Metablock*) curr;
-    size_t word_size = block->word_size();
-    block->verify();
-    curr = curr + word_size;
-  }
+  assert(_bottom <= _top &&
+         _top <= _end,
+         "Chunk has been smashed");
+  assert(SpaceManager::is_humongous(_word_size) ||
+         _word_size == SpaceManager::MediumChunk ||
+         _word_size == SpaceManager::SmallChunk,
+         "Chunk size is wrong");
 #endif
   return;
 }
 
 // BlockFreelist methods
 
-#ifdef DEALLOCATE_BLOCKS
 BlockFreelist::BlockFreelist() : _dictionary(NULL) {}
-#else
-BlockFreelist::BlockFreelist() {}
-#endif
 
 BlockFreelist::~BlockFreelist() {
-#ifdef DEALLOCATE_BLOCKS
   if (_dictionary != NULL) {
     if (Verbose && TraceMetadataChunkAllocation) {
       _dictionary->print_free_lists(gclog_or_tty);
     }
     delete _dictionary;
   }
-#endif
 }
 
-Metablock* BlockFreelist::initialize_free_chunk(Metablock* block, size_t word_size) {
-#ifdef DEALLOCATE_BLOCKS
-#ifdef ASSERT
-  assert(word_size = block->word_size(), "Wrong chunk size");
-#endif
-  Metablock* result = block;
-  result->setSize(word_size);
-  result->linkPrev(NULL);
-  result->linkNext(NULL);
-
-  return result;
-#else
-  ShouldNotReachHere();
+Metablock* BlockFreelist::initialize_free_chunk(MetaWord* p, size_t word_size) {
+  Metablock* block = (Metablock*) p;
+  block->set_word_size(word_size);
+  block->set_prev(NULL);
+  block->set_next(NULL);
+
   return block;
-#endif
 }
 
-void BlockFreelist::return_block(Metablock* block, size_t word_size) {
-#ifdef ASSERT
-  assert(word_size = block->word_size(), "Block size is wrong");;
-#endif
-  Metablock* free_chunk = initialize_free_chunk(block, word_size);
-#ifdef DEALLOCATE_BLOCKS
+void BlockFreelist::return_block(MetaWord* p, size_t word_size) {
+  Metablock* free_chunk = initialize_free_chunk(p, word_size);
   if (dictionary() == NULL) {
-   _dictionary = new BinaryTreeDictionary<Metablock>(false /* adaptive_freelists */);
+   _dictionary = new BlockTreeDictionary();
   }
-  dictionary()->returnChunk(free_chunk);
-#endif
+  dictionary()->return_chunk(free_chunk);
 }
 
-Metablock* BlockFreelist::get_block(size_t word_size) {
-#ifdef DEALLOCATE_BLOCKS
+MetaWord* BlockFreelist::get_block(size_t word_size) {
   if (dictionary() == NULL) {
     return NULL;
   }
 
-  Metablock* free_chunk =
-    dictionary()->getChunk(word_size, FreeBlockDictionary<Metablock>::exactly);
-#else
-  Metablock* free_chunk = NULL;
-#endif
-  if (free_chunk == NULL) {
+  if (word_size < TreeChunk<Metablock, FreeList>::min_size()) {
+    // Dark matter.  Too small for dictionary.
     return NULL;
   }
-  assert(free_chunk->word_size() == word_size, "Size of chunk is incorrect");
-  Metablock* block = Metablock::initialize((MetaWord*) free_chunk, word_size);
-#ifdef ASSERT
-  assert(block->word_size() == word_size, "Block size is not set correctly");
-#endif
-
-  return block;
+
+  Metablock* free_block =
+    dictionary()->get_chunk(word_size, FreeBlockDictionary<Metablock>::exactly);
+  if (free_block == NULL) {
+    return NULL;
+  }
+
+  return (MetaWord*) free_block;
 }
 
 void BlockFreelist::print_on(outputStream* st) const {
-#ifdef DEALLOCATE_BLOCKS
   if (dictionary() == NULL) {
     return;
   }
   dictionary()->print_free_lists(st);
-#else
-  return;
-#endif
 }
 
 // VirtualSpaceNode methods
@@ -1597,14 +1388,11 @@
         Metadebug::deallocate_block_a_lot_count() % MetaDataDeallocateALotInterval == 0 ) {
     Metadebug::set_deallocate_block_a_lot_count(0);
     for (uint i = 0; i < metadata_deallocate_a_lot_block; i++) {
-      Metablock* dummy_block = sm->allocate_work(raw_word_size);
+      MetaWord* dummy_block = sm->allocate_work(raw_word_size);
       if (dummy_block == 0) {
         break;
       }
-#ifdef ASSERT
-      assert(dummy_block->word_size() == raw_word_size, "Block size is not set correctly");
-#endif
-      sm->deallocate(dummy_block->data());
+      sm->deallocate(dummy_block, raw_word_size);
     }
   } else {
     Metadebug::inc_deallocate_block_a_lot_count();
@@ -1784,8 +1572,8 @@
 }
 
 void ChunkManager::locked_verify() {
+  locked_verify_free_chunks_count();
   locked_verify_free_chunks_total();
-  locked_verify_free_chunks_count();
 }
 
 void ChunkManager::locked_print_free_chunks(outputStream* st) {
@@ -1803,7 +1591,6 @@
   return &_free_chunks[index];
 }
 
-
 // These methods that sum the free chunk lists are used in printing
 // methods that are used in product builds.
 size_t ChunkManager::sum_free_chunks() {
@@ -1818,6 +1605,7 @@
 
     result = result + list->sum_list_capacity();
   }
+  result = result + humongous_dictionary()->total_size();
   return result;
 }
 
@@ -1831,6 +1619,7 @@
     }
     count = count + list->sum_list_count();
   }
+  count = count + humongous_dictionary()->total_free_blocks();
   return count;
 }
 
@@ -1875,23 +1664,24 @@
   assert_lock_strong(SpaceManager::expand_lock());
 
   locked_verify();
-  ChunkList* free_list = find_free_chunks_list(word_size);
-  assert(free_list != NULL, "Sanity check");
-
-  Metachunk* chunk = free_list->head();
-  debug_only(Metachunk* debug_head = chunk;)
-
-  if (chunk == NULL) {
-    return NULL;
-  }
-
-  Metachunk* prev_chunk = chunk;
-  if (chunk->word_size() == word_size) {
-    // Chunk is being removed from the chunks free list.
-    dec_free_chunks_total(chunk->capacity_word_size());
+
+  Metachunk* chunk = NULL;
+  if (!SpaceManager::is_humongous(word_size)) {
+    ChunkList* free_list = find_free_chunks_list(word_size);
+    assert(free_list != NULL, "Sanity check");
+
+    chunk = free_list->head();
+    debug_only(Metachunk* debug_head = chunk;)
+
+    if (chunk == NULL) {
+      return NULL;
+    }
+
     // Remove the chunk as the head of the list.
     free_list->set_head(chunk->next());
     chunk->set_next(NULL);
+    // Chunk has been removed from the chunks free list.
+    dec_free_chunks_total(chunk->capacity_word_size());
 
     if (TraceMetadataChunkAllocation && Verbose) {
       tty->print_cr("ChunkManager::free_chunks_get: free_list "
@@ -1899,79 +1689,24 @@
                     free_list, chunk, chunk->word_size());
     }
   } else {
-    assert(SpaceManager::is_humongous(word_size),
-      "Should only need to check humongous");
-    // This code to find the best fit is just for purposes of
-    // investigating the loss due to fragmentation on a humongous
-    // chunk.  It will be replace by a binaryTreeDictionary for
-    // the humongous chunks.
-    uint count = 0;
-    Metachunk* best_fit = NULL;
-    Metachunk* best_fit_prev = NULL;
-    while (chunk != NULL) {
-      count++;
-      if (chunk->word_size() < word_size) {
-        prev_chunk = chunk;
-        chunk = chunk->next();
-      } else if (chunk->word_size() == word_size) {
-        break;
-      } else {
-        if (best_fit == NULL ||
-            best_fit->word_size() > chunk->word_size()) {
-          best_fit_prev = prev_chunk;
-          best_fit = chunk;
-        }
-        prev_chunk = chunk;
-        chunk = chunk->next();
+    chunk = humongous_dictionary()->get_chunk(
+      word_size,
+      FreeBlockDictionary<Metachunk>::atLeast);
+
+    if (chunk != NULL) {
+      if (TraceMetadataHumongousAllocation) {
+        size_t waste = chunk->word_size() - word_size;
+        tty->print_cr("Free list allocate humongous chunk size " SIZE_FORMAT
+                      " for requested size " SIZE_FORMAT
+                      " waste " SIZE_FORMAT,
+                      chunk->word_size(), word_size, waste);
       }
+      // Chunk is being removed from the chunks free list.
+      dec_free_chunks_total(chunk->capacity_word_size());
+#ifdef ASSERT
+      chunk->set_is_free(false);
+#endif
     }
-      if (chunk == NULL) {
-        prev_chunk = best_fit_prev;
-        chunk = best_fit;
-      }
-      if (chunk != NULL) {
-        if (TraceMetadataHumongousAllocation) {
-          size_t waste = chunk->word_size() - word_size;
-          tty->print_cr("Free list allocate humongous chunk size " SIZE_FORMAT
-                        " for requested size " SIZE_FORMAT
-                        " waste " SIZE_FORMAT
-                        " found at " SIZE_FORMAT " of " SIZE_FORMAT,
-                        chunk->word_size(), word_size, waste,
-                        count, free_list->sum_list_count());
-        }
-        // Chunk is being removed from the chunks free list.
-        dec_free_chunks_total(chunk->capacity_word_size());
-        // Remove the chunk if it is at the head of the list.
-        if (chunk == free_list->head()) {
-          free_list->set_head(chunk->next());
-
-          if (TraceMetadataHumongousAllocation) {
-            tty->print_cr("ChunkManager::free_chunks_get: humongous free_list "
-                          PTR_FORMAT " chunk " PTR_FORMAT " size " SIZE_FORMAT
-                          " new head " PTR_FORMAT,
-                          free_list, chunk, chunk->word_size(),
-                          free_list->head());
-          }
-        } else {
-          // Remove a chunk in the interior of the list
-          prev_chunk->set_next(chunk->next());
-
-          if (TraceMetadataHumongousAllocation) {
-            tty->print_cr("ChunkManager::free_chunks_get: humongous free_list "
-                          PTR_FORMAT " chunk " PTR_FORMAT " size " SIZE_FORMAT
-                          PTR_FORMAT "  prev " PTR_FORMAT " next " PTR_FORMAT,
-                          free_list, chunk, chunk->word_size(),
-                          prev_chunk, chunk->next());
-          }
-        }
-        chunk->set_next(NULL);
-      } else {
-        if (TraceMetadataHumongousAllocation) {
-          tty->print_cr("ChunkManager::free_chunks_get: New humongous chunk of size "
-                        SIZE_FORMAT,
-                        word_size);
-        }
-      }
   }
   locked_verify();
   return chunk;
@@ -2000,12 +1735,18 @@
   return chunk;
 }
 
+void ChunkManager::print_on(outputStream* out) {
+  if (PrintFLSStatistics != 0) {
+    humongous_dictionary()->report_statistics();
+  }
+}
+
 // SpaceManager methods
 
 size_t SpaceManager::sum_free_in_chunks_in_use() const {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
   size_t free = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     Metachunk* chunk = chunks_in_use(i);
     while (chunk != NULL) {
       free += chunk->free_word_size();
@@ -2018,11 +1759,12 @@
 size_t SpaceManager::sum_waste_in_chunks_in_use() const {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
   size_t result = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
-   // Count the free space in all the chunk but not the
-   // current chunk from which allocations are still being done.
+  for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
+
+
    result += sum_waste_in_chunks_in_use(i);
   }
+
   return result;
 }
 
@@ -2033,10 +1775,10 @@
   // Count the free space in all the chunk but not the
   // current chunk from which allocations are still being done.
   if (chunk != NULL) {
-    while (chunk != NULL) {
-      if (chunk != current_chunk()) {
-        result += chunk->free_word_size();
-      }
+    Metachunk* prev = chunk;
+    while (chunk != NULL && chunk != current_chunk()) {
+      result += chunk->free_word_size();
+      prev = chunk;
       chunk = chunk->next();
       count++;
     }
@@ -2047,7 +1789,7 @@
 size_t SpaceManager::sum_capacity_in_chunks_in_use() const {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
   size_t sum = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     Metachunk* chunk = chunks_in_use(i);
     while (chunk != NULL) {
       // Just changed this sum += chunk->capacity_word_size();
@@ -2061,9 +1803,10 @@
 
 size_t SpaceManager::sum_count_in_chunks_in_use() {
   size_t count = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     count = count + sum_count_in_chunks_in_use(i);
   }
+
   return count;
 }
 
@@ -2081,7 +1824,7 @@
 size_t SpaceManager::sum_used_in_chunks_in_use() const {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
   size_t used = 0;
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     Metachunk* chunk = chunks_in_use(i);
     while (chunk != NULL) {
       used += chunk->used_word_size();
@@ -2139,15 +1882,13 @@
     gclog_or_tty->print_cr("  word_size " PTR_FORMAT, word_size);
     gclog_or_tty->print_cr("  chunk_word_size " PTR_FORMAT,
                            chunk_word_size);
-    gclog_or_tty->print_cr("    block overhead " PTR_FORMAT
-                           " chunk overhead " PTR_FORMAT,
-                           Metablock::overhead(),
+    gclog_or_tty->print_cr("    chunk overhead " PTR_FORMAT,
                            Metachunk::overhead());
   }
   return chunk_word_size;
 }
 
-Metablock* SpaceManager::grow_and_allocate(size_t word_size) {
+MetaWord* SpaceManager::grow_and_allocate(size_t word_size) {
   assert(vs_list()->current_virtual_space() != NULL,
          "Should have been set");
   assert(current_chunk() == NULL ||
@@ -2180,7 +1921,7 @@
 void SpaceManager::print_on(outputStream* st) const {
 
   for (ChunkIndex i = SmallIndex;
-       i < NumberOfFreeLists ;
+       i < NumberOfInUseLists ;
        i = next_chunk_index(i) ) {
     st->print_cr("  chunks_in_use " PTR_FORMAT " chunk size " PTR_FORMAT,
                  chunks_in_use(i),
@@ -2191,8 +1932,11 @@
                sum_waste_in_chunks_in_use(SmallIndex),
                sum_waste_in_chunks_in_use(MediumIndex),
                sum_waste_in_chunks_in_use(HumongousIndex));
-  // Nothing in them yet
-  // block_freelists()->print_on(st);
+  // block free lists
+  if (block_freelists() != NULL) {
+    st->print_cr("total in block free lists " SIZE_FORMAT,
+      block_freelists()->total_size());
+  }
 }
 
 SpaceManager::SpaceManager(Mutex* lock, VirtualSpaceList* vs_list) :
@@ -2200,7 +1944,7 @@
   _allocation_total(0),
   _lock(lock) {
   Metadebug::init_allocation_fail_alot_count();
-  for (ChunkIndex i = SmallIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+  for (ChunkIndex i = SmallIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
     _chunks_in_use[i] = NULL;
   }
   _current_chunk = NULL;
@@ -2262,22 +2006,24 @@
   // Humongous chunks are never the current chunk.
   Metachunk* humongous_chunks = chunks_in_use(HumongousIndex);
 
-  if (humongous_chunks != NULL) {
-    chunk_manager->free_humongous_chunks()->add_at_head(humongous_chunks);
-    set_chunks_in_use(HumongousIndex, NULL);
+  while (humongous_chunks != NULL) {
+#ifdef ASSERT
+    humongous_chunks->set_is_free(true);
+#endif
+    Metachunk* next_humongous_chunks = humongous_chunks->next();
+    chunk_manager->humongous_dictionary()->return_chunk(humongous_chunks);
+    humongous_chunks = next_humongous_chunks;
   }
+  set_chunks_in_use(HumongousIndex, NULL);
   chunk_manager->locked_verify();
 }
 
-void SpaceManager::deallocate(MetaWord* p) {
+void SpaceManager::deallocate(MetaWord* p, size_t word_size) {
   assert_lock_strong(_lock);
-  ShouldNotReachHere();  // Where is this needed.
-#ifdef DEALLOCATE_BLOCKS
-  Metablock* block = Metablock::metablock_from_data(p);
-  // This is expense but kept it until integration JJJ
-  assert(contains((address)block), "Block does not belong to this metaspace");
-  block_freelists()->return_block(block, word_size);
-#endif
+  size_t min_size = TreeChunk<Metablock, FreeList>::min_size();
+  assert(word_size >= min_size,
+    err_msg("Should not deallocate dark matter " SIZE_FORMAT, word_size));
+  block_freelists()->return_block(p, word_size);
 }
 
 // Adds a chunk to the list of chunks in use.
@@ -2366,50 +2112,40 @@
 MetaWord* SpaceManager::allocate(size_t word_size) {
   MutexLockerEx cl(lock(), Mutex::_no_safepoint_check_flag);
 
-  size_t block_overhead = Metablock::overhead();
   // If only the dictionary is going to be used (i.e., no
   // indexed free list), then there is a minimum size requirement.
   // MinChunkSize is a placeholder for the real minimum size JJJ
-  size_t byte_size_with_overhead = (word_size + block_overhead) * BytesPerWord;
-#ifdef DEALLOCATE_BLOCKS
-  size_t raw_bytes_size = MAX2(ARENA_ALIGN(byte_size_with_overhead),
-                               MinChunkSize * BytesPerWord);
-#else
-  size_t raw_bytes_size = ARENA_ALIGN(byte_size_with_overhead);
-#endif
+  size_t byte_size = word_size * BytesPerWord;
+
+  size_t byte_size_with_overhead = byte_size + Metablock::overhead();
+
+  size_t raw_bytes_size = MAX2(byte_size_with_overhead,
+                               Metablock::min_block_byte_size());
+  raw_bytes_size = ARENA_ALIGN(raw_bytes_size);
   size_t raw_word_size = raw_bytes_size / BytesPerWord;
   assert(raw_word_size * BytesPerWord == raw_bytes_size, "Size problem");
 
   BlockFreelist* fl =  block_freelists();
-  Metablock* block = NULL;
+  MetaWord* p = NULL;
   // Allocation from the dictionary is expensive in the sense that
   // the dictionary has to be searched for a size.  Don't allocate
   // from the dictionary until it starts to get fat.  Is this
   // a reasonable policy?  Maybe an skinny dictionary is fast enough
   // for allocations.  Do some profiling.  JJJ
-  if (fl->totalSize() > allocation_from_dictionary_limit) {
-    block = fl->get_block(raw_word_size);
+  if (fl->total_size() > allocation_from_dictionary_limit) {
+    p = fl->get_block(raw_word_size);
   }
-  if (block == NULL) {
-    block = allocate_work(raw_word_size);
-    if (block == NULL) {
-      return NULL;
-    }
+  if (p == NULL) {
+    p = allocate_work(raw_word_size);
   }
   Metadebug::deallocate_block_a_lot(this, raw_word_size);
 
-  // Push the allocation past the word containing the size and leader.
-#ifdef ASSERT
-  MetaWord* result =  block->data();
-  return result;
-#else
-  return (MetaWord*) block;
-#endif
+  return p;
 }
 
 // Returns the address of spaced allocated for "word_size".
 // This methods does not know about blocks (Metablocks)
-Metablock* SpaceManager::allocate_work(size_t word_size) {
+MetaWord* SpaceManager::allocate_work(size_t word_size) {
   assert_lock_strong(_lock);
 #ifdef ASSERT
   if (Metadebug::test_metadata_failure()) {
@@ -2417,7 +2153,7 @@
   }
 #endif
   // Is there space in the current chunk?
-  Metablock* result = NULL;
+  MetaWord* result = NULL;
 
   // For DumpSharedSpaces, only allocate out of the current chunk which is
   // never null because we gave it the size we wanted.   Caller reports out
@@ -2436,8 +2172,8 @@
   }
   if (result > 0) {
     inc_allocation_total(word_size);
-    assert(result != (Metablock*) chunks_in_use(MediumIndex), "Head of the list is being allocated");
-    assert(result->word_size() == word_size, "Size not set correctly");
+    assert(result != (MetaWord*) chunks_in_use(MediumIndex),
+           "Head of the list is being allocated");
   }
 
   return result;
@@ -2447,13 +2183,13 @@
   // If there are blocks in the dictionary, then
   // verfication of chunks does not work since
   // being in the dictionary alters a chunk.
-  if (block_freelists()->totalSize() == 0) {
+  if (block_freelists()->total_size() == 0) {
     // Skip the small chunks because their next link points to
     // medium chunks.  This is because the small chunk is the
     // current chunk (for allocations) until it is full and the
     // the addition of the next chunk does not NULL the next
     // like of the small chunk.
-    for (ChunkIndex i = MediumIndex; i < NumberOfFreeLists; i = next_chunk_index(i)) {
+    for (ChunkIndex i = MediumIndex; i < NumberOfInUseLists; i = next_chunk_index(i)) {
       Metachunk* curr = chunks_in_use(i);
       while (curr != NULL) {
         curr->verify();
@@ -2492,7 +2228,7 @@
 
   // Add up statistics for all chunks in this SpaceManager.
   for (ChunkIndex index = SmallIndex;
-       index < NumberOfFreeLists;
+       index < NumberOfInUseLists;
        index = next_chunk_index(index)) {
     for (Metachunk* curr = chunks_in_use(index);
          curr != NULL;
@@ -2521,7 +2257,7 @@
 #ifdef ASSERT
 void SpaceManager::mangle_freed_chunks() {
   for (ChunkIndex index = SmallIndex;
-       index < NumberOfFreeLists;
+       index < NumberOfInUseLists;
        index = next_chunk_index(index)) {
     for (Metachunk* curr = chunks_in_use(index);
          curr != NULL;
@@ -2833,13 +2569,12 @@
   }
 }
 
-
 MetaWord* Metaspace::allocate(size_t word_size, MetadataType mdtype) {
   // DumpSharedSpaces doesn't use class metadata area (yet)
   if (mdtype == ClassType && !DumpSharedSpaces) {
-    return class_vsm()->allocate(word_size);
+    return  class_vsm()->allocate(word_size);
   } else {
-    return vsm()->allocate(word_size);
+    return  vsm()->allocate(word_size);
   }
 }
 
@@ -2853,6 +2588,7 @@
     gclog_or_tty->print_cr("Increase capacity to GC from " SIZE_FORMAT
       " to " SIZE_FORMAT, before_inc, MetaspaceGC::capacity_until_GC());
   }
+
   result = allocate(word_size, mdtype);
 
   return result;
@@ -2889,37 +2625,39 @@
 void Metaspace::deallocate(MetaWord* ptr, size_t word_size, bool is_class) {
   if (SafepointSynchronize::is_at_safepoint()) {
     assert(Thread::current()->is_VM_thread(), "should be the VM thread");
-    // Don't take lock
-#ifdef DEALLOCATE_BLOCKS
+    // Don't take Heap_lock
+    MutexLocker ml(vsm()->lock());
+    if (word_size < TreeChunk<Metablock, FreeList>::min_size()) {
+      // Dark matter.  Too small for dictionary.
+#ifdef ASSERT
+      Copy::fill_to_words((HeapWord*)ptr, word_size, 0xf5f5f5f5);
+#endif
+      return;
+    }
     if (is_class) {
-      class_vsm()->deallocate(ptr);
+       class_vsm()->deallocate(ptr, word_size);
     } else {
-      vsm()->deallocate(ptr);
+      vsm()->deallocate(ptr, word_size);
     }
-#else
-#ifdef ASSERT
-    Copy::fill_to_words((HeapWord*)ptr, word_size, metadata_deallocate);
-#endif
-#endif
-
   } else {
     MutexLocker ml(vsm()->lock());
 
-#ifdef DEALLOCATE_BLOCKS
+    if (word_size < TreeChunk<Metablock, FreeList>::min_size()) {
+      // Dark matter.  Too small for dictionary.
+#ifdef ASSERT
+      Copy::fill_to_words((HeapWord*)ptr, word_size, 0xf5f5f5f5);
+#endif
+      return;
+    }
     if (is_class) {
-      class_vsm()->deallocate(ptr);
+      class_vsm()->deallocate(ptr, word_size);
     } else {
-      vsm()->deallocate(ptr);
+      vsm()->deallocate(ptr, word_size);
     }
-#else
-#ifdef ASSERT
-    Copy::fill_to_words((HeapWord*)ptr, word_size, metadata_deallocate);
-#endif
-#endif
   }
 }
 
-MetaWord* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
+Metablock* Metaspace::allocate(ClassLoaderData* loader_data, size_t word_size,
                               bool read_only, MetadataType mdtype, TRAPS) {
   if (HAS_PENDING_EXCEPTION) {
     assert(false, "Should not allocate with exception pending");
@@ -2943,7 +2681,7 @@
     if (result == NULL) {
       report_out_of_shared_space(read_only ? SharedReadOnly : SharedReadWrite);
     }
-    return result;
+    return Metablock::initialize(result, word_size);
   }
 
   result = loader_data->metaspace_non_null()->allocate(word_size, mdtype);
@@ -2951,7 +2689,7 @@
   if (result == NULL) {
     // Try to clean out some memory and retry.
     result =
-    Universe::heap()->collector_policy()->satisfy_failed_metadata_allocation(
+      Universe::heap()->collector_policy()->satisfy_failed_metadata_allocation(
         loader_data, word_size, mdtype);
 
     // If result is still null, we are out of memory.
@@ -2967,7 +2705,7 @@
       THROW_OOP_0(Universe::out_of_memory_error_perm_gen());
     }
   }
-  return result;
+  return Metablock::initialize(result, word_size);
 }
 
 void Metaspace::print_on(outputStream* out) const {
--- a/src/share/vm/memory/metaspace.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/memory/metaspace.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -57,12 +57,10 @@
 //
 
 class ClassLoaderData;
+class Metablock;
 class MetaWord;
 class Mutex;
 class outputStream;
-class FreeChunk;
-template <class Chunk_t> class FreeList;
-template <class Chunk_t> class BinaryTreeDictionary;
 class SpaceManager;
 
 // Metaspaces each have a  SpaceManager and allocations
@@ -128,7 +126,7 @@
   size_t capacity_words(MetadataType mdtype) const;
   size_t waste_words(MetadataType mdtype) const;
 
-  static MetaWord* allocate(ClassLoaderData* loader_data, size_t size,
+  static Metablock* allocate(ClassLoaderData* loader_data, size_t size,
                             bool read_only, MetadataType mdtype, TRAPS);
   void deallocate(MetaWord* ptr, size_t byte_size, bool is_class);
 
--- a/src/share/vm/oops/method.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/oops/method.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -1159,8 +1159,12 @@
 vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) {
   // if loader is not the default loader (i.e., != NULL), we can't know the intrinsics
   // because we are not loading from core libraries
-  if (InstanceKlass::cast(holder)->class_loader() != NULL)
+  // exception: the AES intrinsics come from lib/ext/sunjce_provider.jar
+  // which does not use the class default class loader so we check for its loader here
+  if ((InstanceKlass::cast(holder)->class_loader() != NULL) &&
+       InstanceKlass::cast(holder)->class_loader()->klass()->name() != vmSymbols::sun_misc_Launcher_ExtClassLoader()) {
     return vmSymbols::NO_SID;   // regardless of name, no intrinsics here
+  }
 
   // see if the klass name is well-known:
   Symbol* klass_name = InstanceKlass::cast(holder)->name();
--- a/src/share/vm/opto/c2_globals.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -439,6 +439,9 @@
   product(bool, DoEscapeAnalysis, true,                                     \
           "Perform escape analysis")                                        \
                                                                             \
+  develop(bool, ExitEscapeAnalysisOnTimeout, true,                          \
+          "Exit or throw assert in EA when it reaches time limit")          \
+                                                                            \
   notproduct(bool, PrintEscapeAnalysis, false,                              \
           "Print the results of escape analysis")                           \
                                                                             \
--- a/src/share/vm/opto/callGenerator.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/callGenerator.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -670,6 +670,129 @@
 }
 
 
+//------------------------PredictedIntrinsicGenerator------------------------------
+// Internal class which handles all predicted Intrinsic calls.
+class PredictedIntrinsicGenerator : public CallGenerator {
+  CallGenerator* _intrinsic;
+  CallGenerator* _cg;
+
+public:
+  PredictedIntrinsicGenerator(CallGenerator* intrinsic,
+                              CallGenerator* cg)
+    : CallGenerator(cg->method())
+  {
+    _intrinsic = intrinsic;
+    _cg        = cg;
+  }
+
+  virtual bool      is_virtual()   const    { return true; }
+  virtual bool      is_inlined()   const    { return true; }
+  virtual bool      is_intrinsic() const    { return true; }
+
+  virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator* CallGenerator::for_predicted_intrinsic(CallGenerator* intrinsic,
+                                                      CallGenerator* cg) {
+  return new PredictedIntrinsicGenerator(intrinsic, cg);
+}
+
+
+JVMState* PredictedIntrinsicGenerator::generate(JVMState* jvms) {
+  GraphKit kit(jvms);
+  PhaseGVN& gvn = kit.gvn();
+
+  CompileLog* log = kit.C->log();
+  if (log != NULL) {
+    log->elem("predicted_intrinsic bci='%d' method='%d'",
+              jvms->bci(), log->identify(method()));
+  }
+
+  Node* slow_ctl = _intrinsic->generate_predicate(kit.sync_jvms());
+  if (kit.failing())
+    return NULL;  // might happen because of NodeCountInliningCutoff
+
+  SafePointNode* slow_map = NULL;
+  JVMState* slow_jvms;
+  if (slow_ctl != NULL) {
+    PreserveJVMState pjvms(&kit);
+    kit.set_control(slow_ctl);
+    if (!kit.stopped()) {
+      slow_jvms = _cg->generate(kit.sync_jvms());
+      if (kit.failing())
+        return NULL;  // might happen because of NodeCountInliningCutoff
+      assert(slow_jvms != NULL, "must be");
+      kit.add_exception_states_from(slow_jvms);
+      kit.set_map(slow_jvms->map());
+      if (!kit.stopped())
+        slow_map = kit.stop();
+    }
+  }
+
+  if (kit.stopped()) {
+    // Predicate is always false.
+    kit.set_jvms(slow_jvms);
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  // Generate intrinsic code:
+  JVMState* new_jvms = _intrinsic->generate(kit.sync_jvms());
+  if (new_jvms == NULL) {
+    // Intrinsic failed, so use slow code or make a direct call.
+    if (slow_map == NULL) {
+      CallGenerator* cg = CallGenerator::for_direct_call(method());
+      new_jvms = cg->generate(kit.sync_jvms());
+    } else {
+      kit.set_jvms(slow_jvms);
+      return kit.transfer_exceptions_into_jvms();
+    }
+  }
+  kit.add_exception_states_from(new_jvms);
+  kit.set_jvms(new_jvms);
+
+  // Need to merge slow and fast?
+  if (slow_map == NULL) {
+    // The fast path is the only path remaining.
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  if (kit.stopped()) {
+    // Intrinsic method threw an exception, so it's just the slow path after all.
+    kit.set_jvms(slow_jvms);
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  // Finish the diamond.
+  kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
+  RegionNode* region = new (kit.C) RegionNode(3);
+  region->init_req(1, kit.control());
+  region->init_req(2, slow_map->control());
+  kit.set_control(gvn.transform(region));
+  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
+  iophi->set_req(2, slow_map->i_o());
+  kit.set_i_o(gvn.transform(iophi));
+  kit.merge_memory(slow_map->merged_memory(), region, 2);
+  uint tos = kit.jvms()->stkoff() + kit.sp();
+  uint limit = slow_map->req();
+  for (uint i = TypeFunc::Parms; i < limit; i++) {
+    // Skip unused stack slots; fast forward to monoff();
+    if (i == tos) {
+      i = kit.jvms()->monoff();
+      if( i >= limit ) break;
+    }
+    Node* m = kit.map()->in(i);
+    Node* n = slow_map->in(i);
+    if (m != n) {
+      const Type* t = gvn.type(m)->meet(gvn.type(n));
+      Node* phi = PhiNode::make(region, m, t);
+      phi->set_req(2, n);
+      kit.map()->set_req(i, gvn.transform(phi));
+    }
+  }
+  return kit.transfer_exceptions_into_jvms();
+}
+
 //-------------------------UncommonTrapCallGenerator-----------------------------
 // Internal class which handles all out-of-line calls checking receiver type.
 class UncommonTrapCallGenerator : public CallGenerator {
--- a/src/share/vm/opto/callGenerator.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/callGenerator.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -143,6 +143,9 @@
   // Registry for intrinsics:
   static CallGenerator* for_intrinsic(ciMethod* m);
   static void register_intrinsic(ciMethod* m, CallGenerator* cg);
+  static CallGenerator* for_predicted_intrinsic(CallGenerator* intrinsic,
+                                                CallGenerator* cg);
+  virtual Node* generate_predicate(JVMState* jvms) { return NULL; };
 
   static void print_inlining(ciMethod* callee, int inline_level, int bci, const char* msg) {
     if (PrintInlining)
--- a/src/share/vm/opto/compile.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/compile.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -3047,9 +3047,9 @@
   case T_LONG:
   case T_DOUBLE:  return (_v._value.j == other._v._value.j);
   case T_OBJECT:
-  case T_METADATA: return (_v._metadata == other._v._metadata);
   case T_ADDRESS: return (_v._value.l == other._v._value.l);
   case T_VOID:    return (_v._value.l == other._v._value.l);  // jump-table entries
+  case T_METADATA: return (_v._metadata == other._v._metadata);
   default: ShouldNotReachHere();
   }
   return false;
--- a/src/share/vm/opto/compile.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/compile.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -149,7 +149,7 @@
   private:
     BasicType _type;
     union {
-    jvalue    _value;
+      jvalue    _value;
       Metadata* _metadata;
     } _v;
     int       _offset;         // offset of this constant (in bytes) relative to the constant table base.
--- a/src/share/vm/opto/doCall.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/doCall.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -107,7 +107,17 @@
   // intrinsics handle strict f.p. correctly.
   if (allow_inline && allow_intrinsics) {
     CallGenerator* cg = find_intrinsic(callee, call_is_virtual);
-    if (cg != NULL)  return cg;
+    if (cg != NULL) {
+      if (cg->is_predicted()) {
+        // Code without intrinsic but, hopefully, inlined.
+        CallGenerator* inline_cg = this->call_generator(callee,
+              vtable_index, call_is_virtual, jvms, allow_inline, prof_factor, false);
+        if (inline_cg != NULL) {
+          cg = CallGenerator::for_predicted_intrinsic(cg, inline_cg);
+        }
+      }
+      return cg;
+    }
   }
 
   // Do method handle calls.
--- a/src/share/vm/opto/escape.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/escape.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -893,12 +893,16 @@
                                        arg_has_oops && (i > TypeFunc::Parms);
 #ifdef ASSERT
           if (!(is_arraycopy ||
-                call->as_CallLeaf()->_name != NULL &&
-                (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
-                 strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ))
-          ) {
+                (call->as_CallLeaf()->_name != NULL &&
+                 (strcmp(call->as_CallLeaf()->_name, "g1_wb_pre")  == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "g1_wb_post") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "aescrypt_encryptBlock") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "aescrypt_decryptBlock") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_encryptAESCrypt") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "cipherBlockChaining_decryptAESCrypt") == 0)
+                  ))) {
             call->dump();
-            assert(false, "EA: unexpected CallLeaf");
+            fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
           }
 #endif
           // Always process arraycopy's destination object since
@@ -1080,7 +1084,7 @@
       C->log()->text("%s", (iterations >= CG_BUILD_ITER_LIMIT) ? "iterations" : "time");
       C->log()->end_elem(" limit'");
     }
-    assert(false, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
+    assert(ExitEscapeAnalysisOnTimeout, err_msg_res("infinite EA connection graph build (%f sec, %d iterations) with %d nodes and worklist size %d",
            time.seconds(), iterations, nodes_size(), ptnodes_worklist.length()));
     // Possible infinite build_connection_graph loop,
     // bailout (no changes to ideal graph were made).
--- a/src/share/vm/opto/library_call.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/library_call.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -44,18 +44,22 @@
  public:
  private:
   bool             _is_virtual;
+  bool             _is_predicted;
   vmIntrinsics::ID _intrinsic_id;
 
  public:
-  LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id)
+  LibraryIntrinsic(ciMethod* m, bool is_virtual, bool is_predicted, vmIntrinsics::ID id)
     : InlineCallGenerator(m),
       _is_virtual(is_virtual),
+      _is_predicted(is_predicted),
       _intrinsic_id(id)
   {
   }
   virtual bool is_intrinsic() const { return true; }
   virtual bool is_virtual()   const { return _is_virtual; }
+  virtual bool is_predicted()   const { return _is_predicted; }
   virtual JVMState* generate(JVMState* jvms);
+  virtual Node* generate_predicate(JVMState* jvms);
   vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
 };
 
@@ -83,6 +87,7 @@
   int               arg_size()  const    { return callee()->arg_size(); }
 
   bool try_to_inline();
+  Node* try_to_predicate();
 
   // Helper functions to inline natives
   void push_result(RegionNode* region, PhiNode* value);
@@ -148,6 +153,7 @@
   CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
     return generate_method_call(method_id, true, false);
   }
+  Node * load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString, bool is_exact, bool is_static);
 
   Node* make_string_method_node(int opcode, Node* str1_start, Node* cnt1, Node* str2_start, Node* cnt2);
   Node* make_string_method_node(int opcode, Node* str1, Node* str2);
@@ -253,6 +259,10 @@
   bool inline_reverseBytes(vmIntrinsics::ID id);
 
   bool inline_reference_get();
+  bool inline_aescrypt_Block(vmIntrinsics::ID id);
+  bool inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id);
+  Node* inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting);
+  Node* get_key_start_from_aescrypt_object(Node* aescrypt_object);
 };
 
 
@@ -306,6 +316,8 @@
     }
   }
 
+  bool is_predicted = false;
+
   switch (id) {
   case vmIntrinsics::_compareTo:
     if (!SpecialStringCompareTo)  return NULL;
@@ -413,6 +425,18 @@
     break;
 #endif
 
+  case vmIntrinsics::_aescrypt_encryptBlock:
+  case vmIntrinsics::_aescrypt_decryptBlock:
+    if (!UseAESIntrinsics) return NULL;
+    break;
+
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    if (!UseAESIntrinsics) return NULL;
+    // these two require the predicated logic
+    is_predicted = true;
+    break;
+
  default:
     assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
     assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
@@ -444,7 +468,7 @@
     if (!InlineUnsafeOps)  return NULL;
   }
 
-  return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id);
+  return new LibraryIntrinsic(m, is_virtual, is_predicted, (vmIntrinsics::ID) id);
 }
 
 //----------------------register_library_intrinsics-----------------------
@@ -496,6 +520,47 @@
   return NULL;
 }
 
+Node* LibraryIntrinsic::generate_predicate(JVMState* jvms) {
+  LibraryCallKit kit(jvms, this);
+  Compile* C = kit.C;
+  int nodes = C->unique();
+#ifndef PRODUCT
+  assert(is_predicted(), "sanity");
+  if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) {
+    char buf[1000];
+    const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
+    tty->print_cr("Predicate for intrinsic %s", str);
+  }
+#endif
+
+  Node* slow_ctl = kit.try_to_predicate();
+  if (!kit.failing()) {
+    if (C->log()) {
+      C->log()->elem("predicate_intrinsic id='%s'%s nodes='%d'",
+                     vmIntrinsics::name_at(intrinsic_id()),
+                     (is_virtual() ? " virtual='1'" : ""),
+                     C->unique() - nodes);
+    }
+    return slow_ctl; // Could be NULL if the check folds.
+  }
+
+  // The intrinsic bailed out
+  if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
+    if (jvms->has_method()) {
+      // Not a root compile.
+      const char* msg = "failed to generate predicate for intrinsic";
+      CompileTask::print_inlining(kit.callee(), jvms->depth() - 1, kit.bci(), msg);
+    } else {
+      // Root compile
+      tty->print("Did not generate predicate for intrinsic %s%s at bci:%d in",
+               vmIntrinsics::name_at(intrinsic_id()),
+               (is_virtual() ? " (virtual)" : ""), kit.bci());
+    }
+  }
+  C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
+  return NULL;
+}
+
 bool LibraryCallKit::try_to_inline() {
   // Handle symbolic names for otherwise undistinguished boolean switches:
   const bool is_store       = true;
@@ -767,6 +832,14 @@
   case vmIntrinsics::_Reference_get:
     return inline_reference_get();
 
+  case vmIntrinsics::_aescrypt_encryptBlock:
+  case vmIntrinsics::_aescrypt_decryptBlock:
+    return inline_aescrypt_Block(intrinsic_id());
+
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    return inline_cipherBlockChaining_AESCrypt(intrinsic_id());
+
   default:
     // If you get here, it may be that someone has added a new intrinsic
     // to the list in vmSymbols.hpp without implementing it here.
@@ -780,6 +853,36 @@
   }
 }
 
+Node* LibraryCallKit::try_to_predicate() {
+  if (!jvms()->has_method()) {
+    // Root JVMState has a null method.
+    assert(map()->memory()->Opcode() == Op_Parm, "");
+    // Insert the memory aliasing node
+    set_all_memory(reset_memory());
+  }
+  assert(merged_memory(), "");
+
+  switch (intrinsic_id()) {
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+    return inline_cipherBlockChaining_AESCrypt_predicate(false);
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    return inline_cipherBlockChaining_AESCrypt_predicate(true);
+
+  default:
+    // If you get here, it may be that someone has added a new intrinsic
+    // to the list in vmSymbols.hpp without implementing it here.
+#ifndef PRODUCT
+    if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
+      tty->print_cr("*** Warning: Unimplemented predicate for intrinsic %s(%d)",
+                    vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
+    }
+#endif
+    Node* slow_ctl = control();
+    set_control(top()); // No fast path instrinsic
+    return slow_ctl;
+  }
+}
+
 //------------------------------push_result------------------------------
 // Helper function for finishing intrinsics.
 void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) {
@@ -3830,7 +3933,7 @@
                      vtable_index*vtableEntry::size()) * wordSize +
                      vtableEntry::method_offset_in_bytes();
   Node* entry_addr  = basic_plus_adr(obj_klass, entry_offset);
-  Node* target_call = make_load(NULL, entry_addr, TypeInstPtr::NOTNULL, T_OBJECT);
+  Node* target_call = make_load(NULL, entry_addr, TypePtr::NOTNULL, T_ADDRESS);
 
   // Compare the target method with the expected method (e.g., Object.hashCode).
   const TypePtr* native_call_addr = TypeMetadataPtr::make(method);
@@ -5613,3 +5716,265 @@
   push(result);
   return true;
 }
+
+
+Node * LibraryCallKit::load_field_from_object(Node * fromObj, const char * fieldName, const char * fieldTypeString,
+                                              bool is_exact=true, bool is_static=false) {
+
+  const TypeInstPtr* tinst = _gvn.type(fromObj)->isa_instptr();
+  assert(tinst != NULL, "obj is null");
+  assert(tinst->klass()->is_loaded(), "obj is not loaded");
+  assert(!is_exact || tinst->klass_is_exact(), "klass not exact");
+
+  ciField* field = tinst->klass()->as_instance_klass()->get_field_by_name(ciSymbol::make(fieldName),
+                                                                          ciSymbol::make(fieldTypeString),
+                                                                          is_static);
+  if (field == NULL) return (Node *) NULL;
+  assert (field != NULL, "undefined field");
+
+  // Next code  copied from Parse::do_get_xxx():
+
+  // Compute address and memory type.
+  int offset  = field->offset_in_bytes();
+  bool is_vol = field->is_volatile();
+  ciType* field_klass = field->type();
+  assert(field_klass->is_loaded(), "should be loaded");
+  const TypePtr* adr_type = C->alias_type(field)->adr_type();
+  Node *adr = basic_plus_adr(fromObj, fromObj, offset);
+  BasicType bt = field->layout_type();
+
+  // Build the resultant type of the load
+  const Type *type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+
+  // Build the load.
+  Node* loadedField = make_load(NULL, adr, type, bt, adr_type, is_vol);
+  return loadedField;
+}
+
+
+//------------------------------inline_aescrypt_Block-----------------------
+bool LibraryCallKit::inline_aescrypt_Block(vmIntrinsics::ID id) {
+  address stubAddr;
+  const char *stubName;
+  assert(UseAES, "need AES instruction support");
+
+  switch(id) {
+  case vmIntrinsics::_aescrypt_encryptBlock:
+    stubAddr = StubRoutines::aescrypt_encryptBlock();
+    stubName = "aescrypt_encryptBlock";
+    break;
+  case vmIntrinsics::_aescrypt_decryptBlock:
+    stubAddr = StubRoutines::aescrypt_decryptBlock();
+    stubName = "aescrypt_decryptBlock";
+    break;
+  }
+  if (stubAddr == NULL) return false;
+
+  // Restore the stack and pop off the arguments.
+  int nargs = 5;  // this + 2 oop/offset combos
+  assert(callee()->signature()->size() == nargs-1, "encryptBlock has 4 arguments");
+
+  Node *aescrypt_object  = argument(0);
+  Node *src         = argument(1);
+  Node *src_offset  = argument(2);
+  Node *dest        = argument(3);
+  Node *dest_offset = argument(4);
+
+  // (1) src and dest are arrays.
+  const Type* src_type = src->Value(&_gvn);
+  const Type* dest_type = dest->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+  assert (top_src  != NULL && top_src->klass()  != NULL &&  top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+  // for the quick and dirty code we will skip all the checks.
+  // we are just trying to get the call to be generated.
+  Node* src_start  = src;
+  Node* dest_start = dest;
+  if (src_offset != NULL || dest_offset != NULL) {
+    assert(src_offset != NULL && dest_offset != NULL, "");
+    src_start  = array_element_address(src,  src_offset,  T_BYTE);
+    dest_start = array_element_address(dest, dest_offset, T_BYTE);
+  }
+
+  // now need to get the start of its expanded key array
+  // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+  Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+  if (k_start == NULL) return false;
+
+  // Call the stub.
+  make_runtime_call(RC_LEAF|RC_NO_FP, OptoRuntime::aescrypt_block_Type(),
+                    stubAddr, stubName, TypePtr::BOTTOM,
+                    src_start, dest_start, k_start);
+
+  return true;
+}
+
+//------------------------------inline_cipherBlockChaining_AESCrypt-----------------------
+bool LibraryCallKit::inline_cipherBlockChaining_AESCrypt(vmIntrinsics::ID id) {
+  address stubAddr;
+  const char *stubName;
+
+  assert(UseAES, "need AES instruction support");
+
+  switch(id) {
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+    stubAddr = StubRoutines::cipherBlockChaining_encryptAESCrypt();
+    stubName = "cipherBlockChaining_encryptAESCrypt";
+    break;
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    stubAddr = StubRoutines::cipherBlockChaining_decryptAESCrypt();
+    stubName = "cipherBlockChaining_decryptAESCrypt";
+    break;
+  }
+  if (stubAddr == NULL) return false;
+
+
+  // Restore the stack and pop off the arguments.
+  int nargs = 6;  // this + oop/offset + len + oop/offset
+  assert(callee()->signature()->size() == nargs-1, "wrong number of arguments");
+  Node *cipherBlockChaining_object  = argument(0);
+  Node *src         = argument(1);
+  Node *src_offset  = argument(2);
+  Node *len         = argument(3);
+  Node *dest        = argument(4);
+  Node *dest_offset = argument(5);
+
+  // (1) src and dest are arrays.
+  const Type* src_type = src->Value(&_gvn);
+  const Type* dest_type = dest->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+  assert (top_src  != NULL && top_src->klass()  != NULL
+          &&  top_dest != NULL && top_dest->klass() != NULL, "args are strange");
+
+  // checks are the responsibility of the caller
+  Node* src_start  = src;
+  Node* dest_start = dest;
+  if (src_offset != NULL || dest_offset != NULL) {
+    assert(src_offset != NULL && dest_offset != NULL, "");
+    src_start  = array_element_address(src,  src_offset,  T_BYTE);
+    dest_start = array_element_address(dest, dest_offset, T_BYTE);
+  }
+
+  // if we are in this set of code, we "know" the embeddedCipher is an AESCrypt object
+  // (because of the predicated logic executed earlier).
+  // so we cast it here safely.
+  // this requires a newer class file that has this array as littleEndian ints, otherwise we revert to java
+
+  Node* embeddedCipherObj = load_field_from_object(cipherBlockChaining_object, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+  if (embeddedCipherObj == NULL) return false;
+
+  // cast it to what we know it will be at runtime
+  const TypeInstPtr* tinst = _gvn.type(cipherBlockChaining_object)->isa_instptr();
+  assert(tinst != NULL, "CBC obj is null");
+  assert(tinst->klass()->is_loaded(), "CBC obj is not loaded");
+  ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+  if (!klass_AESCrypt->is_loaded()) return false;
+
+  ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+  const TypeKlassPtr* aklass = TypeKlassPtr::make(instklass_AESCrypt);
+  const TypeOopPtr* xtype = aklass->as_instance_type();
+  Node* aescrypt_object = new(C) CheckCastPPNode(control(), embeddedCipherObj, xtype);
+  aescrypt_object = _gvn.transform(aescrypt_object);
+
+  // we need to get the start of the aescrypt_object's expanded key array
+  Node* k_start = get_key_start_from_aescrypt_object(aescrypt_object);
+  if (k_start == NULL) return false;
+
+  // similarly, get the start address of the r vector
+  Node* objRvec = load_field_from_object(cipherBlockChaining_object, "r", "[B", /*is_exact*/ false);
+  if (objRvec == NULL) return false;
+  Node* r_start = array_element_address(objRvec, intcon(0), T_BYTE);
+
+  // Call the stub, passing src_start, dest_start, k_start, r_start and src_len
+  make_runtime_call(RC_LEAF|RC_NO_FP,
+                    OptoRuntime::cipherBlockChaining_aescrypt_Type(),
+                    stubAddr, stubName, TypePtr::BOTTOM,
+                    src_start, dest_start, k_start, r_start, len);
+
+  // return is void so no result needs to be pushed
+
+  return true;
+}
+
+//------------------------------get_key_start_from_aescrypt_object-----------------------
+Node * LibraryCallKit::get_key_start_from_aescrypt_object(Node *aescrypt_object) {
+  Node* objAESCryptKey = load_field_from_object(aescrypt_object, "K", "[I", /*is_exact*/ false);
+  assert (objAESCryptKey != NULL, "wrong version of com.sun.crypto.provider.AESCrypt");
+  if (objAESCryptKey == NULL) return (Node *) NULL;
+
+  // now have the array, need to get the start address of the K array
+  Node* k_start = array_element_address(objAESCryptKey, intcon(0), T_INT);
+  return k_start;
+}
+
+//----------------------------inline_cipherBlockChaining_AESCrypt_predicate----------------------------
+// Return node representing slow path of predicate check.
+// the pseudo code we want to emulate with this predicate is:
+// for encryption:
+//    if (embeddedCipherObj instanceof AESCrypt) do_intrinsic, else do_javapath
+// for decryption:
+//    if ((embeddedCipherObj instanceof AESCrypt) && (cipher!=plain)) do_intrinsic, else do_javapath
+//    note cipher==plain is more conservative than the original java code but that's OK
+//
+Node* LibraryCallKit::inline_cipherBlockChaining_AESCrypt_predicate(bool decrypting) {
+  // First, check receiver for NULL since it is virtual method.
+  int nargs = arg_size();
+  Node* objCBC = argument(0);
+  _sp += nargs;
+  objCBC = do_null_check(objCBC, T_OBJECT);
+  _sp -= nargs;
+
+  if (stopped()) return NULL; // Always NULL
+
+  // Load embeddedCipher field of CipherBlockChaining object.
+  Node* embeddedCipherObj = load_field_from_object(objCBC, "embeddedCipher", "Lcom/sun/crypto/provider/SymmetricCipher;", /*is_exact*/ false);
+
+  // get AESCrypt klass for instanceOf check
+  // AESCrypt might not be loaded yet if some other SymmetricCipher got us to this compile point
+  // will have same classloader as CipherBlockChaining object
+  const TypeInstPtr* tinst = _gvn.type(objCBC)->isa_instptr();
+  assert(tinst != NULL, "CBCobj is null");
+  assert(tinst->klass()->is_loaded(), "CBCobj is not loaded");
+
+  // we want to do an instanceof comparison against the AESCrypt class
+  ciKlass* klass_AESCrypt = tinst->klass()->as_instance_klass()->find_klass(ciSymbol::make("com/sun/crypto/provider/AESCrypt"));
+  if (!klass_AESCrypt->is_loaded()) {
+    // if AESCrypt is not even loaded, we never take the intrinsic fast path
+    Node* ctrl = control();
+    set_control(top()); // no regular fast path
+    return ctrl;
+  }
+  ciInstanceKlass* instklass_AESCrypt = klass_AESCrypt->as_instance_klass();
+
+  _sp += nargs;          // gen_instanceof might do an uncommon trap
+  Node* instof = gen_instanceof(embeddedCipherObj, makecon(TypeKlassPtr::make(instklass_AESCrypt)));
+  _sp -= nargs;
+  Node* cmp_instof  = _gvn.transform(new (C) CmpINode(instof, intcon(1)));
+  Node* bool_instof  = _gvn.transform(new (C) BoolNode(cmp_instof, BoolTest::ne));
+
+  Node* instof_false = generate_guard(bool_instof, NULL, PROB_MIN);
+
+  // for encryption, we are done
+  if (!decrypting)
+    return instof_false;  // even if it is NULL
+
+  // for decryption, we need to add a further check to avoid
+  // taking the intrinsic path when cipher and plain are the same
+  // see the original java code for why.
+  RegionNode* region = new(C) RegionNode(3);
+  region->init_req(1, instof_false);
+  Node* src = argument(1);
+  Node *dest = argument(4);
+  Node* cmp_src_dest = _gvn.transform(new (C) CmpPNode(src, dest));
+  Node* bool_src_dest = _gvn.transform(new (C) BoolNode(cmp_src_dest, BoolTest::eq));
+  Node* src_dest_conjoint = generate_guard(bool_src_dest, NULL, PROB_MIN);
+  region->init_req(2, src_dest_conjoint);
+
+  record_for_igvn(region);
+  return _gvn.transform(region);
+
+}
+
+
--- a/src/share/vm/opto/loopTransform.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/loopTransform.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -2715,6 +2715,8 @@
   result_mem = new (C) ProjNode(call,TypeFunc::Memory);
   _igvn.register_new_node_with_optimizer(result_mem);
 
+/* Disable following optimization until proper fix (add missing checks).
+
   // If this fill is tightly coupled to an allocation and overwrites
   // the whole body, allow it to take over the zeroing.
   AllocateNode* alloc = AllocateNode::Ideal_allocation(base, this);
@@ -2738,6 +2740,7 @@
 #endif
     }
   }
+*/
 
   // Redirect the old control and memory edges that are outside the loop.
   Node* exit = head->loopexit()->proj_out(0);
--- a/src/share/vm/opto/mulnode.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/mulnode.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -479,24 +479,27 @@
     return new (phase->C) AndINode(load,phase->intcon(mask&0xFFFF));
 
   // Masking bits off of a Short?  Loading a Character does some masking
-  if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
-    Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
-                                              load->in(MemNode::Memory),
-                                              load->in(MemNode::Address),
-                                              load->adr_type());
-    ldus = phase->transform(ldus);
-    return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
-  }
+  if (can_reshape &&
+      load->outcnt() == 1 && load->unique_out() == this) {
+    if (lop == Op_LoadS && (mask & 0xFFFF0000) == 0 ) {
+      Node *ldus = new (phase->C) LoadUSNode(load->in(MemNode::Control),
+                                             load->in(MemNode::Memory),
+                                             load->in(MemNode::Address),
+                                             load->adr_type());
+      ldus = phase->transform(ldus);
+      return new (phase->C) AndINode(ldus, phase->intcon(mask & 0xFFFF));
+    }
 
-  // Masking sign bits off of a Byte?  Do an unsigned byte load plus
-  // an and.
-  if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
-    Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
-                                              load->in(MemNode::Memory),
-                                              load->in(MemNode::Address),
-                                              load->adr_type());
-    ldub = phase->transform(ldub);
-    return new (phase->C) AndINode(ldub, phase->intcon(mask));
+    // Masking sign bits off of a Byte?  Do an unsigned byte load plus
+    // an and.
+    if (lop == Op_LoadB && (mask & 0xFFFFFF00) == 0) {
+      Node* ldub = new (phase->C) LoadUBNode(load->in(MemNode::Control),
+                                             load->in(MemNode::Memory),
+                                             load->in(MemNode::Address),
+                                             load->adr_type());
+      ldub = phase->transform(ldub);
+      return new (phase->C) AndINode(ldub, phase->intcon(mask));
+    }
   }
 
   // Masking off sign bits?  Dont make them!
@@ -923,7 +926,9 @@
       set_req(2, phase->intcon(0));
       return this;
     }
-    else if( ld->Opcode() == Op_LoadUS )
+    else if( can_reshape &&
+             ld->Opcode() == Op_LoadUS &&
+             ld->outcnt() == 1 && ld->unique_out() == shl)
       // Replace zero-extension-load with sign-extension-load
       return new (phase->C) LoadSNode( ld->in(MemNode::Control),
                                 ld->in(MemNode::Memory),
--- a/src/share/vm/opto/runtime.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/runtime.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -811,6 +811,48 @@
   return TypeFunc::make(domain, range);
 }
 
+// for aescrypt encrypt/decrypt operations, just three pointers returning void (length is constant)
+const TypeFunc* OptoRuntime::aescrypt_block_Type() {
+  // create input type (domain)
+  int num_args      = 3;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // src
+  fields[argp++] = TypePtr::NOTNULL;    // dest
+  fields[argp++] = TypePtr::NOTNULL;    // k array
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+// for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning void
+const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
+  // create input type (domain)
+  int num_args      = 5;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // src
+  fields[argp++] = TypePtr::NOTNULL;    // dest
+  fields[argp++] = TypePtr::NOTNULL;    // k array
+  fields[argp++] = TypePtr::NOTNULL;    // r array
+  fields[argp++] = TypeInt::INT;        // src len
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // no result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
 //------------- Interpreter state access for on stack replacement
 const TypeFunc* OptoRuntime::osr_end_Type() {
   // create input type (domain)
--- a/src/share/vm/opto/runtime.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/runtime.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -280,6 +280,9 @@
 
   static const TypeFunc* array_fill_Type();
 
+  static const TypeFunc* aescrypt_block_Type();
+  static const TypeFunc* cipherBlockChaining_aescrypt_Type();
+
   // leaf on stack replacement interpreter accessor types
   static const TypeFunc* osr_end_Type();
 
--- a/src/share/vm/opto/superword.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/superword.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -1776,16 +1776,15 @@
     set_velt_type(n, container_type(n));
   }
 
-  // Propagate narrowed type backwards through operations
+  // Propagate integer narrowed type backwards through operations
   // that don't depend on higher order bits
   for (int i = _block.length() - 1; i >= 0; i--) {
     Node* n = _block.at(i);
     // Only integer types need be examined
-    const Type* vt = velt_type(n);
-    if (vt->basic_type() == T_INT) {
+    const Type* vtn = velt_type(n);
+    if (vtn->basic_type() == T_INT) {
       uint start, end;
       VectorNode::vector_operands(n, &start, &end);
-      const Type* vt = velt_type(n);
 
       for (uint j = start; j < end; j++) {
         Node* in  = n->in(j);
@@ -1801,6 +1800,24 @@
             }
           }
           if (same_type) {
+            // For right shifts of small integer types (bool, byte, char, short)
+            // we need precise information about sign-ness. Only Load nodes have
+            // this information because Store nodes are the same for signed and
+            // unsigned values. And any arithmetic operation after a load may
+            // expand a value to signed Int so such right shifts can't be used
+            // because vector elements do not have upper bits of Int.
+            const Type* vt = vtn;
+            if (VectorNode::is_shift(in)) {
+              Node* load = in->in(1);
+              if (load->is_Load() && in_bb(load) && (velt_type(load)->basic_type() == T_INT)) {
+                vt = velt_type(load);
+              } else if (in->Opcode() != Op_LShiftI) {
+                // Widen type to Int to avoid creation of right shift vector
+                // (align + data_size(s1) check in stmts_can_pack() will fail).
+                // Note, left shifts work regardless type.
+                vt = TypeInt::INT;
+              }
+            }
             set_velt_type(in, vt);
           }
         }
@@ -1841,7 +1858,20 @@
 // Smallest type containing range of values
 const Type* SuperWord::container_type(Node* n) {
   if (n->is_Mem()) {
-    return Type::get_const_basic_type(n->as_Mem()->memory_type());
+    BasicType bt = n->as_Mem()->memory_type();
+    if (n->is_Store() && (bt == T_CHAR)) {
+      // Use T_SHORT type instead of T_CHAR for stored values because any
+      // preceding arithmetic operation extends values to signed Int.
+      bt = T_SHORT;
+    }
+    if (n->Opcode() == Op_LoadUB) {
+      // Adjust type for unsigned byte loads, it is important for right shifts.
+      // T_BOOLEAN is used because there is no basic type representing type
+      // TypeInt::UBYTE. Use of T_BOOLEAN for vectors is fine because only
+      // size (one byte) and sign is important.
+      bt = T_BOOLEAN;
+    }
+    return Type::get_const_basic_type(bt);
   }
   const Type* t = _igvn.type(n);
   if (t->basic_type() == T_INT) {
--- a/src/share/vm/opto/type.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/type.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -61,7 +61,7 @@
   { Bad,             T_ILLEGAL,    "tuple:",        false, Node::NotAMachineReg, relocInfo::none          },  // Tuple
   { Bad,             T_ARRAY,      "array:",        false, Node::NotAMachineReg, relocInfo::none          },  // Array
 
-#if defined(IA32) || defined(AMD64)
+#ifndef SPARC
   { Bad,             T_ILLEGAL,    "vectors:",      false, Op_VecS,              relocInfo::none          },  // VectorS
   { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
   { Bad,             T_ILLEGAL,    "vectorx:",      false, Op_VecX,              relocInfo::none          },  // VectorX
--- a/src/share/vm/opto/vectornode.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/opto/vectornode.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -29,8 +29,7 @@
 //------------------------------VectorNode--------------------------------------
 
 // Return the vector operator for the specified scalar operation
-// and vector length.  Also used to check if the code generator
-// supports the vector operation.
+// and vector length.
 int VectorNode::opcode(int sopc, BasicType bt) {
   switch (sopc) {
   case Op_AddI:
@@ -75,7 +74,7 @@
     case T_BYTE:   return 0;   // Unimplemented
     case T_CHAR:
     case T_SHORT:  return Op_MulVS;
-    case T_INT:    return Matcher::match_rule_supported(Op_MulVI) ? Op_MulVI : 0; // SSE4_1
+    case T_INT:    return Op_MulVI;
     }
     ShouldNotReachHere();
   case Op_MulF:
@@ -104,9 +103,9 @@
     return Op_LShiftVL;
   case Op_RShiftI:
     switch (bt) {
-    case T_BOOLEAN:
+    case T_BOOLEAN:return Op_URShiftVB; // boolean is unsigned value
+    case T_CHAR:   return Op_URShiftVS; // char is unsigned value
     case T_BYTE:   return Op_RShiftVB;
-    case T_CHAR:
     case T_SHORT:  return Op_RShiftVS;
     case T_INT:    return Op_RShiftVI;
     }
@@ -116,10 +115,14 @@
     return Op_RShiftVL;
   case Op_URShiftI:
     switch (bt) {
-    case T_BOOLEAN:
-    case T_BYTE:   return Op_URShiftVB;
-    case T_CHAR:
-    case T_SHORT:  return Op_URShiftVS;
+    case T_BOOLEAN:return Op_URShiftVB;
+    case T_CHAR:   return Op_URShiftVS;
+    case T_BYTE:
+    case T_SHORT:  return 0; // Vector logical right shift for signed short
+                             // values produces incorrect Java result for
+                             // negative data because java code should convert
+                             // a short value into int value with sign
+                             // extension before a shift.
     case T_INT:    return Op_URShiftVI;
     }
     ShouldNotReachHere();
@@ -157,12 +160,14 @@
   return 0; // Unimplemented
 }
 
+// Also used to check if the code generator
+// supports the vector operation.
 bool VectorNode::implemented(int opc, uint vlen, BasicType bt) {
   if (is_java_primitive(bt) &&
       (vlen > 1) && is_power_of_2(vlen) &&
       Matcher::vector_size_supported(bt, vlen)) {
     int vopc = VectorNode::opcode(opc, bt);
-    return vopc > 0 && Matcher::has_match_rule(vopc);
+    return vopc > 0 && Matcher::match_rule_supported(vopc);
   }
   return false;
 }
--- a/src/share/vm/prims/unsafe.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/prims/unsafe.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -124,6 +124,8 @@
       assert((void*)p->obj_field_addr<oop>((jint)byte_offset) == ptr_plus_disp,
              "raw [ptr+disp] must be consistent with oop::field_base");
     }
+    jlong p_size = HeapWordSize * (jlong)(p->size());
+    assert(byte_offset < p_size, err_msg("Unsafe access: offset " INT64_FORMAT " > object's size " INT64_FORMAT, byte_offset, p_size));
   }
 #endif
   if (sizeof(char*) == sizeof(jint))    // (this constant folds!)
--- a/src/share/vm/runtime/arguments.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/runtime/arguments.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -878,7 +878,7 @@
   bool result         = true;
 
   int c = getc(stream);
-  while(c != EOF) {
+  while(c != EOF && pos < (int)(sizeof(token)-1)) {
     if (in_white_space) {
       if (in_comment) {
         if (c == '\n') in_comment = false;
--- a/src/share/vm/runtime/globals.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/runtime/globals.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -533,6 +533,9 @@
   product(intx, UseSSE, 99,                                                 \
           "Highest supported SSE instructions set on x86/x64")              \
                                                                             \
+  product(bool, UseAES, false,                                               \
+          "Control whether AES instructions can be used on x86/x64")        \
+                                                                            \
   product(uintx, LargePageSizeInBytes, 0,                                   \
           "Large page size (0 to let VM choose the page size")              \
                                                                             \
@@ -635,6 +638,9 @@
   product(bool, UseSSE42Intrinsics, false,                                  \
           "SSE4.2 versions of intrinsics")                                  \
                                                                             \
+  product(bool, UseAESIntrinsics, false,                                    \
+          "use intrinsics for AES versions of crypto")                      \
+                                                                            \
   develop(bool, TraceCallFixup, false,                                      \
           "traces all call fixups")                                         \
                                                                             \
--- a/src/share/vm/runtime/stubRoutines.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/runtime/stubRoutines.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -120,6 +120,10 @@
 address StubRoutines::_arrayof_jshort_fill;
 address StubRoutines::_arrayof_jint_fill;
 
+address StubRoutines::_aescrypt_encryptBlock               = NULL;
+address StubRoutines::_aescrypt_decryptBlock               = NULL;
+address StubRoutines::_cipherBlockChaining_encryptAESCrypt = NULL;
+address StubRoutines::_cipherBlockChaining_decryptAESCrypt = NULL;
 
 double (* StubRoutines::_intrinsic_log   )(double) = NULL;
 double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/runtime/stubRoutines.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -199,6 +199,11 @@
   // zero heap space aligned to jlong (8 bytes)
   static address _zero_aligned_words;
 
+  static address _aescrypt_encryptBlock;
+  static address _aescrypt_decryptBlock;
+  static address _cipherBlockChaining_encryptAESCrypt;
+  static address _cipherBlockChaining_decryptAESCrypt;
+
   // These are versions of the java.lang.Math methods which perform
   // the same operations as the intrinsic version.  They are used for
   // constant folding in the compiler to ensure equivalence.  If the
@@ -330,6 +335,11 @@
   static address arrayof_jshort_fill() { return _arrayof_jshort_fill; }
   static address arrayof_jint_fill()   { return _arrayof_jint_fill; }
 
+  static address aescrypt_encryptBlock()                { return _aescrypt_encryptBlock; }
+  static address aescrypt_decryptBlock()                { return _aescrypt_decryptBlock; }
+  static address cipherBlockChaining_encryptAESCrypt()  { return _cipherBlockChaining_encryptAESCrypt; }
+  static address cipherBlockChaining_decryptAESCrypt()  { return _cipherBlockChaining_decryptAESCrypt; }
+
   static address select_fill_function(BasicType t, bool aligned, const char* &name);
 
   static address zero_aligned_words()   { return _zero_aligned_words; }
--- a/src/share/vm/runtime/vmStructs.cpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/runtime/vmStructs.cpp	Fri Nov 02 16:09:50 2012 -0700
@@ -59,6 +59,7 @@
 #include "memory/generation.hpp"
 #include "memory/generationSpec.hpp"
 #include "memory/heap.hpp"
+#include "memory/metablock.hpp"
 #include "memory/space.hpp"
 #include "memory/tenuredGeneration.hpp"
 #include "memory/universe.hpp"
@@ -249,6 +250,7 @@
 typedef Hashtable<Klass*, mtClass>            KlassHashtable;
 typedef HashtableEntry<Klass*, mtClass>       KlassHashtableEntry;
 typedef TwoOopHashtable<Symbol*, mtClass>     SymbolTwoOopHashtable;
+typedef BinaryTreeDictionary<Metablock, FreeList> MetablockTreeDictionary;
 
 //--------------------------------------------------------------------------------
 // VM_STRUCTS
@@ -1237,7 +1239,15 @@
   nonstatic_field(AccessFlags,                 _flags,                                       jint)                                   \
   nonstatic_field(elapsedTimer,                _counter,                                     jlong)                                  \
   nonstatic_field(elapsedTimer,                _active,                                      bool)                                   \
-  nonstatic_field(InvocationCounter,           _counter,                                     unsigned int)
+  nonstatic_field(InvocationCounter,           _counter,                                     unsigned int)                           \
+  volatile_nonstatic_field(FreeChunk,          _size,                                        size_t)                                 \
+  nonstatic_field(FreeChunk,                   _next,                                        FreeChunk*)                             \
+  nonstatic_field(FreeChunk,                   _prev,                                        FreeChunk*)                             \
+  nonstatic_field(FreeList<FreeChunk>,         _size,                                        size_t)                                 \
+  nonstatic_field(FreeList<Metablock>,         _size,                                        size_t)                                 \
+  nonstatic_field(FreeList<FreeChunk>,         _count,                                       ssize_t)                                \
+  nonstatic_field(FreeList<Metablock>,         _count,                                       ssize_t)                                \
+  nonstatic_field(MetablockTreeDictionary,     _total_size,                                  size_t)
 
   /* NOTE that we do not use the last_entry() macro here; it is used  */
   /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
@@ -2080,7 +2090,24 @@
   declare_toplevel_type(Universe)                                         \
   declare_toplevel_type(vframeArray)                                      \
   declare_toplevel_type(vframeArrayElement)                               \
-  declare_toplevel_type(Annotations*)
+  declare_toplevel_type(Annotations*)                                     \
+                                                                          \
+  /***************/                                                       \
+  /* Miscellaneous types */                                               \
+  /***************/                                                       \
+                                                                          \
+  /* freelist */                                                          \
+  declare_toplevel_type(FreeChunk*)                                       \
+  declare_toplevel_type(Metablock*)                                       \
+  declare_toplevel_type(FreeBlockDictionary<FreeChunk>*)                  \
+  declare_toplevel_type(FreeList<FreeChunk>*)                             \
+  declare_toplevel_type(FreeList<FreeChunk>)                              \
+  declare_toplevel_type(FreeBlockDictionary<Metablock>*)                  \
+  declare_toplevel_type(FreeList<Metablock>*)                             \
+  declare_toplevel_type(FreeList<Metablock>)                              \
+  declare_toplevel_type(MetablockTreeDictionary*)                         \
+  declare_type(MetablockTreeDictionary, FreeBlockDictionary<Metablock>)   \
+              declare_type(MetablockTreeDictionary, FreeBlockDictionary<Metablock>)
 
 
   /* NOTE that we do not use the last_entry() macro here; it is used  */
@@ -2447,7 +2474,7 @@
   /* frame              */                                                \
   /**********************/                                                \
                                                                           \
-  X86_ONLY(declare_constant(frame::entry_frame_call_wrapper_offset))      \
+  NOT_ZERO(X86_ONLY(declare_constant(frame::entry_frame_call_wrapper_offset)))      \
   declare_constant(frame::pc_return_offset)                               \
                                                                           \
   /*************/                                                         \
--- a/src/share/vm/utilities/macros.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/utilities/macros.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -282,6 +282,22 @@
 #define NOT_WIN64(code) code
 #endif
 
+#if defined(ZERO)
+#define ZERO_ONLY(code) code
+#define NOT_ZERO(code)
+#else
+#define ZERO_ONLY(code)
+#define NOT_ZERO(code) code
+#endif
+
+#if defined(SHARK)
+#define SHARK_ONLY(code) code
+#define NOT_SHARK(code)
+#else
+#define SHARK_ONLY(code)
+#define NOT_SHARK(code) code
+#endif
+
 #if defined(IA32) || defined(AMD64)
 #define X86
 #define X86_ONLY(code) code
--- a/src/share/vm/utilities/taskqueue.hpp	Thu Oct 11 12:25:42 2012 -0400
+++ b/src/share/vm/utilities/taskqueue.hpp	Fri Nov 02 16:09:50 2012 -0700
@@ -496,9 +496,7 @@
     }
   }
 
-  bool steal_1_random(uint queue_num, int* seed, E& t);
   bool steal_best_of_2(uint queue_num, int* seed, E& t);
-  bool steal_best_of_all(uint queue_num, int* seed, E& t);
 
   void register_queue(uint i, T* q);
 
@@ -538,46 +536,6 @@
 }
 
 template<class T, MEMFLAGS F> bool
-GenericTaskQueueSet<T, F>::steal_best_of_all(uint queue_num, int* seed, E& t) {
-  if (_n > 2) {
-    int best_k;
-    uint best_sz = 0;
-    for (uint k = 0; k < _n; k++) {
-      if (k == queue_num) continue;
-      uint sz = _queues[k]->size();
-      if (sz > best_sz) {
-        best_sz = sz;
-        best_k = k;
-      }
-    }
-    return best_sz > 0 && _queues[best_k]->pop_global(t);
-  } else if (_n == 2) {
-    // Just try the other one.
-    int k = (queue_num + 1) % 2;
-    return _queues[k]->pop_global(t);
-  } else {
-    assert(_n == 1, "can't be zero.");
-    return false;
-  }
-}
-
-template<class T, MEMFLAGS F> bool
-GenericTaskQueueSet<T, F>::steal_1_random(uint queue_num, int* seed, E& t) {
-  if (_n > 2) {
-    uint k = queue_num;
-    while (k == queue_num) k = TaskQueueSetSuper::randomParkAndMiller(seed) % _n;
-    return _queues[2]->pop_global(t);
-  } else if (_n == 2) {
-    // Just try the other one.
-    int k = (queue_num + 1) % 2;
-    return _queues[k]->pop_global(t);
-  } else {
-    assert(_n == 1, "can't be zero.");
-    return false;
-  }
-}
-
-template<class T, MEMFLAGS F> bool
 GenericTaskQueueSet<T, F>::steal_best_of_2(uint queue_num, int* seed, E& t) {
   if (_n > 2) {
     uint k1 = queue_num;
--- a/test/compiler/6340864/TestByteVect.java	Thu Oct 11 12:25:42 2012 -0400
+++ b/test/compiler/6340864/TestByteVect.java	Fri Nov 02 16:09:50 2012 -0700
@@ -33,7 +33,7 @@
 public class TestByteVect {
   private static final int ARRLEN = 997;
   private static final int ITERS  = 11000;
-  private static final int ADD_INIT = 0;
+  private static final int ADD_INIT = 63;
   private static final int BIT_MASK = 0xB7;
   private static final int VALUE = 3;
   private static final int SHIFT = 8;
@@ -76,6 +76,7 @@
       test_subc(a0, a1);
       test_subv(a0, a1, (byte)VALUE);
       test_suba(a0, a1, a2);
+
       test_mulc(a0, a1);
       test_mulv(a0, a1, (byte)VALUE);
       test_mula(a0, a1, a2);
@@ -88,6 +89,7 @@
       test_divc_n(a0, a1);
       test_divv(a0, a1, (byte)-VALUE);
       test_diva(a0, a1, a3);
+
       test_andc(a0, a1);
       test_andv(a0, a1, (byte)BIT_MASK);
       test_anda(a0, a1, a4);
@@ -97,30 +99,49 @@
       test_xorc(a0, a1);
       test_xorv(a0, a1, (byte)BIT_MASK);
       test_xora(a0, a1, a4);
+
       test_sllc(a0, a1);
       test_sllv(a0, a1, VALUE);
       test_srlc(a0, a1);
       test_srlv(a0, a1, VALUE);
       test_srac(a0, a1);
       test_srav(a0, a1, VALUE);
+
       test_sllc_n(a0, a1);
       test_sllv(a0, a1, -VALUE);
       test_srlc_n(a0, a1);
       test_srlv(a0, a1, -VALUE);
       test_srac_n(a0, a1);
       test_srav(a0, a1, -VALUE);
+
       test_sllc_o(a0, a1);
       test_sllv(a0, a1, SHIFT);
       test_srlc_o(a0, a1);
       test_srlv(a0, a1, SHIFT);
       test_srac_o(a0, a1);
       test_srav(a0, a1, SHIFT);
+
       test_sllc_on(a0, a1);
       test_sllv(a0, a1, -SHIFT);
       test_srlc_on(a0, a1);
       test_srlv(a0, a1, -SHIFT);
       test_srac_on(a0, a1);
       test_srav(a0, a1, -SHIFT);
+
+      test_sllc_add(a0, a1);
+      test_sllv_add(a0, a1, ADD_INIT);
+      test_srlc_add(a0, a1);
+      test_srlv_add(a0, a1, ADD_INIT);
+      test_srac_add(a0, a1);
+      test_srav_add(a0, a1, ADD_INIT);
+
+      test_sllc_and(a0, a1);
+      test_sllv_and(a0, a1, BIT_MASK);
+      test_srlc_and(a0, a1);
+      test_srlv_and(a0, a1, BIT_MASK);
+      test_srac_and(a0, a1);
+      test_srav_and(a0, a1, BIT_MASK);
+
       test_pack2(p2, a1);
       test_unpack2(a0, p2);
       test_pack2_swap(p2, a1);
@@ -369,6 +390,60 @@
         errn += verify("test_srav_on: ", i, a0[i], (byte)((byte)(ADD_INIT+i)>>(-SHIFT)));
       }
 
+      test_sllc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+      test_sllv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+
+      test_srlc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+      test_srlv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+
+      test_srac_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+      test_srav_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_add: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+
+      test_sllc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+      test_sllv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+
+      test_srlc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+      test_srlv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+
+      test_srac_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+      test_srav_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_and: ", i, a0[i], (byte)(((byte)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+
       test_pack2(p2, a1);
       for (int i=0; i<ARRLEN/2; i++) {
         errn += verify("test_pack2: ", i, p2[i], (short)(((short)(ADD_INIT+2*i) & 0xFF) | ((short)(ADD_INIT+2*i+1) << 8)));
@@ -805,6 +880,84 @@
 
     start = System.currentTimeMillis();
     for (int i=0; i<ITERS; i++) {
+      test_sllc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
       test_pack2(p2, a1);
     }
     end = System.currentTimeMillis();
@@ -1036,6 +1189,26 @@
       a0[i] = (byte)(a1[i]<<b);
     }
   }
+  static void test_sllc_add(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + ADD_INIT)<<VALUE);
+    }
+  }
+  static void test_sllv_add(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + b)<<VALUE);
+    }
+  }
+  static void test_sllc_and(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & BIT_MASK)<<VALUE);
+    }
+  }
+  static void test_sllv_and(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & b)<<VALUE);
+    }
+  }
 
   static void test_srlc(byte[] a0, byte[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -1062,6 +1235,26 @@
       a0[i] = (byte)(a1[i]>>>b);
     }
   }
+  static void test_srlc_add(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + ADD_INIT)>>>VALUE);
+    }
+  }
+  static void test_srlv_add(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + b)>>>VALUE);
+    }
+  }
+  static void test_srlc_and(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & BIT_MASK)>>>VALUE);
+    }
+  }
+  static void test_srlv_and(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & b)>>>VALUE);
+    }
+  }
 
   static void test_srac(byte[] a0, byte[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -1088,6 +1281,26 @@
       a0[i] = (byte)(a1[i]>>b);
     }
   }
+  static void test_srac_add(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + ADD_INIT)>>VALUE);
+    }
+  }
+  static void test_srav_add(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] + b)>>VALUE);
+    }
+  }
+  static void test_srac_and(byte[] a0, byte[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & BIT_MASK)>>VALUE);
+    }
+  }
+  static void test_srav_and(byte[] a0, byte[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (byte)((a1[i] & b)>>VALUE);
+    }
+  }
 
   static void test_pack2(short[] p2, byte[] a1) {
     if (p2.length*2 > a1.length) return;
--- a/test/compiler/6340864/TestIntVect.java	Thu Oct 11 12:25:42 2012 -0400
+++ b/test/compiler/6340864/TestIntVect.java	Fri Nov 02 16:09:50 2012 -0700
@@ -74,6 +74,7 @@
       test_subc(a0, a1);
       test_subv(a0, a1, (int)VALUE);
       test_suba(a0, a1, a2);
+
       test_mulc(a0, a1);
       test_mulv(a0, a1, (int)VALUE);
       test_mula(a0, a1, a2);
@@ -86,6 +87,7 @@
       test_divc_n(a0, a1);
       test_divv(a0, a1, (int)-VALUE);
       test_diva(a0, a1, a3);
+
       test_andc(a0, a1);
       test_andv(a0, a1, (int)BIT_MASK);
       test_anda(a0, a1, a4);
@@ -95,30 +97,49 @@
       test_xorc(a0, a1);
       test_xorv(a0, a1, (int)BIT_MASK);
       test_xora(a0, a1, a4);
+
       test_sllc(a0, a1);
       test_sllv(a0, a1, VALUE);
       test_srlc(a0, a1);
       test_srlv(a0, a1, VALUE);
       test_srac(a0, a1);
       test_srav(a0, a1, VALUE);
+
       test_sllc_n(a0, a1);
       test_sllv(a0, a1, -VALUE);
       test_srlc_n(a0, a1);
       test_srlv(a0, a1, -VALUE);
       test_srac_n(a0, a1);
       test_srav(a0, a1, -VALUE);
+
       test_sllc_o(a0, a1);
       test_sllv(a0, a1, SHIFT);
       test_srlc_o(a0, a1);
       test_srlv(a0, a1, SHIFT);
       test_srac_o(a0, a1);
       test_srav(a0, a1, SHIFT);
+
       test_sllc_on(a0, a1);
       test_sllv(a0, a1, -SHIFT);
       test_srlc_on(a0, a1);
       test_srlv(a0, a1, -SHIFT);
       test_srac_on(a0, a1);
       test_srav(a0, a1, -SHIFT);
+
+      test_sllc_add(a0, a1);
+      test_sllv_add(a0, a1, ADD_INIT);
+      test_srlc_add(a0, a1);
+      test_srlv_add(a0, a1, ADD_INIT);
+      test_srac_add(a0, a1);
+      test_srav_add(a0, a1, ADD_INIT);
+
+      test_sllc_and(a0, a1);
+      test_sllv_and(a0, a1, BIT_MASK);
+      test_srlc_and(a0, a1);
+      test_srlv_and(a0, a1, BIT_MASK);
+      test_srac_and(a0, a1);
+      test_srav_and(a0, a1, BIT_MASK);
+
       test_pack2(p2, a1);
       test_unpack2(a0, p2);
       test_pack2_swap(p2, a1);
@@ -359,6 +380,60 @@
         errn += verify("test_srav_on: ", i, a0[i], (int)((int)(ADD_INIT+i)>>(-SHIFT)));
       }
 
+      test_sllc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+      test_sllv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+
+      test_srlc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+      test_srlv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+
+      test_srac_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+      test_srav_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_add: ", i, a0[i], (int)(((int)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+
+      test_sllc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+      test_sllv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+
+      test_srlc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+      test_srlv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+
+      test_srac_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+      test_srav_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_and: ", i, a0[i], (int)(((int)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+
       test_pack2(p2, a1);
       for (int i=0; i<ARRLEN/2; i++) {
         errn += verify("test_pack2: ", i, p2[i], ((long)(ADD_INIT+2*i) & 0xFFFFFFFFl) | ((long)(ADD_INIT+2*i+1) << 32));
@@ -727,6 +802,84 @@
 
     start = System.currentTimeMillis();
     for (int i=0; i<ITERS; i++) {
+      test_sllc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
       test_pack2(p2, a1);
     }
     end = System.currentTimeMillis();
@@ -908,6 +1061,26 @@
       a0[i] = (int)(a1[i]<<b);
     }
   }
+  static void test_sllc_add(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + ADD_INIT)<<VALUE);
+    }
+  }
+  static void test_sllv_add(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + b)<<VALUE);
+    }
+  }
+  static void test_sllc_and(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & BIT_MASK)<<VALUE);
+    }
+  }
+  static void test_sllv_and(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & b)<<VALUE);
+    }
+  }
 
   static void test_srlc(int[] a0, int[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -934,6 +1107,26 @@
       a0[i] = (int)(a1[i]>>>b);
     }
   }
+  static void test_srlc_add(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + ADD_INIT)>>>VALUE);
+    }
+  }
+  static void test_srlv_add(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + b)>>>VALUE);
+    }
+  }
+  static void test_srlc_and(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & BIT_MASK)>>>VALUE);
+    }
+  }
+  static void test_srlv_and(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & b)>>>VALUE);
+    }
+  }
 
   static void test_srac(int[] a0, int[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -960,6 +1153,26 @@
       a0[i] = (int)(a1[i]>>b);
     }
   }
+  static void test_srac_add(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + ADD_INIT)>>VALUE);
+    }
+  }
+  static void test_srav_add(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] + b)>>VALUE);
+    }
+  }
+  static void test_srac_and(int[] a0, int[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & BIT_MASK)>>VALUE);
+    }
+  }
+  static void test_srav_and(int[] a0, int[] a1, int b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (int)((a1[i] & b)>>VALUE);
+    }
+  }
 
   static void test_pack2(long[] p2, int[] a1) {
     if (p2.length*2 > a1.length) return;
--- a/test/compiler/6340864/TestLongVect.java	Thu Oct 11 12:25:42 2012 -0400
+++ b/test/compiler/6340864/TestLongVect.java	Fri Nov 02 16:09:50 2012 -0700
@@ -73,6 +73,7 @@
       test_subc(a0, a1);
       test_subv(a0, a1, (long)VALUE);
       test_suba(a0, a1, a2);
+
       test_mulc(a0, a1);
       test_mulv(a0, a1, (long)VALUE);
       test_mula(a0, a1, a2);
@@ -85,6 +86,7 @@
       test_divc_n(a0, a1);
       test_divv(a0, a1, (long)-VALUE);
       test_diva(a0, a1, a3);
+
       test_andc(a0, a1);
       test_andv(a0, a1, (long)BIT_MASK);
       test_anda(a0, a1, a4);
@@ -94,30 +96,48 @@
       test_xorc(a0, a1);
       test_xorv(a0, a1, (long)BIT_MASK);
       test_xora(a0, a1, a4);
+
       test_sllc(a0, a1);
       test_sllv(a0, a1, VALUE);
       test_srlc(a0, a1);
       test_srlv(a0, a1, VALUE);
       test_srac(a0, a1);
       test_srav(a0, a1, VALUE);
+
       test_sllc_n(a0, a1);
       test_sllv(a0, a1, -VALUE);
       test_srlc_n(a0, a1);
       test_srlv(a0, a1, -VALUE);
       test_srac_n(a0, a1);
       test_srav(a0, a1, -VALUE);
+
       test_sllc_o(a0, a1);
       test_sllv(a0, a1, SHIFT);
       test_srlc_o(a0, a1);
       test_srlv(a0, a1, SHIFT);
       test_srac_o(a0, a1);
       test_srav(a0, a1, SHIFT);
+
       test_sllc_on(a0, a1);
       test_sllv(a0, a1, -SHIFT);
       test_srlc_on(a0, a1);
       test_srlv(a0, a1, -SHIFT);
       test_srac_on(a0, a1);
       test_srav(a0, a1, -SHIFT);
+
+      test_sllc_add(a0, a1);
+      test_sllv_add(a0, a1, ADD_INIT);
+      test_srlc_add(a0, a1);
+      test_srlv_add(a0, a1, ADD_INIT);
+      test_srac_add(a0, a1);
+      test_srav_add(a0, a1, ADD_INIT);
+
+      test_sllc_and(a0, a1);
+      test_sllv_and(a0, a1, BIT_MASK);
+      test_srlc_and(a0, a1);
+      test_srlv_and(a0, a1, BIT_MASK);
+      test_srac_and(a0, a1);
+      test_srav_and(a0, a1, BIT_MASK);
     }
     // Test and verify results
     System.out.println("Verification");
@@ -354,6 +374,60 @@
         errn += verify("test_srav_on: ", i, a0[i], (long)((long)(ADD_INIT+i)>>(-SHIFT)));
       }
 
+      test_sllc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+      test_sllv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)<<VALUE));
+      }
+
+      test_srlc_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+      test_srlv_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>>VALUE));
+      }
+
+      test_srac_add(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+      test_srav_add(a0, a1, ADD_INIT);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_add: ", i, a0[i], (long)(((long)(ADD_INIT+i) + ADD_INIT)>>VALUE));
+      }
+
+      test_sllc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+      test_sllv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_sllv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)<<VALUE));
+      }
+
+      test_srlc_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlc_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+      test_srlv_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srlv_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>>VALUE));
+      }
+
+      test_srac_and(a0, a1);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srac_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+      test_srav_and(a0, a1, BIT_MASK);
+      for (int i=0; i<ARRLEN; i++) {
+        errn += verify("test_srav_and: ", i, a0[i], (long)(((long)(ADD_INIT+i) & BIT_MASK)>>VALUE));
+      }
+
     }
 
     if (errn > 0)
@@ -696,6 +770,84 @@
     end = System.currentTimeMillis();
     System.out.println("test_srav_on: " + (end - start));
 
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_add(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_add: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_add(a0, a1, ADD_INIT);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_add: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_sllv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_sllv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlc_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlc_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srlv_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srlv_and: " + (end - start));
+
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srac_and(a0, a1);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srac_and: " + (end - start));
+    start = System.currentTimeMillis();
+    for (int i=0; i<ITERS; i++) {
+      test_srav_and(a0, a1, BIT_MASK);
+    }
+    end = System.currentTimeMillis();
+    System.out.println("test_srav_and: " + (end - start));
+
     return errn;
   }
 
@@ -854,6 +1006,26 @@
       a0[i] = (long)(a1[i]<<b);
     }
   }
+  static void test_sllc_add(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + ADD_INIT)<<VALUE);
+    }
+  }
+  static void test_sllv_add(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + b)<<VALUE);
+    }
+  }
+  static void test_sllc_and(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & BIT_MASK)<<VALUE);
+    }
+  }
+  static void test_sllv_and(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & b)<<VALUE);
+    }
+  }
 
   static void test_srlc(long[] a0, long[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -880,6 +1052,26 @@
       a0[i] = (long)(a1[i]>>>b);
     }
   }
+  static void test_srlc_add(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + ADD_INIT)>>>VALUE);
+    }
+  }
+  static void test_srlv_add(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + b)>>>VALUE);
+    }
+  }
+  static void test_srlc_and(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & BIT_MASK)>>>VALUE);
+    }
+  }
+  static void test_srlv_and(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] & b)>>>VALUE);
+    }
+  }
 
   static void test_srac(long[] a0, long[] a1) {
     for (int i = 0; i < a0.length; i+=1) {
@@ -906,6 +1098,26 @@
       a0[i] = (long)(a1[i]>>b);
     }
   }
+  static void test_srac_add(long[] a0, long[] a1) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + ADD_INIT)>>VALUE);
+    }
+  }
+  static void test_srav_add(long[] a0, long[] a1, long b) {
+    for (int i = 0; i < a0.length; i+=1) {
+      a0[i] = (long)((a1[i] + b)>>VALUE);
+    }