changeset 10508:76582e8dc9e6 jdk-9+112

Merge
author lana
date Wed, 23 Mar 2016 21:44:35 -0700
parents cf67bfa444b1 958cf9910c0f
children 57b3f05dc144 c569f8d89269 9b03bbd4cc3f
files test/compiler/compilercontrol/jcmd/StressAddSequentiallyTest.java
diffstat 164 files changed, 4593 insertions(+), 2208 deletions(-) [+]
line wrap: on
line diff
--- a/make/lib/Lib-jdk.hotspot.agent.gmk	Wed Mar 23 19:33:34 2016 -0700
+++ b/make/lib/Lib-jdk.hotspot.agent.gmk	Wed Mar 23 21:44:35 2016 -0700
@@ -91,7 +91,7 @@
   ifeq ($(OPENJDK_TARGET_CPU), x86_64)
     SA_CXXFLAGS += -DWIN64
   else
-    SA_CXXFLAGS += -RTC1 -ZI
+    SA_CXXFLAGS += -RTC1
     SA_LDFLAGS += -SAFESEH
   endif
 endif
--- a/src/cpu/aarch64/vm/aarch64.ad	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/aarch64.ad	Wed Mar 23 21:44:35 2016 -0700
@@ -3425,9 +3425,6 @@
 // false => size gets scaled to BytesPerLong, ok.
 const bool Matcher::init_array_count_is_in_bytes = false;
 
-// Threshold size for cleararray.
-const int Matcher::init_array_short_size = 18 * BytesPerLong;
-
 // Use conditional move (CMOVL)
 const int Matcher::long_cmove_cost() {
   // long cmoves are no more expensive than int cmoves
@@ -4135,14 +4132,14 @@
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               &Assembler::ldxr, &MacroAssembler::cmp, &Assembler::stlxr);
+               Assembler::xword, /*acquire*/ false, /*release*/ true);
   %}
 
   enc_class aarch64_enc_cmpxchgw(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               &Assembler::ldxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
+               Assembler::word, /*acquire*/ false, /*release*/ true);
   %}
 
 
@@ -4154,14 +4151,14 @@
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               &Assembler::ldaxr, &MacroAssembler::cmp, &Assembler::stlxr);
+               Assembler::xword, /*acquire*/ true, /*release*/ true);
   %}
 
   enc_class aarch64_enc_cmpxchgw_acq(memory mem, iRegINoSp oldval, iRegINoSp newval) %{
     MacroAssembler _masm(&cbuf);
     guarantee($mem$$index == -1 && $mem$$disp == 0, "impossible encoding");
     __ cmpxchg($mem$$base$$Register, $oldval$$Register, $newval$$Register,
-               &Assembler::ldaxrw, &MacroAssembler::cmpw, &Assembler::stlxrw);
+               Assembler::word, /*acquire*/ true, /*release*/ true);
   %}
 
 
@@ -4679,8 +4676,14 @@
 
     // Compare object markOop with mark and if equal exchange scratch1
     // with object markOop.
-    {
+    if (UseLSE) {
+      __ mov(tmp, disp_hdr);
+      __ casal(Assembler::xword, tmp, box, oop);
+      __ cmp(tmp, disp_hdr);
+      __ br(Assembler::EQ, cont);
+    } else {
       Label retry_load;
+      __ prfm(Address(oop), PSTL1STRM);
       __ bind(retry_load);
       __ ldaxr(tmp, oop);
       __ cmp(tmp, disp_hdr);
@@ -4729,8 +4732,13 @@
       __ add(tmp, disp_hdr, (ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value));
       __ mov(disp_hdr, zr);
 
-      {
+      if (UseLSE) {
+        __ mov(rscratch1, disp_hdr);
+        __ casal(Assembler::xword, rscratch1, rthread, tmp);
+        __ cmp(rscratch1, disp_hdr);
+      } else {
         Label retry_load, fail;
+        __ prfm(Address(tmp), PSTL1STRM);
         __ bind(retry_load);
         __ ldaxr(rscratch1, tmp);
         __ cmp(disp_hdr, rscratch1);
@@ -4818,8 +4826,13 @@
     // see the stack address of the basicLock in the markOop of the
     // object.
 
-      {
+      if (UseLSE) {
+        __ mov(tmp, box);
+        __ casl(Assembler::xword, tmp, disp_hdr, oop);
+        __ cmp(tmp, box);
+      } else {
         Label retry_load;
+        __ prfm(Address(oop), PSTL1STRM);
         __ bind(retry_load);
         __ ldxr(tmp, oop);
         __ cmp(box, tmp);
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -972,7 +972,7 @@
 
   // System
   void system(int op0, int op1, int CRn, int CRm, int op2,
-              Register rt = (Register)0b11111)
+              Register rt = dummy_reg)
   {
     starti;
     f(0b11010101000, 31, 21);
@@ -1082,7 +1082,7 @@
 
 #define INSN(NAME, opc)                         \
   void NAME() {                 \
-    branch_reg((Register)0b11111, opc);         \
+    branch_reg(dummy_reg, opc);         \
   }
 
   INSN(eret, 0b0100);
@@ -1094,10 +1094,22 @@
   enum operand_size { byte, halfword, word, xword };
 
   void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
-    Register Rn, enum operand_size sz, int op, int o0) {
+    Register Rn, enum operand_size sz, int op, bool ordered) {
     starti;
     f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
-    rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
+    rf(Rs, 16), f(ordered, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
+  }
+
+  void load_exclusive(Register dst, Register addr,
+                      enum operand_size sz, bool ordered) {
+    load_store_exclusive(dummy_reg, dst, dummy_reg, addr,
+                         sz, 0b010, ordered);
+  }
+
+  void store_exclusive(Register status, Register new_val, Register addr,
+                       enum operand_size sz, bool ordered) {
+    load_store_exclusive(status, new_val, dummy_reg, addr,
+                         sz, 0b000, ordered);
   }
 
 #define INSN4(NAME, sz, op, o0) /* Four registers */                    \
@@ -1109,19 +1121,19 @@
 #define INSN3(NAME, sz, op, o0) /* Three registers */                   \
   void NAME(Register Rs, Register Rt, Register Rn) {                    \
     guarantee(Rs != Rn && Rs != Rt, "unpredictable instruction");       \
-    load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0);    \
+    load_store_exclusive(Rs, Rt, dummy_reg, Rn, sz, op, o0); \
   }
 
 #define INSN2(NAME, sz, op, o0) /* Two registers */                     \
   void NAME(Register Rt, Register Rn) {                                 \
-    load_store_exclusive((Register)0b11111, Rt, (Register)0b11111,      \
+    load_store_exclusive(dummy_reg, Rt, dummy_reg, \
                          Rn, sz, op, o0);                               \
   }
 
 #define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
   void NAME(Register Rt1, Register Rt2, Register Rn) {                  \
     guarantee(Rt1 != Rt2, "unpredictable instruction");                 \
-    load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0);  \
+    load_store_exclusive(dummy_reg, Rt1, Rt2, Rn, sz, op, o0);          \
   }
 
   // bytes
@@ -1169,6 +1181,46 @@
 #undef INSN4
 #undef INSN_FOO
 
+  // 8.1 Compare and swap extensions
+  void lse_cas(Register Rs, Register Rt, Register Rn,
+                        enum operand_size sz, bool a, bool r, bool not_pair) {
+    starti;
+    if (! not_pair) { // Pair
+      assert(sz == word || sz == xword, "invalid size");
+      /* The size bit is in bit 30, not 31 */
+      sz = (operand_size)(sz == word ? 0b00:0b01);
+    }
+    f(sz, 31, 30), f(0b001000, 29, 24), f(1, 23), f(a, 22), f(1, 21);
+    rf(Rs, 16), f(r, 15), f(0b11111, 14, 10), rf(Rn, 5), rf(Rt, 0);
+  }
+
+  // CAS
+#define INSN(NAME, a, r)                                                \
+  void NAME(operand_size sz, Register Rs, Register Rt, Register Rn) {   \
+    assert(Rs != Rn && Rs != Rt, "unpredictable instruction");          \
+    lse_cas(Rs, Rt, Rn, sz, a, r, true);                                \
+  }
+  INSN(cas,    false, false)
+  INSN(casa,   true,  false)
+  INSN(casl,   false, true)
+  INSN(casal,  true,  true)
+#undef INSN
+
+  // CASP
+#define INSN(NAME, a, r)                                                \
+  void NAME(operand_size sz, Register Rs, Register Rs1,                 \
+            Register Rt, Register Rt1, Register Rn) {                   \
+    assert((Rs->encoding() & 1) == 0 && (Rt->encoding() & 1) == 0 &&    \
+           Rs->successor() == Rs1 && Rt->successor() == Rt1 &&          \
+           Rs != Rn && Rs1 != Rn && Rs != Rt, "invalid registers");     \
+    lse_cas(Rs, Rt, Rn, sz, a, r, false);                               \
+  }
+  INSN(casp,    false, false)
+  INSN(caspa,   true,  false)
+  INSN(caspl,   false, true)
+  INSN(caspal,  true,  true)
+#undef INSN
+
   // Load register (literal)
 #define INSN(NAME, opc, V)                                              \
   void NAME(Register Rt, address dest) {                                \
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1556,38 +1556,54 @@
 }
 
 void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
-  Label retry_load, nope;
-  // flush and load exclusive from the memory location
-  // and fail if it is not what we expect
-  __ bind(retry_load);
-  __ ldaxrw(rscratch1, addr);
-  __ cmpw(rscratch1, cmpval);
-  __ cset(rscratch1, Assembler::NE);
-  __ br(Assembler::NE, nope);
-  // if we store+flush with no intervening write rscratch1 wil be zero
-  __ stlxrw(rscratch1, newval, addr);
-  // retry so we only ever return after a load fails to compare
-  // ensures we don't return a stale value after a failed write.
-  __ cbnzw(rscratch1, retry_load);
-  __ bind(nope);
+  if (UseLSE) {
+    __ mov(rscratch1, cmpval);
+    __ casal(Assembler::word, rscratch1, newval, addr);
+    __ cmpw(rscratch1, cmpval);
+    __ cset(rscratch1, Assembler::NE);
+  } else {
+    Label retry_load, nope;
+    // flush and load exclusive from the memory location
+    // and fail if it is not what we expect
+    __ prfm(Address(addr), PSTL1STRM);
+    __ bind(retry_load);
+    __ ldaxrw(rscratch1, addr);
+    __ cmpw(rscratch1, cmpval);
+    __ cset(rscratch1, Assembler::NE);
+    __ br(Assembler::NE, nope);
+    // if we store+flush with no intervening write rscratch1 wil be zero
+    __ stlxrw(rscratch1, newval, addr);
+    // retry so we only ever return after a load fails to compare
+    // ensures we don't return a stale value after a failed write.
+    __ cbnzw(rscratch1, retry_load);
+    __ bind(nope);
+  }
   __ membar(__ AnyAny);
 }
 
 void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
-  Label retry_load, nope;
-  // flush and load exclusive from the memory location
-  // and fail if it is not what we expect
-  __ bind(retry_load);
-  __ ldaxr(rscratch1, addr);
-  __ cmp(rscratch1, cmpval);
-  __ cset(rscratch1, Assembler::NE);
-  __ br(Assembler::NE, nope);
-  // if we store+flush with no intervening write rscratch1 wil be zero
-  __ stlxr(rscratch1, newval, addr);
-  // retry so we only ever return after a load fails to compare
-  // ensures we don't return a stale value after a failed write.
-  __ cbnz(rscratch1, retry_load);
-  __ bind(nope);
+  if (UseLSE) {
+    __ mov(rscratch1, cmpval);
+    __ casal(Assembler::xword, rscratch1, newval, addr);
+    __ cmp(rscratch1, cmpval);
+    __ cset(rscratch1, Assembler::NE);
+  } else {
+    Label retry_load, nope;
+    // flush and load exclusive from the memory location
+    // and fail if it is not what we expect
+    __ prfm(Address(addr), PSTL1STRM);
+    __ bind(retry_load);
+    __ ldaxr(rscratch1, addr);
+    __ cmp(rscratch1, cmpval);
+    __ cset(rscratch1, Assembler::NE);
+    __ br(Assembler::NE, nope);
+    // if we store+flush with no intervening write rscratch1 wil be zero
+    __ stlxr(rscratch1, newval, addr);
+    // retry so we only ever return after a load fails to compare
+    // ensures we don't return a stale value after a failed write.
+    __ cbnz(rscratch1, retry_load);
+    __ bind(nope);
+  }
   __ membar(__ AnyAny);
 }
 
@@ -3156,6 +3172,7 @@
       }
       Label again;
       __ lea(tmp, addr);
+      __ prfm(Address(tmp), PSTL1STRM);
       __ bind(again);
       (_masm->*lda)(dst, tmp);
       (_masm->*add)(rscratch1, dst, inc);
@@ -3175,6 +3192,7 @@
       assert_different_registers(obj, addr.base(), tmp, rscratch2, dst);
       Label again;
       __ lea(tmp, addr);
+      __ prfm(Address(tmp), PSTL1STRM);
       __ bind(again);
       (_masm->*lda)(dst, tmp);
       (_masm->*stl)(rscratch2, obj, tmp);
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -76,6 +76,8 @@
 // avoid biased locking while we are bootstrapping the aarch64 build
 define_pd_global(bool, UseBiasedLocking, false);
 
+define_pd_global(intx, InitArrayShortSize, 18*BytesPerLong);
+
 #if defined(COMPILER1) || defined(COMPILER2)
 define_pd_global(intx, InlineSmallCode,          1000);
 #endif
@@ -101,9 +103,13 @@
                                                                         \
   product(bool, UseCRC32, false,                                        \
           "Use CRC32 instructions for CRC32 computation")               \
+                                                                        \
+  product(bool, UseLSE, false,                                          \
+          "Use LSE instructions")                                       \
 
 // Don't attempt to use Neon on builtin sim until builtin sim supports it
 #define UseCRC32 false
+#define UseSIMDForMemoryOps    false
 
 #else
 #define UseBuiltinSim           false
@@ -121,6 +127,10 @@
           "Use Neon for CRC32 computation")                             \
   product(bool, UseCRC32, false,                                        \
           "Use CRC32 instructions for CRC32 computation")               \
+  product(bool, UseSIMDForMemoryOps, false,                             \
+          "Use SIMD instructions in generated memory move code")        \
+  product(bool, UseLSE, false,                                          \
+          "Use LSE instructions")                                       \
   product(bool, TraceTraps, false, "Trace all traps the signal handler")
 
 #endif
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1638,6 +1638,7 @@
 
 void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
   Label retry_load;
+  prfm(Address(counter_addr), PSTL1STRM);
   bind(retry_load);
   // flush and load exclusive from the memory location
   ldxrw(tmp, counter_addr);
@@ -2070,25 +2071,32 @@
   // oldv holds comparison value
   // newv holds value to write in exchange
   // addr identifies memory word to compare against/update
-  // tmp returns 0/1 for success/failure
-  Label retry_load, nope;
-
-  bind(retry_load);
-  // flush and load exclusive from the memory location
-  // and fail if it is not what we expect
-  ldaxr(tmp, addr);
-  cmp(tmp, oldv);
-  br(Assembler::NE, nope);
-  // if we store+flush with no intervening write tmp wil be zero
-  stlxr(tmp, newv, addr);
-  cbzw(tmp, succeed);
-  // retry so we only ever return after a load fails to compare
-  // ensures we don't return a stale value after a failed write.
-  b(retry_load);
-  // if the memory word differs we return it in oldv and signal a fail
-  bind(nope);
-  membar(AnyAny);
-  mov(oldv, tmp);
+  if (UseLSE) {
+    mov(tmp, oldv);
+    casal(Assembler::xword, oldv, newv, addr);
+    cmp(tmp, oldv);
+    br(Assembler::EQ, succeed);
+    membar(AnyAny);
+  } else {
+    Label retry_load, nope;
+    prfm(Address(addr), PSTL1STRM);
+    bind(retry_load);
+    // flush and load exclusive from the memory location
+    // and fail if it is not what we expect
+    ldaxr(tmp, addr);
+    cmp(tmp, oldv);
+    br(Assembler::NE, nope);
+    // if we store+flush with no intervening write tmp wil be zero
+    stlxr(tmp, newv, addr);
+    cbzw(tmp, succeed);
+    // retry so we only ever return after a load fails to compare
+    // ensures we don't return a stale value after a failed write.
+    b(retry_load);
+    // if the memory word differs we return it in oldv and signal a fail
+    bind(nope);
+    membar(AnyAny);
+    mov(oldv, tmp);
+  }
   if (fail)
     b(*fail);
 }
@@ -2099,28 +2107,64 @@
   // newv holds value to write in exchange
   // addr identifies memory word to compare against/update
   // tmp returns 0/1 for success/failure
-  Label retry_load, nope;
-
-  bind(retry_load);
-  // flush and load exclusive from the memory location
-  // and fail if it is not what we expect
-  ldaxrw(tmp, addr);
-  cmp(tmp, oldv);
-  br(Assembler::NE, nope);
-  // if we store+flush with no intervening write tmp wil be zero
-  stlxrw(tmp, newv, addr);
-  cbzw(tmp, succeed);
-  // retry so we only ever return after a load fails to compare
-  // ensures we don't return a stale value after a failed write.
-  b(retry_load);
-  // if the memory word differs we return it in oldv and signal a fail
-  bind(nope);
-  membar(AnyAny);
-  mov(oldv, tmp);
+  if (UseLSE) {
+    mov(tmp, oldv);
+    casal(Assembler::word, oldv, newv, addr);
+    cmp(tmp, oldv);
+    br(Assembler::EQ, succeed);
+    membar(AnyAny);
+  } else {
+    Label retry_load, nope;
+    prfm(Address(addr), PSTL1STRM);
+    bind(retry_load);
+    // flush and load exclusive from the memory location
+    // and fail if it is not what we expect
+    ldaxrw(tmp, addr);
+    cmp(tmp, oldv);
+    br(Assembler::NE, nope);
+    // if we store+flush with no intervening write tmp wil be zero
+    stlxrw(tmp, newv, addr);
+    cbzw(tmp, succeed);
+    // retry so we only ever return after a load fails to compare
+    // ensures we don't return a stale value after a failed write.
+    b(retry_load);
+    // if the memory word differs we return it in oldv and signal a fail
+    bind(nope);
+    membar(AnyAny);
+    mov(oldv, tmp);
+  }
   if (fail)
     b(*fail);
 }
 
+// A generic CAS; success or failure is in the EQ flag.
+void MacroAssembler::cmpxchg(Register addr, Register expected,
+                             Register new_val,
+                             enum operand_size size,
+                             bool acquire, bool release,
+                             Register tmp) {
+  if (UseLSE) {
+    mov(tmp, expected);
+    lse_cas(tmp, new_val, addr, size, acquire, release, /*not_pair*/ true);
+    cmp(tmp, expected);
+  } else {
+    BLOCK_COMMENT("cmpxchg {");
+    Label retry_load, done;
+    prfm(Address(addr), PSTL1STRM);
+    bind(retry_load);
+    load_exclusive(tmp, addr, size, acquire);
+    if (size == xword)
+      cmp(tmp, expected);
+    else
+      cmpw(tmp, expected);
+    br(Assembler::NE, done);
+    store_exclusive(tmp, new_val, addr, size, release);
+    cbnzw(tmp, retry_load);
+    bind(done);
+    BLOCK_COMMENT("} cmpxchg");
+  }
+}
+
 static bool different(Register a, RegisterOrConstant b, Register c) {
   if (b.is_constant())
     return a != c;
@@ -2135,6 +2179,7 @@
     result = different(prev, incr, addr) ? prev : rscratch2;            \
                                                                         \
   Label retry_load;                                                     \
+  prfm(Address(addr), PSTL1STRM);                                       \
   bind(retry_load);                                                     \
   LDXR(result, addr);                                                   \
   OP(rscratch1, result, incr);                                          \
@@ -2157,6 +2202,7 @@
     result = different(prev, newv, addr) ? prev : rscratch2;            \
                                                                         \
   Label retry_load;                                                     \
+  prfm(Address(addr), PSTL1STRM);                                       \
   bind(retry_load);                                                     \
   LDXR(result, addr);                                                   \
   STXR(rscratch1, newv, addr);                                          \
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -971,21 +971,10 @@
   }
 
   // A generic CAS; success or failure is in the EQ flag.
-  template <typename T1, typename T2>
   void cmpxchg(Register addr, Register expected, Register new_val,
-               T1 load_insn,
-               void (MacroAssembler::*cmp_insn)(Register, Register),
-               T2 store_insn,
-               Register tmp = rscratch1) {
-    Label retry_load, done;
-    bind(retry_load);
-    (this->*load_insn)(tmp, addr);
-    (this->*cmp_insn)(tmp, expected);
-    br(Assembler::NE, done);
-    (this->*store_insn)(tmp, new_val, addr);
-    cbnzw(tmp, retry_load);
-    bind(done);
-  }
+               enum operand_size size,
+               bool acquire, bool release,
+               Register tmp = rscratch1);
 
   // Calls
 
--- a/src/cpu/aarch64/vm/register_aarch64.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/register_aarch64.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -107,6 +107,9 @@
 CONSTANT_REGISTER_DECLARATION(Register, zr,  (32));
 CONSTANT_REGISTER_DECLARATION(Register, sp,  (33));
 
+// Used as a filler in instructions where a register field is unused.
+const Register dummy_reg = r31_sp;
+
 // Use FloatRegister as shortcut
 class FloatRegisterImpl;
 typedef FloatRegisterImpl* FloatRegister;
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -729,7 +729,7 @@
   //
   // count is a count of words.
   //
-  // Precondition: count >= 2
+  // Precondition: count >= 8
   //
   // Postconditions:
   //
@@ -741,6 +741,7 @@
   void generate_copy_longs(Label &start, Register s, Register d, Register count,
                            copy_direction direction) {
     int unit = wordSize * direction;
+    int bias = (UseSIMDForMemoryOps ? 4:2) * wordSize;
 
     int offset;
     const Register t0 = r3, t1 = r4, t2 = r5, t3 = r6,
@@ -750,7 +751,7 @@
     assert_different_registers(rscratch1, t0, t1, t2, t3, t4, t5, t6, t7);
     assert_different_registers(s, d, count, rscratch1);
 
-    Label again, large, small;
+    Label again, drain;
     const char *stub_name;
     if (direction == copy_forwards)
       stub_name = "foward_copy_longs";
@@ -759,57 +760,35 @@
     StubCodeMark mark(this, "StubRoutines", stub_name);
     __ align(CodeEntryAlignment);
     __ bind(start);
-    __ cmp(count, 8);
-    __ br(Assembler::LO, small);
     if (direction == copy_forwards) {
-      __ sub(s, s, 2 * wordSize);
-      __ sub(d, d, 2 * wordSize);
+      __ sub(s, s, bias);
+      __ sub(d, d, bias);
     }
+
+#ifdef ASSERT
+    // Make sure we are never given < 8 words
+    {
+      Label L;
+      __ cmp(count, 8);
+      __ br(Assembler::GE, L);
+      __ stop("genrate_copy_longs called with < 8 words");
+      __ bind(L);
+    }
+#endif
+
+    // Fill 8 registers
+    if (UseSIMDForMemoryOps) {
+      __ ldpq(v0, v1, Address(s, 4 * unit));
+      __ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
+    } else {
+      __ ldp(t0, t1, Address(s, 2 * unit));
+      __ ldp(t2, t3, Address(s, 4 * unit));
+      __ ldp(t4, t5, Address(s, 6 * unit));
+      __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
+    }
+
     __ subs(count, count, 16);
-    __ br(Assembler::GE, large);
-
-    // 8 <= count < 16 words.  Copy 8.
-    __ ldp(t0, t1, Address(s, 2 * unit));
-    __ ldp(t2, t3, Address(s, 4 * unit));
-    __ ldp(t4, t5, Address(s, 6 * unit));
-    __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
-
-    __ stp(t0, t1, Address(d, 2 * unit));
-    __ stp(t2, t3, Address(d, 4 * unit));
-    __ stp(t4, t5, Address(d, 6 * unit));
-    __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
-
-    if (direction == copy_forwards) {
-      __ add(s, s, 2 * wordSize);
-      __ add(d, d, 2 * wordSize);
-    }
-
-    {
-      Label L1, L2;
-      __ bind(small);
-      __ tbz(count, exact_log2(4), L1);
-      __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
-      __ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
-      __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
-      __ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
-      __ bind(L1);
-
-      __ tbz(count, 1, L2);
-      __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
-      __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
-      __ bind(L2);
-    }
-
-    __ ret(lr);
-
-    __ align(CodeEntryAlignment);
-    __ bind(large);
-
-    // Fill 8 registers
-    __ ldp(t0, t1, Address(s, 2 * unit));
-    __ ldp(t2, t3, Address(s, 4 * unit));
-    __ ldp(t4, t5, Address(s, 6 * unit));
-    __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
+    __ br(Assembler::LO, drain);
 
     int prefetch = PrefetchCopyIntervalInBytes;
     bool use_stride = false;
@@ -824,38 +803,56 @@
     if (PrefetchCopyIntervalInBytes > 0)
       __ prfm(use_stride ? Address(s, stride) : Address(s, prefetch), PLDL1KEEP);
 
-    __ stp(t0, t1, Address(d, 2 * unit));
-    __ ldp(t0, t1, Address(s, 2 * unit));
-    __ stp(t2, t3, Address(d, 4 * unit));
-    __ ldp(t2, t3, Address(s, 4 * unit));
-    __ stp(t4, t5, Address(d, 6 * unit));
-    __ ldp(t4, t5, Address(s, 6 * unit));
-    __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
-    __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
+    if (UseSIMDForMemoryOps) {
+      __ stpq(v0, v1, Address(d, 4 * unit));
+      __ ldpq(v0, v1, Address(s, 4 * unit));
+      __ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
+      __ ldpq(v2, v3, Address(__ pre(s, 8 * unit)));
+    } else {
+      __ stp(t0, t1, Address(d, 2 * unit));
+      __ ldp(t0, t1, Address(s, 2 * unit));
+      __ stp(t2, t3, Address(d, 4 * unit));
+      __ ldp(t2, t3, Address(s, 4 * unit));
+      __ stp(t4, t5, Address(d, 6 * unit));
+      __ ldp(t4, t5, Address(s, 6 * unit));
+      __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
+      __ ldp(t6, t7, Address(__ pre(s, 8 * unit)));
+    }
 
     __ subs(count, count, 8);
     __ br(Assembler::HS, again);
 
     // Drain
-    __ stp(t0, t1, Address(d, 2 * unit));
-    __ stp(t2, t3, Address(d, 4 * unit));
-    __ stp(t4, t5, Address(d, 6 * unit));
-    __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
-
-    if (direction == copy_forwards) {
-      __ add(s, s, 2 * wordSize);
-      __ add(d, d, 2 * wordSize);
+    __ bind(drain);
+    if (UseSIMDForMemoryOps) {
+      __ stpq(v0, v1, Address(d, 4 * unit));
+      __ stpq(v2, v3, Address(__ pre(d, 8 * unit)));
+    } else {
+      __ stp(t0, t1, Address(d, 2 * unit));
+      __ stp(t2, t3, Address(d, 4 * unit));
+      __ stp(t4, t5, Address(d, 6 * unit));
+      __ stp(t6, t7, Address(__ pre(d, 8 * unit)));
     }
 
     {
       Label L1, L2;
       __ tbz(count, exact_log2(4), L1);
-      __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
-      __ ldp(t2, t3, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
-      __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
-      __ stp(t2, t3, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
+      if (UseSIMDForMemoryOps) {
+        __ ldpq(v0, v1, Address(__ pre(s, 4 * unit)));
+        __ stpq(v0, v1, Address(__ pre(d, 4 * unit)));
+      } else {
+        __ ldp(t0, t1, Address(s, 2 * unit));
+        __ ldp(t2, t3, Address(__ pre(s, 4 * unit)));
+        __ stp(t0, t1, Address(d, 2 * unit));
+        __ stp(t2, t3, Address(__ pre(d, 4 * unit)));
+      }
       __ bind(L1);
 
+      if (direction == copy_forwards) {
+        __ add(s, s, bias);
+        __ add(d, d, bias);
+      }
+
       __ tbz(count, 1, L2);
       __ ldp(t0, t1, Address(__ adjust(s, 2 * unit, direction == copy_backwards)));
       __ stp(t0, t1, Address(__ adjust(d, 2 * unit, direction == copy_backwards)));
@@ -931,16 +928,135 @@
     int granularity = uabs(step);
     const Register t0 = r3, t1 = r4;
 
+    // <= 96 bytes do inline. Direction doesn't matter because we always
+    // load all the data before writing anything
+    Label copy4, copy8, copy16, copy32, copy80, copy128, copy_big, finish;
+    const Register t2 = r5, t3 = r6, t4 = r7, t5 = r8;
+    const Register t6 = r9, t7 = r10, t8 = r11, t9 = r12;
+    const Register send = r17, dend = r18;
+
+    if (PrefetchCopyIntervalInBytes > 0)
+      __ prfm(Address(s, 0), PLDL1KEEP);
+    __ cmp(count, (UseSIMDForMemoryOps ? 96:80)/granularity);
+    __ br(Assembler::HI, copy_big);
+
+    __ lea(send, Address(s, count, Address::lsl(exact_log2(granularity))));
+    __ lea(dend, Address(d, count, Address::lsl(exact_log2(granularity))));
+
+    __ cmp(count, 16/granularity);
+    __ br(Assembler::LS, copy16);
+
+    __ cmp(count, 64/granularity);
+    __ br(Assembler::HI, copy80);
+
+    __ cmp(count, 32/granularity);
+    __ br(Assembler::LS, copy32);
+
+    // 33..64 bytes
+    if (UseSIMDForMemoryOps) {
+      __ ldpq(v0, v1, Address(s, 0));
+      __ ldpq(v2, v3, Address(send, -32));
+      __ stpq(v0, v1, Address(d, 0));
+      __ stpq(v2, v3, Address(dend, -32));
+    } else {
+      __ ldp(t0, t1, Address(s, 0));
+      __ ldp(t2, t3, Address(s, 16));
+      __ ldp(t4, t5, Address(send, -32));
+      __ ldp(t6, t7, Address(send, -16));
+
+      __ stp(t0, t1, Address(d, 0));
+      __ stp(t2, t3, Address(d, 16));
+      __ stp(t4, t5, Address(dend, -32));
+      __ stp(t6, t7, Address(dend, -16));
+    }
+    __ b(finish);
+
+    // 17..32 bytes
+    __ bind(copy32);
+    __ ldp(t0, t1, Address(s, 0));
+    __ ldp(t2, t3, Address(send, -16));
+    __ stp(t0, t1, Address(d, 0));
+    __ stp(t2, t3, Address(dend, -16));
+    __ b(finish);
+
+    // 65..80/96 bytes
+    // (96 bytes if SIMD because we do 32 byes per instruction)
+    __ bind(copy80);
+    if (UseSIMDForMemoryOps) {
+      __ ldpq(v0, v1, Address(s, 0));
+      __ ldpq(v2, v3, Address(s, 32));
+      __ ldpq(v4, v5, Address(send, -32));
+      __ stpq(v0, v1, Address(d, 0));
+      __ stpq(v2, v3, Address(d, 32));
+      __ stpq(v4, v5, Address(dend, -32));
+    } else {
+      __ ldp(t0, t1, Address(s, 0));
+      __ ldp(t2, t3, Address(s, 16));
+      __ ldp(t4, t5, Address(s, 32));
+      __ ldp(t6, t7, Address(s, 48));
+      __ ldp(t8, t9, Address(send, -16));
+
+      __ stp(t0, t1, Address(d, 0));
+      __ stp(t2, t3, Address(d, 16));
+      __ stp(t4, t5, Address(d, 32));
+      __ stp(t6, t7, Address(d, 48));
+      __ stp(t8, t9, Address(dend, -16));
+    }
+    __ b(finish);
+
+    // 0..16 bytes
+    __ bind(copy16);
+    __ cmp(count, 8/granularity);
+    __ br(Assembler::LO, copy8);
+
+    // 8..16 bytes
+    __ ldr(t0, Address(s, 0));
+    __ ldr(t1, Address(send, -8));
+    __ str(t0, Address(d, 0));
+    __ str(t1, Address(dend, -8));
+    __ b(finish);
+
+    if (granularity < 8) {
+      // 4..7 bytes
+      __ bind(copy8);
+      __ tbz(count, 2 - exact_log2(granularity), copy4);
+      __ ldrw(t0, Address(s, 0));
+      __ ldrw(t1, Address(send, -4));
+      __ strw(t0, Address(d, 0));
+      __ strw(t1, Address(dend, -4));
+      __ b(finish);
+      if (granularity < 4) {
+        // 0..3 bytes
+        __ bind(copy4);
+        __ cbz(count, finish); // get rid of 0 case
+        if (granularity == 2) {
+          __ ldrh(t0, Address(s, 0));
+          __ strh(t0, Address(d, 0));
+        } else { // granularity == 1
+          // Now 1..3 bytes. Handle the 1 and 2 byte case by copying
+          // the first and last byte.
+          // Handle the 3 byte case by loading and storing base + count/2
+          // (count == 1 (s+0)->(d+0), count == 2,3 (s+1) -> (d+1))
+          // This does means in the 1 byte case we load/store the same
+          // byte 3 times.
+          __ lsr(count, count, 1);
+          __ ldrb(t0, Address(s, 0));
+          __ ldrb(t1, Address(send, -1));
+          __ ldrb(t2, Address(s, count));
+          __ strb(t0, Address(d, 0));
+          __ strb(t1, Address(dend, -1));
+          __ strb(t2, Address(d, count));
+        }
+        __ b(finish);
+      }
+    }
+
+    __ bind(copy_big);
     if (is_backwards) {
       __ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
       __ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
     }
 
-    Label tail;
-
-    __ cmp(count, 16/granularity);
-    __ br(Assembler::LO, tail);
-
     // Now we've got the small case out of the way we can align the
     // source address on a 2-word boundary.
 
@@ -986,8 +1102,6 @@
 #endif
     }
 
-    __ cmp(count, 16/granularity);
-    __ br(Assembler::LT, tail);
     __ bind(aligned);
 
     // s is now 2-word-aligned.
@@ -1001,9 +1115,11 @@
       __ bl(copy_b);
 
     // And the tail.
-
-    __ bind(tail);
     copy_memory_small(s, d, count, tmp, step);
+
+    if (granularity >= 8) __ bind(copy8);
+    if (granularity >= 4) __ bind(copy4);
+    __ bind(finish);
   }
 
 
--- a/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1984,6 +1984,7 @@
   __ push(rscratch3);
   Label L;
   __ mov(rscratch2, (address) &BytecodeCounter::_counter_value);
+  __ prfm(Address(rscratch2), PSTL1STRM);
   __ bind(L);
   __ ldxr(rscratch1, rscratch2);
   __ add(rscratch1, rscratch1, 1);
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -61,6 +61,10 @@
 #define HWCAP_CRC32 (1<<7)
 #endif
 
+#ifndef HWCAP_ATOMICS
+#define HWCAP_ATOMICS (1<<8)
+#endif
+
 int VM_Version::_cpu;
 int VM_Version::_model;
 int VM_Version::_model2;
@@ -172,6 +176,7 @@
   if (auxv & HWCAP_AES)   strcat(buf, ", aes");
   if (auxv & HWCAP_SHA1)  strcat(buf, ", sha1");
   if (auxv & HWCAP_SHA2)  strcat(buf, ", sha256");
+  if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
 
   _features_string = os::strdup(buf);
 
@@ -191,6 +196,15 @@
     FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
   }
 
+  if (auxv & HWCAP_ATOMICS) {
+    if (FLAG_IS_DEFAULT(UseLSE))
+      FLAG_SET_DEFAULT(UseLSE, true);
+  } else {
+    if (UseLSE) {
+      warning("UseLSE specified, but not supported on this CPU");
+    }
+  }
+
   if (auxv & HWCAP_AES) {
     UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
     UseAESIntrinsics =
--- a/src/cpu/ppc/vm/globalDefinitions_ppc.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -47,7 +47,7 @@
 // The expected size in bytes of a cache line, used to pad data structures.
 #define DEFAULT_CACHE_LINE_SIZE 128
 
-#if defined(COMPILER2) && defined(AIX)
+#if defined(COMPILER2) && (defined(AIX) || defined(linux))
 // Include Transactional Memory lock eliding optimization
 #define INCLUDE_RTM_OPT 1
 #endif
--- a/src/cpu/ppc/vm/globals_ppc.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/ppc/vm/globals_ppc.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -76,6 +76,8 @@
 
 define_pd_global(bool, CompactStrings, true);
 
+define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
+
 // Platform dependent flag handling: flags only defined on this platform.
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint)  \
                                                                             \
--- a/src/cpu/ppc/vm/ppc.ad	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/ppc/vm/ppc.ad	Wed Mar 23 21:44:35 2016 -0700
@@ -2137,8 +2137,6 @@
   return decode;
 }
 */
-// Threshold size for cleararray.
-const int Matcher::init_array_short_size = 8 * BytesPerLong;
 
 // false => size gets scaled to BytesPerLong, ok.
 const bool Matcher::init_array_count_is_in_bytes = false;
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -255,7 +255,16 @@
     }
 #endif
 #ifdef linux
-    // TODO: check kernel version (we currently have too old versions only)
+    // At least Linux kernel 4.2, as the problematic behavior of syscalls
+    // being called in the middle of a transaction has been addressed.
+    // Please, refer to commit b4b56f9ecab40f3b4ef53e130c9f6663be491894
+    // in Linux kernel source tree: https://goo.gl/Kc5i7A
+    if (os::Linux::os_version_is_known()) {
+      if (os::Linux::os_version() >= 0x040200)
+        os_too_old = false;
+    } else {
+      vm_exit_during_initialization("RTM can not be enabled: kernel version is unknown.");
+    }
 #endif
     if (os_too_old) {
       vm_exit_during_initialization("RTM is not supported on this OS version.");
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -90,6 +90,8 @@
 
 define_pd_global(bool, CompactStrings, true);
 
+define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
                                                                             \
   product(intx, UseVIS, 99,                                                 \
--- a/src/cpu/sparc/vm/sparc.ad	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Wed Mar 23 21:44:35 2016 -0700
@@ -1980,9 +1980,6 @@
 // No scaling for the parameter the ClearArray node.
 const bool Matcher::init_array_count_is_in_bytes = true;
 
-// Threshold size for cleararray.
-const int Matcher::init_array_short_size = 8 * BytesPerLong;
-
 // No additional cost for CMOVL.
 const int Matcher::long_cmove_cost() { return 0; }
 
--- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -777,6 +777,7 @@
     case 0x6E: // movd
     case 0x7E: // movd
     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
+    case 0xFE: // paddd
       debug_only(has_disp32 = true);
       break;
 
@@ -926,6 +927,7 @@
     ip++; // skip P2, move to opcode
     // To find the end of instruction (which == end_pc_operand).
     switch (0xFF & *ip) {
+    case 0x22: // pinsrd r, r/a, #8
     case 0x61: // pcmpestri r, r/a, #8
     case 0x70: // pshufd r, r/a, #8
     case 0x73: // psrldq r, #8
@@ -3953,6 +3955,83 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
+  assert(VM_Version::supports_ssse3(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8((unsigned char)0x0F);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
+void Assembler::pblendw(XMMRegister dst, XMMRegister src, int imm8) {
+  assert(VM_Version::supports_sse4_1(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8((unsigned char)0x0E);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
+void Assembler::sha1rnds4(XMMRegister dst, XMMRegister src, int imm8) {
+  assert(VM_Version::supports_sha(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8((unsigned char)0xCC);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)imm8);
+}
+
+void Assembler::sha1nexte(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sha(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  emit_int8((unsigned char)0xC8);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::sha1msg1(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sha(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  emit_int8((unsigned char)0xC9);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::sha1msg2(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sha(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  emit_int8((unsigned char)0xCA);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// xmm0 is implicit additional source to this instruction.
+void Assembler::sha256rnds2(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sha(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  emit_int8((unsigned char)0xCB);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::sha256msg1(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sha(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  emit_int8((unsigned char)0xCC);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::sha256msg2(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sha(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F_38, &attributes);
+  emit_int8((unsigned char)0xCD);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+
 void Assembler::shll(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
@@ -4931,6 +5010,15 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::paddd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xFE);
+  emit_operand(dst, src);
+}
+
 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -5611,8 +5699,9 @@
 }
 
 
-void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx(), "");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
@@ -5621,11 +5710,12 @@
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -5633,26 +5723,29 @@
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 256 bits
-  emit_int8(value & 0x01);
-}
-
-void Assembler::vinsertf64x4h(XMMRegister dst, Address src, int value) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
   assert(dst != xnoreg, "sanity");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   InstructionMark im(this);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1A);
   emit_operand(dst, src);
   // 0x00 - insert into lower 256 bits
-  // 0x01 - insert into upper 128 bits
-  emit_int8(value & 0x01);
-}
-
-void Assembler::vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
+  // 0x01 - insert into upper 256 bits
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
+  assert(imm8 <= 0x03, "imm8: %u", imm8);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -5662,57 +5755,64 @@
   // 0x01 - insert into q1 128 bits (128..255)
   // 0x02 - insert into q2 128 bits (256..383)
   // 0x03 - insert into q3 128 bits (384..511)
-  emit_int8(value & 0x3);
-}
-
-void Assembler::vinsertf32x4h(XMMRegister dst, Address src, int value) {
+  emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
   assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
+  assert(imm8 <= 0x03, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
   // 0x00 - insert into q0 128 bits (0..127)
   // 0x01 - insert into q1 128 bits (128..255)
   // 0x02 - insert into q2 128 bits (256..383)
   // 0x03 - insert into q3 128 bits (384..511)
-  emit_int8(value & 0x3);
-}
-
-void Assembler::vinsertf128h(XMMRegister dst, Address src) {
+  emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
   assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
+  // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx(), "");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
-  // 0x00 - insert into lower 128 bits
-  // 0x01 - insert into upper 128 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vextractf128h(Address dst, XMMRegister src) {
+  // 0x00 - extract from lower 128 bits
+  // 0x01 - extract from upper 128 bits
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextractf128(Address dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx(), "");
   assert(src != xnoreg, "sanity");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
@@ -5720,12 +5820,14 @@
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_operand(src, dst);
+  // 0x00 - extract from lower 128 bits
   // 0x01 - extract from upper 128 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx2(), "");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
@@ -5734,11 +5836,12 @@
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   int encode = vex_prefix_and_encode(dst->encoding(), nds_enc, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -5746,39 +5849,44 @@
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 256 bits
-  emit_int8(value & 0x01);
-}
-
-void Assembler::vinserti128h(XMMRegister dst, Address src) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
   assert(VM_Version::supports_avx2(), "");
   assert(dst != xnoreg, "sanity");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   // swap src<->dst for encoding
-  vex_prefix(src, dst->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  vex_prefix(src, nds_enc, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_operand(dst, src);
+  // 0x00 - insert into lower 128 bits
   // 0x01 - insert into upper 128 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx(), "");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
-  // 0x00 - insert into lower 128 bits
-  // 0x01 - insert into upper 128 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vextracti128h(Address dst, XMMRegister src) {
+  // 0x00 - extract from lower 128 bits
+  // 0x01 - extract from upper 128 bits
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx2(), "");
   assert(src != xnoreg, "sanity");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
@@ -5786,47 +5894,53 @@
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_operand(src, dst);
+  // 0x00 - extract from lower 128 bits
   // 0x01 - extract from upper 128 bits
-  emit_int8(0x01);
-}
-
-void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src, int value) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x3B);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(value & 0x01);
-}
-
-void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
+  assert(imm8 <= 0x03, "imm8: %u", imm8);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - extract from bits 127:0
   // 0x01 - extract from bits 255:128
   // 0x02 - extract from bits 383:256
   // 0x03 - extract from bits 511:384
-  emit_int8(value & 0x3);
-}
-
-void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src, int value) {
+  emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1B);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(value & 0x1);
-}
-
-void Assembler::vextractf64x4h(Address dst, XMMRegister src, int value) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextractf64x4(Address dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
   InstructionMark im(this);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
@@ -5835,11 +5949,12 @@
   emit_operand(src, dst);
   // 0x00 - extract from lower 256 bits
   // 0x01 - extract from upper 256 bits
-  emit_int8(value & 0x01);
-}
-
-void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
+  emit_int8(imm8 & 0x01);
+}
+
+void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx(), "");
+  assert(imm8 <= 0x03, "imm8: %u", imm8);
   int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -5849,12 +5964,13 @@
   // 0x01 - extract from bits 255:128
   // 0x02 - extract from bits 383:256
   // 0x03 - extract from bits 511:384
-  emit_int8(value & 0x3);
-}
-
-void Assembler::vextractf32x4h(Address dst, XMMRegister src, int value) {
+  emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vextractf32x4(Address dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
+  assert(imm8 <= 0x03, "imm8: %u", imm8);
   InstructionMark im(this);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
@@ -5865,19 +5981,21 @@
   // 0x01 - extract from bits 255:128
   // 0x02 - extract from bits 383:256
   // 0x03 - extract from bits 511:384
-  emit_int8(value & 0x3);
-}
-
-void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
+  emit_int8(imm8 & 0x03);
+}
+
+void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
+  assert(imm8 <= 0x03, "imm8: %u", imm8);
   InstructionAttr attributes(AVX_512bit, /* vex_w */ !_legacy_mode_dq, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - extract from bits 127:0
   // 0x01 - extract from bits 255:128
   // 0x02 - extract from bits 383:256
   // 0x03 - extract from bits 511:384
-  emit_int8(value & 0x3);
+  emit_int8(imm8 & 0x03);
 }
 
 // duplicate 4-bytes integer data from src into 8 locations in dest
--- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1672,6 +1672,18 @@
 
   void setb(Condition cc, Register dst);
 
+  void palignr(XMMRegister dst, XMMRegister src, int imm8);
+  void pblendw(XMMRegister dst, XMMRegister src, int imm8);
+
+  void sha1rnds4(XMMRegister dst, XMMRegister src, int imm8);
+  void sha1nexte(XMMRegister dst, XMMRegister src);
+  void sha1msg1(XMMRegister dst, XMMRegister src);
+  void sha1msg2(XMMRegister dst, XMMRegister src);
+  // xmm0 is implicit additional source to the following instruction.
+  void sha256rnds2(XMMRegister dst, XMMRegister src);
+  void sha256msg1(XMMRegister dst, XMMRegister src);
+  void sha256msg2(XMMRegister dst, XMMRegister src);
+
   void shldl(Register dst, Register src);
   void shldl(Register dst, Register src, int8_t imm8);
 
@@ -1868,6 +1880,7 @@
   void paddb(XMMRegister dst, XMMRegister src);
   void paddw(XMMRegister dst, XMMRegister src);
   void paddd(XMMRegister dst, XMMRegister src);
+  void paddd(XMMRegister dst, Address src);
   void paddq(XMMRegister dst, XMMRegister src);
   void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -1958,33 +1971,31 @@
   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
-  // Copy low 128bit into high 128bit of YMM registers.
-  void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
-  void vextractf128h(XMMRegister dst, XMMRegister src);
-  void vextracti128h(XMMRegister dst, XMMRegister src);
-
-  // Load/store high 128bit of YMM registers which does not destroy other half.
-  void vinsertf128h(XMMRegister dst, Address src);
-  void vinserti128h(XMMRegister dst, Address src);
-  void vextractf128h(Address dst, XMMRegister src);
-  void vextracti128h(Address dst, XMMRegister src);
-
-  // Copy low 256bit into high 256bit of ZMM registers.
-  void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
-  void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
-  void vextracti64x4h(XMMRegister dst, XMMRegister src, int value);
-  void vextractf64x4h(XMMRegister dst, XMMRegister src, int value);
-  void vextractf64x4h(Address dst, XMMRegister src, int value);
-  void vinsertf64x4h(XMMRegister dst, Address src, int value);
-
-  // Copy targeted 128bit segments of the ZMM registers
-  void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
-  void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
-  void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
-  void vextractf32x4h(Address dst, XMMRegister src, int value);
-  void vinsertf32x4h(XMMRegister dst, XMMRegister nds, XMMRegister src, int value);
-  void vinsertf32x4h(XMMRegister dst, Address src, int value);
+  // 128bit copy from/to 256bit (YMM) vector registers
+  void vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
+  void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
+  void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
+  void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8);
+  void vinsertf128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
+  void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
+  void vextractf128(Address dst, XMMRegister src, uint8_t imm8);
+  void vextracti128(Address dst, XMMRegister src, uint8_t imm8);
+
+  // 256bit copy from/to 512bit (ZMM) vector registers
+  void vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
+  void vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
+  void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
+  void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
+  void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
+  void vinsertf64x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
+
+  // 128bit copy from/to 256bit (YMM) or 512bit (ZMM) vector registers
+  void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
+  void vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
+  void vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
+  void vextractf32x4(Address dst, XMMRegister src, uint8_t imm8);
+  void vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8);
+  void vinsertf32x4(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8);
 
   // duplicate 4-bytes integer data from src into 8 locations in dest
   void vpbroadcastd(XMMRegister dst, XMMRegister src);
--- a/src/cpu/x86/vm/globals_x86.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -97,6 +97,8 @@
 
 define_pd_global(bool, PreserveFramePointer, false);
 
+define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
                                                                             \
   develop(bool, IEEEPrecision, true,                                        \
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -3445,7 +3445,7 @@
 
 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
   if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
-    Assembler::vextractf32x4h(dst, src, 0);
+    Assembler::vextractf32x4(dst, src, 0);
   } else {
     Assembler::movdqu(dst, src);
   }
@@ -3453,7 +3453,7 @@
 
 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
   if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
-    Assembler::vinsertf32x4h(dst, src, 0);
+    Assembler::vinsertf32x4(dst, dst, src, 0);
   } else {
     Assembler::movdqu(dst, src);
   }
@@ -3478,7 +3478,7 @@
 
 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
   if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
-    Assembler::vextractf64x4h(dst, src, 0);
+    vextractf64x4_low(dst, src);
   } else {
     Assembler::vmovdqu(dst, src);
   }
@@ -3486,7 +3486,7 @@
 
 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
   if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
-    Assembler::vinsertf64x4h(dst, src, 0);
+    vinsertf64x4_low(dst, src);
   } else {
     Assembler::vmovdqu(dst, src);
   }
@@ -5649,14 +5649,14 @@
         // Save upper half of ZMM registers
         subptr(rsp, 32*num_xmm_regs);
         for (int n = 0; n < num_xmm_regs; n++) {
-          vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
+          vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
         }
       }
       assert(UseAVX > 0, "256 bit vectors are supported only with AVX");
       // Save upper half of YMM registers
       subptr(rsp, 16*num_xmm_regs);
       for (int n = 0; n < num_xmm_regs; n++) {
-        vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
+        vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
       }
     }
 #endif
@@ -5665,7 +5665,7 @@
 #ifdef _LP64
     if (VM_Version::supports_evex()) {
       for (int n = 0; n < num_xmm_regs; n++) {
-        vextractf32x4h(Address(rsp, n*16), as_XMMRegister(n), 0);
+        vextractf32x4(Address(rsp, n*16), as_XMMRegister(n), 0);
       }
     } else {
       for (int n = 0; n < num_xmm_regs; n++) {
@@ -5753,7 +5753,7 @@
 #ifdef _LP64
   if (VM_Version::supports_evex()) {
     for (int n = 0; n < num_xmm_regs; n++) {
-      vinsertf32x4h(as_XMMRegister(n), Address(rsp, n*16), 0);
+      vinsertf32x4(as_XMMRegister(n), as_XMMRegister(n), Address(rsp, n*16), 0);
     }
   } else {
     for (int n = 0; n < num_xmm_regs; n++) {
@@ -5771,12 +5771,12 @@
     if (MaxVectorSize > 16) {
       // Restore upper half of YMM registers.
       for (int n = 0; n < num_xmm_regs; n++) {
-        vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
+        vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
       }
       addptr(rsp, 16*num_xmm_regs);
       if(UseAVX > 2) {
         for (int n = 0; n < num_xmm_regs; n++) {
-          vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
+          vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
         }
         addptr(rsp, 32*num_xmm_regs);
       }
@@ -7198,21 +7198,50 @@
 
 }
 
-void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp) {
+void MacroAssembler::clear_mem(Register base, Register cnt, Register tmp, bool is_large) {
   // cnt - number of qwords (8-byte words).
   // base - start address, qword aligned.
+  // is_large - if optimizers know cnt is larger than InitArrayShortSize
   assert(base==rdi, "base register must be edi for rep stos");
   assert(tmp==rax,   "tmp register must be eax for rep stos");
   assert(cnt==rcx,   "cnt register must be ecx for rep stos");
+  assert(InitArrayShortSize % BytesPerLong == 0,
+    "InitArrayShortSize should be the multiple of BytesPerLong");
+
+  Label DONE;
 
   xorptr(tmp, tmp);
+
+  if (!is_large) {
+    Label LOOP, LONG;
+    cmpptr(cnt, InitArrayShortSize/BytesPerLong);
+    jccb(Assembler::greater, LONG);
+
+    NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
+
+    decrement(cnt);
+    jccb(Assembler::negative, DONE); // Zero length
+
+    // Use individual pointer-sized stores for small counts:
+    BIND(LOOP);
+    movptr(Address(base, cnt, Address::times_ptr), tmp);
+    decrement(cnt);
+    jccb(Assembler::greaterEqual, LOOP);
+    jmpb(DONE);
+
+    BIND(LONG);
+  }
+
+  // Use longer rep-prefixed ops for non-small counts:
   if (UseFastStosb) {
-    shlptr(cnt,3); // convert to number of bytes
+    shlptr(cnt, 3); // convert to number of bytes
     rep_stosb();
   } else {
-    NOT_LP64(shlptr(cnt,1);) // convert to number of dwords for 32-bit VM
+    NOT_LP64(shlptr(cnt, 1);) // convert to number of 32-bit words for 32-bit VM
     rep_stos();
   }
+
+  BIND(DONE);
 }
 
 #ifdef COMPILER2
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,7 +48,6 @@
   // This is the base routine called by the different versions of call_VM_leaf. The interpreter
   // may customize this version by overriding it for its purposes (e.g., to save/restore
   // additional registers when doing a VM call).
-#define COMMA ,
 
   virtual void call_VM_leaf_base(
     address entry_point,               // the entry point
@@ -903,35 +902,66 @@
   void ldmxcsr(Address src) { Assembler::ldmxcsr(src); }
   void ldmxcsr(AddressLiteral src);
 
+  void fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
+                 XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
+                 Register buf, Register state, Register ofs, Register limit, Register rsp,
+                 bool multi_block);
+
+#ifdef _LP64
+  void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
+                   XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
+                   Register buf, Register state, Register ofs, Register limit, Register rsp,
+                   bool multi_block, XMMRegister shuf_mask);
+#else
+  void fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
+                   XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
+                   Register buf, Register state, Register ofs, Register limit, Register rsp,
+                   bool multi_block);
+#endif
+
   void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
                 Register rax, Register rcx, Register rdx, Register tmp);
 
+#ifdef _LP64
   void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
-                Register rax, Register rcx, Register rdx, Register tmp1 LP64_ONLY(COMMA Register tmp2));
+                Register rax, Register rcx, Register rdx, Register tmp1, Register tmp2);
 
   void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
                 XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
-                Register rdx NOT_LP64(COMMA  Register tmp) LP64_ONLY(COMMA  Register tmp1)
-                LP64_ONLY(COMMA  Register tmp2) LP64_ONLY(COMMA  Register tmp3) LP64_ONLY(COMMA  Register tmp4));
+                Register rdx, Register tmp1, Register tmp2, Register tmp3, Register tmp4);
 
   void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
-                Register rax, Register rbx LP64_ONLY(COMMA  Register rcx), Register rdx
-                LP64_ONLY(COMMA Register tmp1) LP64_ONLY(COMMA Register tmp2)
-                LP64_ONLY(COMMA Register tmp3) LP64_ONLY(COMMA Register tmp4));
+                Register rax, Register rbx, Register rcx, Register rdx, Register tmp1, Register tmp2,
+                Register tmp3, Register tmp4);
 
   void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
-                Register rax, Register rcx, Register rdx NOT_LP64(COMMA Register tmp)
-                LP64_ONLY(COMMA Register r8) LP64_ONLY(COMMA Register r9)
-                LP64_ONLY(COMMA Register r10) LP64_ONLY(COMMA Register r11));
+                Register rax, Register rcx, Register rdx, Register tmp1,
+                Register tmp2, Register tmp3, Register tmp4);
+#else
+  void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
+                XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
+                Register rax, Register rcx, Register rdx, Register tmp1);
 
-#ifndef _LP64
+  void fast_pow(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4,
+                XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register rax, Register rcx,
+                Register rdx, Register tmp);
+
+  void fast_sin(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
+                XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
+                Register rax, Register rbx, Register rdx);
+
+  void fast_cos(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
+                XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
+                Register rax, Register rcx, Register rdx, Register tmp);
+
   void libm_sincos_huge(XMMRegister xmm0, XMMRegister xmm1, Register eax, Register ecx,
                         Register edx, Register ebx, Register esi, Register edi,
                         Register ebp, Register esp);
+
   void libm_reduce_pi04l(Register eax, Register ecx, Register edx, Register ebx,
                          Register esi, Register edi, Register ebp, Register esp);
 #endif
@@ -1185,14 +1215,131 @@
   void vpxor(XMMRegister dst, XMMRegister src) { Assembler::vpxor(dst, dst, src, true); }
   void vpxor(XMMRegister dst, Address src) { Assembler::vpxor(dst, dst, src, true); }
 
-  // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
-  void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-    if (UseAVX > 1) // vinserti128h is available only in AVX2
-      Assembler::vinserti128h(dst, nds, src);
-    else
-      Assembler::vinsertf128h(dst, nds, src);
+  void vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
+    if (UseAVX > 1) { // vinserti128 is available only in AVX2
+      Assembler::vinserti128(dst, nds, src, imm8);
+    } else {
+      Assembler::vinsertf128(dst, nds, src, imm8);
+    }
   }
 
+  void vinserti128(XMMRegister dst, XMMRegister nds, Address src, uint8_t imm8) {
+    if (UseAVX > 1) { // vinserti128 is available only in AVX2
+      Assembler::vinserti128(dst, nds, src, imm8);
+    } else {
+      Assembler::vinsertf128(dst, nds, src, imm8);
+    }
+  }
+
+  void vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
+    if (UseAVX > 1) { // vextracti128 is available only in AVX2
+      Assembler::vextracti128(dst, src, imm8);
+    } else {
+      Assembler::vextractf128(dst, src, imm8);
+    }
+  }
+
+  void vextracti128(Address dst, XMMRegister src, uint8_t imm8) {
+    if (UseAVX > 1) { // vextracti128 is available only in AVX2
+      Assembler::vextracti128(dst, src, imm8);
+    } else {
+      Assembler::vextractf128(dst, src, imm8);
+    }
+  }
+
+  // 128bit copy to/from high 128 bits of 256bit (YMM) vector registers
+  void vinserti128_high(XMMRegister dst, XMMRegister src) {
+    vinserti128(dst, dst, src, 1);
+  }
+  void vinserti128_high(XMMRegister dst, Address src) {
+    vinserti128(dst, dst, src, 1);
+  }
+  void vextracti128_high(XMMRegister dst, XMMRegister src) {
+    vextracti128(dst, src, 1);
+  }
+  void vextracti128_high(Address dst, XMMRegister src) {
+    vextracti128(dst, src, 1);
+  }
+  void vinsertf128_high(XMMRegister dst, XMMRegister src) {
+    vinsertf128(dst, dst, src, 1);
+  }
+  void vinsertf128_high(XMMRegister dst, Address src) {
+    vinsertf128(dst, dst, src, 1);
+  }
+  void vextractf128_high(XMMRegister dst, XMMRegister src) {
+    vextractf128(dst, src, 1);
+  }
+  void vextractf128_high(Address dst, XMMRegister src) {
+    vextractf128(dst, src, 1);
+  }
+
+  // 256bit copy to/from high 256 bits of 512bit (ZMM) vector registers
+  void vinserti64x4_high(XMMRegister dst, XMMRegister src) {
+    vinserti64x4(dst, dst, src, 1);
+  }
+  void vinsertf64x4_high(XMMRegister dst, XMMRegister src) {
+    vinsertf64x4(dst, dst, src, 1);
+  }
+  void vextracti64x4_high(XMMRegister dst, XMMRegister src) {
+    vextracti64x4(dst, src, 1);
+  }
+  void vextractf64x4_high(XMMRegister dst, XMMRegister src) {
+    vextractf64x4(dst, src, 1);
+  }
+  void vextractf64x4_high(Address dst, XMMRegister src) {
+    vextractf64x4(dst, src, 1);
+  }
+  void vinsertf64x4_high(XMMRegister dst, Address src) {
+    vinsertf64x4(dst, dst, src, 1);
+  }
+
+  // 128bit copy to/from low 128 bits of 256bit (YMM) vector registers
+  void vinserti128_low(XMMRegister dst, XMMRegister src) {
+    vinserti128(dst, dst, src, 0);
+  }
+  void vinserti128_low(XMMRegister dst, Address src) {
+    vinserti128(dst, dst, src, 0);
+  }
+  void vextracti128_low(XMMRegister dst, XMMRegister src) {
+    vextracti128(dst, src, 0);
+  }
+  void vextracti128_low(Address dst, XMMRegister src) {
+    vextracti128(dst, src, 0);
+  }
+  void vinsertf128_low(XMMRegister dst, XMMRegister src) {
+    vinsertf128(dst, dst, src, 0);
+  }
+  void vinsertf128_low(XMMRegister dst, Address src) {
+    vinsertf128(dst, dst, src, 0);
+  }
+  void vextractf128_low(XMMRegister dst, XMMRegister src) {
+    vextractf128(dst, src, 0);
+  }
+  void vextractf128_low(Address dst, XMMRegister src) {
+    vextractf128(dst, src, 0);
+  }
+
+  // 256bit copy to/from low 256 bits of 512bit (ZMM) vector registers
+  void vinserti64x4_low(XMMRegister dst, XMMRegister src) {
+    vinserti64x4(dst, dst, src, 0);
+  }
+  void vinsertf64x4_low(XMMRegister dst, XMMRegister src) {
+    vinsertf64x4(dst, dst, src, 0);
+  }
+  void vextracti64x4_low(XMMRegister dst, XMMRegister src) {
+    vextracti64x4(dst, src, 0);
+  }
+  void vextractf64x4_low(XMMRegister dst, XMMRegister src) {
+    vextractf64x4(dst, src, 0);
+  }
+  void vextractf64x4_low(Address dst, XMMRegister src) {
+    vextractf64x4(dst, src, 0);
+  }
+  void vinsertf64x4_low(XMMRegister dst, Address src) {
+    vinsertf64x4(dst, dst, src, 0);
+  }
+
+
   // Carry-Less Multiplication Quadword
   void vpclmulldq(XMMRegister dst, XMMRegister nds, XMMRegister src) {
     // 0x00 - multiply lower 64 bits [0:63]
@@ -1284,8 +1431,9 @@
   // C2 compiled method's prolog code.
   void verified_entry(int framesize, int stack_bang_size, bool fp_mode_24b);
 
-  // clear memory of size 'cnt' qwords, starting at 'base'.
-  void clear_mem(Register base, Register cnt, Register rtmp);
+  // clear memory of size 'cnt' qwords, starting at 'base';
+  // if 'is_large' is set, do not try to produce short loop
+  void clear_mem(Register base, Register cnt, Register rtmp, bool is_large);
 
 #ifdef COMPILER2
   void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/macroAssembler_x86_sha.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -0,0 +1,495 @@
+/*
+* Copyright (c) 2016, Intel Corporation.
+*
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "macroAssembler_x86.hpp"
+
+// ofs and limit are used for multi-block byte array.
+// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
+void MacroAssembler::fast_sha1(XMMRegister abcd, XMMRegister e0, XMMRegister e1, XMMRegister msg0,
+  XMMRegister msg1, XMMRegister msg2, XMMRegister msg3, XMMRegister shuf_mask,
+  Register buf, Register state, Register ofs, Register limit, Register rsp, bool multi_block) {
+
+  Label start, done_hash, loop0;
+
+  address upper_word_mask = StubRoutines::x86::upper_word_mask_addr();
+  address shuffle_byte_flip_mask = StubRoutines::x86::shuffle_byte_flip_mask_addr();
+
+  bind(start);
+  movdqu(abcd, Address(state, 0));
+  pinsrd(e0, Address(state, 16), 3);
+  movdqu(shuf_mask, ExternalAddress(upper_word_mask)); // 0xFFFFFFFF000000000000000000000000
+  pand(e0, shuf_mask);
+  pshufd(abcd, abcd, 0x1B);
+  movdqu(shuf_mask, ExternalAddress(shuffle_byte_flip_mask)); //0x000102030405060708090a0b0c0d0e0f
+
+  bind(loop0);
+  // Save hash values for addition after rounds
+  movdqu(Address(rsp, 0), e0);
+  movdqu(Address(rsp, 16), abcd);
+
+
+  // Rounds 0 - 3
+  movdqu(msg0, Address(buf, 0));
+  pshufb(msg0, shuf_mask);
+  paddd(e0, msg0);
+  movdqa(e1, abcd);
+  sha1rnds4(abcd, e0, 0);
+
+  // Rounds 4 - 7
+  movdqu(msg1, Address(buf, 16));
+  pshufb(msg1, shuf_mask);
+  sha1nexte(e1, msg1);
+  movdqa(e0, abcd);
+  sha1rnds4(abcd, e1, 0);
+  sha1msg1(msg0, msg1);
+
+  // Rounds 8 - 11
+  movdqu(msg2, Address(buf, 32));
+  pshufb(msg2, shuf_mask);
+  sha1nexte(e0, msg2);
+  movdqa(e1, abcd);
+  sha1rnds4(abcd, e0, 0);
+  sha1msg1(msg1, msg2);
+  pxor(msg0, msg2);
+
+  // Rounds 12 - 15
+  movdqu(msg3, Address(buf, 48));
+  pshufb(msg3, shuf_mask);
+  sha1nexte(e1, msg3);
+  movdqa(e0, abcd);
+  sha1msg2(msg0, msg3);
+  sha1rnds4(abcd, e1, 0);
+  sha1msg1(msg2, msg3);
+  pxor(msg1, msg3);
+
+  // Rounds 16 - 19
+  sha1nexte(e0, msg0);
+  movdqa(e1, abcd);
+  sha1msg2(msg1, msg0);
+  sha1rnds4(abcd, e0, 0);
+  sha1msg1(msg3, msg0);
+  pxor(msg2, msg0);
+
+  // Rounds 20 - 23
+  sha1nexte(e1, msg1);
+  movdqa(e0, abcd);
+  sha1msg2(msg2, msg1);
+  sha1rnds4(abcd, e1, 1);
+  sha1msg1(msg0, msg1);
+  pxor(msg3, msg1);
+
+  // Rounds 24 - 27
+  sha1nexte(e0, msg2);
+  movdqa(e1, abcd);
+  sha1msg2(msg3, msg2);
+  sha1rnds4(abcd, e0, 1);
+  sha1msg1(msg1, msg2);
+  pxor(msg0, msg2);
+
+  // Rounds 28 - 31
+  sha1nexte(e1, msg3);
+  movdqa(e0, abcd);
+  sha1msg2(msg0, msg3);
+  sha1rnds4(abcd, e1, 1);
+  sha1msg1(msg2, msg3);
+  pxor(msg1, msg3);
+
+  // Rounds 32 - 35
+  sha1nexte(e0, msg0);
+  movdqa(e1, abcd);
+  sha1msg2(msg1, msg0);
+  sha1rnds4(abcd, e0, 1);
+  sha1msg1(msg3, msg0);
+  pxor(msg2, msg0);
+
+  // Rounds 36 - 39
+  sha1nexte(e1, msg1);
+  movdqa(e0, abcd);
+  sha1msg2(msg2, msg1);
+  sha1rnds4(abcd, e1, 1);
+  sha1msg1(msg0, msg1);
+  pxor(msg3, msg1);
+
+  // Rounds 40 - 43
+  sha1nexte(e0, msg2);
+  movdqa(e1, abcd);
+  sha1msg2(msg3, msg2);
+  sha1rnds4(abcd, e0, 2);
+  sha1msg1(msg1, msg2);
+  pxor(msg0, msg2);
+
+  // Rounds 44 - 47
+  sha1nexte(e1, msg3);
+  movdqa(e0, abcd);
+  sha1msg2(msg0, msg3);
+  sha1rnds4(abcd, e1, 2);
+  sha1msg1(msg2, msg3);
+  pxor(msg1, msg3);
+
+  // Rounds 48 - 51
+  sha1nexte(e0, msg0);
+  movdqa(e1, abcd);
+  sha1msg2(msg1, msg0);
+  sha1rnds4(abcd, e0, 2);
+  sha1msg1(msg3, msg0);
+  pxor(msg2, msg0);
+
+  // Rounds 52 - 55
+  sha1nexte(e1, msg1);
+  movdqa(e0, abcd);
+  sha1msg2(msg2, msg1);
+  sha1rnds4(abcd, e1, 2);
+  sha1msg1(msg0, msg1);
+  pxor(msg3, msg1);
+
+  // Rounds 56 - 59
+  sha1nexte(e0, msg2);
+  movdqa(e1, abcd);
+  sha1msg2(msg3, msg2);
+  sha1rnds4(abcd, e0, 2);
+  sha1msg1(msg1, msg2);
+  pxor(msg0, msg2);
+
+  // Rounds 60 - 63
+  sha1nexte(e1, msg3);
+  movdqa(e0, abcd);
+  sha1msg2(msg0, msg3);
+  sha1rnds4(abcd, e1, 3);
+  sha1msg1(msg2, msg3);
+  pxor(msg1, msg3);
+
+  // Rounds 64 - 67
+  sha1nexte(e0, msg0);
+  movdqa(e1, abcd);
+  sha1msg2(msg1, msg0);
+  sha1rnds4(abcd, e0, 3);
+  sha1msg1(msg3, msg0);
+  pxor(msg2, msg0);
+
+  // Rounds 68 - 71
+  sha1nexte(e1, msg1);
+  movdqa(e0, abcd);
+  sha1msg2(msg2, msg1);
+  sha1rnds4(abcd, e1, 3);
+  pxor(msg3, msg1);
+
+  // Rounds 72 - 75
+  sha1nexte(e0, msg2);
+  movdqa(e1, abcd);
+  sha1msg2(msg3, msg2);
+  sha1rnds4(abcd, e0, 3);
+
+  // Rounds 76 - 79
+  sha1nexte(e1, msg3);
+  movdqa(e0, abcd);
+  sha1rnds4(abcd, e1, 3);
+
+  // add current hash values with previously saved
+  movdqu(msg0, Address(rsp, 0));
+  sha1nexte(e0, msg0);
+  movdqu(msg0, Address(rsp, 16));
+  paddd(abcd, msg0);
+
+  if (multi_block) {
+    // increment data pointer and loop if more to process
+    addptr(buf, 64);
+    addptr(ofs, 64);
+    cmpptr(ofs, limit);
+    jcc(Assembler::belowEqual, loop0);
+    movptr(rax, ofs); //return ofs
+  }
+  // write hash values back in the correct order
+  pshufd(abcd, abcd, 0x1b);
+  movdqu(Address(state, 0), abcd);
+  pextrd(Address(state, 16), e0, 3);
+
+  bind(done_hash);
+
+}
+
+// xmm0 (msg) is used as an implicit argument to sh256rnds2
+// and state0 and state1 can never use xmm0 register.
+// ofs and limit are used for multi-block byte array.
+// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
+#ifdef _LP64
+void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
+  XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
+  Register buf, Register state, Register ofs, Register limit, Register rsp,
+  bool multi_block, XMMRegister shuf_mask) {
+#else
+void MacroAssembler::fast_sha256(XMMRegister msg, XMMRegister state0, XMMRegister state1, XMMRegister msgtmp0,
+  XMMRegister msgtmp1, XMMRegister msgtmp2, XMMRegister msgtmp3, XMMRegister msgtmp4,
+  Register buf, Register state, Register ofs, Register limit, Register rsp,
+  bool multi_block) {
+#endif
+  Label start, done_hash, loop0;
+
+  address K256 = StubRoutines::x86::k256_addr();
+  address pshuffle_byte_flip_mask = StubRoutines::x86::pshuffle_byte_flip_mask_addr();
+
+  bind(start);
+  movdqu(state0, Address(state, 0));
+  movdqu(state1, Address(state, 16));
+
+  pshufd(state0, state0, 0xB1);
+  pshufd(state1, state1, 0x1B);
+  movdqa(msgtmp4, state0);
+  palignr(state0, state1, 8);
+  pblendw(state1, msgtmp4, 0xF0);
+
+#ifdef _LP64
+  movdqu(shuf_mask, ExternalAddress(pshuffle_byte_flip_mask));
+#endif
+  lea(rax, ExternalAddress(K256));
+
+  bind(loop0);
+  movdqu(Address(rsp, 0), state0);
+  movdqu(Address(rsp, 16), state1);
+
+  // Rounds 0-3
+  movdqu(msg, Address(buf, 0));
+#ifdef _LP64
+  pshufb(msg, shuf_mask);
+#else
+  pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
+#endif
+  movdqa(msgtmp0, msg);
+  paddd(msg, Address(rax, 0));
+  sha256rnds2(state1, state0);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+
+  // Rounds 4-7
+  movdqu(msg, Address(buf, 16));
+#ifdef _LP64
+  pshufb(msg, shuf_mask);
+#else
+  pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
+#endif
+  movdqa(msgtmp1, msg);
+  paddd(msg, Address(rax, 16));
+  sha256rnds2(state1, state0);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp0, msgtmp1);
+
+  // Rounds 8-11
+  movdqu(msg, Address(buf, 32));
+#ifdef _LP64
+  pshufb(msg, shuf_mask);
+#else
+  pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
+#endif
+  movdqa(msgtmp2, msg);
+  paddd(msg, Address(rax, 32));
+  sha256rnds2(state1, state0);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp1, msgtmp2);
+
+  // Rounds 12-15
+  movdqu(msg, Address(buf, 48));
+#ifdef _LP64
+  pshufb(msg, shuf_mask);
+#else
+  pshufb(msg, ExternalAddress(pshuffle_byte_flip_mask));
+#endif
+  movdqa(msgtmp3, msg);
+  paddd(msg, Address(rax, 48));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp3);
+  palignr(msgtmp4, msgtmp2, 4);
+  paddd(msgtmp0, msgtmp4);
+  sha256msg2(msgtmp0, msgtmp3);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp2, msgtmp3);
+
+  // Rounds 16-19
+  movdqa(msg, msgtmp0);
+  paddd(msg, Address(rax, 64));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp0);
+  palignr(msgtmp4, msgtmp3, 4);
+  paddd(msgtmp1, msgtmp4);
+  sha256msg2(msgtmp1, msgtmp0);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp3, msgtmp0);
+
+  // Rounds 20-23
+  movdqa(msg, msgtmp1);
+  paddd(msg, Address(rax, 80));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp1);
+  palignr(msgtmp4, msgtmp0, 4);
+  paddd(msgtmp2, msgtmp4);
+  sha256msg2(msgtmp2, msgtmp1);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp0, msgtmp1);
+
+  // Rounds 24-27
+  movdqa(msg, msgtmp2);
+  paddd(msg, Address(rax, 96));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp2);
+  palignr(msgtmp4, msgtmp1, 4);
+  paddd(msgtmp3, msgtmp4);
+  sha256msg2(msgtmp3, msgtmp2);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp1, msgtmp2);
+
+  // Rounds 28-31
+  movdqa(msg, msgtmp3);
+  paddd(msg, Address(rax, 112));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp3);
+  palignr(msgtmp4, msgtmp2, 4);
+  paddd(msgtmp0, msgtmp4);
+  sha256msg2(msgtmp0, msgtmp3);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp2, msgtmp3);
+
+  // Rounds 32-35
+  movdqa(msg, msgtmp0);
+  paddd(msg, Address(rax, 128));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp0);
+  palignr(msgtmp4, msgtmp3, 4);
+  paddd(msgtmp1, msgtmp4);
+  sha256msg2(msgtmp1, msgtmp0);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp3, msgtmp0);
+
+  // Rounds 36-39
+  movdqa(msg, msgtmp1);
+  paddd(msg, Address(rax, 144));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp1);
+  palignr(msgtmp4, msgtmp0, 4);
+  paddd(msgtmp2, msgtmp4);
+  sha256msg2(msgtmp2, msgtmp1);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp0, msgtmp1);
+
+  // Rounds 40-43
+  movdqa(msg, msgtmp2);
+  paddd(msg, Address(rax, 160));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp2);
+  palignr(msgtmp4, msgtmp1, 4);
+  paddd(msgtmp3, msgtmp4);
+  sha256msg2(msgtmp3, msgtmp2);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp1, msgtmp2);
+
+  // Rounds 44-47
+  movdqa(msg, msgtmp3);
+  paddd(msg, Address(rax, 176));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp3);
+  palignr(msgtmp4, msgtmp2, 4);
+  paddd(msgtmp0, msgtmp4);
+  sha256msg2(msgtmp0, msgtmp3);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp2, msgtmp3);
+
+  // Rounds 48-51
+  movdqa(msg, msgtmp0);
+  paddd(msg, Address(rax, 192));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp0);
+  palignr(msgtmp4, msgtmp3, 4);
+  paddd(msgtmp1, msgtmp4);
+  sha256msg2(msgtmp1, msgtmp0);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  sha256msg1(msgtmp3, msgtmp0);
+
+  // Rounds 52-55
+  movdqa(msg, msgtmp1);
+  paddd(msg, Address(rax, 208));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp1);
+  palignr(msgtmp4, msgtmp0, 4);
+  paddd(msgtmp2, msgtmp4);
+  sha256msg2(msgtmp2, msgtmp1);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+
+  // Rounds 56-59
+  movdqa(msg, msgtmp2);
+  paddd(msg, Address(rax, 224));
+  sha256rnds2(state1, state0);
+  movdqa(msgtmp4, msgtmp2);
+  palignr(msgtmp4, msgtmp1, 4);
+  paddd(msgtmp3, msgtmp4);
+  sha256msg2(msgtmp3, msgtmp2);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+
+  // Rounds 60-63
+  movdqa(msg, msgtmp3);
+  paddd(msg, Address(rax, 240));
+  sha256rnds2(state1, state0);
+  pshufd(msg, msg, 0x0E);
+  sha256rnds2(state0, state1);
+  movdqu(msg, Address(rsp, 0));
+  paddd(state0, msg);
+  movdqu(msg, Address(rsp, 16));
+  paddd(state1, msg);
+
+  if (multi_block) {
+    // increment data pointer and loop if more to process
+    addptr(buf, 64);
+    addptr(ofs, 64);
+    cmpptr(ofs, limit);
+    jcc(Assembler::belowEqual, loop0);
+    movptr(rax, ofs); //return ofs
+  }
+
+  pshufd(state0, state0, 0x1B);
+  pshufd(state1, state1, 0xB1);
+  movdqa(msgtmp4, state0);
+  pblendw(state0, state1, 0xF0);
+  palignr(state1, msgtmp4, 8);
+
+  movdqu(Address(state, 0), state0);
+  movdqu(Address(state, 16), state1);
+
+  bind(done_hash);
+
+}
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -208,13 +208,13 @@
     __ subptr(rsp, ymm_bytes);
     // Save upper half of YMM registers
     for (int n = 0; n < num_xmm_regs; n++) {
-      __ vextractf128h(Address(rsp, n*16), as_XMMRegister(n));
+      __ vextractf128_high(Address(rsp, n*16), as_XMMRegister(n));
     }
     if (UseAVX > 2) {
       __ subptr(rsp, zmm_bytes);
       // Save upper half of ZMM registers
       for (int n = 0; n < num_xmm_regs; n++) {
-        __ vextractf64x4h(Address(rsp, n*32), as_XMMRegister(n), 1);
+        __ vextractf64x4_high(Address(rsp, n*32), as_XMMRegister(n));
       }
     }
   }
@@ -304,13 +304,13 @@
     if (UseAVX > 2) {
       // Restore upper half of ZMM registers.
       for (int n = 0; n < num_xmm_regs; n++) {
-        __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, n*32), 1);
+        __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, n*32));
       }
       __ addptr(rsp, zmm_bytes);
     }
     // Restore upper half of YMM registers.
     for (int n = 0; n < num_xmm_regs; n++) {
-      __ vinsertf128h(as_XMMRegister(n), Address(rsp, n*16));
+      __ vinsertf128_high(as_XMMRegister(n), Address(rsp, n*16));
     }
     __ addptr(rsp, ymm_bytes);
   }
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -179,13 +179,13 @@
     // Save upper half of YMM registers(0..15)
     int base_addr = XSAVE_AREA_YMM_BEGIN;
     for (int n = 0; n < 16; n++) {
-      __ vextractf128h(Address(rsp, base_addr+n*16), as_XMMRegister(n));
+      __ vextractf128_high(Address(rsp, base_addr+n*16), as_XMMRegister(n));
     }
     if (VM_Version::supports_evex()) {
       // Save upper half of ZMM registers(0..15)
       base_addr = XSAVE_AREA_ZMM_BEGIN;
       for (int n = 0; n < 16; n++) {
-        __ vextractf64x4h(Address(rsp, base_addr+n*32), as_XMMRegister(n), 1);
+        __ vextractf64x4_high(Address(rsp, base_addr+n*32), as_XMMRegister(n));
       }
       // Save full ZMM registers(16..num_xmm_regs)
       base_addr = XSAVE_AREA_UPPERBANK;
@@ -333,13 +333,13 @@
     // Restore upper half of YMM registers (0..15)
     int base_addr = XSAVE_AREA_YMM_BEGIN;
     for (int n = 0; n < 16; n++) {
-      __ vinsertf128h(as_XMMRegister(n), Address(rsp,  base_addr+n*16));
+      __ vinsertf128_high(as_XMMRegister(n), Address(rsp, base_addr+n*16));
     }
     if (VM_Version::supports_evex()) {
       // Restore upper half of ZMM registers (0..15)
       base_addr = XSAVE_AREA_ZMM_BEGIN;
       for (int n = 0; n < 16; n++) {
-        __ vinsertf64x4h(as_XMMRegister(n), Address(rsp, base_addr+n*32), 1);
+        __ vinsertf64x4_high(as_XMMRegister(n), Address(rsp, base_addr+n*32));
       }
       // Restore full ZMM registers(16..num_xmm_regs)
       base_addr = XSAVE_AREA_UPPERBANK;
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -3068,6 +3068,136 @@
     return start;
   }
 
+  address generate_upper_word_mask() {
+    __ align(64);
+    StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
+    address start = __ pc();
+    __ emit_data(0x00000000, relocInfo::none, 0);
+    __ emit_data(0x00000000, relocInfo::none, 0);
+    __ emit_data(0x00000000, relocInfo::none, 0);
+    __ emit_data(0xFFFFFFFF, relocInfo::none, 0);
+    return start;
+  }
+
+  address generate_shuffle_byte_flip_mask() {
+    __ align(64);
+    StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
+    address start = __ pc();
+    __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
+    __ emit_data(0x08090a0b, relocInfo::none, 0);
+    __ emit_data(0x04050607, relocInfo::none, 0);
+    __ emit_data(0x00010203, relocInfo::none, 0);
+    return start;
+  }
+
+  // ofs and limit are use for multi-block byte array.
+  // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
+  address generate_sha1_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Register buf   = rax;
+    Register state = rdx;
+    Register ofs   = rcx;
+    Register limit = rdi;
+
+    const Address  buf_param(rbp, 8 + 0);
+    const Address  state_param(rbp, 8 + 4);
+    const Address  ofs_param(rbp, 8 + 8);
+    const Address  limit_param(rbp, 8 + 12);
+
+    const XMMRegister abcd = xmm0;
+    const XMMRegister e0 = xmm1;
+    const XMMRegister e1 = xmm2;
+    const XMMRegister msg0 = xmm3;
+
+    const XMMRegister msg1 = xmm4;
+    const XMMRegister msg2 = xmm5;
+    const XMMRegister msg3 = xmm6;
+    const XMMRegister shuf_mask = xmm7;
+
+    __ enter();
+    __ subptr(rsp, 8 * wordSize);
+    if (multi_block) {
+      __ push(limit);
+    }
+    __ movptr(buf, buf_param);
+    __ movptr(state, state_param);
+    if (multi_block) {
+      __ movptr(ofs, ofs_param);
+      __ movptr(limit, limit_param);
+    }
+
+    __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
+      buf, state, ofs, limit, rsp, multi_block);
+
+    if (multi_block) {
+      __ pop(limit);
+    }
+    __ addptr(rsp, 8 * wordSize);
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
+  address generate_pshuffle_byte_flip_mask() {
+    __ align(64);
+    StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
+    address start = __ pc();
+    __ emit_data(0x00010203, relocInfo::none, 0);
+    __ emit_data(0x04050607, relocInfo::none, 0);
+    __ emit_data(0x08090a0b, relocInfo::none, 0);
+    __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
+    return start;
+  }
+
+  // ofs and limit are use for multi-block byte array.
+  // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
+ address generate_sha256_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Register buf = rbx;
+    Register state = rsi;
+    Register ofs = rdx;
+    Register limit = rcx;
+
+    const Address  buf_param(rbp, 8 + 0);
+    const Address  state_param(rbp, 8 + 4);
+    const Address  ofs_param(rbp, 8 + 8);
+    const Address  limit_param(rbp, 8 + 12);
+
+    const XMMRegister msg = xmm0;
+    const XMMRegister state0 = xmm1;
+    const XMMRegister state1 = xmm2;
+    const XMMRegister msgtmp0 = xmm3;
+
+    const XMMRegister msgtmp1 = xmm4;
+    const XMMRegister msgtmp2 = xmm5;
+    const XMMRegister msgtmp3 = xmm6;
+    const XMMRegister msgtmp4 = xmm7;
+
+    __ enter();
+    __ subptr(rsp, 8 * wordSize);
+    handleSOERegisters(true /*saving*/);
+    __ movptr(buf, buf_param);
+    __ movptr(state, state_param);
+    if (multi_block) {
+     __ movptr(ofs, ofs_param);
+     __ movptr(limit, limit_param);
+    }
+
+    __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
+      buf, state, ofs, limit, rsp, multi_block);
+
+    handleSOERegisters(false);
+    __ addptr(rsp, 8 * wordSize);
+    __ leave();
+    __ ret(0);
+    return start;
+  }
 
   // byte swap x86 long
   address generate_ghash_long_swap_mask() {
@@ -3772,6 +3902,19 @@
       StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
     }
 
+    if (UseSHA1Intrinsics) {
+      StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
+      StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
+      StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
+      StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
+    }
+    if (UseSHA256Intrinsics) {
+      StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
+      StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
+      StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
+      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
+    }
+
     // Generate GHASH intrinsics code
     if (UseGHASHIntrinsics) {
       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -275,7 +275,7 @@
     }
     if (VM_Version::supports_evex()) {
       for (int i = xmm_save_first; i <= last_reg; i++) {
-        __ vextractf32x4h(xmm_save(i), as_XMMRegister(i), 0);
+        __ vextractf32x4(xmm_save(i), as_XMMRegister(i), 0);
       }
     } else {
       for (int i = xmm_save_first; i <= last_reg; i++) {
@@ -393,7 +393,7 @@
     // emit the restores for xmm regs
     if (VM_Version::supports_evex()) {
       for (int i = xmm_save_first; i <= last_reg; i++) {
-        __ vinsertf32x4h(as_XMMRegister(i), xmm_save(i), 0);
+        __ vinsertf32x4(as_XMMRegister(i), as_XMMRegister(i), xmm_save(i), 0);
       }
     } else {
       for (int i = xmm_save_first; i <= last_reg; i++) {
@@ -3695,6 +3695,133 @@
     return start;
   }
 
+  address generate_upper_word_mask() {
+    __ align(64);
+    StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
+    address start = __ pc();
+    __ emit_data64(0x0000000000000000, relocInfo::none);
+    __ emit_data64(0xFFFFFFFF00000000, relocInfo::none);
+    return start;
+  }
+
+  address generate_shuffle_byte_flip_mask() {
+    __ align(64);
+    StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
+    address start = __ pc();
+    __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
+    __ emit_data64(0x0001020304050607, relocInfo::none);
+    return start;
+  }
+
+  // ofs and limit are use for multi-block byte array.
+  // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
+  address generate_sha1_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Register buf = c_rarg0;
+    Register state = c_rarg1;
+    Register ofs = c_rarg2;
+    Register limit = c_rarg3;
+
+    const XMMRegister abcd = xmm0;
+    const XMMRegister e0 = xmm1;
+    const XMMRegister e1 = xmm2;
+    const XMMRegister msg0 = xmm3;
+
+    const XMMRegister msg1 = xmm4;
+    const XMMRegister msg2 = xmm5;
+    const XMMRegister msg3 = xmm6;
+    const XMMRegister shuf_mask = xmm7;
+
+    __ enter();
+
+#ifdef _WIN64
+    // save the xmm registers which must be preserved 6-7
+    __ subptr(rsp, 4 * wordSize);
+    __ movdqu(Address(rsp, 0), xmm6);
+    __ movdqu(Address(rsp, 2 * wordSize), xmm7);
+#endif
+
+    __ subptr(rsp, 4 * wordSize);
+
+    __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
+      buf, state, ofs, limit, rsp, multi_block);
+
+    __ addptr(rsp, 4 * wordSize);
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    __ movdqu(xmm6, Address(rsp, 0));
+    __ movdqu(xmm7, Address(rsp, 2 * wordSize));
+    __ addptr(rsp, 4 * wordSize);
+#endif
+
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
+  address generate_pshuffle_byte_flip_mask() {
+    __ align(64);
+    StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");
+    address start = __ pc();
+    __ emit_data64(0x0405060700010203, relocInfo::none);
+    __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
+    return start;
+  }
+
+// ofs and limit are use for multi-block byte array.
+// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
+  address generate_sha256_implCompress(bool multi_block, const char *name) {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+
+    Register buf = c_rarg0;
+    Register state = c_rarg1;
+    Register ofs = c_rarg2;
+    Register limit = c_rarg3;
+
+    const XMMRegister msg = xmm0;
+    const XMMRegister state0 = xmm1;
+    const XMMRegister state1 = xmm2;
+    const XMMRegister msgtmp0 = xmm3;
+
+    const XMMRegister msgtmp1 = xmm4;
+    const XMMRegister msgtmp2 = xmm5;
+    const XMMRegister msgtmp3 = xmm6;
+    const XMMRegister msgtmp4 = xmm7;
+
+    const XMMRegister shuf_mask = xmm8;
+
+    __ enter();
+#ifdef _WIN64
+    // save the xmm registers which must be preserved 6-7
+    __ subptr(rsp, 6 * wordSize);
+    __ movdqu(Address(rsp, 0), xmm6);
+    __ movdqu(Address(rsp, 2 * wordSize), xmm7);
+    __ movdqu(Address(rsp, 4 * wordSize), xmm8);
+#endif
+
+    __ subptr(rsp, 4 * wordSize);
+
+    __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
+      buf, state, ofs, limit, rsp, multi_block, shuf_mask);
+
+    __ addptr(rsp, 4 * wordSize);
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    __ movdqu(xmm6, Address(rsp, 0));
+    __ movdqu(xmm7, Address(rsp, 2 * wordSize));
+    __ movdqu(xmm8, Address(rsp, 4 * wordSize));
+    __ addptr(rsp, 6 * wordSize);
+#endif
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
   // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
   // to hide instruction latency
   //
@@ -4974,6 +5101,19 @@
       StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel();
     }
 
+    if (UseSHA1Intrinsics) {
+      StubRoutines::x86::_upper_word_mask_addr = generate_upper_word_mask();
+      StubRoutines::x86::_shuffle_byte_flip_mask_addr = generate_shuffle_byte_flip_mask();
+      StubRoutines::_sha1_implCompress = generate_sha1_implCompress(false, "sha1_implCompress");
+      StubRoutines::_sha1_implCompressMB = generate_sha1_implCompress(true, "sha1_implCompressMB");
+    }
+    if (UseSHA256Intrinsics) {
+      StubRoutines::x86::_k256_adr = (address)StubRoutines::x86::_k256;
+      StubRoutines::x86::_pshuffle_byte_flip_mask_addr = generate_pshuffle_byte_flip_mask();
+      StubRoutines::_sha256_implCompress = generate_sha256_implCompress(false, "sha256_implCompress");
+      StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true, "sha256_implCompressMB");
+    }
+
     // Generate GHASH intrinsics code
     if (UseGHASHIntrinsics) {
       StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
--- a/src/cpu/x86/vm/stubRoutines_x86.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/stubRoutines_x86.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -29,6 +29,12 @@
 #include "runtime/thread.inline.hpp"
 #include "crc32c.h"
 
+#ifdef _MSC_VER
+#define ALIGNED_(x) __declspec(align(x))
+#else
+#define ALIGNED_(x) __attribute__ ((aligned(x)))
+#endif
+
 // Implementation of the platform-specific part of StubRoutines - for
 // a description of how to extend it, see the stubRoutines.hpp file.
 
@@ -37,6 +43,10 @@
 address StubRoutines::x86::_counter_shuffle_mask_addr = NULL;
 address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
 address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
+address StubRoutines::x86::_upper_word_mask_addr = NULL;
+address StubRoutines::x86::_shuffle_byte_flip_mask_addr = NULL;
+address StubRoutines::x86::_k256_adr = NULL;
+address StubRoutines::x86::_pshuffle_byte_flip_mask_addr = NULL;
 
 uint64_t StubRoutines::x86::_crc_by128_masks[] =
 {
@@ -236,3 +246,23 @@
     _crc32c_table = (juint*)pclmulqdq_table;
   }
 }
+
+ALIGNED_(64) juint StubRoutines::x86::_k256[] =
+{
+    0x428a2f98UL, 0x71374491UL, 0xb5c0fbcfUL, 0xe9b5dba5UL,
+    0x3956c25bUL, 0x59f111f1UL, 0x923f82a4UL, 0xab1c5ed5UL,
+    0xd807aa98UL, 0x12835b01UL, 0x243185beUL, 0x550c7dc3UL,
+    0x72be5d74UL, 0x80deb1feUL, 0x9bdc06a7UL, 0xc19bf174UL,
+    0xe49b69c1UL, 0xefbe4786UL, 0x0fc19dc6UL, 0x240ca1ccUL,
+    0x2de92c6fUL, 0x4a7484aaUL, 0x5cb0a9dcUL, 0x76f988daUL,
+    0x983e5152UL, 0xa831c66dUL, 0xb00327c8UL, 0xbf597fc7UL,
+    0xc6e00bf3UL, 0xd5a79147UL, 0x06ca6351UL, 0x14292967UL,
+    0x27b70a85UL, 0x2e1b2138UL, 0x4d2c6dfcUL, 0x53380d13UL,
+    0x650a7354UL, 0x766a0abbUL, 0x81c2c92eUL, 0x92722c85UL,
+    0xa2bfe8a1UL, 0xa81a664bUL, 0xc24b8b70UL, 0xc76c51a3UL,
+    0xd192e819UL, 0xd6990624UL, 0xf40e3585UL, 0x106aa070UL,
+    0x19a4c116UL, 0x1e376c08UL, 0x2748774cUL, 0x34b0bcb5UL,
+    0x391c0cb3UL, 0x4ed8aa4aUL, 0x5b9cca4fUL, 0x682e6ff3UL,
+    0x748f82eeUL, 0x78a5636fUL, 0x84c87814UL, 0x8cc70208UL,
+    0x90befffaUL, 0xa4506cebUL, 0xbef9a3f7UL, 0xc67178f2UL
+};
--- a/src/cpu/x86/vm/stubRoutines_x86.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/stubRoutines_x86.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -46,6 +46,17 @@
   static address _ghash_long_swap_mask_addr;
   static address _ghash_byte_swap_mask_addr;
 
+  // upper word mask for sha1
+  static address _upper_word_mask_addr;
+  // byte flip mask for sha1
+  static address _shuffle_byte_flip_mask_addr;
+
+  //k256 table for sha256
+  static juint _k256[];
+  static address _k256_adr;
+  // byte flip mask for sha256
+  static address _pshuffle_byte_flip_mask_addr;
+
  public:
   static address verify_mxcsr_entry()    { return _verify_mxcsr_entry; }
   static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
@@ -53,5 +64,9 @@
   static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }
   static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
   static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
+  static address upper_word_mask_addr() { return _upper_word_mask_addr; }
+  static address shuffle_byte_flip_mask_addr() { return _shuffle_byte_flip_mask_addr; }
+  static address k256_addr()      { return _k256_adr; }
+  static address pshuffle_byte_flip_mask_addr() { return _pshuffle_byte_flip_mask_addr; }
   static void generate_CRC32C_table(bool is_pclmulqdq_supported);
 #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
--- a/src/cpu/x86/vm/vmStructs_x86.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/vmStructs_x86.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -68,10 +68,11 @@
   declare_constant(VM_Version::CPU_AVX512DQ)                        \
   declare_constant(VM_Version::CPU_AVX512PF)                        \
   declare_constant(VM_Version::CPU_AVX512ER)                        \
-  declare_constant(VM_Version::CPU_AVX512CD)                        \
-  declare_constant(VM_Version::CPU_AVX512BW)
+  declare_constant(VM_Version::CPU_AVX512CD)
 
 #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
-  declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL)
+  declare_preprocessor_constant("VM_Version::CPU_AVX512BW", CPU_AVX512BW) \
+  declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \
+  declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA)
 
 #endif // CPU_X86_VM_VMSTRUCTS_X86_HPP
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -385,7 +385,7 @@
 
     __ movdl(xmm0, rcx);
     __ pshufd(xmm0, xmm0, 0x00);
-    __ vinsertf128h(xmm0, xmm0, xmm0);
+    __ vinsertf128_high(xmm0, xmm0);
     __ vmovdqu(xmm7, xmm0);
 #ifdef _LP64
     __ vmovdqu(xmm8, xmm0);
@@ -577,7 +577,7 @@
   }
 
   char buf[256];
-  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+  jio_snprintf(buf, sizeof(buf), "(%u cores per cpu, %u threads per core) family %d model %d stepping %d%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
                cores_per_cpu(), threads_per_core(),
                cpu_family(), _model, _stepping,
                (supports_cmov() ? ", cmov" : ""),
@@ -608,7 +608,8 @@
                (supports_bmi1() ? ", bmi1" : ""),
                (supports_bmi2() ? ", bmi2" : ""),
                (supports_adx() ? ", adx" : ""),
-               (supports_evex() ? ", evex" : ""));
+               (supports_evex() ? ", evex" : ""),
+               (supports_sha() ? ", sha" : ""));
   _features_string = os::strdup(buf);
 
   // UseSSE is set to the smaller of what hardware supports and what
@@ -730,17 +731,29 @@
     FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
   }
 
-  if (UseSHA) {
+  if (supports_sha()) {
+    if (FLAG_IS_DEFAULT(UseSHA)) {
+      UseSHA = true;
+    }
+  } else if (UseSHA) {
     warning("SHA instructions are not available on this CPU");
     FLAG_SET_DEFAULT(UseSHA, false);
   }
 
-  if (UseSHA1Intrinsics) {
+  if (UseSHA) {
+    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+    }
+  } else if (UseSHA1Intrinsics) {
     warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
   }
 
-  if (UseSHA256Intrinsics) {
+  if (UseSHA) {
+    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+    }
+  } else if (UseSHA256Intrinsics) {
     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
   }
@@ -750,6 +763,10 @@
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
 
+  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+    FLAG_SET_DEFAULT(UseSHA, false);
+  }
+
   if (UseAdler32Intrinsics) {
     warning("Adler32Intrinsics not available on this CPU.");
     FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
--- a/src/cpu/x86/vm/vm_version_x86.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -221,7 +221,7 @@
                avx512pf : 1,
                avx512er : 1,
                avx512cd : 1,
-                        : 1,
+                    sha : 1,
                avx512bw : 1,
                avx512vl : 1;
     } bits;
@@ -282,11 +282,13 @@
     CPU_AVX512DQ = (1 << 27),
     CPU_AVX512PF = (1 << 28),
     CPU_AVX512ER = (1 << 29),
-    CPU_AVX512CD = (1 << 30),
-    CPU_AVX512BW = (1 << 31)
+    CPU_AVX512CD = (1 << 30)
+    // Keeping sign bit 31 unassigned.
   };
 
-#define CPU_AVX512VL UCONST64(0x100000000) // EVEX instructions with smaller vector length : enums are limited to 32bit
+#define CPU_AVX512BW ((uint64_t)UCONST64(0x100000000)) // enums are limited to 31 bit
+#define CPU_AVX512VL ((uint64_t)UCONST64(0x200000000)) // EVEX instructions with smaller vector length
+#define CPU_SHA ((uint64_t)UCONST64(0x400000000))      // SHA instructions
 
   enum Extended_Family {
     // AMD
@@ -516,6 +518,8 @@
          result |= CPU_ADX;
       if(_cpuid_info.sef_cpuid7_ebx.bits.bmi2 != 0)
         result |= CPU_BMI2;
+      if (_cpuid_info.sef_cpuid7_ebx.bits.sha != 0)
+        result |= CPU_SHA;
       if(_cpuid_info.ext_cpuid1_ecx.bits.lzcnt_intel != 0)
         result |= CPU_LZCNT;
       // for Intel, ecx.bits.misalignsse bit (bit 8) indicates support for prefetchw
@@ -721,6 +725,7 @@
   static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
   static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
   static bool supports_avxonly()    { return ((supports_avx2() || supports_avx()) && !supports_evex()); }
+  static bool supports_sha()        { return (_features & CPU_SHA) != 0; }
   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
--- a/src/cpu/x86/vm/x86.ad	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/x86.ad	Wed Mar 23 21:44:35 2016 -0700
@@ -3179,13 +3179,13 @@
             "punpcklbw $dst,$dst\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
+            "vinserti128_high $dst,$dst\t! replicate32B" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3196,12 +3196,12 @@
   format %{ "punpcklbw $dst,$mem\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
+            "vinserti128_high $dst,$dst\t! replicate32B" %}
   ins_encode %{
     __ punpcklbw($dst$$XMMRegister, $mem$$Address);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3223,11 +3223,11 @@
   match(Set dst (ReplicateB con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
+            "vinserti128_high $dst,$dst\t! lreplicate32B($con)" %}
   ins_encode %{
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3298,12 +3298,12 @@
   format %{ "movd    $dst,$src\n\t"
             "pshuflw $dst,$dst,0x00\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
+            "vinserti128_high $dst,$dst\t! replicate16S" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3313,11 +3313,11 @@
   match(Set dst (ReplicateS (LoadS mem)));
   format %{ "pshuflw $dst,$mem,0x00\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
+            "vinserti128_high $dst,$dst\t! replicate16S" %}
   ins_encode %{
     __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3327,11 +3327,11 @@
   match(Set dst (ReplicateS con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
+            "vinserti128_high $dst,$dst\t! replicate16S($con)" %}
   ins_encode %{
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3363,11 +3363,11 @@
   match(Set dst (ReplicateI src));
   format %{ "movd    $dst,$src\n\t"
             "pshufd  $dst,$dst,0x00\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
+            "vinserti128_high $dst,$dst\t! replicate8I" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3376,10 +3376,10 @@
   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateI (LoadI mem)));
   format %{ "pshufd  $dst,$mem,0x00\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
+            "vinserti128_high $dst,$dst\t! replicate8I" %}
   ins_encode %{
     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3401,11 +3401,11 @@
   match(Set dst (ReplicateI con));
   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst" %}
+            "vinserti128_high $dst,$dst" %}
   ins_encode %{
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3430,11 +3430,11 @@
   match(Set dst (ReplicateL src));
   format %{ "movdq   $dst,$src\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+            "vinserti128_high $dst,$dst\t! replicate4L" %}
   ins_encode %{
     __ movdq($dst$$XMMRegister, $src$$Register);
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3447,13 +3447,13 @@
             "movdl   $tmp,$src.hi\n\t"
             "punpckldq $dst,$tmp\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+            "vinserti128_high $dst,$dst\t! replicate4L" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3464,11 +3464,11 @@
   match(Set dst (ReplicateL con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
+            "vinserti128_high $dst,$dst\t! replicate4L($con)" %}
   ins_encode %{
     __ movq($dst$$XMMRegister, $constantaddress($con));
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3478,11 +3478,11 @@
   match(Set dst (ReplicateL (LoadL mem)));
   format %{ "movq    $dst,$mem\n\t"
             "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+            "vinserti128_high $dst,$dst\t! replicate4L" %}
   ins_encode %{
     __ movq($dst$$XMMRegister, $mem$$Address);
     __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3511,10 +3511,10 @@
   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateF src));
   format %{ "pshufd  $dst,$src,0x00\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
+            "vinsertf128_high $dst,$dst\t! replicate8F" %}
   ins_encode %{
     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3523,10 +3523,10 @@
   predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateF (LoadF mem)));
   format %{ "pshufd  $dst,$mem,0x00\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
+            "vinsertf128_high $dst,$dst\t! replicate8F" %}
   ins_encode %{
     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3576,10 +3576,10 @@
   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateD src));
   format %{ "pshufd  $dst,$src,0x44\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
+            "vinsertf128_high $dst,$dst\t! replicate4D" %}
   ins_encode %{
     __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3588,10 +3588,10 @@
   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateD (LoadD mem)));
   format %{ "pshufd  $dst,$mem,0x44\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
+            "vinsertf128_high $dst,$dst\t! replicate4D" %}
   ins_encode %{
     __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4791,7 +4791,7 @@
   effect(TEMP tmp, TEMP tmp2);
   format %{ "vphaddd  $tmp,$src2,$src2\n\t"
             "vphaddd  $tmp,$tmp,$tmp2\n\t"
-            "vextracti128  $tmp2,$tmp\n\t"
+            "vextracti128_high  $tmp2,$tmp\n\t"
             "vpaddd   $tmp,$tmp,$tmp2\n\t"
             "movd     $tmp2,$src1\n\t"
             "vpaddd   $tmp2,$tmp2,$tmp\n\t"
@@ -4800,7 +4800,7 @@
     int vector_len = 1;
     __ vphaddd($tmp$$XMMRegister, $src2$$XMMRegister, $src2$$XMMRegister, vector_len);
     __ vphaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
-    __ vextracti128h($tmp2$$XMMRegister, $tmp$$XMMRegister);
+    __ vextracti128_high($tmp2$$XMMRegister, $tmp$$XMMRegister);
     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
     __ movdl($tmp2$$XMMRegister, $src1$$Register);
     __ vpaddd($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4813,7 +4813,7 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti128  $tmp,$src2\n\t"
+  format %{ "vextracti128_high  $tmp,$src2\n\t"
             "vpaddd  $tmp,$tmp,$src2\n\t"
             "pshufd  $tmp2,$tmp,0xE\n\t"
             "vpaddd  $tmp,$tmp,$tmp2\n\t"
@@ -4824,7 +4824,7 @@
             "movd    $dst,$tmp2\t! add reduction8I" %}
   ins_encode %{
     int vector_len = 0;
-    __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
@@ -4841,9 +4841,9 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
+  format %{ "vextracti64x4_high  $tmp3,$src2\n\t"
             "vpaddd  $tmp3,$tmp3,$src2\n\t"
-            "vextracti128   $tmp,$tmp3\n\t"
+            "vextracti128_high  $tmp,$tmp3\n\t"
             "vpaddd  $tmp,$tmp,$tmp3\n\t"
             "pshufd  $tmp2,$tmp,0xE\n\t"
             "vpaddd  $tmp,$tmp,$tmp2\n\t"
@@ -4853,9 +4853,9 @@
             "vpaddd  $tmp2,$tmp,$tmp2\n\t"
             "movd    $dst,$tmp2\t! mul reduction16I" %}
   ins_encode %{
-    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
+    __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
     __ vpaddd($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
-    __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
+    __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
     __ vpaddd($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
@@ -4892,7 +4892,7 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti128  $tmp,$src2\n\t"
+  format %{ "vextracti128_high  $tmp,$src2\n\t"
             "vpaddq  $tmp2,$tmp,$src2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
@@ -4900,7 +4900,7 @@
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
             "movdq   $dst,$tmp2\t! add reduction4L" %}
   ins_encode %{
-    __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
     __ vpaddq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -4915,9 +4915,9 @@
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
+  format %{ "vextracti64x4_high  $tmp2,$src2\n\t"
             "vpaddq  $tmp2,$tmp2,$src2\n\t"
-            "vextracti128   $tmp,$tmp2\n\t"
+            "vextracti128_high  $tmp,$tmp2\n\t"
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
@@ -4925,9 +4925,9 @@
             "vpaddq  $tmp2,$tmp2,$tmp\n\t"
             "movdq   $dst,$tmp2\t! add reduction8L" %}
   ins_encode %{
-    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
+    __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
-    __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vpaddq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5026,7 +5026,7 @@
             "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
             "vaddss  $dst,$dst,$tmp\n\t"
-            "vextractf128  $tmp2,$src2\n\t"
+            "vextractf128_high  $tmp2,$src2\n\t"
             "vaddss  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0x01\n\t"
             "vaddss  $dst,$dst,$tmp\n\t"
@@ -5042,7 +5042,7 @@
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5065,7 +5065,7 @@
             "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
             "vaddss  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vextractf32x4  $tmp2,$src2,0x1\n\t"
             "vaddss  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0x01\n\t"
             "vaddss  $dst,$dst,$tmp\n\t"
@@ -5073,7 +5073,7 @@
             "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$tmp2,0x03\n\t"
             "vaddss  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vextractf32x4  $tmp2,$src2,0x2\n\t"
             "vaddss  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0x01\n\t"
             "vaddss  $dst,$dst,$tmp\n\t"
@@ -5081,7 +5081,7 @@
             "vaddss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$tmp2,0x03\n\t"
             "vaddss  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vextractf32x4  $tmp2,$src2,0x3\n\t"
             "vaddss  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0x01\n\t"
             "vaddss  $dst,$dst,$tmp\n\t"
@@ -5097,7 +5097,7 @@
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5105,7 +5105,7 @@
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5113,7 +5113,7 @@
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
     __ vaddss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5162,7 +5162,7 @@
   format %{ "vaddsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
             "vaddsd  $dst,$dst,$tmp\n\t"
-            "vextractf32x4h  $tmp2,$src2, 0x1\n\t"
+            "vextractf32x4  $tmp2,$src2,0x1\n\t"
             "vaddsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vaddsd  $dst,$dst,$tmp\t! add reduction4D" %}
@@ -5170,7 +5170,7 @@
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5185,15 +5185,15 @@
   format %{ "vaddsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
             "vaddsd  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vextractf32x4  $tmp2,$src2,0x1\n\t"
             "vaddsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vaddsd  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vextractf32x4  $tmp2,$src2,0x2\n\t"
             "vaddsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vaddsd  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vextractf32x4  $tmp2,$src2,0x3\n\t"
             "vaddsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vaddsd  $dst,$dst,$tmp\t! add reduction8D" %}
@@ -5201,15 +5201,15 @@
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5307,7 +5307,7 @@
   predicate(UseAVX > 0);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti128  $tmp,$src2\n\t"
+  format %{ "vextracti128_high  $tmp,$src2\n\t"
             "vpmulld  $tmp,$tmp,$src2\n\t"
             "pshufd   $tmp2,$tmp,0xE\n\t"
             "vpmulld  $tmp,$tmp,$tmp2\n\t"
@@ -5318,7 +5318,7 @@
             "movd     $dst,$tmp2\t! mul reduction8I" %}
   ins_encode %{
     int vector_len = 0;
-    __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, vector_len);
     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, vector_len);
@@ -5335,9 +5335,9 @@
   predicate(UseAVX > 2);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
-  format %{ "vextracti64x4  $tmp3,$src2,0x1\n\t"
+  format %{ "vextracti64x4_high  $tmp3,$src2\n\t"
             "vpmulld  $tmp3,$tmp3,$src2\n\t"
-            "vextracti128   $tmp,$tmp3\n\t"
+            "vextracti128_high  $tmp,$tmp3\n\t"
             "vpmulld  $tmp,$tmp,$src2\n\t"
             "pshufd   $tmp2,$tmp,0xE\n\t"
             "vpmulld  $tmp,$tmp,$tmp2\n\t"
@@ -5347,9 +5347,9 @@
             "vpmulld  $tmp2,$tmp,$tmp2\n\t"
             "movd     $dst,$tmp2\t! mul reduction16I" %}
   ins_encode %{
-    __ vextracti64x4h($tmp3$$XMMRegister, $src2$$XMMRegister, 1);
+    __ vextracti64x4_high($tmp3$$XMMRegister, $src2$$XMMRegister);
     __ vpmulld($tmp3$$XMMRegister, $tmp3$$XMMRegister, $src2$$XMMRegister, 1);
-    __ vextracti128h($tmp$$XMMRegister, $tmp3$$XMMRegister);
+    __ vextracti128_high($tmp$$XMMRegister, $tmp3$$XMMRegister);
     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp3$$XMMRegister, 0);
     __ pshufd($tmp2$$XMMRegister, $tmp$$XMMRegister, 0xE);
     __ vpmulld($tmp$$XMMRegister, $tmp$$XMMRegister, $tmp2$$XMMRegister, 0);
@@ -5386,7 +5386,7 @@
   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
   match(Set dst (MulReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti128  $tmp,$src2\n\t"
+  format %{ "vextracti128_high  $tmp,$src2\n\t"
             "vpmullq  $tmp2,$tmp,$src2\n\t"
             "pshufd   $tmp,$tmp2,0xE\n\t"
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
@@ -5394,7 +5394,7 @@
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
             "movdq    $dst,$tmp2\t! mul reduction4L" %}
   ins_encode %{
-    __ vextracti128h($tmp$$XMMRegister, $src2$$XMMRegister);
+    __ vextracti128_high($tmp$$XMMRegister, $src2$$XMMRegister);
     __ vpmullq($tmp2$$XMMRegister, $tmp$$XMMRegister, $src2$$XMMRegister, 0);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5409,9 +5409,9 @@
   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
   match(Set dst (MulReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
-  format %{ "vextracti64x4  $tmp2,$src2,0x1\n\t"
+  format %{ "vextracti64x4_high  $tmp2,$src2\n\t"
             "vpmullq  $tmp2,$tmp2,$src2\n\t"
-            "vextracti128   $tmp,$tmp2\n\t"
+            "vextracti128_high  $tmp,$tmp2\n\t"
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
             "pshufd   $tmp,$tmp2,0xE\n\t"
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
@@ -5419,9 +5419,9 @@
             "vpmullq  $tmp2,$tmp2,$tmp\n\t"
             "movdq    $dst,$tmp2\t! mul reduction8L" %}
   ins_encode %{
-    __ vextracti64x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 1);
+    __ vextracti64x4_high($tmp2$$XMMRegister, $src2$$XMMRegister);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $src2$$XMMRegister, 1);
-    __ vextracti128h($tmp$$XMMRegister, $tmp2$$XMMRegister);
+    __ vextracti128_high($tmp$$XMMRegister, $tmp2$$XMMRegister);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vpmullq($tmp2$$XMMRegister, $tmp2$$XMMRegister, $tmp$$XMMRegister, 0);
@@ -5520,7 +5520,7 @@
             "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
             "vmulss  $dst,$dst,$tmp\n\t"
-            "vextractf128  $tmp2,$src2\n\t"
+            "vextractf128_high  $tmp2,$src2\n\t"
             "vmulss  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0x01\n\t"
             "vmulss  $dst,$dst,$tmp\n\t"
@@ -5536,7 +5536,7 @@
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5559,7 +5559,7 @@
             "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$src2,0x03\n\t"
             "vmulss  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vextractf32x4  $tmp2,$src2,0x1\n\t"
             "vmulss  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0x01\n\t"
             "vmulss  $dst,$dst,$tmp\n\t"
@@ -5567,7 +5567,7 @@
             "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$tmp2,0x03\n\t"
             "vmulss  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vextractf32x4  $tmp2,$src2,0x2\n\t"
             "vmulss  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0x01\n\t"
             "vmulss  $dst,$dst,$tmp\n\t"
@@ -5575,7 +5575,7 @@
             "vmulss  $dst,$dst,$tmp\n\t"
             "pshufd  $tmp,$tmp2,0x03\n\t"
             "vmulss  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vextractf32x4  $tmp2,$src2,0x3\n\t"
             "vmulss  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0x01\n\t"
             "vmulss  $dst,$dst,$tmp\n\t"
@@ -5591,7 +5591,7 @@
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0x03);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5599,7 +5599,7 @@
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5607,7 +5607,7 @@
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x03);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0x01);
     __ vmulss($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5656,7 +5656,7 @@
   format %{ "vmulsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
             "vmulsd  $dst,$dst,$tmp\n\t"
-            "vextractf128  $tmp2,$src2\n\t"
+            "vextractf128_high  $tmp2,$src2\n\t"
             "vmulsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vmulsd  $dst,$dst,$tmp\t! mul reduction4D" %}
@@ -5664,7 +5664,7 @@
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf128h($tmp2$$XMMRegister, $src2$$XMMRegister);
+    __ vextractf128_high($tmp2$$XMMRegister, $src2$$XMMRegister);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5679,15 +5679,15 @@
   format %{ "vmulsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
             "vmulsd  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x1\n\t"
+            "vextractf32x4  $tmp2,$src2,0x1\n\t"
             "vmulsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
             "vmulsd  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x2\n\t"
+            "vextractf32x4  $tmp2,$src2,0x2\n\t"
             "vmulsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vmulsd  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2, 0x3\n\t"
+            "vextractf32x4  $tmp2,$src2,0x3\n\t"
             "vmulsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vmulsd  $dst,$dst,$tmp\t! mul reduction8D" %}
@@ -5695,15 +5695,15 @@
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x2);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4h($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
+    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x3);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vmulsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
--- a/src/cpu/x86/vm/x86_32.ad	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Wed Mar 23 21:44:35 2016 -0700
@@ -1420,9 +1420,6 @@
 // The ecx parameter to rep stos for the ClearArray node is in dwords.
 const bool Matcher::init_array_count_is_in_bytes = false;
 
-// Threshold size for cleararray.
-const int Matcher::init_array_short_size = 8 * BytesPerLong;
-
 // Needs 2 CMOV's for longs.
 const int Matcher::long_cmove_cost() { return 1; }
 
@@ -11369,27 +11366,54 @@
 // =======================================================================
 // fast clearing of an array
 instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
-  predicate(!UseFastStosb);
+  predicate(!((ClearArrayNode*)n)->is_large());
   match(Set dummy (ClearArray cnt base));
   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
-  format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
-            "SHL    ECX,1\t# Convert doublewords to words\n\t"
-            "REP STOS\t# store EAX into [EDI++] while ECX--" %}
-  ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
-  predicate(UseFastStosb);
+
+  format %{ $$template
+    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
+    $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
+    $$emit$$"JG     LARGE\n\t"
+    $$emit$$"SHL    ECX, 1\n\t"
+    $$emit$$"DEC    ECX\n\t"
+    $$emit$$"JS     DONE\t# Zero length\n\t"
+    $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
+    $$emit$$"DEC    ECX\n\t"
+    $$emit$$"JGE    LOOP\n\t"
+    $$emit$$"JMP    DONE\n\t"
+    $$emit$$"# LARGE:\n\t"
+    if (UseFastStosb) {
+       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
+       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
+    } else {
+       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
+       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
+    }
+    $$emit$$"# DONE"
+  %}
+  ins_encode %{
+    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
+  predicate(((ClearArrayNode*)n)->is_large());
   match(Set dummy (ClearArray cnt base));
   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
-  format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
-            "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
-            "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
-  ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+  format %{ $$template
+    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
+    if (UseFastStosb) {
+       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
+       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
+    } else {
+       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
+       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
+    }
+    $$emit$$"# DONE"
+  %}
+  ins_encode %{
+    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
   %}
   ins_pipe( pipe_slow );
 %}
--- a/src/cpu/x86/vm/x86_64.ad	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Wed Mar 23 21:44:35 2016 -0700
@@ -1637,9 +1637,6 @@
 // The ecx parameter to rep stosq for the ClearArray node is in words.
 const bool Matcher::init_array_count_is_in_bytes = false;
 
-// Threshold size for cleararray.
-const int Matcher::init_array_short_size = 8 * BytesPerLong;
-
 // No additional cost for CMOVL.
 const int Matcher::long_cmove_cost() { return 0; }
 
@@ -10460,31 +10457,55 @@
 instruct rep_stos(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
                   rFlagsReg cr)
 %{
-  predicate(!UseFastStosb);
+  predicate(!((ClearArrayNode*)n)->is_large());
   match(Set dummy (ClearArray cnt base));
   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
 
-  format %{ "xorq    rax, rax\t# ClearArray:\n\t"
-            "rep     stosq\t# Store rax to *rdi++ while rcx--" %}
-  ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
+  format %{ $$template
+    $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
+    $$emit$$"cmp     InitArrayShortSize,rcx\n\t"
+    $$emit$$"jg      LARGE\n\t"
+    $$emit$$"dec     rcx\n\t"
+    $$emit$$"js      DONE\t# Zero length\n\t"
+    $$emit$$"mov     rax,(rdi,rcx,8)\t# LOOP\n\t"
+    $$emit$$"dec     rcx\n\t"
+    $$emit$$"jge     LOOP\n\t"
+    $$emit$$"jmp     DONE\n\t"
+    $$emit$$"# LARGE:\n\t"
+    if (UseFastStosb) {
+       $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
+       $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--\n\t"
+    } else {
+       $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--\n\t"
+    }
+    $$emit$$"# DONE"
+  %}
+  ins_encode %{
+    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
   %}
   ins_pipe(pipe_slow);
 %}
 
-instruct rep_fast_stosb(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
-                        rFlagsReg cr)
-%{
-  predicate(UseFastStosb);
+instruct rep_stos_large(rcx_RegL cnt, rdi_RegP base, rax_RegI zero, Universe dummy,
+                  rFlagsReg cr)
+%{
+  predicate(((ClearArrayNode*)n)->is_large());
   match(Set dummy (ClearArray cnt base));
   effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
-  format %{ "xorq    rax, rax\t# ClearArray:\n\t"
-            "shlq    rcx,3\t# Convert doublewords to bytes\n\t"
-            "rep     stosb\t# Store rax to *rdi++ while rcx--" %}
-  ins_encode %{
-    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
-  %}
-  ins_pipe( pipe_slow );
+
+  format %{ $$template
+    $$emit$$"xorq    rax, rax\t# ClearArray:\n\t"
+    if (UseFastStosb) {
+       $$emit$$"shlq    rcx,3\t# Convert doublewords to bytes\n\t"
+       $$emit$$"rep     stosb\t# Store rax to *rdi++ while rcx--"
+    } else {
+       $$emit$$"rep     stosq\t# Store rax to *rdi++ while rcx--"
+    }
+  %}
+  ins_encode %{
+    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
+  %}
+  ins_pipe(pipe_slow);
 %}
 
 instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64.java	Wed Mar 23 21:44:35 2016 -0700
@@ -203,7 +203,8 @@
         AVX512ER,
         AVX512CD,
         AVX512BW,
-        AVX512VL
+        AVX512VL,
+        SHA
     }
 
     private final EnumSet<CPUFeature> features;
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotJVMCIBackendFactory.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotJVMCIBackendFactory.java	Wed Mar 23 21:44:35 2016 -0700
@@ -122,6 +122,9 @@
         if ((config.vmVersionFeatures & config.amd64AVX512VL) != 0) {
             features.add(AMD64.CPUFeature.AVX512VL);
         }
+        if ((config.vmVersionFeatures & config.amd64SHA) != 0) {
+            features.add(AMD64.CPUFeature.SHA);
+        }
         return features;
     }
 
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotConstantReflectionProvider.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotConstantReflectionProvider.java	Wed Mar 23 21:44:35 2016 -0700
@@ -339,7 +339,7 @@
 
     public JavaConstant readStableFieldValue(ResolvedJavaField field, JavaConstant receiver, boolean isDefaultStable) {
         JavaConstant fieldValue = readNonStableFieldValue(field, receiver);
-        if (fieldValue.isNonNull()) {
+        if (fieldValue != null && fieldValue.isNonNull()) {
             JavaType declaredType = field.getType();
             if (declaredType.getComponentType() != null) {
                 int stableDimension = getArrayDimension(declaredType);
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCICompilerConfig.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCICompilerConfig.java	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 import jdk.vm.ci.code.CompilationRequest;
 import jdk.vm.ci.code.CompilationRequestResult;
 import jdk.vm.ci.common.JVMCIError;
+import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime.Option;
 import jdk.vm.ci.runtime.JVMCICompiler;
 import jdk.vm.ci.runtime.JVMCICompilerFactory;
 import jdk.vm.ci.runtime.JVMCIRuntime;
@@ -47,29 +48,33 @@
         }
     }
 
+    /**
+     * Factory of the selected system compiler.
+     */
     private static JVMCICompilerFactory compilerFactory;
 
     /**
-     * Selects the system compiler.
+     * Gets the selected system compiler factory.
      *
-     * Called from VM. This method has an object return type to allow it to be called with a VM
-     * utility function used to call other static initialization methods.
+     * @return the selected system compiler factory
      */
-    static Boolean selectCompiler(String compilerName) {
-        assert compilerFactory == null;
-        for (JVMCICompilerFactory factory : Services.load(JVMCICompilerFactory.class)) {
-            if (factory.getCompilerName().equals(compilerName)) {
-                compilerFactory = factory;
-                return Boolean.TRUE;
-            }
-        }
-
-        throw new JVMCIError("JVMCI compiler '%s' not found", compilerName);
-    }
-
     static JVMCICompilerFactory getCompilerFactory() {
         if (compilerFactory == null) {
-            compilerFactory = new DummyCompilerFactory();
+            JVMCICompilerFactory factory = null;
+            String compilerName = Option.Compiler.getString();
+            if (compilerName != null) {
+                for (JVMCICompilerFactory f : Services.load(JVMCICompilerFactory.class)) {
+                    if (f.getCompilerName().equals(compilerName)) {
+                        factory = f;
+                    }
+                }
+                if (factory == null) {
+                    throw new JVMCIError("JVMCI compiler '%s' not found", compilerName);
+                }
+            } else {
+                factory = new DummyCompilerFactory();
+            }
+            compilerFactory = factory;
         }
         return compilerFactory;
     }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCIRuntime.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotJVMCIRuntime.java	Wed Mar 23 21:44:35 2016 -0700
@@ -91,6 +91,7 @@
      * A list of all supported JVMCI options.
      */
     public enum Option {
+        Compiler(String.class, null, "Selects the system compiler."),
         ImplicitStableValues(boolean.class, true, "Mark well-known stable fields as such."),
         // Note: The following one is not used (see InitTimer.ENABLED).
         InitTimer(boolean.class, false, "Specifies if initialization timing is enabled."),
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMetaAccessProvider.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMetaAccessProvider.java	Wed Mar 23 21:44:35 2016 -0700
@@ -41,7 +41,6 @@
 import jdk.vm.ci.meta.DeoptimizationReason;
 import jdk.vm.ci.meta.JavaConstant;
 import jdk.vm.ci.meta.JavaKind;
-import jdk.vm.ci.meta.JavaType;
 import jdk.vm.ci.meta.MetaAccessProvider;
 import jdk.vm.ci.meta.ResolvedJavaField;
 import jdk.vm.ci.meta.ResolvedJavaMethod;
@@ -111,23 +110,26 @@
     }
 
     public ResolvedJavaField lookupJavaField(Field reflectionField) {
-        String name = reflectionField.getName();
         Class<?> fieldHolder = reflectionField.getDeclaringClass();
-        Class<?> fieldType = reflectionField.getType();
-        // java.lang.reflect.Field's modifiers should be enough here since VM internal modifier bits
-        // are not used (yet).
-        final int modifiers = reflectionField.getModifiers();
-        final long offset = Modifier.isStatic(modifiers) ? UNSAFE.staticFieldOffset(reflectionField) : UNSAFE.objectFieldOffset(reflectionField);
 
         HotSpotResolvedObjectType holder = fromObjectClass(fieldHolder);
-        JavaType type = runtime.fromClass(fieldType);
+        if (Modifier.isStatic(reflectionField.getModifiers())) {
+            final long offset = UNSAFE.staticFieldOffset(reflectionField);
+            for (ResolvedJavaField field : holder.getStaticFields()) {
+                if (offset == ((HotSpotResolvedJavaField) field).offset()) {
+                    return field;
+                }
+            }
+        } else {
+            final long offset = UNSAFE.objectFieldOffset(reflectionField);
+            for (ResolvedJavaField field : holder.getInstanceFields(false)) {
+                if (offset == ((HotSpotResolvedJavaField) field).offset()) {
+                    return field;
+                }
+            }
+        }
 
-        if (offset != -1) {
-            HotSpotResolvedObjectType resolved = holder;
-            return resolved.createField(name, type, offset, modifiers);
-        } else {
-            throw new JVMCIError("unresolved field %s", reflectionField);
-        }
+        throw new JVMCIError("unresolved field %s", reflectionField);
     }
 
     private static int intMaskRight(int n) {
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Wed Mar 23 21:44:35 2016 -0700
@@ -945,6 +945,7 @@
     @HotSpotVMConstant(name = "VM_Version::CPU_AVX512CD", archs = {"amd64"}) @Stable public long amd64AVX512CD;
     @HotSpotVMConstant(name = "VM_Version::CPU_AVX512BW", archs = {"amd64"}) @Stable public long amd64AVX512BW;
     @HotSpotVMConstant(name = "VM_Version::CPU_AVX512VL", archs = {"amd64"}) @Stable public long amd64AVX512VL;
+    @HotSpotVMConstant(name = "VM_Version::CPU_SHA", archs = {"amd64"}) @Stable public long amd64SHA;
 
     // SPARC specific values
     @HotSpotVMConstant(name = "VM_Version::vis3_instructions_m", archs = {"sparc"}) @Stable public int sparcVis3Instructions;
--- a/src/os/linux/vm/os_linux.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/os/linux/vm/os_linux.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -144,6 +144,7 @@
 int os::Linux::_page_size = -1;
 const int os::Linux::_vm_default_page_size = (8 * K);
 bool os::Linux::_supports_fast_thread_cpu_time = false;
+uint32_t os::Linux::_os_version = 0;
 const char * os::Linux::_glibc_version = NULL;
 const char * os::Linux::_libpthread_version = NULL;
 pthread_condattr_t os::Linux::_condattr[1];
@@ -4356,6 +4357,48 @@
   return (tp.tv_sec * NANOSECS_PER_SEC) + tp.tv_nsec;
 }
 
+void os::Linux::initialize_os_info() {
+  assert(_os_version == 0, "OS info already initialized");
+
+  struct utsname _uname;
+
+  uint32_t major;
+  uint32_t minor;
+  uint32_t fix;
+
+  int rc;
+
+  // Kernel version is unknown if
+  // verification below fails.
+  _os_version = 0x01000000;
+
+  rc = uname(&_uname);
+  if (rc != -1) {
+
+    rc = sscanf(_uname.release,"%d.%d.%d", &major, &minor, &fix);
+    if (rc == 3) {
+
+      if (major < 256 && minor < 256 && fix < 256) {
+        // Kernel version format is as expected,
+        // set it overriding unknown state.
+        _os_version = (major << 16) |
+                      (minor << 8 ) |
+                      (fix   << 0 ) ;
+      }
+    }
+  }
+}
+
+uint32_t os::Linux::os_version() {
+  assert(_os_version != 0, "not initialized");
+  return _os_version & 0x00FFFFFF;
+}
+
+bool os::Linux::os_version_is_known() {
+  assert(_os_version != 0, "not initialized");
+  return _os_version & 0x01000000 ? false : true;
+}
+
 /////
 // glibc on Linux platform uses non-documented flag
 // to indicate, that some special sort of signal
@@ -4578,6 +4621,8 @@
 
   Linux::initialize_system_info();
 
+  Linux::initialize_os_info();
+
   // main_thread points to the aboriginal thread
   Linux::_main_thread = pthread_self();
 
--- a/src/os/linux/vm/os_linux.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/os/linux/vm/os_linux.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -56,6 +56,15 @@
 
   static GrowableArray<int>* _cpu_to_node;
 
+  // 0x00000000 = uninitialized,
+  // 0x01000000 = kernel version unknown,
+  // otherwise a 32-bit number:
+  // Ox00AABBCC
+  // AA, Major Version
+  // BB, Minor Version
+  // CC, Fix   Version
+  static uint32_t _os_version;
+
  protected:
 
   static julong _physical_memory;
@@ -198,6 +207,10 @@
 
   static jlong fast_thread_cpu_time(clockid_t clockid);
 
+  static void initialize_os_info();
+  static bool os_version_is_known();
+  static uint32_t os_version();
+
   // pthread_cond clock suppport
  private:
   static pthread_condattr_t _condattr[1];
--- a/src/os/posix/vm/os_posix.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/os/posix/vm/os_posix.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -336,13 +336,13 @@
   const char *start;
 
   if (lib_name != NULL) {
-    len = name_len = strlen(lib_name);
+    name_len = strlen(lib_name);
     if (is_absolute_path) {
       // Need to strip path, prefix and suffix
       if ((start = strrchr(lib_name, *os::file_separator())) != NULL) {
         lib_name = ++start;
       }
-      if (len <= (prefix_len + suffix_len)) {
+      if (strlen(lib_name) <= (prefix_len + suffix_len)) {
         return NULL;
       }
       lib_name += prefix_len;
--- a/src/os/windows/vm/os_windows.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/os/windows/vm/os_windows.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -951,11 +951,11 @@
     FILETIME wt;
     GetSystemTimeAsFileTime(&wt);
     jlong rtc_millis = windows_to_java_time(wt);
-    jlong user_millis = windows_to_java_time(user_time);
-    jlong system_millis = windows_to_java_time(kernel_time);
     *process_real_time = ((double) rtc_millis) / ((double) MILLIUNITS);
-    *process_user_time = ((double) user_millis) / ((double) MILLIUNITS);
-    *process_system_time = ((double) system_millis) / ((double) MILLIUNITS);
+    *process_user_time =
+      (double) jlong_from(user_time.dwHighDateTime, user_time.dwLowDateTime) / (10 * MICROUNITS);
+    *process_system_time =
+      (double) jlong_from(kernel_time.dwHighDateTime, kernel_time.dwLowDateTime) / (10 * MICROUNITS);
     return true;
   } else {
     return false;
--- a/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.inline.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.inline.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -26,44 +26,108 @@
 #ifndef OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP
 #define OS_CPU_LINUX_AARCH64_VM_COPY_LINUX_AARCH64_INLINE_HPP
 
+#define COPY_SMALL(from, to, count)                                     \
+{                                                                       \
+        long tmp0, tmp1, tmp2, tmp3;                                    \
+        long tmp4, tmp5, tmp6, tmp7;                                    \
+  __asm volatile(                                                       \
+"       adr     %[t0], 0f;"                                             \
+"       add     %[t0], %[t0], %[cnt], lsl #5;"                          \
+"       br      %[t0];"                                                 \
+"       .align  5;"                                                     \
+"0:"                                                                    \
+"       b       1f;"                                                    \
+"       .align  5;"                                                     \
+"       ldr     %[t0], [%[s], #0];"                                     \
+"       str     %[t0], [%[d], #0];"                                     \
+"       b       1f;"                                                    \
+"       .align  5;"                                                     \
+"       ldp     %[t0], %[t1], [%[s], #0];"                              \
+"       stp     %[t0], %[t1], [%[d], #0];"                              \
+"       b       1f;"                                                    \
+"       .align  5;"                                                     \
+"       ldp     %[t0], %[t1], [%[s], #0];"                              \
+"       ldr     %[t2], [%[s], #16];"                                    \
+"       stp     %[t0], %[t1], [%[d], #0];"                              \
+"       str     %[t2], [%[d], #16];"                                    \
+"       b       1f;"                                                    \
+"       .align  5;"                                                     \
+"       ldp     %[t0], %[t1], [%[s], #0];"                              \
+"       ldp     %[t2], %[t3], [%[s], #16];"                             \
+"       stp     %[t0], %[t1], [%[d], #0];"                              \
+"       stp     %[t2], %[t3], [%[d], #16];"                             \
+"       b       1f;"                                                    \
+"       .align  5;"                                                     \
+"       ldp     %[t0], %[t1], [%[s], #0];"                              \
+"       ldp     %[t2], %[t3], [%[s], #16];"                             \
+"       ldr     %[t4], [%[s], #32];"                                    \
+"       stp     %[t0], %[t1], [%[d], #0];"                              \
+"       stp     %[t2], %[t3], [%[d], #16];"                             \
+"       str     %[t4], [%[d], #32];"                                    \
+"       b       1f;"                                                    \
+"       .align  5;"                                                     \
+"       ldp     %[t0], %[t1], [%[s], #0];"                              \
+"       ldp     %[t2], %[t3], [%[s], #16];"                             \
+"       ldp     %[t4], %[t5], [%[s], #32];"                             \
+"2:"                                                                    \
+"       stp     %[t0], %[t1], [%[d], #0];"                              \
+"       stp     %[t2], %[t3], [%[d], #16];"                             \
+"       stp     %[t4], %[t5], [%[d], #32];"                             \
+"       b       1f;"                                                    \
+"       .align  5;"                                                     \
+"       ldr     %[t6], [%[s], #0];"                                     \
+"       ldp     %[t0], %[t1], [%[s], #8];"                              \
+"       ldp     %[t2], %[t3], [%[s], #24];"                             \
+"       ldp     %[t4], %[t5], [%[s], #40];"                             \
+"       str     %[t6], [%[d]], #8;"                                     \
+"       b       2b;"                                                    \
+"       .align  5;"                                                     \
+"       ldp     %[t0], %[t1], [%[s], #0];"                              \
+"       ldp     %[t2], %[t3], [%[s], #16];"                             \
+"       ldp     %[t4], %[t5], [%[s], #32];"                             \
+"       ldp     %[t6], %[t7], [%[s], #48];"                             \
+"       stp     %[t0], %[t1], [%[d], #0];"                              \
+"       stp     %[t2], %[t3], [%[d], #16];"                             \
+"       stp     %[t4], %[t5], [%[d], #32];"                             \
+"       stp     %[t6], %[t7], [%[d], #48];"                             \
+"1:"                                                                    \
+                                                                        \
+  : [s]"+r"(from), [d]"+r"(to), [cnt]"+r"(count),                       \
+    [t0]"=&r"(tmp0), [t1]"=&r"(tmp1), [t2]"=&r"(tmp2), [t3]"=&r"(tmp3), \
+    [t4]"=&r"(tmp4), [t5]"=&r"(tmp5), [t6]"=&r"(tmp6), [t7]"=&r"(tmp7)  \
+  :                                                                     \
+  : "memory", "cc");                                                    \
+}
+
 static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
-  (void)memmove(to, from, count * HeapWordSize);
+  __asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
+  if (__builtin_expect(count <= 8, 1)) {
+    COPY_SMALL(from, to, count);
+    return;
+  }
+  _Copy_conjoint_words(from, to, count);
 }
 
 static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
-  switch (count) {
-  case 8:  to[7] = from[7];
-  case 7:  to[6] = from[6];
-  case 6:  to[5] = from[5];
-  case 5:  to[4] = from[4];
-  case 4:  to[3] = from[3];
-  case 3:  to[2] = from[2];
-  case 2:  to[1] = from[1];
-  case 1:  to[0] = from[0];
-  case 0:  break;
-  default:
-    (void)memcpy(to, from, count * HeapWordSize);
-    break;
+  if (__builtin_constant_p(count)) {
+    memcpy(to, from, count * sizeof(HeapWord));
+    return;
   }
+  __asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
+  if (__builtin_expect(count <= 8, 1)) {
+    COPY_SMALL(from, to, count);
+    return;
+  }
+  _Copy_disjoint_words(from, to, count);
 }
 
 static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
-  switch (count) {
-  case 8:  to[7] = from[7];
-  case 7:  to[6] = from[6];
-  case 6:  to[5] = from[5];
-  case 5:  to[4] = from[4];
-  case 4:  to[3] = from[3];
-  case 3:  to[2] = from[2];
-  case 2:  to[1] = from[1];
-  case 1:  to[0] = from[0];
-  case 0:  break;
-  default:
-    while (count-- > 0) {
-      *to++ = *from++;
-    }
-    break;
+  __asm volatile( "prfm pldl1strm, [%[s], #0];" :: [s]"r"(from) : "memory");
+  if (__builtin_expect(count <= 8, 1)) {
+    COPY_SMALL(from, to, count);
+    return;
   }
+  _Copy_disjoint_words(from, to, count);
 }
 
 static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/os_cpu/linux_aarch64/vm/copy_linux_aarch64.s	Wed Mar 23 21:44:35 2016 -0700
@@ -0,0 +1,236 @@
+/*
+ * Copyright (c) 2016, Linaro Ltd. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+        .global _Copy_conjoint_words
+        .global _Copy_disjoint_words
+
+s       .req    x0
+d       .req    x1
+count   .req    x2
+t0      .req    x3
+t1      .req    x4
+t2      .req    x5
+t3      .req    x6
+t4      .req    x7
+t5      .req    x8
+t6      .req    x9
+t7      .req    x10
+
+        .align  6
+_Copy_disjoint_words:
+        // Ensure 2 word aligned
+        tbz     s, #3, fwd_copy_aligned
+        ldr     t0, [s], #8
+        str     t0, [d], #8
+        sub     count, count, #1
+
+fwd_copy_aligned:
+        // Bias s & d so we only pre index on the last copy
+        sub     s, s, #16
+        sub     d, d, #16
+
+        ldp     t0, t1, [s, #16]
+        ldp     t2, t3, [s, #32]
+        ldp     t4, t5, [s, #48]
+        ldp     t6, t7, [s, #64]!
+
+        subs    count, count, #16
+        blo     fwd_copy_drain
+
+fwd_copy_again:
+        prfm    pldl1keep, [s, #256]
+        stp     t0, t1, [d, #16]
+        ldp     t0, t1, [s, #16]
+        stp     t2, t3, [d, #32]
+        ldp     t2, t3, [s, #32]
+        stp     t4, t5, [d, #48]
+        ldp     t4, t5, [s, #48]
+        stp     t6, t7, [d, #64]!
+        ldp     t6, t7, [s, #64]!
+        subs    count, count, #8
+        bhs     fwd_copy_again
+
+fwd_copy_drain:
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        stp     t4, t5, [d, #48]
+        stp     t6, t7, [d, #64]!
+
+        // count is now -8..-1 for 0..7 words to copy
+        adr     t0, 0f
+        add     t0, t0, count, lsl #5
+        br      t0
+
+        .align  5
+        ret                             // -8 == 0 words
+        .align  5
+        ldr     t0, [s, #16]            // -7 == 1 word
+        str     t0, [d, #16]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -6 = 2 words
+        stp     t0, t1, [d, #16]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -5 = 3 words
+        ldr     t2, [s, #32]
+        stp     t0, t1, [d, #16]
+        str     t2, [d, #32]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -4 = 4 words
+        ldp     t2, t3, [s, #32]
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -3 = 5 words
+        ldp     t2, t3, [s, #32]
+        ldr     t4, [s, #48]
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        str     t4, [d, #48]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -2 = 6 words
+        ldp     t2, t3, [s, #32]
+        ldp     t4, t5, [s, #48]
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        stp     t4, t5, [d, #48]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #16]        // -1 = 7 words
+        ldp     t2, t3, [s, #32]
+        ldp     t4, t5, [s, #48]
+        ldr     t6, [s, #64]
+        stp     t0, t1, [d, #16]
+        stp     t2, t3, [d, #32]
+        stp     t4, t5, [d, #48]
+        str     t6, [d, #64]
+        // Is always aligned here, code for 7 words is one instruction
+        // too large so it just falls through.
+        .align  5
+0:
+        ret
+
+        .align  6
+_Copy_conjoint_words:
+        sub     t0, d, s
+        cmp     t0, count, lsl #3
+        bhs     _Copy_disjoint_words
+
+        add     s, s, count, lsl #3
+        add     d, d, count, lsl #3
+
+        // Ensure 2 word aligned
+        tbz     s, #3, bwd_copy_aligned
+        ldr     t0, [s, #-8]!
+        str     t0, [d, #-8]!
+        sub     count, count, #1
+
+bwd_copy_aligned:
+        ldp     t0, t1, [s, #-16]
+        ldp     t2, t3, [s, #-32]
+        ldp     t4, t5, [s, #-48]
+        ldp     t6, t7, [s, #-64]!
+
+        subs    count, count, #16
+        blo     bwd_copy_drain
+
+bwd_copy_again:
+        prfm    pldl1keep, [s, #-256]
+        stp     t0, t1, [d, #-16]
+        ldp     t0, t1, [s, #-16]
+        stp     t2, t3, [d, #-32]
+        ldp     t2, t3, [s, #-32]
+        stp     t4, t5, [d, #-48]
+        ldp     t4, t5, [s, #-48]
+        stp     t6, t7, [d, #-64]!
+        ldp     t6, t7, [s, #-64]!
+        subs    count, count, #8
+        bhs     bwd_copy_again
+
+bwd_copy_drain:
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        stp     t4, t5, [d, #-48]
+        stp     t6, t7, [d, #-64]!
+
+        // count is now -8..-1 for 0..7 words to copy
+        adr     t0, 0f
+        add     t0, t0, count, lsl #5
+        br      t0
+
+        .align  5
+        ret                             // -8 == 0 words
+        .align  5
+        ldr     t0, [s, #-8]            // -7 == 1 word
+        str     t0, [d, #-8]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -6 = 2 words
+        stp     t0, t1, [d, #-16]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -5 = 3 words
+        ldr     t2, [s, #-24]
+        stp     t0, t1, [d, #-16]
+        str     t2, [d, #-24]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -4 = 4 words
+        ldp     t2, t3, [s, #-32]
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -3 = 5 words
+        ldp     t2, t3, [s, #-32]
+        ldr     t4, [s, #-40]
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        str     t4, [d, #-40]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -2 = 6 words
+        ldp     t2, t3, [s, #-32]
+        ldp     t4, t5, [s, #-48]
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        stp     t4, t5, [d, #-48]
+        ret
+        .align  5
+        ldp     t0, t1, [s, #-16]       // -1 = 7 words
+        ldp     t2, t3, [s, #-32]
+        ldp     t4, t5, [s, #-48]
+        ldr     t6, [s, #-56]
+        stp     t0, t1, [d, #-16]
+        stp     t2, t3, [d, #-32]
+        stp     t4, t5, [d, #-48]
+        str     t6, [d, #-56]
+        // Is always aligned here, code for 7 words is one instruction
+        // too large so it just falls through.
+        .align  5
+0:
+        ret
--- a/src/share/vm/c1/c1_Canonicalizer.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/c1/c1_Canonicalizer.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -257,7 +257,38 @@
   }
 }
 
-void Canonicalizer::do_LoadIndexed    (LoadIndexed*     x) {}
+void Canonicalizer::do_LoadIndexed    (LoadIndexed*     x) {
+  StableArrayConstant* array = x->array()->type()->as_StableArrayConstant();
+  IntConstant* index = x->index()->type()->as_IntConstant();
+
+  assert(array == NULL || FoldStableValues, "not enabled");
+
+  // Constant fold loads from stable arrays.
+  if (array != NULL && index != NULL) {
+    jint idx = index->value();
+    if (idx < 0 || idx >= array->value()->length()) {
+      // Leave the load as is. The range check will handle it.
+      return;
+    }
+
+    ciConstant field_val = array->value()->element_value(idx);
+    if (!field_val.is_null_or_zero()) {
+      jint dimension = array->dimension();
+      assert(dimension <= array->value()->array_type()->dimension(), "inconsistent info");
+      ValueType* value = NULL;
+      if (dimension > 1) {
+        // Preserve information about the dimension for the element.
+        assert(field_val.as_object()->is_array(), "not an array");
+        value = new StableArrayConstant(field_val.as_object()->as_array(), dimension - 1);
+      } else {
+        assert(dimension == 1, "sanity");
+        value = as_ValueType(field_val);
+      }
+      set_canonical(new Constant(value));
+    }
+  }
+}
+
 void Canonicalizer::do_StoreIndexed   (StoreIndexed*    x) {
   // If a value is going to be stored into a field or array some of
   // the conversions emitted by javac are unneeded because the fields
@@ -471,7 +502,7 @@
     InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant();
     if (c != NULL && !c->value()->is_null_object()) {
       // ciInstance::java_mirror_type() returns non-NULL only for Java mirrors
-      ciType* t = c->value()->as_instance()->java_mirror_type();
+      ciType* t = c->value()->java_mirror_type();
       if (t->is_klass()) {
         // substitute cls.isInstance(obj) of a constant Class into
         // an InstantOf instruction
@@ -487,6 +518,17 @@
     }
     break;
   }
+  case vmIntrinsics::_isPrimitive        : {
+    assert(x->number_of_arguments() == 1, "wrong type");
+
+    // Class.isPrimitive is known on constant classes:
+    InstanceConstant* c = x->argument_at(0)->type()->as_InstanceConstant();
+    if (c != NULL && !c->value()->is_null_object()) {
+      ciType* t = c->value()->java_mirror_type();
+      set_constant(t->is_primitive_type());
+    }
+    break;
+  }
   }
 }
 
--- a/src/share/vm/c1/c1_Compiler.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/c1/c1_Compiler.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -148,6 +148,7 @@
   case vmIntrinsics::_longBitsToDouble:
   case vmIntrinsics::_getClass:
   case vmIntrinsics::_isInstance:
+  case vmIntrinsics::_isPrimitive:
   case vmIntrinsics::_currentThread:
   case vmIntrinsics::_dabs:
   case vmIntrinsics::_dsqrt:
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1519,6 +1519,29 @@
   append(new Return(x));
 }
 
+Value GraphBuilder::make_constant(ciConstant field_value, ciField* field) {
+  BasicType field_type = field_value.basic_type();
+  ValueType* value = as_ValueType(field_value);
+
+  // Attach dimension info to stable arrays.
+  if (FoldStableValues &&
+      field->is_stable() && field_type == T_ARRAY && !field_value.is_null_or_zero()) {
+    ciArray* array = field_value.as_object()->as_array();
+    jint dimension = field->type()->as_array_klass()->dimension();
+    value = new StableArrayConstant(array, dimension);
+  }
+
+  switch (field_type) {
+    case T_ARRAY:
+    case T_OBJECT:
+      if (field_value.as_object()->should_be_constant()) {
+        return new Constant(value);
+      }
+      return NULL; // Not a constant.
+    default:
+      return new Constant(value);
+  }
+}
 
 void GraphBuilder::access_field(Bytecodes::Code code) {
   bool will_link;
@@ -1563,22 +1586,13 @@
   switch (code) {
     case Bytecodes::_getstatic: {
       // check for compile-time constants, i.e., initialized static final fields
-      Instruction* constant = NULL;
+      Value constant = NULL;
       if (field->is_constant() && !PatchALot) {
-        ciConstant field_val = field->constant_value();
-        BasicType field_type = field_val.basic_type();
-        switch (field_type) {
-        case T_ARRAY:
-        case T_OBJECT:
-          if (field_val.as_object()->should_be_constant()) {
-            constant = new Constant(as_ValueType(field_val));
-          }
-          break;
-
-        default:
-          constant = new Constant(as_ValueType(field_val));
-        }
+        ciConstant field_value = field->constant_value();
         // Stable static fields are checked for non-default values in ciField::initialize_from().
+        assert(!field->is_stable() || !field_value.is_null_or_zero(),
+               "stable static w/ default value shouldn't be a constant");
+        constant = make_constant(field_value, field);
       }
       if (constant != NULL) {
         push(type, append(constant));
@@ -1591,38 +1605,29 @@
       }
       break;
     }
-    case Bytecodes::_putstatic:
-      { Value val = pop(type);
-        if (state_before == NULL) {
-          state_before = copy_state_for_exception();
-        }
-        append(new StoreField(append(obj), offset, field, val, true, state_before, needs_patching));
+    case Bytecodes::_putstatic: {
+      Value val = pop(type);
+      if (state_before == NULL) {
+        state_before = copy_state_for_exception();
       }
+      append(new StoreField(append(obj), offset, field, val, true, state_before, needs_patching));
       break;
+    }
     case Bytecodes::_getfield: {
       // Check for compile-time constants, i.e., trusted final non-static fields.
-      Instruction* constant = NULL;
+      Value constant = NULL;
       obj = apop();
       ObjectType* obj_type = obj->type()->as_ObjectType();
       if (obj_type->is_constant() && !PatchALot) {
         ciObject* const_oop = obj_type->constant_value();
         if (!const_oop->is_null_object() && const_oop->is_loaded()) {
           if (field->is_constant()) {
-            ciConstant field_val = field->constant_value_of(const_oop);
-            BasicType field_type = field_val.basic_type();
-            switch (field_type) {
-            case T_ARRAY:
-            case T_OBJECT:
-              if (field_val.as_object()->should_be_constant()) {
-                constant = new Constant(as_ValueType(field_val));
-              }
-              break;
-            default:
-              constant = new Constant(as_ValueType(field_val));
-            }
-            if (FoldStableValues && field->is_stable() && field_val.is_null_or_zero()) {
+            ciConstant field_value = field->constant_value_of(const_oop);
+            if (FoldStableValues && field->is_stable() && field_value.is_null_or_zero()) {
               // Stable field with default value can't be constant.
               constant = NULL;
+            } else {
+              constant = make_constant(field_value, field);
             }
           } else {
             // For CallSite objects treat the target field as a compile time constant.
@@ -3942,7 +3947,7 @@
 
 
 bool GraphBuilder::try_method_handle_inline(ciMethod* callee) {
-  ValueStack* state_before = state()->copy_for_parsing();
+  ValueStack* state_before = copy_state_before();
   vmIntrinsics::ID iid = callee->intrinsic_id();
   switch (iid) {
   case vmIntrinsics::_invokeBasic:
@@ -4032,7 +4037,7 @@
     fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
     break;
   }
-  set_state(state_before);
+  set_state(state_before->copy_for_parsing());
   return false;
 }
 
--- a/src/share/vm/c1/c1_GraphBuilder.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -276,6 +276,7 @@
   void iterate_all_blocks(bool start_in_current_block_for_inlining = false);
   Dependencies* dependency_recorder() const; // = compilation()->dependencies()
   bool direct_compare(ciKlass* k);
+  Value make_constant(ciConstant value, ciField* field);
 
   void kill_all();
 
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1296,6 +1296,25 @@
   __ move_wide(new LIR_Address(temp, in_bytes(Klass::java_mirror_offset()), T_OBJECT), result);
 }
 
+// java.lang.Class::isPrimitive()
+void LIRGenerator::do_isPrimitive(Intrinsic* x) {
+  assert(x->number_of_arguments() == 1, "wrong type");
+
+  LIRItem rcvr(x->argument_at(0), this);
+  rcvr.load_item();
+  LIR_Opr temp = new_register(T_METADATA);
+  LIR_Opr result = rlock_result(x);
+
+  CodeEmitInfo* info = NULL;
+  if (x->needs_null_check()) {
+    info = state_for(x);
+  }
+
+  __ move(new LIR_Address(rcvr.result(), java_lang_Class::klass_offset_in_bytes(), T_ADDRESS), temp, info);
+  __ cmp(lir_cond_notEqual, temp, LIR_OprFact::intConst(0));
+  __ cmove(lir_cond_notEqual, LIR_OprFact::intConst(0), LIR_OprFact::intConst(1), result, T_BOOLEAN);
+}
+
 
 // Example: Thread.currentThread()
 void LIRGenerator::do_currentThread(Intrinsic* x) {
@@ -3098,6 +3117,7 @@
 
   case vmIntrinsics::_Object_init:    do_RegisterFinalizer(x); break;
   case vmIntrinsics::_isInstance:     do_isInstance(x);    break;
+  case vmIntrinsics::_isPrimitive:    do_isPrimitive(x);   break;
   case vmIntrinsics::_getClass:       do_getClass(x);      break;
   case vmIntrinsics::_currentThread:  do_currentThread(x); break;
 
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -246,6 +246,7 @@
 
   void do_RegisterFinalizer(Intrinsic* x);
   void do_isInstance(Intrinsic* x);
+  void do_isPrimitive(Intrinsic* x);
   void do_getClass(Intrinsic* x);
   void do_currentThread(Intrinsic* x);
   void do_MathIntrinsic(Intrinsic* x);
--- a/src/share/vm/c1/c1_Runtime1.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -335,6 +335,7 @@
   NOT_PRODUCT(_new_instance_slowcase_cnt++;)
 
   assert(klass->is_klass(), "not a class");
+  Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
   instanceKlassHandle h(thread, klass);
   h->check_valid_for_instantiation(true, CHECK);
   // make sure klass is initialized
@@ -370,6 +371,7 @@
   //       anymore after new_objArray() and no GC can happen before.
   //       (This may have to change if this code changes!)
   assert(array_klass->is_klass(), "not a class");
+  Handle holder(THREAD, array_klass->klass_holder()); // keep the klass alive
   Klass* elem_klass = ObjArrayKlass::cast(array_klass)->element_klass();
   objArrayOop obj = oopFactory::new_objArray(elem_klass, length, CHECK);
   thread->set_vm_result(obj);
@@ -386,6 +388,7 @@
 
   assert(klass->is_klass(), "not a class");
   assert(rank >= 1, "rank must be nonzero");
+  Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(klass)->multi_allocate(rank, dims, CHECK);
   thread->set_vm_result(obj);
 JRT_END
--- a/src/share/vm/c1/c1_ValueType.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/c1/c1_ValueType.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -45,6 +45,7 @@
 class     ObjectConstant;
 class     ArrayType;
 class       ArrayConstant;
+class         StableArrayConstant;
 class     InstanceType;
 class       InstanceConstant;
 class   MetadataType;
@@ -168,6 +169,7 @@
   virtual MethodConstant*   as_MethodConstant()  { return NULL; }
   virtual MethodDataConstant* as_MethodDataConstant() { return NULL; }
   virtual ArrayConstant*    as_ArrayConstant()   { return NULL; }
+  virtual StableArrayConstant* as_StableArrayConstant()   { return NULL; }
   virtual AddressConstant*  as_AddressConstant() { return NULL; }
 
   // type operations
@@ -355,6 +357,20 @@
   virtual ciType* exact_type() const;
 };
 
+class StableArrayConstant: public ArrayConstant {
+ private:
+  jint _dimension;
+
+ public:
+  StableArrayConstant(ciArray* value, jint dimension) : ArrayConstant(value) {
+    assert(dimension > 0, "not a stable array");
+    _dimension = dimension;
+  }
+
+  jint dimension() const                              { return _dimension; }
+
+  virtual StableArrayConstant* as_StableArrayConstant() { return this; }
+};
 
 class InstanceType: public ObjectType {
  public:
--- a/src/share/vm/ci/ciMethodData.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/ci/ciMethodData.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -81,7 +81,7 @@
 void ciMethodData::load_extra_data() {
   MethodData* mdo = get_MethodData();
 
-  MutexLocker(mdo->extra_data_lock());
+  MutexLocker ml(mdo->extra_data_lock());
 
   // speculative trap entries also hold a pointer to a Method so need to be translated
   DataLayout* dp_src  = mdo->extra_data_base();
@@ -103,16 +103,13 @@
 
     switch(tag) {
     case DataLayout::speculative_trap_data_tag: {
-      ciSpeculativeTrapData* data_dst = new ciSpeculativeTrapData(dp_dst);
-      SpeculativeTrapData* data_src = new SpeculativeTrapData(dp_src);
+      ciSpeculativeTrapData data_dst(dp_dst);
+      SpeculativeTrapData   data_src(dp_src);
 
-      data_dst->translate_from(data_src);
-
-#ifdef ASSERT
-      SpeculativeTrapData* data_src2 = new SpeculativeTrapData(dp_src);
-      assert(data_src2->method() == data_src->method() && data_src2->bci() == data_src->bci(), "entries changed while translating");
-#endif
-
+      { // During translation a safepoint can happen or VM lock can be taken (e.g., Compile_lock).
+        MutexUnlocker ml(mdo->extra_data_lock());
+        data_dst.translate_from(&data_src);
+      }
       break;
     }
     case DataLayout::bit_data_tag:
@@ -120,9 +117,11 @@
     case DataLayout::no_tag:
     case DataLayout::arg_info_data_tag:
       // An empty slot or ArgInfoData entry marks the end of the trap data
-      return;
+      {
+        return; // Need a block to avoid SS compiler bug
+      }
     default:
-      fatal("bad tag = %d", dp_dst->tag());
+      fatal("bad tag = %d", tag);
     }
   }
 }
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1060,14 +1060,15 @@
    do_name(     updateByteBuffer_A_name,                          "updateByteBuffer")                                   \
                                                                                                                         \
   /* support for Unsafe */                                                                                              \
-  do_class(sun_misc_Unsafe,                        "sun/misc/Unsafe")                                                   \
   do_class(jdk_internal_misc_Unsafe,               "jdk/internal/misc/Unsafe")                                          \
                                                                                                                         \
   do_intrinsic(_allocateInstance,         jdk_internal_misc_Unsafe,     allocateInstance_name, allocateInstance_signature, F_RN) \
    do_name(     allocateInstance_name,                                  "allocateInstance")                                      \
    do_signature(allocateInstance_signature,                             "(Ljava/lang/Class;)Ljava/lang/Object;")                 \
+  do_intrinsic(_allocateUninitializedArray, jdk_internal_misc_Unsafe,   allocateUninitializedArray_name, newArray_signature,  F_R) \
+   do_name(     allocateUninitializedArray_name,                        "allocateUninitializedArray0")                           \
   do_intrinsic(_copyMemory,               jdk_internal_misc_Unsafe,     copyMemory_name, copyMemory_signature,         F_RN)     \
-   do_name(     copyMemory_name,                                        "copyMemory")                                            \
+   do_name(     copyMemory_name,                                        "copyMemory0")                                           \
    do_signature(copyMemory_signature,                                   "(Ljava/lang/Object;JLjava/lang/Object;JJ)V")            \
   do_intrinsic(_loadFence,                jdk_internal_misc_Unsafe,     loadFence_name, loadFence_signature,           F_RN)     \
    do_name(     loadFence_name,                                         "loadFence")                                             \
--- a/src/share/vm/code/codeCache.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/code/codeCache.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -637,16 +637,19 @@
 }
 
 // Walk the list of methods which might contain non-perm oops.
-void CodeCache::scavenge_root_nmethods_do(CodeBlobClosure* f) {
+void CodeCache::scavenge_root_nmethods_do(CodeBlobToOopClosure* f) {
   assert_locked_or_safepoint(CodeCache_lock);
 
   if (UseG1GC) {
     return;
   }
 
+  const bool fix_relocations = f->fix_relocations();
   debug_only(mark_scavenge_root_nmethods());
 
-  for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
+  nmethod* prev = NULL;
+  nmethod* cur = scavenge_root_nmethods();
+  while (cur != NULL) {
     debug_only(cur->clear_scavenge_root_marked());
     assert(cur->scavenge_root_not_marked(), "");
     assert(cur->on_scavenge_root_list(), "else shouldn't be on this list");
@@ -659,6 +662,18 @@
       // Perform cur->oops_do(f), maybe just once per nmethod.
       f->do_code_blob(cur);
     }
+    nmethod* const next = cur->scavenge_root_link();
+    // The scavengable nmethod list must contain all methods with scavengable
+    // oops. It is safe to include more nmethod on the list, but we do not
+    // expect any live non-scavengable nmethods on the list.
+    if (fix_relocations) {
+      if (!is_live || !cur->detect_scavenge_root_oops()) {
+        unlink_scavenge_root_nmethod(cur, prev);
+      } else {
+        prev = cur;
+      }
+    }
+    cur = next;
   }
 
   // Check for stray marks.
@@ -678,6 +693,24 @@
   print_trace("add_scavenge_root", nm);
 }
 
+void CodeCache::unlink_scavenge_root_nmethod(nmethod* nm, nmethod* prev) {
+  assert_locked_or_safepoint(CodeCache_lock);
+
+  assert((prev == NULL && scavenge_root_nmethods() == nm) ||
+         (prev != NULL && prev->scavenge_root_link() == nm), "precondition");
+
+  assert(!UseG1GC, "G1 does not use the scavenge_root_nmethods list");
+
+  print_trace("unlink_scavenge_root", nm);
+  if (prev == NULL) {
+    set_scavenge_root_nmethods(nm->scavenge_root_link());
+  } else {
+    prev->set_scavenge_root_link(nm->scavenge_root_link());
+  }
+  nm->set_scavenge_root_link(NULL);
+  nm->clear_on_scavenge_root_list();
+}
+
 void CodeCache::drop_scavenge_root_nmethod(nmethod* nm) {
   assert_locked_or_safepoint(CodeCache_lock);
 
@@ -686,20 +719,13 @@
   }
 
   print_trace("drop_scavenge_root", nm);
-  nmethod* last = NULL;
-  nmethod* cur = scavenge_root_nmethods();
-  while (cur != NULL) {
-    nmethod* next = cur->scavenge_root_link();
+  nmethod* prev = NULL;
+  for (nmethod* cur = scavenge_root_nmethods(); cur != NULL; cur = cur->scavenge_root_link()) {
     if (cur == nm) {
-      if (last != NULL)
-            last->set_scavenge_root_link(next);
-      else  set_scavenge_root_nmethods(next);
-      nm->set_scavenge_root_link(NULL);
-      nm->clear_on_scavenge_root_list();
+      unlink_scavenge_root_nmethod(cur, prev);
       return;
     }
-    last = cur;
-    cur = next;
+    prev = cur;
   }
   assert(false, "should have been on list");
 }
@@ -728,11 +754,7 @@
     } else {
       // Prune it from the list, so we don't have to look at it any more.
       print_trace("prune_scavenge_root", cur);
-      cur->set_scavenge_root_link(NULL);
-      cur->clear_on_scavenge_root_list();
-      if (last != NULL)
-            last->set_scavenge_root_link(next);
-      else  set_scavenge_root_nmethods(next);
+      unlink_scavenge_root_nmethod(cur, last);
     }
     cur = next;
   }
--- a/src/share/vm/code/codeCache.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/code/codeCache.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -116,6 +116,10 @@
   static int    allocated_segments();
   static size_t freelists_length();
 
+  static void set_scavenge_root_nmethods(nmethod* nm) { _scavenge_root_nmethods = nm; }
+  static void prune_scavenge_root_nmethods();
+  static void unlink_scavenge_root_nmethod(nmethod* nm, nmethod* prev);
+
  public:
   // Initialization
   static void initialize();
@@ -153,13 +157,17 @@
   // to "true" iff some code got unloaded.
   static void do_unloading(BoolObjectClosure* is_alive, bool unloading_occurred);
   static void asserted_non_scavengable_nmethods_do(CodeBlobClosure* f = NULL) PRODUCT_RETURN;
-  static void scavenge_root_nmethods_do(CodeBlobClosure* f);
+
+  // Apply f to every live code blob in scavengable nmethods. Prune nmethods
+  // from the list of scavengable nmethods if f->fix_relocations() and a nmethod
+  // no longer has scavengable oops.  If f->fix_relocations(), then f must copy
+  // objects to their new location immediately to avoid fixing nmethods on the
+  // basis of the old object locations.
+  static void scavenge_root_nmethods_do(CodeBlobToOopClosure* f);
 
   static nmethod* scavenge_root_nmethods()            { return _scavenge_root_nmethods; }
-  static void set_scavenge_root_nmethods(nmethod* nm) { _scavenge_root_nmethods = nm; }
   static void add_scavenge_root_nmethod(nmethod* nm);
   static void drop_scavenge_root_nmethod(nmethod* nm);
-  static void prune_scavenge_root_nmethods();
 
   // Printing/debugging
   static void print();                           // prints summary
--- a/src/share/vm/code/debugInfoRec.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/code/debugInfoRec.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -369,7 +369,6 @@
   assert(method == NULL ||
          (method->is_native() && bci == 0) ||
          (!method->is_native() && 0 <= bci && bci < method->code_size()) ||
-         (method->is_compiled_lambda_form() && bci == -99) ||  // this might happen in C1
          bci == -1, "illegal bci");
 
   // serialize the locals/expressions/monitors
--- a/src/share/vm/code/nmethod.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/code/nmethod.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1381,7 +1381,6 @@
   assert(_method == NULL, "Tautology");
 
   set_osr_link(NULL);
-  //set_scavenge_root_link(NULL); // done by prune_scavenge_root_nmethods
   NMethodSweeper::report_state_change(this);
 }
 
--- a/src/share/vm/compiler/compileBroker.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/compiler/compileBroker.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -838,12 +838,8 @@
                                         const methodHandle& hot_method,
                                         int hot_count,
                                         const char* comment,
+                                        bool blocking,
                                         Thread* thread) {
-  // do nothing if compiler thread(s) is not available
-  if (!_initialized) {
-    return;
-  }
-
   guarantee(!method->is_abstract(), "cannot compile abstract methods");
   assert(method->method_holder()->is_instance_klass(),
          "sanity check");
@@ -916,7 +912,6 @@
 
   // Outputs from the following MutexLocker block:
   CompileTask* task     = NULL;
-  bool         blocking = false;
   CompileQueue* queue  = compile_queue(comp_level);
 
   // Acquire our lock.
@@ -946,9 +941,6 @@
       return;
     }
 
-    // Should this thread wait for completion of the compile?
-    blocking = is_compile_blocking();
-
 #if INCLUDE_JVMCI
     if (UseJVMCICompiler) {
       if (blocking) {
@@ -1034,11 +1026,28 @@
   }
 }
 
-
 nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
                                        int comp_level,
                                        const methodHandle& hot_method, int hot_count,
                                        const char* comment, Thread* THREAD) {
+  // do nothing if compilebroker is not available
+  if (!_initialized) {
+    return NULL;
+  }
+  AbstractCompiler *comp = CompileBroker::compiler(comp_level);
+  assert(comp != NULL, "Ensure we don't compile before compilebroker init");
+  DirectiveSet* directive = DirectivesStack::getMatchingDirective(method, comp);
+  nmethod* nm = CompileBroker::compile_method(method, osr_bci, comp_level, hot_method, hot_count, comment, directive, THREAD);
+  DirectivesStack::release(directive);
+  return nm;
+}
+
+nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
+                                         int comp_level,
+                                         const methodHandle& hot_method, int hot_count,
+                                         const char* comment, DirectiveSet* directive,
+                                         Thread* THREAD) {
+
   // make sure arguments make sense
   assert(method->method_holder()->is_instance_klass(), "not an instance method");
   assert(osr_bci == InvocationEntryBci || (0 <= osr_bci && osr_bci < method->code_size()), "bci out of range");
@@ -1051,8 +1060,8 @@
   // lock, make sure that the compilation
   // isn't prohibited in a straightforward way.
   AbstractCompiler *comp = CompileBroker::compiler(comp_level);
-  if (comp == NULL || !comp->can_compile_method(method) ||
-      compilation_is_prohibited(method, osr_bci, comp_level)) {
+  if (!comp->can_compile_method(method) ||
+      compilation_is_prohibited(method, osr_bci, comp_level, directive->ExcludeOption)) {
     return NULL;
   }
 
@@ -1160,7 +1169,7 @@
       CompilationPolicy::policy()->delay_compilation(method());
       return NULL;
     }
-    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, comment, THREAD);
+    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, comment, !directive->BackgroundCompilationOption, THREAD);
   }
 
   // return requested nmethod
@@ -1217,7 +1226,7 @@
 // CompileBroker::compilation_is_prohibited
 //
 // See if this compilation is not allowed.
-bool CompileBroker::compilation_is_prohibited(const methodHandle& method, int osr_bci, int comp_level) {
+bool CompileBroker::compilation_is_prohibited(const methodHandle& method, int osr_bci, int comp_level, bool excluded) {
   bool is_native = method->is_native();
   // Some compilers may not support the compilation of natives.
   AbstractCompiler *comp = compiler(comp_level);
@@ -1235,11 +1244,6 @@
     return true;
   }
 
-  // Breaking the abstraction - directives are only used inside a compilation otherwise.
-  DirectiveSet* directive = DirectivesStack::getMatchingDirective(method, comp);
-  bool excluded = directive->ExcludeOption;
-  DirectivesStack::release(directive);
-
   // The method may be explicitly excluded by the user.
   double scale;
   if (excluded || (CompilerOracle::has_option_value(method, "CompileThresholdScaling", scale) && scale == 0)) {
@@ -1304,16 +1308,6 @@
   return assign_compile_id(method, osr_bci);
 }
 
-/**
- * Should the current thread block until this compilation request
- * has been fulfilled?
- */
-bool CompileBroker::is_compile_blocking() {
-  assert(!InstanceRefKlass::owns_pending_list_lock(JavaThread::current()), "possible deadlock");
-  return !BackgroundCompilation;
-}
-
-
 // ------------------------------------------------------------------
 // CompileBroker::preload_classes
 void CompileBroker::preload_classes(const methodHandle& method, TRAPS) {
--- a/src/share/vm/compiler/compileBroker.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/compiler/compileBroker.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -222,8 +222,7 @@
   static JavaThread* make_thread(const char* name, CompileQueue* queue, CompilerCounters* counters, AbstractCompiler* comp, bool compiler_thread, TRAPS);
   static void init_compiler_sweeper_threads(int c1_compiler_count, int c2_compiler_count);
   static bool compilation_is_complete  (const methodHandle& method, int osr_bci, int comp_level);
-  static bool compilation_is_prohibited(const methodHandle& method, int osr_bci, int comp_level);
-  static bool is_compile_blocking();
+  static bool compilation_is_prohibited(const methodHandle& method, int osr_bci, int comp_level, bool excluded);
   static void preload_classes          (const methodHandle& method, TRAPS);
 
   static CompileTask* create_compile_task(CompileQueue*       queue,
@@ -253,6 +252,7 @@
                                   const methodHandle& hot_method,
                                   int hot_count,
                                   const char* comment,
+                                  bool blocking,
                                   Thread* thread);
 
   static CompileQueue* compile_queue(int comp_level);
@@ -291,6 +291,15 @@
                                  int hot_count,
                                  const char* comment, Thread* thread);
 
+  static nmethod* compile_method(const methodHandle& method,
+                                   int osr_bci,
+                                   int comp_level,
+                                   const methodHandle& hot_method,
+                                   int hot_count,
+                                   const char* comment,
+                                   DirectiveSet* directive,
+                                   Thread* thread);
+
   // Acquire any needed locks and assign a compile id
   static uint assign_compile_id_unlocked(Thread* thread, const methodHandle& method, int osr_bci);
 
--- a/src/share/vm/compiler/compilerDirectives.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/compiler/compilerDirectives.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -472,9 +472,12 @@
   _depth++;
 }
 
-void DirectivesStack::pop() {
+void DirectivesStack::pop(int count) {
   MutexLockerEx locker(DirectivesStack_lock, Mutex::_no_safepoint_check_flag);
-  pop_inner();
+  assert(count > -1, "No negative values");
+  for (int i = 0; i < count; i++) {
+    pop_inner();
+  }
 }
 
 void DirectivesStack::pop_inner() {
--- a/src/share/vm/compiler/compilerDirectives.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/compiler/compilerDirectives.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,6 +42,7 @@
     cflags(PrintAssembly,           bool, PrintAssembly, PrintAssembly) \
     cflags(PrintInlining,           bool, PrintInlining, PrintInlining) \
     cflags(PrintNMethods,           bool, PrintNMethods, PrintNMethods) \
+    cflags(BackgroundCompilation,   bool, BackgroundCompilation, BackgroundCompilation) \
     cflags(ReplayInline,            bool, false, ReplayInline) \
     cflags(DumpReplay,              bool, false, DumpReplay) \
     cflags(DumpInline,              bool, false, DumpInline) \
@@ -87,7 +88,7 @@
   static DirectiveSet* getMatchingDirective(methodHandle mh, AbstractCompiler* comp);
   static DirectiveSet* getDefaultDirective(AbstractCompiler* comp);
   static void push(CompilerDirectives* directive);
-  static void pop();
+  static void pop(int count);
   static bool check_capacity(int request_size, outputStream* st);
   static void clear();
   static void print(outputStream* st);
--- a/src/share/vm/compiler/directivesParser.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/compiler/directivesParser.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,7 +55,7 @@
   assert(_tmp_depth == 0, "Consistency");
 }
 
-bool DirectivesParser::parse_string(const char* text, outputStream* st) {
+int DirectivesParser::parse_string(const char* text, outputStream* st) {
   DirectivesParser cd(text, st);
   if (cd.valid()) {
     return cd.install_directives();
@@ -63,7 +63,7 @@
     cd.clean_tmp();
     st->flush();
     st->print_cr("Parsing of compiler directives failed");
-    return false;
+    return -1;
   }
 }
 
@@ -97,17 +97,17 @@
       buffer[num_read] = '\0';
       // close file
       os::close(file_handle);
-      return parse_string(buffer, stream);
+      return parse_string(buffer, stream) > 0;
     }
   }
   return false;
 }
 
-bool DirectivesParser::install_directives() {
+int DirectivesParser::install_directives() {
   // Check limit
   if (!DirectivesStack::check_capacity(_tmp_depth, _st)) {
     clean_tmp();
-    return false;
+    return 0;
   }
 
   // Pop from internal temporary stack and push to compileBroker.
@@ -120,14 +120,14 @@
   }
   if (i == 0) {
     _st->print_cr("No directives in file");
-    return false;
+    return 0;
   } else {
     _st->print_cr("%i compiler directives added", i);
     if (CompilerDirectivesPrint) {
       // Print entire directives stack after new has been pushed.
       DirectivesStack::print(_st);
     }
-    return true;
+    return i;
   }
 }
 
--- a/src/share/vm/compiler/directivesParser.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/compiler/directivesParser.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,8 +51,8 @@
   static bool has_file();
   static bool parse_from_flag();
   static bool parse_from_file(const char* filename, outputStream* st);
-  static bool parse_string(const char* string, outputStream* st);
-  bool install_directives();
+  static int  parse_string(const char* string, outputStream* st);
+  int install_directives();
 
 private:
   DirectivesParser(const char* text, outputStream* st);
--- a/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/g1/g1CollectedHeap.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -2329,9 +2329,13 @@
     GCIdMarkAndRestore conc_gc_id_mark(_cmThread->gc_id());
     if (_cm->has_aborted()) {
       _gc_tracer_cm->report_concurrent_mode_failure();
+
+      // ConcurrentGCTimer will be ended as well.
+      _cm->register_concurrent_gc_end_and_stop_timer();
+    } else {
+      _gc_timer_cm->register_gc_end();
     }
 
-    _gc_timer_cm->register_gc_end();
     _gc_tracer_cm->report_gc_end(_gc_timer_cm->gc_end(), _gc_timer_cm->time_partitions());
 
     // Clear state variables to prepare for the next concurrent cycle.
--- a/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/g1/g1CollectorPolicy.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -269,6 +269,8 @@
   _reserve_regions = 0;
 
   _cset_chooser = new CollectionSetChooser();
+
+  _ihop_control = create_ihop_control();
 }
 
 G1CollectorPolicy::~G1CollectorPolicy() {
@@ -469,8 +471,6 @@
   if (max_young_size != MaxNewSize) {
     FLAG_SET_ERGO(size_t, MaxNewSize, max_young_size);
   }
-
-  _ihop_control = create_ihop_control();
 }
 
 void G1CollectorPolicy::initialize_flags() {
@@ -565,6 +565,8 @@
   _reserve_regions = (uint) ceil(reserve_regions_d);
 
   _young_gen_sizer->heap_size_changed(new_number_of_regions);
+
+  _ihop_control->update_target_occupancy(new_number_of_regions * HeapRegion::GrainBytes);
 }
 
 uint G1CollectorPolicy::calculate_young_list_desired_min_length(
@@ -1234,13 +1236,11 @@
 G1IHOPControl* G1CollectorPolicy::create_ihop_control() const {
   if (G1UseAdaptiveIHOP) {
     return new G1AdaptiveIHOPControl(InitiatingHeapOccupancyPercent,
-                                     G1CollectedHeap::heap()->max_capacity(),
                                      &_predictor,
                                      G1ReservePercent,
                                      G1HeapWastePercent);
   } else {
-    return new G1StaticIHOPControl(InitiatingHeapOccupancyPercent,
-                                   G1CollectedHeap::heap()->max_capacity());
+    return new G1StaticIHOPControl(InitiatingHeapOccupancyPercent);
   }
 }
 
--- a/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/g1/g1ConcurrentMark.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -441,7 +441,7 @@
   _has_aborted(false),
   _restart_for_overflow(false),
   _concurrent_marking_in_progress(false),
-  _concurrent_phase_started(false),
+  _concurrent_phase_status(ConcPhaseNotStarted),
 
   // _verbose_level set below
 
@@ -1008,16 +1008,43 @@
 }
 
 void G1ConcurrentMark::register_concurrent_phase_start(const char* title) {
-  assert(!_concurrent_phase_started, "Sanity");
-  _concurrent_phase_started = true;
+  uint old_val = 0;
+  do {
+    old_val = Atomic::cmpxchg(ConcPhaseStarted, &_concurrent_phase_status, ConcPhaseNotStarted);
+  } while (old_val != ConcPhaseNotStarted);
   _g1h->gc_timer_cm()->register_gc_concurrent_start(title);
 }
 
+void G1ConcurrentMark::register_concurrent_phase_end_common(bool end_timer) {
+  if (_concurrent_phase_status == ConcPhaseNotStarted) {
+    return;
+  }
+
+  uint old_val = Atomic::cmpxchg(ConcPhaseStopping, &_concurrent_phase_status, ConcPhaseStarted);
+  if (old_val == ConcPhaseStarted) {
+    _g1h->gc_timer_cm()->register_gc_concurrent_end();
+    // If 'end_timer' is true, we came here to end timer which needs concurrent phase ended.
+    // We need to end it before changing the status to 'ConcPhaseNotStarted' to prevent
+    // starting a new concurrent phase by 'ConcurrentMarkThread'.
+    if (end_timer) {
+      _g1h->gc_timer_cm()->register_gc_end();
+    }
+    old_val = Atomic::cmpxchg(ConcPhaseNotStarted, &_concurrent_phase_status, ConcPhaseStopping);
+    assert(old_val == ConcPhaseStopping, "Should not have changed since we entered this scope.");
+  } else {
+    do {
+      // Let other thread finish changing '_concurrent_phase_status' to 'ConcPhaseNotStarted'.
+      os::naked_short_sleep(1);
+    } while (_concurrent_phase_status != ConcPhaseNotStarted);
+  }
+}
+
 void G1ConcurrentMark::register_concurrent_phase_end() {
-  if (_concurrent_phase_started) {
-    _concurrent_phase_started = false;
-    _g1h->gc_timer_cm()->register_gc_concurrent_end();
-  }
+  register_concurrent_phase_end_common(false);
+}
+
+void G1ConcurrentMark::register_concurrent_gc_end_and_stop_timer() {
+  register_concurrent_phase_end_common(true);
 }
 
 void G1ConcurrentMark::markFromRoots() {
@@ -2605,9 +2632,6 @@
 
   _g1h->trace_heap_after_concurrent_cycle();
 
-  // Close any open concurrent phase timing
-  register_concurrent_phase_end();
-
   _g1h->register_concurrent_cycle_end();
 }
 
--- a/src/share/vm/gc/g1/g1ConcurrentMark.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/g1/g1ConcurrentMark.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -352,8 +352,17 @@
   // time of remark.
   volatile bool           _concurrent_marking_in_progress;
 
-  // Keep track of whether we have started concurrent phase or not.
-  bool                    _concurrent_phase_started;
+  // There would be a race between ConcurrentMarkThread and VMThread(ConcurrentMark::abort())
+  // to call ConcurrentGCTimer::register_gc_concurrent_end().
+  // And this variable is used to keep track of concurrent phase.
+  volatile uint           _concurrent_phase_status;
+  // Concurrent phase is not yet started.
+  static const uint       ConcPhaseNotStarted = 0;
+  // Concurrent phase is started.
+  static const uint       ConcPhaseStarted = 1;
+  // Caller thread of ConcurrentGCTimer::register_gc_concurrent_end() is ending concurrent phase.
+  // So other thread should wait until the status to be changed to ConcPhaseNotStarted.
+  static const uint       ConcPhaseStopping = 2;
 
   // All of these times are in ms
   NumberSeq _init_times;
@@ -485,6 +494,9 @@
   // Set to true when initialization is complete
   bool _completed_initialization;
 
+  // end_timer, true to end gc timer after ending concurrent phase.
+  void register_concurrent_phase_end_common(bool end_timer);
+
 public:
   // Manipulation of the global mark stack.
   // The push and pop operations are used by tasks for transfers
@@ -520,6 +532,8 @@
 
   void register_concurrent_phase_start(const char* title);
   void register_concurrent_phase_end();
+  // Ends both concurrent phase and timer.
+  void register_concurrent_gc_end_and_stop_timer();
 
   void update_accum_task_vtime(int i, double vtime) {
     _accum_task_vtime[i] += vtime;
--- a/src/share/vm/gc/g1/g1IHOPControl.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/g1/g1IHOPControl.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -29,15 +29,21 @@
 #include "gc/shared/gcTrace.hpp"
 #include "logging/log.hpp"
 
-G1IHOPControl::G1IHOPControl(double initial_ihop_percent, size_t target_occupancy) :
+G1IHOPControl::G1IHOPControl(double initial_ihop_percent) :
   _initial_ihop_percent(initial_ihop_percent),
-  _target_occupancy(target_occupancy),
+  _target_occupancy(0),
   _last_allocated_bytes(0),
   _last_allocation_time_s(0.0)
 {
   assert(_initial_ihop_percent >= 0.0 && _initial_ihop_percent <= 100.0, "Initial IHOP value must be between 0 and 100 but is %.3f", initial_ihop_percent);
 }
 
+void G1IHOPControl::update_target_occupancy(size_t new_target_occupancy) {
+  log_debug(gc, ihop)("Target occupancy update: old: " SIZE_FORMAT "B, new: " SIZE_FORMAT "B",
+                      _target_occupancy, new_target_occupancy);
+  _target_occupancy = new_target_occupancy;
+}
+
 void G1IHOPControl::update_allocation_info(double allocation_time_s, size_t allocated_bytes, size_t additional_buffer_size) {
   assert(allocation_time_s >= 0.0, "Allocation time must be positive but is %.3f", allocation_time_s);
 
@@ -46,6 +52,7 @@
 }
 
 void G1IHOPControl::print() {
+  assert(_target_occupancy > 0, "Target occupancy still not updated yet.");
   size_t cur_conc_mark_start_threshold = get_conc_mark_start_threshold();
   log_debug(gc, ihop)("Basic information (value update), threshold: " SIZE_FORMAT "B (%1.2f), target occupancy: " SIZE_FORMAT "B, current occupancy: " SIZE_FORMAT "B, "
                       "recent allocation size: " SIZE_FORMAT "B, recent allocation duration: %1.2fms, recent old gen allocation rate: %1.2fB/s, recent marking phase length: %1.2fms",
@@ -60,6 +67,7 @@
 }
 
 void G1IHOPControl::send_trace_event(G1NewTracer* tracer) {
+  assert(_target_occupancy > 0, "Target occupancy still not updated yet.");
   tracer->report_basic_ihop_statistics(get_conc_mark_start_threshold(),
                                        _target_occupancy,
                                        G1CollectedHeap::heap()->used(),
@@ -68,10 +76,9 @@
                                        last_marking_length_s());
 }
 
-G1StaticIHOPControl::G1StaticIHOPControl(double ihop_percent, size_t target_occupancy) :
-  G1IHOPControl(ihop_percent, target_occupancy),
+G1StaticIHOPControl::G1StaticIHOPControl(double ihop_percent) :
+  G1IHOPControl(ihop_percent),
   _last_marking_length_s(0.0) {
-  assert(_target_occupancy > 0, "Target occupancy must be larger than zero.");
 }
 
 #ifndef PRODUCT
@@ -85,7 +92,8 @@
 void G1StaticIHOPControl::test() {
   size_t const initial_ihop = 45;
 
-  G1StaticIHOPControl ctrl(initial_ihop, 100);
+  G1StaticIHOPControl ctrl(initial_ihop);
+  ctrl.update_target_occupancy(100);
 
   size_t threshold = ctrl.get_conc_mark_start_threshold();
   assert(threshold == initial_ihop,
@@ -115,11 +123,10 @@
 #endif
 
 G1AdaptiveIHOPControl::G1AdaptiveIHOPControl(double ihop_percent,
-                                             size_t initial_target_occupancy,
                                              G1Predictions const* predictor,
                                              size_t heap_reserve_percent,
                                              size_t heap_waste_percent) :
-  G1IHOPControl(ihop_percent, initial_target_occupancy),
+  G1IHOPControl(ihop_percent),
   _predictor(predictor),
   _marking_times_s(10, 0.95),
   _allocation_rate_s(10, 0.95),
@@ -130,6 +137,7 @@
 }
 
 size_t G1AdaptiveIHOPControl::actual_target_threshold() const {
+  guarantee(_target_occupancy > 0, "Target occupancy still not updated yet.");
   // The actual target threshold takes the heap reserve and the expected waste in
   // free space  into account.
   // _heap_reserve is that part of the total heap capacity that is reserved for
@@ -227,7 +235,8 @@
   // target_size - (young_size + alloc_amount/alloc_time * marking_time)
 
   G1Predictions pred(0.95);
-  G1AdaptiveIHOPControl ctrl(initial_threshold, target_size, &pred, 0, 0);
+  G1AdaptiveIHOPControl ctrl(initial_threshold, &pred, 0, 0);
+  ctrl.update_target_occupancy(target_size);
 
   // First "load".
   size_t const alloc_time1 = 2;
@@ -288,5 +297,6 @@
 
 void IHOP_test() {
   G1StaticIHOPControl::test();
+  G1AdaptiveIHOPControl::test();
 }
 #endif
--- a/src/share/vm/gc/g1/g1IHOPControl.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/g1/g1IHOPControl.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -38,7 +38,8 @@
  protected:
   // The initial IHOP value relative to the target occupancy.
   double _initial_ihop_percent;
-  // The target maximum occupancy of the heap.
+  // The target maximum occupancy of the heap. The target occupancy is the number
+  // of bytes when marking should be finished and reclaim started.
   size_t _target_occupancy;
 
   // Most recent complete mutator allocation period in seconds.
@@ -46,10 +47,9 @@
   // Amount of bytes allocated during _last_allocation_time_s.
   size_t _last_allocated_bytes;
 
-  // Initialize an instance with the initial IHOP value in percent and the target
-  // occupancy. The target occupancy is the number of bytes when marking should
-  // be finished and reclaim started.
-  G1IHOPControl(double initial_ihop_percent, size_t target_occupancy);
+  // Initialize an instance with the initial IHOP value in percent. The target
+  // occupancy will be updated at the first heap expansion.
+  G1IHOPControl(double initial_ihop_percent);
 
   // Most recent time from the end of the initial mark to the start of the first
   // mixed gc.
@@ -60,6 +60,8 @@
   // Get the current non-young occupancy at which concurrent marking should start.
   virtual size_t get_conc_mark_start_threshold() = 0;
 
+  // Adjust target occupancy.
+  virtual void update_target_occupancy(size_t new_target_occupancy);
   // Update information about time during which allocations in the Java heap occurred,
   // how large these allocations were in bytes, and an additional buffer.
   // The allocations should contain any amount of space made unusable for further
@@ -86,9 +88,12 @@
  protected:
   double last_marking_length_s() const { return _last_marking_length_s; }
  public:
-  G1StaticIHOPControl(double ihop_percent, size_t target_occupancy);
+  G1StaticIHOPControl(double ihop_percent);
 
-  size_t get_conc_mark_start_threshold() { return (size_t) (_initial_ihop_percent * _target_occupancy / 100.0); }
+  size_t get_conc_mark_start_threshold() {
+    guarantee(_target_occupancy > 0, "Target occupancy must have been initialized.");
+    return (size_t) (_initial_ihop_percent * _target_occupancy / 100.0);
+  }
 
   virtual void update_marking_length(double marking_length_s) {
    assert(marking_length_s > 0.0, "Marking length must be larger than zero but is %.3f", marking_length_s);
@@ -132,7 +137,6 @@
   virtual double last_marking_length_s() const { return _marking_times_s.last(); }
  public:
   G1AdaptiveIHOPControl(double ihop_percent,
-                        size_t initial_target_occupancy,
                         G1Predictions const* predictor,
                         size_t heap_reserve_percent, // The percentage of total heap capacity that should not be tapped into.
                         size_t heap_waste_percent);  // The percentage of the free space in the heap that we think is not usable for allocation.
--- a/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/g1/g1OopClosures.inline.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -171,14 +171,7 @@
 #ifdef ASSERT
   // can't do because of races
   // assert(obj == NULL || obj->is_oop(), "expected an oop");
-
-  // Do the safe subset of is_oop
-#ifdef CHECK_UNHANDLED_OOPS
-  oopDesc* o = obj.obj();
-#else
-  oopDesc* o = obj;
-#endif // CHECK_UNHANDLED_OOPS
-  assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
+  assert(check_obj_alignment(obj), "not oop aligned");
   assert(_g1->is_in_reserved(obj), "must be in heap");
 #endif // ASSERT
 
--- a/src/share/vm/gc/g1/g1RemSet.inline.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/g1/g1RemSet.inline.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -44,14 +44,7 @@
 #ifdef ASSERT
   // can't do because of races
   // assert(obj == NULL || obj->is_oop(), "expected an oop");
-
-  // Do the safe subset of is_oop
-#ifdef CHECK_UNHANDLED_OOPS
-  oopDesc* o = obj.obj();
-#else
-  oopDesc* o = obj;
-#endif // CHECK_UNHANDLED_OOPS
-  assert((intptr_t)o % MinObjAlignmentInBytes == 0, "not oop aligned");
+  assert(check_obj_alignment(obj), "not oop aligned");
   assert(_g1->is_in_reserved(obj), "must be in heap");
 #endif // ASSERT
 
--- a/src/share/vm/gc/parallel/psCompactionManager.inline.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/parallel/psCompactionManager.inline.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -125,14 +125,14 @@
   T* const beg = base + beg_index;
   T* const end = base + end_index;
 
+  if (end_index < len) {
+    cm->push_objarray(obj, end_index); // Push the continuation.
+  }
+
   // Push the non-NULL elements of the next stride on the marking stack.
   for (T* e = beg; e < end; e++) {
     cm->mark_and_push<T>(e);
   }
-
-  if (end_index < len) {
-    cm->push_objarray(obj, end_index); // Push the continuation.
-  }
 }
 
 inline void ParCompactionManager::follow_contents(objArrayOop obj, int index) {
--- a/src/share/vm/gc/parallel/psParallelCompact.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/parallel/psParallelCompact.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1484,17 +1484,6 @@
 }
 
 void
-PSParallelCompact::clear_source_region(HeapWord* beg_addr, HeapWord* end_addr)
-{
-  RegionData* const beg_ptr = _summary_data.addr_to_region_ptr(beg_addr);
-  HeapWord* const end_aligned_up = _summary_data.region_align_up(end_addr);
-  RegionData* const end_ptr = _summary_data.addr_to_region_ptr(end_aligned_up);
-  for (RegionData* cur = beg_ptr; cur < end_ptr; ++cur) {
-    cur->set_source_region(0);
-  }
-}
-
-void
 PSParallelCompact::summarize_space(SpaceId id, bool maximum_compaction)
 {
   assert(id < last_space_id, "id out of range");
--- a/src/share/vm/gc/parallel/psParallelCompact.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/parallel/psParallelCompact.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1065,9 +1065,6 @@
   // non-empty.
   static void fill_dense_prefix_end(SpaceId id);
 
-  // Clear the summary data source_region field for the specified addresses.
-  static void clear_source_region(HeapWord* beg_addr, HeapWord* end_addr);
-
   static void summarize_spaces_quick();
   static void summarize_space(SpaceId id, bool maximum_compaction);
   static void summary_phase(ParCompactionManager* cm, bool maximum_compaction);
--- a/src/share/vm/gc/parallel/psScavenge.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/parallel/psScavenge.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -600,12 +600,6 @@
 
     NOT_PRODUCT(reference_processor()->verify_no_references_recorded());
 
-    {
-      GCTraceTime(Debug, gc, phases) tm("Prune Scavenge Root Methods", &_gc_timer);
-
-      CodeCache::prune_scavenge_root_nmethods();
-    }
-
     // Re-verify object start arrays
     if (VerifyObjectStartArray &&
         VerifyAfterGC) {
--- a/src/share/vm/gc/shared/genCollectedHeap.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/shared/genCollectedHeap.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -561,7 +561,7 @@
                                      OopClosure* weak_roots,
                                      CLDClosure* strong_cld_closure,
                                      CLDClosure* weak_cld_closure,
-                                     CodeBlobClosure* code_roots) {
+                                     CodeBlobToOopClosure* code_roots) {
   // General roots.
   assert(Threads::thread_claim_parity() != 0, "must have called prologue code");
   assert(code_roots != NULL, "code root closure should always be set");
@@ -578,7 +578,7 @@
   // Don't process them if they will be processed during the ClassLoaderDataGraph phase.
   CLDClosure* roots_from_clds_p = (strong_cld_closure != weak_cld_closure) ? strong_cld_closure : NULL;
   // Only process code roots from thread stacks if we aren't visiting the entire CodeCache anyway
-  CodeBlobClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
+  CodeBlobToOopClosure* roots_from_code_p = (so & SO_AllCodeCache) ? NULL : code_roots;
 
   bool is_par = scope->n_threads() > 1;
   Threads::possibly_parallel_oops_do(is_par, strong_roots, roots_from_clds_p, roots_from_code_p);
--- a/src/share/vm/gc/shared/genCollectedHeap.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/gc/shared/genCollectedHeap.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -399,7 +399,7 @@
                      OopClosure* weak_roots,
                      CLDClosure* strong_cld_closure,
                      CLDClosure* weak_cld_closure,
-                     CodeBlobClosure* code_roots);
+                     CodeBlobToOopClosure* code_roots);
 
  public:
   static const bool StrongAndWeakRoots = false;
--- a/src/share/vm/jvmci/jvmciCompiler.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/jvmci/jvmciCompiler.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -162,6 +162,9 @@
     CLEAR_PENDING_EXCEPTION;
 
     java_lang_Throwable::java_printStackTrace(exception, THREAD);
+    if (HAS_PENDING_EXCEPTION) {
+      CLEAR_PENDING_EXCEPTION;
+    }
 
     env->set_failure("exception throw", false);
   } else {
--- a/src/share/vm/jvmci/jvmciRuntime.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/jvmci/jvmciRuntime.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -51,7 +51,6 @@
 jobject JVMCIRuntime::_HotSpotJVMCIRuntime_instance = NULL;
 bool JVMCIRuntime::_HotSpotJVMCIRuntime_initialized = false;
 bool JVMCIRuntime::_well_known_classes_initialized = false;
-const char* JVMCIRuntime::_compiler = NULL;
 int JVMCIRuntime::_trivial_prefixes_count = 0;
 char** JVMCIRuntime::_trivial_prefixes = NULL;
 bool JVMCIRuntime::_shutdown_called = false;
@@ -104,6 +103,7 @@
 JRT_BLOCK_ENTRY(void, JVMCIRuntime::new_instance(JavaThread* thread, Klass* klass))
   JRT_BLOCK;
   assert(klass->is_klass(), "not a class");
+  Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
   instanceKlassHandle h(thread, klass);
   h->check_valid_for_instantiation(true, CHECK);
   // make sure klass is initialized
@@ -129,6 +129,7 @@
     BasicType elt_type = TypeArrayKlass::cast(array_klass)->element_type();
     obj = oopFactory::new_typeArray(elt_type, length, CHECK);
   } else {
+    Handle holder(THREAD, array_klass->klass_holder()); // keep the klass alive
     Klass* elem_klass = ObjArrayKlass::cast(array_klass)->element_klass();
     obj = oopFactory::new_objArray(elem_klass, length, CHECK);
   }
@@ -172,6 +173,7 @@
 JRT_ENTRY(void, JVMCIRuntime::new_multi_array(JavaThread* thread, Klass* klass, int rank, jint* dims))
   assert(klass->is_klass(), "not a class");
   assert(rank >= 1, "rank must be nonzero");
+  Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(klass)->multi_allocate(rank, dims, CHECK);
   thread->set_vm_result(obj);
 JRT_END
@@ -642,15 +644,6 @@
            "HotSpotJVMCIRuntime initialization should only be triggered through JVMCI initialization");
 #endif
 
-    if (_compiler != NULL) {
-      JavaCallArguments args;
-      oop compiler = java_lang_String::create_oop_from_str(_compiler, CHECK);
-      args.push_oop(compiler);
-      callStatic("jdk/vm/ci/hotspot/HotSpotJVMCICompilerConfig",
-                 "selectCompiler",
-                 "(Ljava/lang/String;)Ljava/lang/Boolean;", &args, CHECK);
-    }
-
     Handle result = callStatic("jdk/vm/ci/hotspot/HotSpotJVMCIRuntime",
                                "runtime",
                                "()Ljdk/vm/ci/hotspot/HotSpotJVMCIRuntime;", NULL, CHECK);
@@ -783,66 +776,6 @@
   }
 JVM_END
 
-/**
- * Closure for parsing a line from a *.properties file in jre/lib/jvmci/properties.
- * The line must match the regular expression "[^=]+=.*". That is one or more
- * characters other than '=' followed by '=' followed by zero or more characters.
- * Everything before the '=' is the property name and everything after '=' is the value.
- * Lines that start with '#' are treated as comments and ignored.
- * No special processing of whitespace or any escape characters is performed.
- * The last definition of a property "wins" (i.e., it overrides all earlier
- * definitions of the property).
- */
-class JVMCIPropertiesFileClosure : public ParseClosure {
-  SystemProperty** _plist;
-public:
-  JVMCIPropertiesFileClosure(SystemProperty** plist) : _plist(plist) {}
-  void do_line(char* line) {
-    if (line[0] == '#') {
-      // skip comment
-      return;
-    }
-    size_t len = strlen(line);
-    char* sep = strchr(line, '=');
-    if (sep == NULL) {
-      warn_and_abort("invalid format: could not find '=' character");
-      return;
-    }
-    if (sep == line) {
-      warn_and_abort("invalid format: name cannot be empty");
-      return;
-    }
-    *sep = '\0';
-    const char* name = line;
-    char* value = sep + 1;
-    Arguments::PropertyList_unique_add(_plist, name, value);
-  }
-};
-
-void JVMCIRuntime::init_system_properties(SystemProperty** plist) {
-  char jvmciDir[JVM_MAXPATHLEN];
-  const char* fileSep = os::file_separator();
-  jio_snprintf(jvmciDir, sizeof(jvmciDir), "%s%slib%sjvmci",
-               Arguments::get_java_home(), fileSep, fileSep, fileSep);
-  DIR* dir = os::opendir(jvmciDir);
-  if (dir != NULL) {
-    struct dirent *entry;
-    char *dbuf = NEW_C_HEAP_ARRAY(char, os::readdir_buf_size(jvmciDir), mtInternal);
-    JVMCIPropertiesFileClosure closure(plist);
-    const unsigned suffix_len = (unsigned)strlen(".properties");
-    while ((entry = os::readdir(dir, (dirent *) dbuf)) != NULL && !closure.is_aborted()) {
-      const char* name = entry->d_name;
-      if (strlen(name) > suffix_len && strcmp(name + strlen(name) - suffix_len, ".properties") == 0) {
-        char propertiesFilePath[JVM_MAXPATHLEN];
-        jio_snprintf(propertiesFilePath, sizeof(propertiesFilePath), "%s%s%s",jvmciDir, fileSep, name);
-        JVMCIRuntime::parse_lines(propertiesFilePath, &closure, false);
-      }
-    }
-    FREE_C_HEAP_ARRAY(char, dbuf);
-    os::closedir(dir);
-  }
-}
-
 #define CHECK_WARN_ABORT_(message) THREAD); \
   if (HAS_PENDING_EXCEPTION) { \
     warning(message); \
@@ -853,12 +786,6 @@
   } \
   (void)(0
 
-void JVMCIRuntime::save_compiler(const char* compiler) {
-  assert(compiler != NULL, "npe");
-  assert(_compiler == NULL, "cannot reassign JVMCI compiler");
-  _compiler = compiler;
-}
-
 void JVMCIRuntime::shutdown(TRAPS) {
   if (_HotSpotJVMCIRuntime_instance != NULL) {
     _shutdown_called = true;
@@ -884,69 +811,3 @@
   }
   return false;
 }
-
-void JVMCIRuntime::parse_lines(char* path, ParseClosure* closure, bool warnStatFailure) {
-  struct stat st;
-  if (::stat(path, &st) == 0 && (st.st_mode & S_IFREG) == S_IFREG) { // exists & is regular file
-    int file_handle = ::open(path, os::default_file_open_flags(), 0);
-    if (file_handle != -1) {
-      char* buffer = NEW_C_HEAP_ARRAY(char, st.st_size + 1, mtInternal);
-      int num_read;
-      num_read = (int) ::read(file_handle, (char*) buffer, st.st_size);
-      if (num_read == -1) {
-        warning("Error reading file %s due to %s", path, strerror(errno));
-      } else if (num_read != st.st_size) {
-        warning("Only read %d of " SIZE_FORMAT " bytes from %s", num_read, (size_t) st.st_size, path);
-      }
-      ::close(file_handle);
-      closure->set_filename(path);
-      if (num_read == st.st_size) {
-        buffer[num_read] = '\0';
-
-        char* line = buffer;
-        while (line - buffer < num_read && !closure->is_aborted()) {
-          // find line end (\r, \n or \r\n)
-          char* nextline = NULL;
-          char* cr = strchr(line, '\r');
-          char* lf = strchr(line, '\n');
-          if (cr != NULL && lf != NULL) {
-            char* min = MIN2(cr, lf);
-            *min = '\0';
-            if (lf == cr + 1) {
-              nextline = lf + 1;
-            } else {
-              nextline = min + 1;
-            }
-          } else if (cr != NULL) {
-            *cr = '\0';
-            nextline = cr + 1;
-          } else if (lf != NULL) {
-            *lf = '\0';
-            nextline = lf + 1;
-          }
-          // trim left
-          while (*line == ' ' || *line == '\t') line++;
-          char* end = line + strlen(line);
-          // trim right
-          while (end > line && (*(end -1) == ' ' || *(end -1) == '\t')) end--;
-          *end = '\0';
-          // skip comments and empty lines
-          if (*line != '#' && strlen(line) > 0) {
-            closure->parse_line(line);
-          }
-          if (nextline != NULL) {
-            line = nextline;
-          } else {
-            // File without newline at the end
-            break;
-          }
-        }
-      }
-      FREE_C_HEAP_ARRAY(char, buffer);
-    } else {
-      warning("Error opening file %s due to %s", path, strerror(errno));
-    }
-  } else if (warnStatFailure) {
-    warning("Could not stat file %s due to %s", path, strerror(errno));
-  }
-}
--- a/src/share/vm/jvmci/jvmciRuntime.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/jvmci/jvmciRuntime.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -70,7 +70,6 @@
   static jobject _HotSpotJVMCIRuntime_instance;
   static bool _HotSpotJVMCIRuntime_initialized;
   static bool _well_known_classes_initialized;
-  static const char* _compiler;
 
   static int _trivial_prefixes_count;
   static char** _trivial_prefixes;
@@ -85,19 +84,9 @@
   static Handle create_Service(const char* name, TRAPS);
 
  public:
-
-  /**
-   * Parses *.properties files in jre/lib/jvmci/ and adds the properties to plist.
-   */
-  static void init_system_properties(SystemProperty** plist);
-
-  /**
-   * Saves the value of the "jvmci.compiler" system property for processing
-   * when JVMCI is initialized.
-   */
-  static void save_compiler(const char* compiler);
-
-  static bool is_HotSpotJVMCIRuntime_initialized() { return _HotSpotJVMCIRuntime_initialized; }
+  static bool is_HotSpotJVMCIRuntime_initialized() {
+    return _HotSpotJVMCIRuntime_initialized;
+  }
 
   /**
    * Gets the singleton HotSpotJVMCIRuntime instance, initializing it if necessary
@@ -136,7 +125,6 @@
   }
 
   static bool treat_as_trivial(Method* method);
-  static void parse_lines(char* path, ParseClosure* closure, bool warnStatFailure);
 
   static BasicType kindToBasicType(Handle kind, TRAPS);
 
--- a/src/share/vm/jvmci/vmStructs_jvmci.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/jvmci/vmStructs_jvmci.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -639,11 +639,12 @@
   declare_constant(VM_Version::CPU_AVX512DQ)                        \
   declare_constant(VM_Version::CPU_AVX512PF)                        \
   declare_constant(VM_Version::CPU_AVX512ER)                        \
-  declare_constant(VM_Version::CPU_AVX512CD)                        \
-  declare_constant(VM_Version::CPU_AVX512BW)
+  declare_constant(VM_Version::CPU_AVX512CD)
 
 #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
-  declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL)
+  declare_preprocessor_constant("VM_Version::CPU_AVX512BW", CPU_AVX512BW) \
+  declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL) \
+  declare_preprocessor_constant("VM_Version::CPU_SHA", CPU_SHA)
 
 #endif // TARGET_ARCH_x86
 
--- a/src/share/vm/logging/log.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/logging/log.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -107,18 +107,25 @@
     return LogTagSetMapping<T0, T1, T2, T3, T4>::tagset().is_level(level);
   }
 
+  ATTRIBUTE_PRINTF(2, 3)
+  static void write(LogLevelType level, const char* fmt, ...) {
+    va_list args;
+    va_start(args, fmt);
+    vwrite(level, fmt, args);
+    va_end(args);
+  };
+
   template <LogLevelType Level>
   ATTRIBUTE_PRINTF(1, 2)
   static void write(const char* fmt, ...) {
     va_list args;
     va_start(args, fmt);
-    vwrite<Level>(fmt, args);
+    vwrite(Level, fmt, args);
     va_end(args);
   };
 
-  template <LogLevelType Level>
-  ATTRIBUTE_PRINTF(1, 0)
-  static void vwrite(const char* fmt, va_list args) {
+  ATTRIBUTE_PRINTF(2, 0)
+  static void vwrite(LogLevelType level, const char* fmt, va_list args) {
     char buf[LogBufferSize];
     va_list saved_args;         // For re-format on buf overflow.
     va_copy(saved_args, args);
@@ -132,27 +139,26 @@
       prefix_len = LogPrefix<T0, T1, T2, T3, T4>::prefix(newbuf, newbuf_len);
       ret = os::log_vsnprintf(newbuf + prefix_len, newbuf_len - prefix_len, fmt, saved_args);
       assert(ret >= 0, "Log message buffer issue");
-      puts<Level>(newbuf);
+      puts(level, newbuf);
       FREE_C_HEAP_ARRAY(char, newbuf);
     } else {
-      puts<Level>(buf);
+      puts(level, buf);
     }
   }
 
-  template <LogLevelType Level>
-  static void puts(const char* string) {
-    LogTagSetMapping<T0, T1, T2, T3, T4>::tagset().log(Level, string);
+  static void puts(LogLevelType level, const char* string) {
+    LogTagSetMapping<T0, T1, T2, T3, T4>::tagset().log(level, string);
   }
 
 #define LOG_LEVEL(level, name) ATTRIBUTE_PRINTF(2, 0) \
   Log& v##name(const char* fmt, va_list args) { \
-    vwrite<LogLevel::level>(fmt, args); \
+    vwrite(LogLevel::level, fmt, args); \
     return *this; \
   } \
   Log& name(const char* fmt, ...) ATTRIBUTE_PRINTF(2, 3) { \
     va_list args; \
     va_start(args, fmt); \
-    vwrite<LogLevel::level>(fmt, args); \
+    vwrite(LogLevel::level, fmt, args); \
     va_end(args); \
     return *this; \
   } \
--- a/src/share/vm/memory/iterator.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/memory/iterator.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -285,9 +285,12 @@
  protected:
   void do_nmethod(nmethod* nm);
  public:
+  // If fix_relocations(), then cl must copy objects to their new location immediately to avoid
+  // patching nmethods with the old locations.
   CodeBlobToOopClosure(OopClosure* cl, bool fix_relocations) : _cl(cl), _fix_relocations(fix_relocations) {}
   virtual void do_code_blob(CodeBlob* cb);
 
+  bool fix_relocations() const { return _fix_relocations; }
   const static bool FixRelocations = true;
 };
 
--- a/src/share/vm/oops/method.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/oops/method.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1338,73 +1338,6 @@
   return vmSymbols::find_sid(klass_name);
 }
 
-static bool is_unsafe_alias(vmSymbols::SID name_id) {
-  // All 70 intrinsic candidate methods from sun.misc.Unsafe in 1.8.
-  // Some have the same method name but different signature, e.g.
-  // getByte(long), getByte(Object,long)
-  switch (name_id) {
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(allocateInstance_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(copyMemory_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(loadFence_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(storeFence_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(fullFence_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getObject_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getBoolean_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getByte_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getShort_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getChar_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getInt_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getLong_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getFloat_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getDouble_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putObject_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putBoolean_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putByte_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putShort_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putChar_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putInt_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putLong_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putFloat_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putDouble_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getObjectVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getBooleanVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getByteVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getShortVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getCharVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getIntVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getLongVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getFloatVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getDoubleVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putObjectVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putBooleanVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putByteVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putShortVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putCharVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putIntVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putLongVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putFloatVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putDoubleVolatile_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getAddress_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putAddress_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(compareAndSwapObject_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(compareAndSwapLong_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(compareAndSwapInt_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putOrderedObject_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putOrderedLong_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(putOrderedInt_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndAddInt_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndAddLong_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndSetInt_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndSetLong_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(getAndSetObject_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(park_name):
-    case vmSymbols::VM_SYMBOL_ENUM_NAME(unpark_name):
-      return true;
-  }
-
-  return false;
-}
-
 void Method::init_intrinsic_id() {
   assert(_intrinsic_id == vmIntrinsics::_none, "do this just once");
   const uintptr_t max_id_uint = right_n_bits((int)(sizeof(_intrinsic_id) * BitsPerByte));
@@ -1457,14 +1390,6 @@
     if (is_static() != MethodHandles::is_signature_polymorphic_static(id))
       id = vmIntrinsics::_none;
     break;
-
-  case vmSymbols::VM_SYMBOL_ENUM_NAME(sun_misc_Unsafe):
-    // Map sun.misc.Unsafe to jdk.internal.misc.Unsafe
-    if (!is_unsafe_alias(name_id))  break;
-    // pretend it is the corresponding method in the internal Unsafe class:
-    klass_id = vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_misc_Unsafe);
-    id = vmIntrinsics::find_id(klass_id, name_id, sig_id, flags);
-    break;
   }
 
   if (id != vmIntrinsics::_none) {
--- a/src/share/vm/oops/oop.inline.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/oops/oop.inline.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -295,7 +295,7 @@
 // in inner GC loops so these are separated.
 
 inline bool check_obj_alignment(oop obj) {
-  return cast_from_oop<intptr_t>(obj) % MinObjAlignmentInBytes == 0;
+  return (cast_from_oop<intptr_t>(obj) & MinObjAlignmentInBytesMask) == 0;
 }
 
 oop oopDesc::decode_heap_oop_not_null(narrowOop v) {
--- a/src/share/vm/oops/symbol.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/oops/symbol.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -158,9 +158,21 @@
 }
 
 void Symbol::print_symbol_on(outputStream* st) const {
-  ResourceMark rm;
+  char *s;
   st = st ? st : tty;
-  st->print("%s", as_quoted_ascii());
+  {
+    // ResourceMark may not affect st->print(). If st is a string
+    // stream it could resize, using the same resource arena.
+    ResourceMark rm;
+    s = as_quoted_ascii();
+    s = os::strdup(s);
+  }
+  if (s == NULL) {
+    st->print("(null)");
+  } else {
+    st->print("%s", s);
+    os::free(s);
+  }
 }
 
 char* Symbol::as_quoted_ascii() const {
--- a/src/share/vm/opto/c2compiler.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/opto/c2compiler.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -498,6 +498,7 @@
   case vmIntrinsics::_currentTimeMillis:
   case vmIntrinsics::_nanoTime:
   case vmIntrinsics::_allocateInstance:
+  case vmIntrinsics::_allocateUninitializedArray:
   case vmIntrinsics::_newArray:
   case vmIntrinsics::_getLength:
   case vmIntrinsics::_copyOf:
--- a/src/share/vm/opto/callnode.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/opto/callnode.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1243,13 +1243,13 @@
 
 Node *SafePointNode::peek_monitor_box() const {
   int mon = jvms()->nof_monitors() - 1;
-  assert(mon >= 0, "most have a monitor");
+  assert(mon >= 0, "must have a monitor");
   return monitor_box(jvms(), mon);
 }
 
 Node *SafePointNode::peek_monitor_obj() const {
   int mon = jvms()->nof_monitors() - 1;
-  assert(mon >= 0, "most have a monitor");
+  assert(mon >= 0, "must have a monitor");
   return monitor_obj(jvms(), mon);
 }
 
--- a/src/share/vm/opto/cfgnode.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/opto/cfgnode.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1665,7 +1665,7 @@
 
   bool uncasted = false;
   Node* uin = unique_input(phase, false);
-  if (uin == NULL) {
+  if (uin == NULL && can_reshape) {
     uncasted = true;
     uin = unique_input(phase, true);
   }
@@ -1702,6 +1702,8 @@
     }
 
     if (uncasted) {
+      // Wait until after parsing for the type information to propagate from the casts
+      assert(can_reshape, "Invalid during parsing");
       const Type* phi_type = bottom_type();
       assert(phi_type->isa_int() || phi_type->isa_ptr(), "bad phi type");
       int opcode;
@@ -1720,8 +1722,9 @@
       Node* cast = ConstraintCastNode::make_cast(opcode, r, uin, phi_type, true);
       cast = phase->transform(cast);
       // set all inputs to the new cast so the Phi is removed by Identity
+      PhaseIterGVN* igvn = phase->is_IterGVN();
       for (uint i = 1; i < req(); i++) {
-        set_req(i, cast);
+        set_req_X(i, cast, igvn);
       }
       uin = cast;
     }
--- a/src/share/vm/opto/compile.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/opto/compile.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1118,7 +1118,11 @@
   bool           in_scratch_emit_size() const   { return _in_scratch_emit_size;     }
 
   enum ScratchBufferBlob {
+#if defined(PPC64)
+    MAX_inst_size       = 2048,
+#else
     MAX_inst_size       = 1024,
+#endif
     MAX_locs_size       = 128, // number of relocInfo elements
     MAX_const_size      = 128,
     MAX_stubs_size      = 128
--- a/src/share/vm/opto/library_call.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/opto/library_call.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -48,6 +48,7 @@
 #include "opto/runtime.hpp"
 #include "opto/subnode.hpp"
 #include "prims/nativeLookup.hpp"
+#include "prims/unsafe.hpp"
 #include "runtime/sharedRuntime.hpp"
 #ifdef TRACE_HAVE_INTRINSICS
 #include "trace/traceMacros.hpp"
@@ -248,6 +249,7 @@
   bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, AccessKind kind, bool is_unaligned);
   static bool klass_needs_init_guard(Node* kls);
   bool inline_unsafe_allocate();
+  bool inline_unsafe_newArray(bool uninitialized);
   bool inline_unsafe_copyMemory();
   bool inline_native_currentThread();
 
@@ -255,8 +257,6 @@
   bool inline_native_isInterrupted();
   bool inline_native_Class_query(vmIntrinsics::ID id);
   bool inline_native_subtype_check();
-
-  bool inline_native_newArray();
   bool inline_native_getLength();
   bool inline_array_copyOf(bool is_copyOfRange);
   bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
@@ -709,7 +709,6 @@
   case vmIntrinsics::_nanoTime:                 return inline_native_time_funcs(CAST_FROM_FN_PTR(address, os::javaTimeNanos), "nanoTime");
   case vmIntrinsics::_allocateInstance:         return inline_unsafe_allocate();
   case vmIntrinsics::_copyMemory:               return inline_unsafe_copyMemory();
-  case vmIntrinsics::_newArray:                 return inline_native_newArray();
   case vmIntrinsics::_getLength:                return inline_native_getLength();
   case vmIntrinsics::_copyOf:                   return inline_array_copyOf(false);
   case vmIntrinsics::_copyOfRange:              return inline_array_copyOf(true);
@@ -718,6 +717,9 @@
   case vmIntrinsics::_Objects_checkIndex:       return inline_objects_checkIndex();
   case vmIntrinsics::_clone:                    return inline_native_clone(intrinsic()->is_virtual());
 
+  case vmIntrinsics::_allocateUninitializedArray: return inline_unsafe_newArray(true);
+  case vmIntrinsics::_newArray:                   return inline_unsafe_newArray(false);
+
   case vmIntrinsics::_isAssignableFrom:         return inline_native_subtype_check();
 
   case vmIntrinsics::_isInstance:
@@ -2298,9 +2300,6 @@
 }
 
 
-// Interpret Unsafe.fieldOffset cookies correctly:
-extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
-
 const TypeOopPtr* LibraryCallKit::sharpen_unsafe_type(Compile::AliasType* alias_type, const TypePtr *adr_type, bool is_native_ptr) {
   // Attempt to infer a sharper value type from the offset and base type.
   ciKlass* sharpened_klass = NULL;
@@ -3777,9 +3776,17 @@
 
 //-----------------------inline_native_newArray--------------------------
 // private static native Object java.lang.reflect.newArray(Class<?> componentType, int length);
-bool LibraryCallKit::inline_native_newArray() {
-  Node* mirror    = argument(0);
-  Node* count_val = argument(1);
+// private        native Object Unsafe.allocateUninitializedArray0(Class<?> cls, int size);
+bool LibraryCallKit::inline_unsafe_newArray(bool uninitialized) {
+  Node* mirror;
+  Node* count_val;
+  if (uninitialized) {
+    mirror    = argument(1);
+    count_val = argument(2);
+  } else {
+    mirror    = argument(0);
+    count_val = argument(1);
+  }
 
   mirror = null_check(mirror);
   // If mirror or obj is dead, only null-path is taken.
@@ -3824,6 +3831,12 @@
     result_val->init_req(_normal_path, obj);
     result_io ->init_req(_normal_path, i_o());
     result_mem->init_req(_normal_path, reset_memory());
+
+    if (uninitialized) {
+      // Mark the allocation so that zeroing is skipped
+      AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(obj, &_gvn);
+      alloc->maybe_set_complete(&_gvn);
+    }
   }
 
   // Return the combined state.
@@ -4412,7 +4425,7 @@
 }
 
 //----------------------inline_unsafe_copyMemory-------------------------
-// public native void Unsafe.copyMemory(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes);
+// public native void Unsafe.copyMemory0(Object srcBase, long srcOffset, Object destBase, long destOffset, long bytes);
 bool LibraryCallKit::inline_unsafe_copyMemory() {
   if (callee()->is_static())  return false;  // caller must have the capability!
   null_check_receiver();  // null-check receiver
--- a/src/share/vm/opto/matcher.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/opto/matcher.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -399,10 +399,6 @@
   // Optional scaling for the parameter to the ClearArray/CopyArray node.
   static const bool init_array_count_is_in_bytes;
 
-  // Threshold small size (in bytes) for a ClearArray/CopyArray node.
-  // Anything this size or smaller may get converted to discrete scalar stores.
-  static const int init_array_short_size;
-
   // Some hardware needs 2 CMOV's for longs.
   static const int long_cmove_cost();
 
--- a/src/share/vm/opto/memnode.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/opto/memnode.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -2741,6 +2741,9 @@
 //------------------------------Idealize---------------------------------------
 // Clearing a short array is faster with stores
 Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  // Already know this is a large node, do not try to ideal it
+  if (_is_large) return NULL;
+
   const int unit = BytesPerLong;
   const TypeX* t = phase->type(in(2))->isa_intptr_t();
   if (!t)  return NULL;
@@ -2753,8 +2756,11 @@
   // (see jck test stmt114.stmt11402.val).
   if (size <= 0 || size % unit != 0)  return NULL;
   intptr_t count = size / unit;
-  // Length too long; use fast hardware clear
-  if (size > Matcher::init_array_short_size)  return NULL;
+  // Length too long; communicate this to matchers and assemblers.
+  // Assemblers are responsible to produce fast hardware clears for it.
+  if (size > InitArrayShortSize) {
+    return new ClearArrayNode(in(0), in(1), in(2), in(3), true);
+  }
   Node *mem = in(1);
   if( phase->type(mem)==Type::TOP ) return NULL;
   Node *adr = in(3);
@@ -2852,7 +2858,7 @@
   // Bulk clear double-words
   Node* zsize = phase->transform(new SubXNode(zend, zbase) );
   Node* adr = phase->transform(new AddPNode(dest, dest, start_offset) );
-  mem = new ClearArrayNode(ctl, mem, zsize, adr);
+  mem = new ClearArrayNode(ctl, mem, zsize, adr, false);
   return phase->transform(mem);
 }
 
@@ -3901,7 +3907,7 @@
                                               zeroes_done, zeroes_needed,
                                               phase);
         zeroes_done = zeroes_needed;
-        if (zsize > Matcher::init_array_short_size && ++big_init_gaps > 2)
+        if (zsize > InitArrayShortSize && ++big_init_gaps > 2)
           do_zeroing = false;   // leave the hole, next time
       }
     }
--- a/src/share/vm/opto/memnode.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/opto/memnode.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1013,9 +1013,11 @@
 
 //------------------------------ClearArray-------------------------------------
 class ClearArrayNode: public Node {
+private:
+  bool _is_large;
 public:
-  ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base )
-    : Node(ctrl,arymem,word_cnt,base) {
+  ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base, bool is_large)
+    : Node(ctrl,arymem,word_cnt,base), _is_large(is_large) {
     init_class_id(Class_ClearArray);
   }
   virtual int         Opcode() const;
@@ -1026,6 +1028,7 @@
   virtual Node* Identity(PhaseGVN* phase);
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual uint match_edge(uint idx) const;
+  bool is_large() const { return _is_large; }
 
   // Clear the given area of an object or array.
   // The start offset must always be aligned mod BytesPerInt.
--- a/src/share/vm/opto/runtime.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/opto/runtime.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -220,22 +220,17 @@
 
   // These checks are cheap to make and support reflective allocation.
   int lh = klass->layout_helper();
-  if (Klass::layout_helper_needs_slow_path(lh)
-      || !InstanceKlass::cast(klass)->is_initialized()) {
-    KlassHandle kh(THREAD, klass);
-    kh->check_valid_for_instantiation(false, THREAD);
+  if (Klass::layout_helper_needs_slow_path(lh) || !InstanceKlass::cast(klass)->is_initialized()) {
+    Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
+    klass->check_valid_for_instantiation(false, THREAD);
     if (!HAS_PENDING_EXCEPTION) {
-      InstanceKlass::cast(kh())->initialize(THREAD);
-    }
-    if (!HAS_PENDING_EXCEPTION) {
-      klass = kh();
-    } else {
-      klass = NULL;
+      InstanceKlass::cast(klass)->initialize(THREAD);
     }
   }
 
-  if (klass != NULL) {
+  if (!HAS_PENDING_EXCEPTION) {
     // Scavenge and allocate an instance.
+    Handle holder(THREAD, klass->klass_holder()); // keep the klass alive
     oop result = InstanceKlass::cast(klass)->allocate_instance(THREAD);
     thread->set_vm_result(result);
 
@@ -275,6 +270,7 @@
     // Although the oopFactory likes to work with the elem_type,
     // the compiler prefers the array_type, since it must already have
     // that latter value in hand for the fast path.
+    Handle holder(THREAD, array_type->klass_holder()); // keep the array klass alive
     Klass* elem_type = ObjArrayKlass::cast(array_type)->element_klass();
     result = oopFactory::new_objArray(elem_type, len, THREAD);
   }
@@ -353,6 +349,7 @@
   jint dims[2];
   dims[0] = len1;
   dims[1] = len2;
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(2, dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
@@ -369,6 +366,7 @@
   dims[0] = len1;
   dims[1] = len2;
   dims[2] = len3;
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(3, dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
@@ -386,6 +384,7 @@
   dims[1] = len2;
   dims[2] = len3;
   dims[3] = len4;
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(4, dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
@@ -404,6 +403,7 @@
   dims[2] = len3;
   dims[3] = len4;
   dims[4] = len5;
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(5, dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
@@ -421,6 +421,7 @@
   jint *c_dims = NEW_RESOURCE_ARRAY(jint, len);
   Copy::conjoint_jints_atomic(j_dims, c_dims, len);
 
+  Handle holder(THREAD, elem_type->klass_holder()); // keep the klass alive
   oop obj = ArrayKlass::cast(elem_type)->multi_allocate(len, c_dims, THREAD);
   deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
   thread->set_vm_result(obj);
--- a/src/share/vm/prims/nativeLookup.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/prims/nativeLookup.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -35,6 +35,7 @@
 #include "oops/symbol.hpp"
 #include "prims/jvm_misc.hpp"
 #include "prims/nativeLookup.hpp"
+#include "prims/unsafe.hpp"
 #include "runtime/arguments.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/javaCalls.hpp"
@@ -109,8 +110,6 @@
 }
 
 extern "C" {
-  void JNICALL JVM_RegisterJDKInternalMiscUnsafeMethods(JNIEnv *env, jclass unsafecls);
-  void JNICALL JVM_RegisterSunMiscUnsafeMethods(JNIEnv *env, jclass unsafecls);
   void JNICALL JVM_RegisterMethodHandleMethods(JNIEnv *env, jclass unsafecls);
   void JNICALL JVM_RegisterPerfMethods(JNIEnv *env, jclass perfclass);
   void JNICALL JVM_RegisterWhiteBoxMethods(JNIEnv *env, jclass wbclass);
@@ -125,7 +124,6 @@
 
 static JNINativeMethod lookup_special_native_methods[] = {
   { CC"Java_jdk_internal_misc_Unsafe_registerNatives",             NULL, FN_PTR(JVM_RegisterJDKInternalMiscUnsafeMethods) },
-  { CC"Java_sun_misc_Unsafe_registerNatives",                      NULL, FN_PTR(JVM_RegisterSunMiscUnsafeMethods)         },
   { CC"Java_java_lang_invoke_MethodHandleNatives_registerNatives", NULL, FN_PTR(JVM_RegisterMethodHandleMethods) },
   { CC"Java_jdk_internal_perf_Perf_registerNatives",               NULL, FN_PTR(JVM_RegisterPerfMethods)         },
   { CC"Java_sun_hotspot_WhiteBox_registerNatives",                 NULL, FN_PTR(JVM_RegisterWhiteBoxMethods)     },
--- a/src/share/vm/prims/unsafe.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/prims/unsafe.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -30,6 +30,7 @@
 #include "oops/oop.inline.hpp"
 #include "prims/jni.h"
 #include "prims/jvm.h"
+#include "prims/unsafe.hpp"
 #include "runtime/atomic.inline.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/interfaceSupport.hpp"
@@ -45,8 +46,8 @@
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #endif // INCLUDE_ALL_GCS
 
-/*
- *      Implementation of class Unsafe
+/**
+ * Implementation of the jdk.internal.misc.Unsafe class
  */
 
 
@@ -56,25 +57,15 @@
 
 
 #define UNSAFE_ENTRY(result_type, header) \
-  JVM_ENTRY(result_type, header)
+  JVM_ENTRY(static result_type, header)
 
-// Can't use UNSAFE_LEAF because it has the signature of a straight
-// call into the runtime (just like JVM_LEAF, funny that) but it's
-// called like a Java Native and thus the wrapper built for it passes
-// arguments like a JNI call.  It expects those arguments to be popped
-// from the stack on Intel like all good JNI args are, and adjusts the
-// stack according.  Since the JVM_LEAF call expects no extra
-// arguments the stack isn't popped in the C code, is pushed by the
-// wrapper and we get sick.
-//#define UNSAFE_LEAF(result_type, header) \
-//  JVM_LEAF(result_type, header)
+#define UNSAFE_LEAF(result_type, header) \
+  JVM_LEAF(static result_type, header)
 
 #define UNSAFE_END JVM_END
 
-#define UnsafeWrapper(arg) /*nothing, for the present*/
 
-
-inline void* addr_from_java(jlong addr) {
+static inline void* addr_from_java(jlong addr) {
   // This assert fails in a variety of ways on 32-bit systems.
   // It is impossible to predict whether native code that converts
   // pointers to longs will sign-extend or zero-extend the addresses.
@@ -82,7 +73,7 @@
   return (void*)(uintptr_t)addr;
 }
 
-inline jlong addr_to_java(void* p) {
+static inline jlong addr_to_java(void* p) {
   assert(p == (void*)(uintptr_t)p, "must not be odd high bits");
   return (uintptr_t)p;
 }
@@ -98,24 +89,17 @@
 // through conversion functions when going between the VM and the Unsafe API.
 // The conversion functions just happen to be no-ops at present.
 
-inline jlong field_offset_to_byte_offset(jlong field_offset) {
+static inline jlong field_offset_to_byte_offset(jlong field_offset) {
   return field_offset;
 }
 
-inline jlong field_offset_from_byte_offset(jlong byte_offset) {
+static inline jlong field_offset_from_byte_offset(jlong byte_offset) {
   return byte_offset;
 }
 
-inline jint invocation_key_from_method_slot(jint slot) {
-  return slot;
-}
+static inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) {
+  jlong byte_offset = field_offset_to_byte_offset(field_offset);
 
-inline jint invocation_key_to_method_slot(jint key) {
-  return key;
-}
-
-inline void* index_oop_from_field_offset_long(oop p, jlong field_offset) {
-  jlong byte_offset = field_offset_to_byte_offset(field_offset);
 #ifdef ASSERT
   if (p != NULL) {
     assert(byte_offset >= 0 && byte_offset <= (jlong)MAX_OBJECT_SIZE, "sane offset");
@@ -128,10 +112,12 @@
     assert(byte_offset < p_size, "Unsafe access: offset " INT64_FORMAT " > object's size " INT64_FORMAT, byte_offset, p_size);
   }
 #endif
-  if (sizeof(char*) == sizeof(jint))    // (this constant folds!)
+
+  if (sizeof(char*) == sizeof(jint)) {   // (this constant folds!)
     return (address)p + (jint) byte_offset;
-  else
+  } else {
     return (address)p +        byte_offset;
+  }
 }
 
 // Externally callable versions:
@@ -142,12 +128,6 @@
 jlong Unsafe_field_offset_from_byte_offset(jlong byte_offset) {
   return byte_offset;
 }
-jint Unsafe_invocation_key_from_method_slot(jint slot) {
-  return invocation_key_from_method_slot(slot);
-}
-jint Unsafe_invocation_key_to_method_slot(jint key) {
-  return invocation_key_to_method_slot(key);
-}
 
 
 ///// Data in the Java heap.
@@ -177,17 +157,19 @@
 // These functions allow a null base pointer with an arbitrary address.
 // But if the base pointer is non-null, the offset should make some sense.
 // That is, it should be in the range [0, MAX_OBJECT_SIZE].
-UNSAFE_ENTRY(jobject, Unsafe_GetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset))
-  UnsafeWrapper("Unsafe_GetObject");
+UNSAFE_ENTRY(jobject, Unsafe_GetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
   oop p = JNIHandles::resolve(obj);
   oop v;
+
   if (UseCompressedOops) {
     narrowOop n = *(narrowOop*)index_oop_from_field_offset_long(p, offset);
     v = oopDesc::decode_heap_oop(n);
   } else {
     v = *(oop*)index_oop_from_field_offset_long(p, offset);
   }
+
   jobject ret = JNIHandles::make_local(env, v);
+
 #if INCLUDE_ALL_GCS
   // We could be accessing the referent field in a reference
   // object. If G1 is enabled then we need to register non-null
@@ -212,67 +194,71 @@
     }
   }
 #endif // INCLUDE_ALL_GCS
+
   return ret;
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_SetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h))
-  UnsafeWrapper("Unsafe_SetObject");
+UNSAFE_ENTRY(void, Unsafe_SetObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) {
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
+
   if (UseCompressedOops) {
     oop_store((narrowOop*)index_oop_from_field_offset_long(p, offset), x);
   } else {
     oop_store((oop*)index_oop_from_field_offset_long(p, offset), x);
   }
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jobject, Unsafe_GetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset))
-  UnsafeWrapper("Unsafe_GetObjectVolatile");
+UNSAFE_ENTRY(jobject, Unsafe_GetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
   oop p = JNIHandles::resolve(obj);
   void* addr = index_oop_from_field_offset_long(p, offset);
+
   volatile oop v;
+
   if (UseCompressedOops) {
     volatile narrowOop n = *(volatile narrowOop*) addr;
     (void)const_cast<oop&>(v = oopDesc::decode_heap_oop(n));
   } else {
     (void)const_cast<oop&>(v = *(volatile oop*) addr);
   }
+
   OrderAccess::acquire();
   return JNIHandles::make_local(env, v);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_SetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h))
-  UnsafeWrapper("Unsafe_SetObjectVolatile");
+UNSAFE_ENTRY(void, Unsafe_SetObjectVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) {
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
   void* addr = index_oop_from_field_offset_long(p, offset);
   OrderAccess::release();
+
   if (UseCompressedOops) {
     oop_store((narrowOop*)addr, x);
   } else {
     oop_store((oop*)addr, x);
   }
+
   OrderAccess::fence();
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jobject, Unsafe_GetUncompressedObject(JNIEnv *env, jobject unsafe, jlong addr))
-  UnsafeWrapper("Unsafe_GetUncompressedObject");
+UNSAFE_ENTRY(jobject, Unsafe_GetUncompressedObject(JNIEnv *env, jobject unsafe, jlong addr)) {
   oop v = *(oop*) (address) addr;
+
   return JNIHandles::make_local(env, v);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jclass, Unsafe_GetJavaMirror(JNIEnv *env, jobject unsafe, jlong metaspace_klass))
-  UnsafeWrapper("Unsafe_GetJavaMirror");
+UNSAFE_ENTRY(jclass, Unsafe_GetJavaMirror(JNIEnv *env, jobject unsafe, jlong metaspace_klass)) {
   Klass* klass = (Klass*) (address) metaspace_klass;
+
   return (jclass) JNIHandles::make_local(klass->java_mirror());
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jlong, Unsafe_GetKlassPointer(JNIEnv *env, jobject unsafe, jobject obj))
-  UnsafeWrapper("Unsafe_GetKlassPointer");
+UNSAFE_ENTRY(jlong, Unsafe_GetKlassPointer(JNIEnv *env, jobject unsafe, jobject obj)) {
   oop o = JNIHandles::resolve(obj);
   jlong klass = (jlong) (address) o->klass();
+
   return klass;
-UNSAFE_END
+} UNSAFE_END
 
 #ifndef SUPPORTS_NATIVE_CX8
 
@@ -303,83 +289,54 @@
 // the address of the field _after_ we have acquired the lock, else the object may have
 // been moved by the GC
 
-UNSAFE_ENTRY(jlong, Unsafe_GetLongVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset))
-  UnsafeWrapper("Unsafe_GetLongVolatile");
-  {
-    if (VM_Version::supports_cx8()) {
-      GET_FIELD_VOLATILE(obj, offset, jlong, v);
-      return v;
-    }
-    else {
-      Handle p (THREAD, JNIHandles::resolve(obj));
-      jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
-      MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
-      jlong value = Atomic::load(addr);
-      return value;
-    }
+UNSAFE_ENTRY(jlong, Unsafe_GetLongVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) {
+  if (VM_Version::supports_cx8()) {
+    GET_FIELD_VOLATILE(obj, offset, jlong, v);
+    return v;
+  } else {
+    Handle p (THREAD, JNIHandles::resolve(obj));
+    jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
+    MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
+    jlong value = Atomic::load(addr);
+    return value;
   }
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_SetLongVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong x))
-  UnsafeWrapper("Unsafe_SetLongVolatile");
-  {
-    if (VM_Version::supports_cx8()) {
-      SET_FIELD_VOLATILE(obj, offset, jlong, x);
-    }
-    else {
-      Handle p (THREAD, JNIHandles::resolve(obj));
-      jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
-      MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
-      Atomic::store(x, addr);
-    }
+UNSAFE_ENTRY(void, Unsafe_SetLongVolatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong x)) {
+  if (VM_Version::supports_cx8()) {
+    SET_FIELD_VOLATILE(obj, offset, jlong, x);
+  } else {
+    Handle p (THREAD, JNIHandles::resolve(obj));
+    jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
+    MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
+    Atomic::store(x, addr);
   }
-UNSAFE_END
+} UNSAFE_END
 
 #endif // not SUPPORTS_NATIVE_CX8
 
-UNSAFE_ENTRY(jboolean, Unsafe_isBigEndian0(JNIEnv *env, jobject unsafe))
-  UnsafeWrapper("Unsafe_IsBigEndian0");
-  {
+UNSAFE_LEAF(jboolean, Unsafe_isBigEndian0(JNIEnv *env, jobject unsafe)) {
 #ifdef VM_LITTLE_ENDIAN
-    return false;
+  return false;
 #else
-    return true;
+  return true;
 #endif
-  }
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jint, Unsafe_unalignedAccess0(JNIEnv *env, jobject unsafe))
-  UnsafeWrapper("Unsafe_UnalignedAccess0");
-  {
-    return UseUnalignedAccesses;
-  }
-UNSAFE_END
+UNSAFE_LEAF(jint, Unsafe_unalignedAccess0(JNIEnv *env, jobject unsafe)) {
+  return UseUnalignedAccesses;
+} UNSAFE_END
 
-#define DEFINE_GETSETOOP(jboolean, Boolean) \
+#define DEFINE_GETSETOOP(java_type, Type)        \
  \
-UNSAFE_ENTRY(jboolean, Unsafe_Get##Boolean##140(JNIEnv *env, jobject unsafe, jobject obj, jint offset)) \
-  UnsafeWrapper("Unsafe_Get"#Boolean); \
-  if (obj == NULL)  THROW_0(vmSymbols::java_lang_NullPointerException()); \
-  GET_FIELD(obj, offset, jboolean, v); \
+UNSAFE_ENTRY(java_type, Unsafe_Get##Type(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) { \
+  GET_FIELD(obj, offset, java_type, v); \
   return v; \
-UNSAFE_END \
+} UNSAFE_END \
  \
-UNSAFE_ENTRY(void, Unsafe_Set##Boolean##140(JNIEnv *env, jobject unsafe, jobject obj, jint offset, jboolean x)) \
-  UnsafeWrapper("Unsafe_Set"#Boolean); \
-  if (obj == NULL)  THROW(vmSymbols::java_lang_NullPointerException()); \
-  SET_FIELD(obj, offset, jboolean, x); \
-UNSAFE_END \
- \
-UNSAFE_ENTRY(jboolean, Unsafe_Get##Boolean(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) \
-  UnsafeWrapper("Unsafe_Get"#Boolean); \
-  GET_FIELD(obj, offset, jboolean, v); \
-  return v; \
-UNSAFE_END \
- \
-UNSAFE_ENTRY(void, Unsafe_Set##Boolean(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jboolean x)) \
-  UnsafeWrapper("Unsafe_Set"#Boolean); \
-  SET_FIELD(obj, offset, jboolean, x); \
-UNSAFE_END \
+UNSAFE_ENTRY(void, Unsafe_Set##Type(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, java_type x)) { \
+  SET_FIELD(obj, offset, java_type, x); \
+} UNSAFE_END \
  \
 // END DEFINE_GETSETOOP.
 
@@ -394,18 +351,16 @@
 
 #undef DEFINE_GETSETOOP
 
-#define DEFINE_GETSETOOP_VOLATILE(jboolean, Boolean) \
+#define DEFINE_GETSETOOP_VOLATILE(java_type, Type) \
  \
-UNSAFE_ENTRY(jboolean, Unsafe_Get##Boolean##Volatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) \
-  UnsafeWrapper("Unsafe_Get"#Boolean); \
-  GET_FIELD_VOLATILE(obj, offset, jboolean, v); \
+UNSAFE_ENTRY(java_type, Unsafe_Get##Type##Volatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset)) { \
+  GET_FIELD_VOLATILE(obj, offset, java_type, v); \
   return v; \
-UNSAFE_END \
+} UNSAFE_END \
  \
-UNSAFE_ENTRY(void, Unsafe_Set##Boolean##Volatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jboolean x)) \
-  UnsafeWrapper("Unsafe_Set"#Boolean); \
-  SET_FIELD_VOLATILE(obj, offset, jboolean, x); \
-UNSAFE_END \
+UNSAFE_ENTRY(void, Unsafe_Set##Type##Volatile(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, java_type x)) { \
+  SET_FIELD_VOLATILE(obj, offset, java_type, x); \
+} UNSAFE_END \
  \
 // END DEFINE_GETSETOOP_VOLATILE.
 
@@ -425,59 +380,53 @@
 
 // The non-intrinsified versions of setOrdered just use setVolatile
 
-UNSAFE_ENTRY(void, Unsafe_SetOrderedInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint x))
-  UnsafeWrapper("Unsafe_SetOrderedInt");
+UNSAFE_ENTRY(void, Unsafe_SetOrderedInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint x)) {
   SET_FIELD_VOLATILE(obj, offset, jint, x);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_SetOrderedObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h))
-  UnsafeWrapper("Unsafe_SetOrderedObject");
+UNSAFE_ENTRY(void, Unsafe_SetOrderedObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject x_h)) {
   oop x = JNIHandles::resolve(x_h);
   oop p = JNIHandles::resolve(obj);
   void* addr = index_oop_from_field_offset_long(p, offset);
   OrderAccess::release();
+
   if (UseCompressedOops) {
     oop_store((narrowOop*)addr, x);
   } else {
     oop_store((oop*)addr, x);
   }
+
   OrderAccess::fence();
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_SetOrderedLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong x))
-  UnsafeWrapper("Unsafe_SetOrderedLong");
+UNSAFE_ENTRY(void, Unsafe_SetOrderedLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong x)) {
 #ifdef SUPPORTS_NATIVE_CX8
   SET_FIELD_VOLATILE(obj, offset, jlong, x);
 #else
+
   // Keep old code for platforms which may not have atomic long (8 bytes) instructions
-  {
-    if (VM_Version::supports_cx8()) {
-      SET_FIELD_VOLATILE(obj, offset, jlong, x);
-    }
-    else {
-      Handle p (THREAD, JNIHandles::resolve(obj));
-      jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
-      MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
-      Atomic::store(x, addr);
-    }
+  if (VM_Version::supports_cx8()) {
+    SET_FIELD_VOLATILE(obj, offset, jlong, x);
+  } else {
+    Handle p(THREAD, JNIHandles::resolve(obj));
+    jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
+    MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
+    Atomic::store(x, addr);
   }
 #endif
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_LoadFence(JNIEnv *env, jobject unsafe))
-  UnsafeWrapper("Unsafe_LoadFence");
+UNSAFE_LEAF(void, Unsafe_LoadFence(JNIEnv *env, jobject unsafe)) {
   OrderAccess::acquire();
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_StoreFence(JNIEnv *env, jobject unsafe))
-  UnsafeWrapper("Unsafe_StoreFence");
+UNSAFE_LEAF(void, Unsafe_StoreFence(JNIEnv *env, jobject unsafe)) {
   OrderAccess::release();
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_FullFence(JNIEnv *env, jobject unsafe))
-  UnsafeWrapper("Unsafe_FullFence");
+UNSAFE_LEAF(void, Unsafe_FullFence(JNIEnv *env, jobject unsafe)) {
   OrderAccess::fence();
-UNSAFE_END
+} UNSAFE_END
 
 ////// Data in the C heap.
 
@@ -486,24 +435,22 @@
 //
 #define DEFINE_GETSETNATIVE(java_type, Type, native_type) \
  \
-UNSAFE_ENTRY(java_type, Unsafe_GetNative##Type(JNIEnv *env, jobject unsafe, jlong addr)) \
-  UnsafeWrapper("Unsafe_GetNative"#Type); \
+UNSAFE_ENTRY(java_type, Unsafe_GetNative##Type(JNIEnv *env, jobject unsafe, jlong addr)) { \
   void* p = addr_from_java(addr); \
   JavaThread* t = JavaThread::current(); \
   t->set_doing_unsafe_access(true); \
   java_type x = *(volatile native_type*)p; \
   t->set_doing_unsafe_access(false); \
   return x; \
-UNSAFE_END \
+} UNSAFE_END \
  \
-UNSAFE_ENTRY(void, Unsafe_SetNative##Type(JNIEnv *env, jobject unsafe, jlong addr, java_type x)) \
-  UnsafeWrapper("Unsafe_SetNative"#Type); \
+UNSAFE_ENTRY(void, Unsafe_SetNative##Type(JNIEnv *env, jobject unsafe, jlong addr, java_type x)) { \
   JavaThread* t = JavaThread::current(); \
   t->set_doing_unsafe_access(true); \
   void* p = addr_from_java(addr); \
   *(volatile native_type*)p = x; \
   t->set_doing_unsafe_access(false); \
-UNSAFE_END \
+} UNSAFE_END \
  \
 // END DEFINE_GETSETNATIVE.
 
@@ -517,8 +464,7 @@
 
 #undef DEFINE_GETSETNATIVE
 
-UNSAFE_ENTRY(jlong, Unsafe_GetNativeLong(JNIEnv *env, jobject unsafe, jlong addr))
-  UnsafeWrapper("Unsafe_GetNativeLong");
+UNSAFE_ENTRY(jlong, Unsafe_GetNativeLong(JNIEnv *env, jobject unsafe, jlong addr)) {
   JavaThread* t = JavaThread::current();
   // We do it this way to avoid problems with access to heap using 64
   // bit loads, as jlong in heap could be not 64-bit aligned, and on
@@ -526,7 +472,8 @@
   t->set_doing_unsafe_access(true);
   void* p = addr_from_java(addr);
   jlong x;
-  if (((intptr_t)p & 7) == 0) {
+
+  if (is_ptr_aligned(p, sizeof(jlong)) == 0) {
     // jlong is aligned, do a volatile access
     x = *(volatile jlong*)p;
   } else {
@@ -535,17 +482,19 @@
     acc.words[1] = ((volatile jint*)p)[1];
     x = acc.long_value;
   }
+
   t->set_doing_unsafe_access(false);
+
   return x;
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_SetNativeLong(JNIEnv *env, jobject unsafe, jlong addr, jlong x))
-  UnsafeWrapper("Unsafe_SetNativeLong");
+UNSAFE_ENTRY(void, Unsafe_SetNativeLong(JNIEnv *env, jobject unsafe, jlong addr, jlong x)) {
   JavaThread* t = JavaThread::current();
   // see comment for Unsafe_GetNativeLong
   t->set_doing_unsafe_access(true);
   void* p = addr_from_java(addr);
-  if (((intptr_t)p & 7) == 0) {
+
+  if (is_ptr_aligned(p, sizeof(jlong))) {
     // jlong is aligned, do a volatile access
     *(volatile jlong*)p = x;
   } else {
@@ -554,119 +503,81 @@
     ((volatile jint*)p)[0] = acc.words[0];
     ((volatile jint*)p)[1] = acc.words[1];
   }
+
   t->set_doing_unsafe_access(false);
-UNSAFE_END
+} UNSAFE_END
 
 
-UNSAFE_ENTRY(jlong, Unsafe_GetNativeAddress(JNIEnv *env, jobject unsafe, jlong addr))
-  UnsafeWrapper("Unsafe_GetNativeAddress");
+UNSAFE_LEAF(jlong, Unsafe_GetNativeAddress(JNIEnv *env, jobject unsafe, jlong addr)) {
   void* p = addr_from_java(addr);
+
   return addr_to_java(*(void**)p);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_SetNativeAddress(JNIEnv *env, jobject unsafe, jlong addr, jlong x))
-  UnsafeWrapper("Unsafe_SetNativeAddress");
+UNSAFE_LEAF(void, Unsafe_SetNativeAddress(JNIEnv *env, jobject unsafe, jlong addr, jlong x)) {
   void* p = addr_from_java(addr);
   *(void**)p = addr_from_java(x);
-UNSAFE_END
+} UNSAFE_END
 
 
 ////// Allocation requests
 
-UNSAFE_ENTRY(jobject, Unsafe_AllocateInstance(JNIEnv *env, jobject unsafe, jclass cls))
-  UnsafeWrapper("Unsafe_AllocateInstance");
-  {
-    ThreadToNativeFromVM ttnfv(thread);
-    return env->AllocObject(cls);
-  }
-UNSAFE_END
+UNSAFE_ENTRY(jobject, Unsafe_AllocateInstance(JNIEnv *env, jobject unsafe, jclass cls)) {
+  ThreadToNativeFromVM ttnfv(thread);
+  return env->AllocObject(cls);
+} UNSAFE_END
 
-UNSAFE_ENTRY(jlong, Unsafe_AllocateMemory(JNIEnv *env, jobject unsafe, jlong size))
-  UnsafeWrapper("Unsafe_AllocateMemory");
+UNSAFE_ENTRY(jlong, Unsafe_AllocateMemory0(JNIEnv *env, jobject unsafe, jlong size)) {
   size_t sz = (size_t)size;
-  if (sz != (julong)size || size < 0) {
-    THROW_0(vmSymbols::java_lang_IllegalArgumentException());
-  }
-  if (sz == 0) {
-    return 0;
-  }
+
   sz = round_to(sz, HeapWordSize);
   void* x = os::malloc(sz, mtInternal);
-  if (x == NULL) {
-    THROW_0(vmSymbols::java_lang_OutOfMemoryError());
-  }
-  //Copy::fill_to_words((HeapWord*)x, sz / HeapWordSize);
+
   return addr_to_java(x);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jlong, Unsafe_ReallocateMemory(JNIEnv *env, jobject unsafe, jlong addr, jlong size))
-  UnsafeWrapper("Unsafe_ReallocateMemory");
+UNSAFE_ENTRY(jlong, Unsafe_ReallocateMemory0(JNIEnv *env, jobject unsafe, jlong addr, jlong size)) {
   void* p = addr_from_java(addr);
   size_t sz = (size_t)size;
-  if (sz != (julong)size || size < 0) {
-    THROW_0(vmSymbols::java_lang_IllegalArgumentException());
-  }
-  if (sz == 0) {
-    os::free(p);
-    return 0;
-  }
   sz = round_to(sz, HeapWordSize);
-  void* x = (p == NULL) ? os::malloc(sz, mtInternal) : os::realloc(p, sz, mtInternal);
-  if (x == NULL) {
-    THROW_0(vmSymbols::java_lang_OutOfMemoryError());
-  }
+
+  void* x = os::realloc(p, sz, mtInternal);
+
   return addr_to_java(x);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_FreeMemory(JNIEnv *env, jobject unsafe, jlong addr))
-  UnsafeWrapper("Unsafe_FreeMemory");
+UNSAFE_ENTRY(void, Unsafe_FreeMemory0(JNIEnv *env, jobject unsafe, jlong addr)) {
   void* p = addr_from_java(addr);
-  if (p == NULL) {
-    return;
-  }
+
   os::free(p);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_SetMemory(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong size, jbyte value))
-  UnsafeWrapper("Unsafe_SetMemory");
+UNSAFE_ENTRY(void, Unsafe_SetMemory0(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong size, jbyte value)) {
   size_t sz = (size_t)size;
-  if (sz != (julong)size || size < 0) {
-    THROW(vmSymbols::java_lang_IllegalArgumentException());
-  }
+
   oop base = JNIHandles::resolve(obj);
   void* p = index_oop_from_field_offset_long(base, offset);
+
   Copy::fill_to_memory_atomic(p, sz, value);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_CopyMemory(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size))
-  UnsafeWrapper("Unsafe_CopyMemory");
-  if (size == 0) {
-    return;
-  }
+UNSAFE_ENTRY(void, Unsafe_CopyMemory0(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size)) {
   size_t sz = (size_t)size;
-  if (sz != (julong)size || size < 0) {
-    THROW(vmSymbols::java_lang_IllegalArgumentException());
-  }
+
   oop srcp = JNIHandles::resolve(srcObj);
   oop dstp = JNIHandles::resolve(dstObj);
-  if (dstp != NULL && !dstp->is_typeArray()) {
-    // NYI:  This works only for non-oop arrays at present.
-    // Generalizing it would be reasonable, but requires card marking.
-    // Also, autoboxing a Long from 0L in copyMemory(x,y, 0L,z, n) would be bad.
-    THROW(vmSymbols::java_lang_IllegalArgumentException());
-  }
+
   void* src = index_oop_from_field_offset_long(srcp, srcOffset);
   void* dst = index_oop_from_field_offset_long(dstp, dstOffset);
+
   Copy::conjoint_memory_atomic(src, dst, sz);
-UNSAFE_END
+} UNSAFE_END
 
 // This function is a leaf since if the source and destination are both in native memory
 // the copy may potentially be very large, and we don't want to disable GC if we can avoid it.
 // If either source or destination (or both) are on the heap, the function will enter VM using
 // JVM_ENTRY_FROM_LEAF
-JVM_LEAF(void, Unsafe_CopySwapMemory0(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size, jlong elemSize)) {
-  UnsafeWrapper("Unsafe_CopySwapMemory0");
-
+UNSAFE_LEAF(void, Unsafe_CopySwapMemory0(JNIEnv *env, jobject unsafe, jobject srcObj, jlong srcOffset, jobject dstObj, jlong dstOffset, jlong size, jlong elemSize)) {
   size_t sz = (size_t)size;
   size_t esz = (size_t)elemSize;
 
@@ -689,32 +600,24 @@
       Copy::conjoint_swap(src, dst, sz, esz);
     } JVM_END
   }
-} JVM_END
+} UNSAFE_END
 
 ////// Random queries
 
-// See comment at file start about UNSAFE_LEAF
-//UNSAFE_LEAF(jint, Unsafe_AddressSize())
-UNSAFE_ENTRY(jint, Unsafe_AddressSize(JNIEnv *env, jobject unsafe))
-  UnsafeWrapper("Unsafe_AddressSize");
+UNSAFE_LEAF(jint, Unsafe_AddressSize0(JNIEnv *env, jobject unsafe)) {
   return sizeof(void*);
-UNSAFE_END
+} UNSAFE_END
 
-// See comment at file start about UNSAFE_LEAF
-//UNSAFE_LEAF(jint, Unsafe_PageSize())
-UNSAFE_ENTRY(jint, Unsafe_PageSize(JNIEnv *env, jobject unsafe))
-  UnsafeWrapper("Unsafe_PageSize");
+UNSAFE_LEAF(jint, Unsafe_PageSize()) {
   return os::vm_page_size();
-UNSAFE_END
+} UNSAFE_END
 
-jint find_field_offset(jobject field, int must_be_static, TRAPS) {
-  if (field == NULL) {
-    THROW_0(vmSymbols::java_lang_NullPointerException());
-  }
+static jint find_field_offset(jobject field, int must_be_static, TRAPS) {
+  assert(field != NULL, "field must not be NULL");
 
   oop reflected   = JNIHandles::resolve_non_null(field);
   oop mirror      = java_lang_reflect_Field::clazz(reflected);
-  Klass* k      = java_lang_Class::as_Klass(mirror);
+  Klass* k        = java_lang_Class::as_Klass(mirror);
   int slot        = java_lang_reflect_Field::slot(reflected);
   int modifiers   = java_lang_reflect_Field::modifiers(reflected);
 
@@ -729,18 +632,17 @@
   return field_offset_from_byte_offset(offset);
 }
 
-UNSAFE_ENTRY(jlong, Unsafe_ObjectFieldOffset(JNIEnv *env, jobject unsafe, jobject field))
-  UnsafeWrapper("Unsafe_ObjectFieldOffset");
+UNSAFE_ENTRY(jlong, Unsafe_ObjectFieldOffset0(JNIEnv *env, jobject unsafe, jobject field)) {
   return find_field_offset(field, 0, THREAD);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jlong, Unsafe_StaticFieldOffset(JNIEnv *env, jobject unsafe, jobject field))
-  UnsafeWrapper("Unsafe_StaticFieldOffset");
+UNSAFE_ENTRY(jlong, Unsafe_StaticFieldOffset0(JNIEnv *env, jobject unsafe, jobject field)) {
   return find_field_offset(field, 1, THREAD);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jobject, Unsafe_StaticFieldBaseFromField(JNIEnv *env, jobject unsafe, jobject field))
-  UnsafeWrapper("Unsafe_StaticFieldBase");
+UNSAFE_ENTRY(jobject, Unsafe_StaticFieldBase0(JNIEnv *env, jobject unsafe, jobject field)) {
+  assert(field != NULL, "field must not be NULL");
+
   // Note:  In this VM implementation, a field address is always a short
   // offset from the base of a a klass metaobject.  Thus, the full dynamic
   // range of the return type is never used.  However, some implementations
@@ -749,8 +651,6 @@
   // large.  In that last case, this function would return NULL, since
   // the address would operate alone, without any base pointer.
 
-  if (field == NULL)  THROW_0(vmSymbols::java_lang_NullPointerException());
-
   oop reflected   = JNIHandles::resolve_non_null(field);
   oop mirror      = java_lang_reflect_Field::clazz(reflected);
   int modifiers   = java_lang_reflect_Field::modifiers(reflected);
@@ -760,13 +660,11 @@
   }
 
   return JNIHandles::make_local(env, mirror);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_EnsureClassInitialized(JNIEnv *env, jobject unsafe, jobject clazz)) {
-  UnsafeWrapper("Unsafe_EnsureClassInitialized");
-  if (clazz == NULL) {
-    THROW(vmSymbols::java_lang_NullPointerException());
-  }
+UNSAFE_ENTRY(void, Unsafe_EnsureClassInitialized0(JNIEnv *env, jobject unsafe, jobject clazz)) {
+  assert(clazz != NULL, "clazz must not be NULL");
+
   oop mirror = JNIHandles::resolve_non_null(clazz);
 
   Klass* klass = java_lang_Class::as_Klass(mirror);
@@ -777,26 +675,26 @@
 }
 UNSAFE_END
 
-UNSAFE_ENTRY(jboolean, Unsafe_ShouldBeInitialized(JNIEnv *env, jobject unsafe, jobject clazz)) {
-  UnsafeWrapper("Unsafe_ShouldBeInitialized");
-  if (clazz == NULL) {
-    THROW_(vmSymbols::java_lang_NullPointerException(), false);
-  }
+UNSAFE_ENTRY(jboolean, Unsafe_ShouldBeInitialized0(JNIEnv *env, jobject unsafe, jobject clazz)) {
+  assert(clazz != NULL, "clazz must not be NULL");
+
   oop mirror = JNIHandles::resolve_non_null(clazz);
   Klass* klass = java_lang_Class::as_Klass(mirror);
+
   if (klass != NULL && klass->should_be_initialized()) {
     return true;
   }
+
   return false;
 }
 UNSAFE_END
 
-static void getBaseAndScale(int& base, int& scale, jclass acls, TRAPS) {
-  if (acls == NULL) {
-    THROW(vmSymbols::java_lang_NullPointerException());
-  }
-  oop      mirror = JNIHandles::resolve_non_null(acls);
-  Klass* k      = java_lang_Class::as_Klass(mirror);
+static void getBaseAndScale(int& base, int& scale, jclass clazz, TRAPS) {
+  assert(clazz != NULL, "clazz must not be NULL");
+
+  oop mirror = JNIHandles::resolve_non_null(clazz);
+  Klass* k = java_lang_Class::as_Klass(mirror);
+
   if (k == NULL || !k->is_array_klass()) {
     THROW(vmSymbols::java_lang_InvalidClassException());
   } else if (k->is_objArray_klass()) {
@@ -812,18 +710,18 @@
   }
 }
 
-UNSAFE_ENTRY(jint, Unsafe_ArrayBaseOffset(JNIEnv *env, jobject unsafe, jclass acls))
-  UnsafeWrapper("Unsafe_ArrayBaseOffset");
+UNSAFE_ENTRY(jint, Unsafe_ArrayBaseOffset0(JNIEnv *env, jobject unsafe, jclass clazz)) {
   int base = 0, scale = 0;
-  getBaseAndScale(base, scale, acls, CHECK_0);
+  getBaseAndScale(base, scale, clazz, CHECK_0);
+
   return field_offset_from_byte_offset(base);
-UNSAFE_END
+} UNSAFE_END
 
 
-UNSAFE_ENTRY(jint, Unsafe_ArrayIndexScale(JNIEnv *env, jobject unsafe, jclass acls))
-  UnsafeWrapper("Unsafe_ArrayIndexScale");
+UNSAFE_ENTRY(jint, Unsafe_ArrayIndexScale0(JNIEnv *env, jobject unsafe, jclass clazz)) {
   int base = 0, scale = 0;
-  getBaseAndScale(base, scale, acls, CHECK_0);
+  getBaseAndScale(base, scale, clazz, CHECK_0);
+
   // This VM packs both fields and array elements down to the byte.
   // But watch out:  If this changes, so that array references for
   // a given primitive type (say, T_BOOLEAN) use different memory units
@@ -839,100 +737,88 @@
   // The following allows for a pretty general fieldOffset cookie scheme,
   // but requires it to be linear in byte offset.
   return field_offset_from_byte_offset(scale) - field_offset_from_byte_offset(0);
-UNSAFE_END
+} UNSAFE_END
 
 
 static inline void throw_new(JNIEnv *env, const char *ename) {
   char buf[100];
+
   jio_snprintf(buf, 100, "%s%s", "java/lang/", ename);
+
   jclass cls = env->FindClass(buf);
   if (env->ExceptionCheck()) {
     env->ExceptionClear();
     tty->print_cr("Unsafe: cannot throw %s because FindClass has failed", buf);
     return;
   }
-  char* msg = NULL;
-  env->ThrowNew(cls, msg);
+
+  env->ThrowNew(cls, NULL);
 }
 
 static jclass Unsafe_DefineClass_impl(JNIEnv *env, jstring name, jbyteArray data, int offset, int length, jobject loader, jobject pd) {
-  {
-    // Code lifted from JDK 1.3 ClassLoader.c
+  // Code lifted from JDK 1.3 ClassLoader.c
 
-    jbyte *body;
-    char *utfName;
-    jclass result = 0;
-    char buf[128];
+  jbyte *body;
+  char *utfName = NULL;
+  jclass result = 0;
+  char buf[128];
 
-    if (UsePerfData) {
-      ClassLoader::unsafe_defineClassCallCounter()->inc();
+  assert(data != NULL, "Class bytes must not be NULL");
+  assert(length >= 0, "length must not be negative: %d", length);
+
+  if (UsePerfData) {
+    ClassLoader::unsafe_defineClassCallCounter()->inc();
+  }
+
+  body = NEW_C_HEAP_ARRAY(jbyte, length, mtInternal);
+  if (body == NULL) {
+    throw_new(env, "OutOfMemoryError");
+    return 0;
+  }
+
+  env->GetByteArrayRegion(data, offset, length, body);
+  if (env->ExceptionOccurred()) {
+    goto free_body;
+  }
+
+  if (name != NULL) {
+    uint len = env->GetStringUTFLength(name);
+    int unicode_len = env->GetStringLength(name);
+
+    if (len >= sizeof(buf)) {
+      utfName = NEW_C_HEAP_ARRAY(char, len + 1, mtInternal);
+      if (utfName == NULL) {
+        throw_new(env, "OutOfMemoryError");
+        goto free_body;
+      }
+    } else {
+      utfName = buf;
     }
 
-    if (data == NULL) {
-        throw_new(env, "NullPointerException");
-        return 0;
+    env->GetStringUTFRegion(name, 0, unicode_len, utfName);
+
+    for (uint i = 0; i < len; i++) {
+      if (utfName[i] == '.')   utfName[i] = '/';
     }
+  }
 
-    /* Work around 4153825. malloc crashes on Solaris when passed a
-     * negative size.
-     */
-    if (length < 0) {
-        throw_new(env, "ArrayIndexOutOfBoundsException");
-        return 0;
-    }
+  result = JVM_DefineClass(env, utfName, loader, body, length, pd);
 
-    body = NEW_C_HEAP_ARRAY(jbyte, length, mtInternal);
-
-    if (body == 0) {
-        throw_new(env, "OutOfMemoryError");
-        return 0;
-    }
-
-    env->GetByteArrayRegion(data, offset, length, body);
-
-    if (env->ExceptionOccurred())
-        goto free_body;
-
-    if (name != NULL) {
-        uint len = env->GetStringUTFLength(name);
-        int unicode_len = env->GetStringLength(name);
-        if (len >= sizeof(buf)) {
-            utfName = NEW_C_HEAP_ARRAY(char, len + 1, mtInternal);
-            if (utfName == NULL) {
-                throw_new(env, "OutOfMemoryError");
-                goto free_body;
-            }
-        } else {
-            utfName = buf;
-        }
-        env->GetStringUTFRegion(name, 0, unicode_len, utfName);
-        //VerifyFixClassname(utfName);
-        for (uint i = 0; i < len; i++) {
-          if (utfName[i] == '.')   utfName[i] = '/';
-        }
-    } else {
-        utfName = NULL;
-    }
-
-    result = JVM_DefineClass(env, utfName, loader, body, length, pd);
-
-    if (utfName && utfName != buf)
-        FREE_C_HEAP_ARRAY(char, utfName);
+  if (utfName && utfName != buf) {
+    FREE_C_HEAP_ARRAY(char, utfName);
+  }
 
  free_body:
-    FREE_C_HEAP_ARRAY(jbyte, body);
-    return result;
-  }
+  FREE_C_HEAP_ARRAY(jbyte, body);
+  return result;
 }
 
 
-UNSAFE_ENTRY(jclass, Unsafe_DefineClass(JNIEnv *env, jobject unsafe, jstring name, jbyteArray data, int offset, int length, jobject loader, jobject pd))
-  UnsafeWrapper("Unsafe_DefineClass");
-  {
-    ThreadToNativeFromVM ttnfv(thread);
-    return Unsafe_DefineClass_impl(env, name, data, offset, length, loader, pd);
-  }
-UNSAFE_END
+UNSAFE_ENTRY(jclass, Unsafe_DefineClass0(JNIEnv *env, jobject unsafe, jstring name, jbyteArray data, int offset, int length, jobject loader, jobject pd)) {
+  ThreadToNativeFromVM ttnfv(thread);
+
+  return Unsafe_DefineClass_impl(env, name, data, offset, length, loader, pd);
+} UNSAFE_END
 
 
 // define a class but do not make it known to the class loader or system dictionary
@@ -986,45 +872,35 @@
 static instanceKlassHandle
 Unsafe_DefineAnonymousClass_impl(JNIEnv *env,
                                  jclass host_class, jbyteArray data, jobjectArray cp_patches_jh,
-                                 HeapWord* *temp_alloc,
+                                 u1** temp_alloc,
                                  TRAPS) {
+  assert(host_class != NULL, "host_class must not be NULL");
+  assert(data != NULL, "data must not be NULL");
 
   if (UsePerfData) {
     ClassLoader::unsafe_defineClassCallCounter()->inc();
   }
 
-  if (data == NULL) {
-    THROW_0(vmSymbols::java_lang_NullPointerException());
-  }
+  jint length = typeArrayOop(JNIHandles::resolve_non_null(data))->length();
+  assert(length >= 0, "class_bytes_length must not be negative: %d", length);
 
-  jint length = typeArrayOop(JNIHandles::resolve_non_null(data))->length();
-  jint word_length = (length + sizeof(HeapWord)-1) / sizeof(HeapWord);
-  HeapWord* body = NEW_C_HEAP_ARRAY(HeapWord, word_length, mtInternal);
-  if (body == NULL) {
+  int class_bytes_length = (int) length;
+
+  u1* class_bytes = NEW_C_HEAP_ARRAY(u1, length, mtInternal);
+  if (class_bytes == NULL) {
     THROW_0(vmSymbols::java_lang_OutOfMemoryError());
   }
 
   // caller responsible to free it:
-  (*temp_alloc) = body;
+  *temp_alloc = class_bytes;
 
-  {
-    jbyte* array_base = typeArrayOop(JNIHandles::resolve_non_null(data))->byte_at_addr(0);
-    Copy::conjoint_words((HeapWord*) array_base, body, word_length);
-  }
-
-  u1* class_bytes = (u1*) body;
-  int class_bytes_length = (int) length;
-  if (class_bytes_length < 0)  class_bytes_length = 0;
-  if (class_bytes == NULL
-      || host_class == NULL
-      || length != class_bytes_length)
-    THROW_0(vmSymbols::java_lang_IllegalArgumentException());
+  jbyte* array_base = typeArrayOop(JNIHandles::resolve_non_null(data))->byte_at_addr(0);
+  Copy::conjoint_jbytes(array_base, class_bytes, length);
 
   objArrayHandle cp_patches_h;
   if (cp_patches_jh != NULL) {
     oop p = JNIHandles::resolve_non_null(cp_patches_jh);
-    if (!p->is_objArray())
-      THROW_0(vmSymbols::java_lang_IllegalArgumentException());
+    assert(p->is_objArray(), "cp_patches must be an object[]");
     cp_patches_h = objArrayHandle(THREAD, (objArrayOop)p);
   }
 
@@ -1036,60 +912,56 @@
   Handle      host_domain(THREAD, host_klass->protection_domain());
 
   GrowableArray<Handle>* cp_patches = NULL;
+
   if (cp_patches_h.not_null()) {
     int alen = cp_patches_h->length();
+
     for (int i = alen-1; i >= 0; i--) {
       oop p = cp_patches_h->obj_at(i);
       if (p != NULL) {
         Handle patch(THREAD, p);
-        if (cp_patches == NULL)
+
+        if (cp_patches == NULL) {
           cp_patches = new GrowableArray<Handle>(i+1, i+1, Handle());
+        }
+
         cp_patches->at_put(i, patch);
       }
     }
   }
 
-  ClassFileStream st(class_bytes,
-                     class_bytes_length,
-                     host_source,
-                     ClassFileStream::verify);
+  ClassFileStream st(class_bytes, class_bytes_length, host_source, ClassFileStream::verify);
+
+  Symbol* no_class_name = NULL;
+  Klass* anonk = SystemDictionary::parse_stream(no_class_name,
+                                                host_loader,
+                                                host_domain,
+                                                &st,
+                                                host_klass,
+                                                cp_patches,
+                                                CHECK_NULL);
+  if (anonk == NULL) {
+    return NULL;
+  }
+
+  return instanceKlassHandle(THREAD, anonk);
+}
+
+UNSAFE_ENTRY(jclass, Unsafe_DefineAnonymousClass0(JNIEnv *env, jobject unsafe, jclass host_class, jbyteArray data, jobjectArray cp_patches_jh)) {
+  ResourceMark rm(THREAD);
 
   instanceKlassHandle anon_klass;
-  {
-    Symbol* no_class_name = NULL;
-    Klass* anonk = SystemDictionary::parse_stream(no_class_name,
-                                                  host_loader,
-                                                  host_domain,
-                                                  &st,
-                                                  host_klass,
-                                                  cp_patches,
-                                                  CHECK_NULL);
-    if (anonk == NULL)  return NULL;
-    anon_klass = instanceKlassHandle(THREAD, anonk);
+  jobject res_jh = NULL;
+  u1* temp_alloc = NULL;
+
+  anon_klass = Unsafe_DefineAnonymousClass_impl(env, host_class, data, cp_patches_jh, &temp_alloc, THREAD);
+  if (anon_klass() != NULL) {
+    res_jh = JNIHandles::make_local(env, anon_klass->java_mirror());
   }
 
-  return anon_klass;
-}
-
-UNSAFE_ENTRY(jclass, Unsafe_DefineAnonymousClass(JNIEnv *env, jobject unsafe, jclass host_class, jbyteArray data, jobjectArray cp_patches_jh))
-{
-  instanceKlassHandle anon_klass;
-  jobject res_jh = NULL;
-
-  UnsafeWrapper("Unsafe_DefineAnonymousClass");
-  ResourceMark rm(THREAD);
-
-  HeapWord* temp_alloc = NULL;
-
-  anon_klass = Unsafe_DefineAnonymousClass_impl(env, host_class, data,
-                                                cp_patches_jh,
-                                                   &temp_alloc, THREAD);
-  if (anon_klass() != NULL)
-    res_jh = JNIHandles::make_local(env, anon_klass->java_mirror());
-
   // try/finally clause:
   if (temp_alloc != NULL) {
-    FREE_C_HEAP_ARRAY(HeapWord, temp_alloc);
+    FREE_C_HEAP_ARRAY(u1, temp_alloc);
   }
 
   // The anonymous class loader data has been artificially been kept alive to
@@ -1102,100 +974,103 @@
   // let caller initialize it as needed...
 
   return (jclass) res_jh;
-}
-UNSAFE_END
+} UNSAFE_END
 
 
 
-UNSAFE_ENTRY(void, Unsafe_ThrowException(JNIEnv *env, jobject unsafe, jthrowable thr))
-  UnsafeWrapper("Unsafe_ThrowException");
-  {
-    ThreadToNativeFromVM ttnfv(thread);
-    env->Throw(thr);
-  }
-UNSAFE_END
+UNSAFE_ENTRY(void, Unsafe_ThrowException(JNIEnv *env, jobject unsafe, jthrowable thr)) {
+  ThreadToNativeFromVM ttnfv(thread);
+  env->Throw(thr);
+} UNSAFE_END
 
 // JSR166 ------------------------------------------------------------------
 
-UNSAFE_ENTRY(jobject, Unsafe_CompareAndExchangeObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h))
-  UnsafeWrapper("Unsafe_CompareAndExchangeObject");
+UNSAFE_ENTRY(jobject, Unsafe_CompareAndExchangeObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h)) {
   oop x = JNIHandles::resolve(x_h);
   oop e = JNIHandles::resolve(e_h);
   oop p = JNIHandles::resolve(obj);
   HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
   oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e, true);
-  if (res == e)
+  if (res == e) {
     update_barrier_set((void*)addr, x);
+  }
   return JNIHandles::make_local(env, res);
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jint, Unsafe_CompareAndExchangeInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x))
-  UnsafeWrapper("Unsafe_CompareAndExchangeInt");
+UNSAFE_ENTRY(jint, Unsafe_CompareAndExchangeInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x)) {
   oop p = JNIHandles::resolve(obj);
   jint* addr = (jint *) index_oop_from_field_offset_long(p, offset);
+
   return (jint)(Atomic::cmpxchg(x, addr, e));
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jlong, Unsafe_CompareAndExchangeLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x))
-  UnsafeWrapper("Unsafe_CompareAndExchangeLong");
+UNSAFE_ENTRY(jlong, Unsafe_CompareAndExchangeLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x)) {
   Handle p (THREAD, JNIHandles::resolve(obj));
   jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
+
 #ifdef SUPPORTS_NATIVE_CX8
   return (jlong)(Atomic::cmpxchg(x, addr, e));
 #else
-  if (VM_Version::supports_cx8())
+  if (VM_Version::supports_cx8()) {
     return (jlong)(Atomic::cmpxchg(x, addr, e));
-  else {
+  } else {
     MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
+
     jlong val = Atomic::load(addr);
-    if (val == e)
+    if (val == e) {
       Atomic::store(x, addr);
+    }
     return val;
   }
 #endif
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h))
-  UnsafeWrapper("Unsafe_CompareAndSwapObject");
+UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapObject(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jobject e_h, jobject x_h)) {
   oop x = JNIHandles::resolve(x_h);
   oop e = JNIHandles::resolve(e_h);
   oop p = JNIHandles::resolve(obj);
   HeapWord* addr = (HeapWord *)index_oop_from_field_offset_long(p, offset);
   oop res = oopDesc::atomic_compare_exchange_oop(x, addr, e, true);
-  jboolean success  = (res == e);
-  if (success)
-    update_barrier_set((void*)addr, x);
-  return success;
-UNSAFE_END
+  if (res != e) {
+    return false;
+  }
 
-UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x))
-  UnsafeWrapper("Unsafe_CompareAndSwapInt");
+  update_barrier_set((void*)addr, x);
+
+  return true;
+} UNSAFE_END
+
+UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapInt(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jint e, jint x)) {
   oop p = JNIHandles::resolve(obj);
   jint* addr = (jint *) index_oop_from_field_offset_long(p, offset);
+
   return (jint)(Atomic::cmpxchg(x, addr, e)) == e;
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x))
-  UnsafeWrapper("Unsafe_CompareAndSwapLong");
-  Handle p (THREAD, JNIHandles::resolve(obj));
-  jlong* addr = (jlong*)(index_oop_from_field_offset_long(p(), offset));
+UNSAFE_ENTRY(jboolean, Unsafe_CompareAndSwapLong(JNIEnv *env, jobject unsafe, jobject obj, jlong offset, jlong e, jlong x)) {
+  Handle p(THREAD, JNIHandles::resolve(obj));
+  jlong* addr = (jlong*)index_oop_from_field_offset_long(p(), offset);
+
 #ifdef SUPPORTS_NATIVE_CX8
   return (jlong)(Atomic::cmpxchg(x, addr, e)) == e;
 #else
-  if (VM_Version::supports_cx8())
+  if (VM_Version::supports_cx8()) {
     return (jlong)(Atomic::cmpxchg(x, addr, e)) == e;
-  else {
-    jboolean success = false;
+  } else {
     MutexLockerEx mu(UnsafeJlong_lock, Mutex::_no_safepoint_check_flag);
+
     jlong val = Atomic::load(addr);
-    if (val == e) { Atomic::store(x, addr); success = true; }
-    return success;
+    if (val != e) {
+      return false;
+    }
+
+    Atomic::store(x, addr);
+    return true;
   }
 #endif
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_Park(JNIEnv *env, jobject unsafe, jboolean isAbsolute, jlong time))
-  UnsafeWrapper("Unsafe_Park");
+UNSAFE_ENTRY(void, Unsafe_Park(JNIEnv *env, jobject unsafe, jboolean isAbsolute, jlong time)) {
   EventThreadPark event;
   HOTSPOT_THREAD_PARK_BEGIN((uintptr_t) thread->parker(), (int) isAbsolute, time);
 
@@ -1203,6 +1078,7 @@
   thread->parker()->park(isAbsolute != 0, time);
 
   HOTSPOT_THREAD_PARK_END((uintptr_t) thread->parker());
+
   if (event.should_commit()) {
     oop obj = thread->current_park_blocker();
     event.set_klass((obj != NULL) ? obj->klass() : NULL);
@@ -1210,11 +1086,11 @@
     event.set_address((obj != NULL) ? (TYPE_ADDRESS) cast_from_oop<uintptr_t>(obj) : 0);
     event.commit();
   }
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(void, Unsafe_Unpark(JNIEnv *env, jobject unsafe, jobject jthread))
-  UnsafeWrapper("Unsafe_Unpark");
+UNSAFE_ENTRY(void, Unsafe_Unpark(JNIEnv *env, jobject unsafe, jobject jthread)) {
   Parker* p = NULL;
+
   if (jthread != NULL) {
     oop java_thread = JNIHandles::resolve_non_null(jthread);
     if (java_thread != NULL) {
@@ -1228,6 +1104,7 @@
         // Grab lock if apparently null or using older version of library
         MutexLocker mu(Threads_lock);
         java_thread = JNIHandles::resolve_non_null(jthread);
+
         if (java_thread != NULL) {
           JavaThread* thr = java_lang_Thread::thread(java_thread);
           if (thr != NULL) {
@@ -1240,14 +1117,14 @@
       }
     }
   }
+
   if (p != NULL) {
     HOTSPOT_THREAD_UNPARK((uintptr_t) p);
     p->unpark();
   }
-UNSAFE_END
+} UNSAFE_END
 
-UNSAFE_ENTRY(jint, Unsafe_Loadavg(JNIEnv *env, jobject unsafe, jdoubleArray loadavg, jint nelem))
-  UnsafeWrapper("Unsafe_Loadavg");
+UNSAFE_ENTRY(jint, Unsafe_GetLoadAverage0(JNIEnv *env, jobject unsafe, jdoubleArray loadavg, jint nelem)) {
   const int max_nelem = 3;
   double la[max_nelem];
   jint ret;
@@ -1255,15 +1132,11 @@
   typeArrayOop a = typeArrayOop(JNIHandles::resolve_non_null(loadavg));
   assert(a->is_typeArray(), "must be type array");
 
-  if (nelem < 0 || nelem > max_nelem || a->length() < nelem) {
-    ThreadToNativeFromVM ttnfv(thread);
-    throw_new(env, "ArrayIndexOutOfBoundsException");
+  ret = os::loadavg(la, nelem);
+  if (ret == -1) {
     return -1;
   }
 
-  ret = os::loadavg(la, nelem);
-  if (ret == -1) return -1;
-
   // if successful, ret is the number of samples actually retrieved.
   assert(ret >= 0 && ret <= max_nelem, "Unexpected loadavg return value");
   switch(ret) {
@@ -1271,8 +1144,9 @@
     case 2: a->double_at_put(1, (jdouble)la[1]); // fall through
     case 1: a->double_at_put(0, (jdouble)la[0]); break;
   }
+
   return ret;
-UNSAFE_END
+} UNSAFE_END
 
 
 /// JVM_RegisterUnsafeMethods
@@ -1292,87 +1166,17 @@
 #define CC (char*)  /*cast a literal from (const char*)*/
 #define FN_PTR(f) CAST_FROM_FN_PTR(void*, &f)
 
-#define DECLARE_GETPUTOOP(Boolean, Z) \
-    {CC "get" #Boolean,      CC "(" OBJ "J)" #Z,       FN_PTR(Unsafe_Get##Boolean)}, \
-    {CC "put" #Boolean,      CC "(" OBJ "J" #Z ")V",   FN_PTR(Unsafe_Set##Boolean)}, \
-    {CC "get" #Boolean "Volatile",      CC "(" OBJ "J)" #Z,       FN_PTR(Unsafe_Get##Boolean##Volatile)}, \
-    {CC "put" #Boolean "Volatile",      CC "(" OBJ "J" #Z ")V",   FN_PTR(Unsafe_Set##Boolean##Volatile)}
+#define DECLARE_GETPUTOOP(Type, Desc) \
+    {CC "get" #Type,      CC "(" OBJ "J)" #Desc,       FN_PTR(Unsafe_Get##Type)}, \
+    {CC "put" #Type,      CC "(" OBJ "J" #Desc ")V",   FN_PTR(Unsafe_Set##Type)}, \
+    {CC "get" #Type "Volatile",      CC "(" OBJ "J)" #Desc,       FN_PTR(Unsafe_Get##Type##Volatile)}, \
+    {CC "put" #Type "Volatile",      CC "(" OBJ "J" #Desc ")V",   FN_PTR(Unsafe_Set##Type##Volatile)}
 
 
 #define DECLARE_GETPUTNATIVE(Byte, B) \
     {CC "get" #Byte,         CC "(" ADR ")" #B,       FN_PTR(Unsafe_GetNative##Byte)}, \
     {CC "put" #Byte,         CC "(" ADR#B ")V",       FN_PTR(Unsafe_SetNative##Byte)}
 
-
-static JNINativeMethod sun_misc_Unsafe_methods[] = {
-    {CC "getObject",        CC "(" OBJ "J)" OBJ "",   FN_PTR(Unsafe_GetObject)},
-    {CC "putObject",        CC "(" OBJ "J" OBJ ")V",  FN_PTR(Unsafe_SetObject)},
-    {CC "getObjectVolatile",CC "(" OBJ "J)" OBJ "",   FN_PTR(Unsafe_GetObjectVolatile)},
-    {CC "putObjectVolatile",CC "(" OBJ "J" OBJ ")V",  FN_PTR(Unsafe_SetObjectVolatile)},
-
-    {CC "getUncompressedObject", CC "(" ADR ")" OBJ,  FN_PTR(Unsafe_GetUncompressedObject)},
-    {CC "getJavaMirror",         CC "(" ADR ")" CLS,  FN_PTR(Unsafe_GetJavaMirror)},
-    {CC "getKlassPointer",       CC "(" OBJ ")" ADR,  FN_PTR(Unsafe_GetKlassPointer)},
-
-    DECLARE_GETPUTOOP(Boolean, Z),
-    DECLARE_GETPUTOOP(Byte, B),
-    DECLARE_GETPUTOOP(Short, S),
-    DECLARE_GETPUTOOP(Char, C),
-    DECLARE_GETPUTOOP(Int, I),
-    DECLARE_GETPUTOOP(Long, J),
-    DECLARE_GETPUTOOP(Float, F),
-    DECLARE_GETPUTOOP(Double, D),
-
-    DECLARE_GETPUTNATIVE(Byte, B),
-    DECLARE_GETPUTNATIVE(Short, S),
-    DECLARE_GETPUTNATIVE(Char, C),
-    DECLARE_GETPUTNATIVE(Int, I),
-    DECLARE_GETPUTNATIVE(Long, J),
-    DECLARE_GETPUTNATIVE(Float, F),
-    DECLARE_GETPUTNATIVE(Double, D),
-
-    {CC "getAddress",         CC "(" ADR ")" ADR,        FN_PTR(Unsafe_GetNativeAddress)},
-    {CC "putAddress",         CC "(" ADR "" ADR ")V",    FN_PTR(Unsafe_SetNativeAddress)},
-
-    {CC "allocateMemory",     CC "(J)" ADR,              FN_PTR(Unsafe_AllocateMemory)},
-    {CC "reallocateMemory",   CC "(" ADR "J)" ADR,       FN_PTR(Unsafe_ReallocateMemory)},
-    {CC "freeMemory",         CC "(" ADR ")V",           FN_PTR(Unsafe_FreeMemory)},
-
-    {CC "objectFieldOffset",  CC "(" FLD ")J",           FN_PTR(Unsafe_ObjectFieldOffset)},
-    {CC "staticFieldOffset",  CC "(" FLD ")J",           FN_PTR(Unsafe_StaticFieldOffset)},
-    {CC "staticFieldBase",    CC "(" FLD ")" OBJ,        FN_PTR(Unsafe_StaticFieldBaseFromField)},
-    {CC "ensureClassInitialized",CC "(" CLS ")V",        FN_PTR(Unsafe_EnsureClassInitialized)},
-    {CC "arrayBaseOffset",    CC "(" CLS ")I",           FN_PTR(Unsafe_ArrayBaseOffset)},
-    {CC "arrayIndexScale",    CC "(" CLS ")I",           FN_PTR(Unsafe_ArrayIndexScale)},
-    {CC "addressSize",        CC "()I",                  FN_PTR(Unsafe_AddressSize)},
-    {CC "pageSize",           CC "()I",                  FN_PTR(Unsafe_PageSize)},
-
-    {CC "defineClass",        CC "(" DC_Args ")" CLS,    FN_PTR(Unsafe_DefineClass)},
-    {CC "allocateInstance",   CC "(" CLS ")" OBJ,        FN_PTR(Unsafe_AllocateInstance)},
-    {CC "throwException",     CC "(" THR ")V",           FN_PTR(Unsafe_ThrowException)},
-    {CC "compareAndSwapObject", CC "(" OBJ "J" OBJ "" OBJ ")Z", FN_PTR(Unsafe_CompareAndSwapObject)},
-    {CC "compareAndSwapInt",  CC "(" OBJ "J""I""I"")Z",  FN_PTR(Unsafe_CompareAndSwapInt)},
-    {CC "compareAndSwapLong", CC "(" OBJ "J""J""J"")Z",  FN_PTR(Unsafe_CompareAndSwapLong)},
-    {CC "putOrderedObject",   CC "(" OBJ "J" OBJ ")V",   FN_PTR(Unsafe_SetOrderedObject)},
-    {CC "putOrderedInt",      CC "(" OBJ "JI)V",         FN_PTR(Unsafe_SetOrderedInt)},
-    {CC "putOrderedLong",     CC "(" OBJ "JJ)V",         FN_PTR(Unsafe_SetOrderedLong)},
-    {CC "park",               CC "(ZJ)V",                FN_PTR(Unsafe_Park)},
-    {CC "unpark",             CC "(" OBJ ")V",           FN_PTR(Unsafe_Unpark)},
-
-    {CC "getLoadAverage",     CC "([DI)I",               FN_PTR(Unsafe_Loadavg)},
-
-    {CC "copyMemory",         CC "(" OBJ "J" OBJ "JJ)V", FN_PTR(Unsafe_CopyMemory)},
-    {CC "setMemory",          CC "(" OBJ "JJB)V",        FN_PTR(Unsafe_SetMemory)},
-
-    {CC "defineAnonymousClass", CC "(" DAC_Args ")" CLS, FN_PTR(Unsafe_DefineAnonymousClass)},
-
-    {CC "shouldBeInitialized",CC "(" CLS ")Z",           FN_PTR(Unsafe_ShouldBeInitialized)},
-
-    {CC "loadFence",          CC "()V",                  FN_PTR(Unsafe_LoadFence)},
-    {CC "storeFence",         CC "()V",                  FN_PTR(Unsafe_StoreFence)},
-    {CC "fullFence",          CC "()V",                  FN_PTR(Unsafe_FullFence)},
-};
-
 static JNINativeMethod jdk_internal_misc_Unsafe_methods[] = {
     {CC "getObject",        CC "(" OBJ "J)" OBJ "",   FN_PTR(Unsafe_GetObject)},
     {CC "putObject",        CC "(" OBJ "J" OBJ ")V",  FN_PTR(Unsafe_SetObject)},
@@ -1403,20 +1207,20 @@
     {CC "getAddress",         CC "(" ADR ")" ADR,        FN_PTR(Unsafe_GetNativeAddress)},
     {CC "putAddress",         CC "(" ADR "" ADR ")V",    FN_PTR(Unsafe_SetNativeAddress)},
 
-    {CC "allocateMemory",     CC "(J)" ADR,              FN_PTR(Unsafe_AllocateMemory)},
-    {CC "reallocateMemory",   CC "(" ADR "J)" ADR,       FN_PTR(Unsafe_ReallocateMemory)},
-    {CC "freeMemory",         CC "(" ADR ")V",           FN_PTR(Unsafe_FreeMemory)},
+    {CC "allocateMemory0",    CC "(J)" ADR,              FN_PTR(Unsafe_AllocateMemory0)},
+    {CC "reallocateMemory0",  CC "(" ADR "J)" ADR,       FN_PTR(Unsafe_ReallocateMemory0)},
+    {CC "freeMemory0",        CC "(" ADR ")V",           FN_PTR(Unsafe_FreeMemory0)},
 
-    {CC "objectFieldOffset",  CC "(" FLD ")J",           FN_PTR(Unsafe_ObjectFieldOffset)},
-    {CC "staticFieldOffset",  CC "(" FLD ")J",           FN_PTR(Unsafe_StaticFieldOffset)},
-    {CC "staticFieldBase",    CC "(" FLD ")" OBJ,        FN_PTR(Unsafe_StaticFieldBaseFromField)},
-    {CC "ensureClassInitialized",CC "(" CLS ")V",        FN_PTR(Unsafe_EnsureClassInitialized)},
-    {CC "arrayBaseOffset",    CC "(" CLS ")I",           FN_PTR(Unsafe_ArrayBaseOffset)},
-    {CC "arrayIndexScale",    CC "(" CLS ")I",           FN_PTR(Unsafe_ArrayIndexScale)},
-    {CC "addressSize",        CC "()I",                  FN_PTR(Unsafe_AddressSize)},
+    {CC "objectFieldOffset0", CC "(" FLD ")J",           FN_PTR(Unsafe_ObjectFieldOffset0)},
+    {CC "staticFieldOffset0", CC "(" FLD ")J",           FN_PTR(Unsafe_StaticFieldOffset0)},
+    {CC "staticFieldBase0",   CC "(" FLD ")" OBJ,        FN_PTR(Unsafe_StaticFieldBase0)},
+    {CC "ensureClassInitialized0", CC "(" CLS ")V",      FN_PTR(Unsafe_EnsureClassInitialized0)},
+    {CC "arrayBaseOffset0",   CC "(" CLS ")I",           FN_PTR(Unsafe_ArrayBaseOffset0)},
+    {CC "arrayIndexScale0",   CC "(" CLS ")I",           FN_PTR(Unsafe_ArrayIndexScale0)},
+    {CC "addressSize0",       CC "()I",                  FN_PTR(Unsafe_AddressSize0)},
     {CC "pageSize",           CC "()I",                  FN_PTR(Unsafe_PageSize)},
 
-    {CC "defineClass",        CC "(" DC_Args ")" CLS,    FN_PTR(Unsafe_DefineClass)},
+    {CC "defineClass0",       CC "(" DC_Args ")" CLS,    FN_PTR(Unsafe_DefineClass0)},
     {CC "allocateInstance",   CC "(" CLS ")" OBJ,        FN_PTR(Unsafe_AllocateInstance)},
     {CC "throwException",     CC "(" THR ")V",           FN_PTR(Unsafe_ThrowException)},
     {CC "compareAndSwapObject", CC "(" OBJ "J" OBJ "" OBJ ")Z", FN_PTR(Unsafe_CompareAndSwapObject)},
@@ -1432,15 +1236,15 @@
     {CC "park",               CC "(ZJ)V",                FN_PTR(Unsafe_Park)},
     {CC "unpark",             CC "(" OBJ ")V",           FN_PTR(Unsafe_Unpark)},
 
-    {CC "getLoadAverage",     CC "([DI)I",               FN_PTR(Unsafe_Loadavg)},
+    {CC "getLoadAverage0",    CC "([DI)I",               FN_PTR(Unsafe_GetLoadAverage0)},
 
-    {CC "copyMemory",         CC "(" OBJ "J" OBJ "JJ)V", FN_PTR(Unsafe_CopyMemory)},
+    {CC "copyMemory0",        CC "(" OBJ "J" OBJ "JJ)V", FN_PTR(Unsafe_CopyMemory0)},
     {CC "copySwapMemory0",    CC "(" OBJ "J" OBJ "JJJ)V", FN_PTR(Unsafe_CopySwapMemory0)},
-    {CC "setMemory",          CC "(" OBJ "JJB)V",        FN_PTR(Unsafe_SetMemory)},
+    {CC "setMemory0",         CC "(" OBJ "JJB)V",        FN_PTR(Unsafe_SetMemory0)},
 
-    {CC "defineAnonymousClass", CC "(" DAC_Args ")" CLS, FN_PTR(Unsafe_DefineAnonymousClass)},
+    {CC "defineAnonymousClass0", CC "(" DAC_Args ")" CLS, FN_PTR(Unsafe_DefineAnonymousClass0)},
 
-    {CC "shouldBeInitialized",CC "(" CLS ")Z",           FN_PTR(Unsafe_ShouldBeInitialized)},
+    {CC "shouldBeInitialized0", CC "(" CLS ")Z",         FN_PTR(Unsafe_ShouldBeInitialized0)},
 
     {CC "loadFence",          CC "()V",                  FN_PTR(Unsafe_LoadFence)},
     {CC "storeFence",         CC "()V",                  FN_PTR(Unsafe_StoreFence)},
@@ -1466,27 +1270,14 @@
 #undef DECLARE_GETPUTNATIVE
 
 
-// These two functions are exported, used by NativeLookup.
+// This function is exported, used by NativeLookup.
 // The Unsafe_xxx functions above are called only from the interpreter.
 // The optimizer looks at names and signatures to recognize
 // individual functions.
 
-JVM_ENTRY(void, JVM_RegisterSunMiscUnsafeMethods(JNIEnv *env, jclass unsafeclass))
-  UnsafeWrapper("JVM_RegisterSunMiscUnsafeMethods");
-  {
-    ThreadToNativeFromVM ttnfv(thread);
+JVM_ENTRY(void, JVM_RegisterJDKInternalMiscUnsafeMethods(JNIEnv *env, jclass unsafeclass)) {
+  ThreadToNativeFromVM ttnfv(thread);
 
-    int ok = env->RegisterNatives(unsafeclass, sun_misc_Unsafe_methods, sizeof(sun_misc_Unsafe_methods)/sizeof(JNINativeMethod));
-    guarantee(ok == 0, "register sun.misc.Unsafe natives");
-  }
-JVM_END
-
-JVM_ENTRY(void, JVM_RegisterJDKInternalMiscUnsafeMethods(JNIEnv *env, jclass unsafeclass))
-  UnsafeWrapper("JVM_RegisterJDKInternalMiscUnsafeMethods");
-  {
-    ThreadToNativeFromVM ttnfv(thread);
-
-    int ok = env->RegisterNatives(unsafeclass, jdk_internal_misc_Unsafe_methods, sizeof(jdk_internal_misc_Unsafe_methods)/sizeof(JNINativeMethod));
-    guarantee(ok == 0, "register jdk.internal.misc.Unsafe natives");
-  }
-JVM_END
+  int ok = env->RegisterNatives(unsafeclass, jdk_internal_misc_Unsafe_methods, sizeof(jdk_internal_misc_Unsafe_methods)/sizeof(JNINativeMethod));
+  guarantee(ok == 0, "register jdk.internal.misc.Unsafe natives");
+} JVM_END
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/prims/unsafe.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+
+#ifndef SHARE_VM_PRIMS_UNSAFE_HPP
+#define SHARE_VM_PRIMS_UNSAFE_HPP
+
+#include "jni.h"
+
+extern "C" {
+  void JNICALL JVM_RegisterJDKInternalMiscUnsafeMethods(JNIEnv *env, jclass unsafecls);
+}
+
+jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
+
+jlong Unsafe_field_offset_from_byte_offset(jlong byte_offset);
+
+#endif // SHARE_VM_PRIMS_UNSAFE_HPP
--- a/src/share/vm/prims/whitebox.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/prims/whitebox.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -31,6 +31,7 @@
 #include "classfile/stringTable.hpp"
 #include "code/codeCache.hpp"
 #include "compiler/methodMatcher.hpp"
+#include "compiler/directivesParser.hpp"
 #include "jvmtifiles/jvmtiEnv.hpp"
 #include "memory/metadataFactory.hpp"
 #include "memory/metaspaceShared.hpp"
@@ -637,6 +638,10 @@
 WB_END
 
 WB_ENTRY(jboolean, WB_EnqueueMethodForCompilation(JNIEnv* env, jobject o, jobject method, jint comp_level, jint bci))
+  // Screen for unavailable/bad comp level
+  if (CompileBroker::compiler(comp_level) == NULL) {
+    return false;
+  }
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
   CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
   methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(jmid));
@@ -1539,6 +1544,27 @@
   }
 }
 
+WB_ENTRY(jint, WB_AddCompilerDirective(JNIEnv* env, jobject o, jstring compDirect))
+  // can't be in VM when we call JNI
+  ThreadToNativeFromVM ttnfv(thread);
+  const char* dir = env->GetStringUTFChars(compDirect, NULL);
+  int ret;
+  {
+    ThreadInVMfromNative ttvfn(thread); // back to VM
+    ret = DirectivesParser::parse_string(dir, tty);
+  }
+  env->ReleaseStringUTFChars(compDirect, dir);
+  // -1 for error parsing directive. Return 0 as number of directives added.
+  if (ret == -1) {
+    ret = 0;
+  }
+  return (jint) ret;
+WB_END
+
+WB_ENTRY(void, WB_RemoveCompilerDirective(JNIEnv* env, jobject o, jint count))
+  DirectivesStack::pop(count);
+WB_END
+
 #define CC (char*)
 
 static JNINativeMethod methods[] = {
@@ -1732,6 +1758,9 @@
   {CC"isSharedClass",      CC"(Ljava/lang/Class;)Z",  (void*)&WB_IsSharedClass },
   {CC"areSharedStringsIgnored",           CC"()Z",    (void*)&WB_AreSharedStringsIgnored },
   {CC"clearInlineCaches",  CC"()V",                   (void*)&WB_ClearInlineCaches },
+  {CC"addCompilerDirective",    CC"(Ljava/lang/String;)I",
+                                                      (void*)&WB_AddCompilerDirective },
+  {CC"removeCompilerDirective", CC"(I)V",             (void*)&WB_RemoveCompilerDirective },
 };
 
 #undef CC
--- a/src/share/vm/runtime/advancedThresholdPolicy.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,17 +42,30 @@
 }
 
 void AdvancedThresholdPolicy::initialize() {
+  int count = CICompilerCount;
+#ifdef _LP64
   // Turn on ergonomic compiler count selection
   if (FLAG_IS_DEFAULT(CICompilerCountPerCPU) && FLAG_IS_DEFAULT(CICompilerCount)) {
     FLAG_SET_DEFAULT(CICompilerCountPerCPU, true);
   }
-  int count = CICompilerCount;
   if (CICompilerCountPerCPU) {
     // Simple log n seems to grow too slowly for tiered, try something faster: log n * log log n
     int log_cpu = log2_intptr(os::active_processor_count());
     int loglog_cpu = log2_intptr(MAX2(log_cpu, 1));
     count = MAX2(log_cpu * loglog_cpu, 1) * 3 / 2;
   }
+#else
+  // On 32-bit systems, the number of compiler threads is limited to 3.
+  // On these systems, the virtual address space available to the JVM
+  // is usually limited to 2-4 GB (the exact value depends on the platform).
+  // As the compilers (especially C2) can consume a large amount of
+  // memory, scaling the number of compiler threads with the number of
+  // available cores can result in the exhaustion of the address space
+  /// available to the VM and thus cause the VM to crash.
+  if (FLAG_IS_DEFAULT(CICompilerCount)) {
+    count = 3;
+  }
+#endif
 
   set_c1_count(MAX2(count / 3, 1));
   set_c2_count(MAX2(count - c1_count(), 1));
@@ -164,9 +177,7 @@
 
 // Called with the queue locked and with at least one element
 CompileTask* AdvancedThresholdPolicy::select_task(CompileQueue* compile_queue) {
-#if INCLUDE_JVMCI
   CompileTask *max_blocking_task = NULL;
-#endif
   CompileTask *max_task = NULL;
   Method* max_method = NULL;
   jlong t = os::javaTimeMillis();
@@ -180,7 +191,8 @@
       max_method = method;
     } else {
       // If a method has been stale for some time, remove it from the queue.
-      if (is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
+      // Blocking tasks don't become stale
+      if (!task->is_blocking() && is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
         if (PrintTieredEvents) {
           print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level());
         }
@@ -197,29 +209,25 @@
         max_method = method;
       }
     }
-#if INCLUDE_JVMCI
-    if (UseJVMCICompiler && task->is_blocking()) {
+
+    if (task->is_blocking()) {
       if (max_blocking_task == NULL || compare_methods(method, max_blocking_task->method())) {
         max_blocking_task = task;
       }
     }
-#endif
+
     task = next_task;
   }
 
-#if INCLUDE_JVMCI
-  if (UseJVMCICompiler) {
-    if (max_blocking_task != NULL) {
-      // In blocking compilation mode, the CompileBroker will make
-      // compilations submitted by a JVMCI compiler thread non-blocking. These
-      // compilations should be scheduled after all blocking compilations
-      // to service non-compiler related compilations sooner and reduce the
-      // chance of such compilations timing out.
-      max_task = max_blocking_task;
-      max_method = max_task->method();
-    }
+  if (max_blocking_task != NULL) {
+    // In blocking compilation mode, the CompileBroker will make
+    // compilations submitted by a JVMCI compiler thread non-blocking. These
+    // compilations should be scheduled after all blocking compilations
+    // to service non-compiler related compilations sooner and reduce the
+    // chance of such compilations timing out.
+    max_task = max_blocking_task;
+    max_method = max_task->method();
   }
-#endif
 
   if (max_task->comp_level() == CompLevel_full_profile && TieredStopAtLevel > CompLevel_full_profile
       && is_method_profiled(max_method)) {
--- a/src/share/vm/runtime/arguments.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -235,8 +235,6 @@
 
   // Set OS specific system properties values
   os::init_system_properties_values();
-
-  JVMCI_ONLY(JVMCIRuntime::init_system_properties(&_system_properties);)
 }
 
 // Update/Initialize System properties after JDK version number is known
@@ -2540,9 +2538,11 @@
     status = false;
   }
 
+#ifdef _LP64
   if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) {
     warning("The VM option CICompilerCountPerCPU overrides CICompilerCount.");
   }
+#endif
 
 #ifndef SUPPORT_RESERVED_STACK_AREA
   if (StackReservedPages != 0) {
--- a/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -354,6 +354,14 @@
   return Flag::SUCCESS;
 }
 
+Flag::Error InitArrayShortSizeConstraintFunc(intx value, bool verbose) {
+  if (value % BytesPerLong != 0) {
+    return Flag::VIOLATES_CONSTRAINT;
+  } else {
+    return Flag::SUCCESS;
+  }
+}
+
 #ifdef COMPILER2
 Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose) {
   if (InteriorEntryAlignment > CodeEntryAlignment) {
--- a/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -62,6 +62,8 @@
 
 Flag::Error TypeProfileLevelConstraintFunc(uintx value, bool verbose);
 
+Flag::Error InitArrayShortSizeConstraintFunc(intx value, bool verbose);
+
 #ifdef COMPILER2
 Flag::Error InteriorEntryAlignmentConstraintFunc(intx value, bool verbose);
 
--- a/src/share/vm/runtime/commandLineFlagRangeList.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/commandLineFlagRangeList.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -247,6 +247,9 @@
 void emit_range_double(const char* /*name*/)    { /* NOP */ }
 
 // CommandLineFlagRange emitting code functions if range arguments are provided
+void emit_range_int(const char* name, int min, int max)       {
+  CommandLineFlagRangeList::add(new CommandLineFlagRange_int(name, min, max));
+}
 void emit_range_intx(const char* name, intx min, intx max) {
   CommandLineFlagRangeList::add(new CommandLineFlagRange_intx(name, min, max));
 }
--- a/src/share/vm/runtime/globals.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/globals.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -725,7 +725,7 @@
                                                                             \
   product(bool, UseSHA, false,                                              \
           "Control whether SHA instructions can be used "                   \
-          "on SPARC and on ARM")                                            \
+          "on SPARC, on ARM and on x86")                                    \
                                                                             \
   product(bool, UseGHASHIntrinsics, false,                                  \
           "Use intrinsics for GHASH versions of crypto")                    \
@@ -891,9 +891,6 @@
   notproduct(bool, VerifyLastFrame, false,                                  \
           "Verify oops on last frame on entry to VM")                       \
                                                                             \
-  develop(bool, TraceHandleAllocation, false,                               \
-          "Print out warnings when suspiciously many handles are allocated")\
-                                                                            \
   product(bool, FailOverToOldVerifier, true,                                \
           "Fail over to old verifier when split verifier fails")            \
                                                                             \
@@ -1603,10 +1600,10 @@
   product(bool, ResizePLAB, true,                                           \
           "Dynamically resize (survivor space) promotion LAB's")            \
                                                                             \
-  product(intx, ParGCArrayScanChunk, 50,                                    \
+  product(int, ParGCArrayScanChunk, 50,                                     \
           "Scan a subset of object array and push remainder, if array is "  \
           "bigger than this")                                               \
-          range(1, max_intx)                                                \
+          range(1, max_jint/3)                                              \
                                                                             \
   product(bool, ParGCUseLocalOverflow, false,                               \
           "Instead of a global overflow list, use local overflow stacks")   \
@@ -3024,14 +3021,6 @@
   notproduct(ccstrlist, SuppressErrorAt, "",                                \
           "List of assertions (file:line) to muzzle")                       \
                                                                             \
-  notproduct(size_t, HandleAllocationLimit, 1024,                           \
-          "Threshold for HandleMark allocation when +TraceHandleAllocation "\
-          "is used")                                                        \
-                                                                            \
-  develop(size_t, TotalHandleAllocationLimit, 1024,                         \
-          "Threshold for total handle allocation when "                     \
-          "+TraceHandleAllocation is used")                                 \
-                                                                            \
   develop(intx, StackPrintLimit, 100,                                       \
           "number of stack frames to print in VM-level stack dump")         \
                                                                             \
@@ -3079,16 +3068,16 @@
   develop(intx, MethodHistogramCutoff, 100,                                 \
           "The cutoff value for method invocation histogram (+CountCalls)") \
                                                                             \
-  develop(intx, ProfilerNumberOfInterpretedMethods, 25,                     \
+  diagnostic(intx, ProfilerNumberOfInterpretedMethods, 25,                  \
           "Number of interpreted methods to show in profile")               \
                                                                             \
-  develop(intx, ProfilerNumberOfCompiledMethods, 25,                        \
+  diagnostic(intx, ProfilerNumberOfCompiledMethods, 25,                     \
           "Number of compiled methods to show in profile")                  \
                                                                             \
-  develop(intx, ProfilerNumberOfStubMethods, 25,                            \
+  diagnostic(intx, ProfilerNumberOfStubMethods, 25,                         \
           "Number of stub methods to show in profile")                      \
                                                                             \
-  develop(intx, ProfilerNumberOfRuntimeStubNodes, 25,                       \
+  diagnostic(intx, ProfilerNumberOfRuntimeStubNodes, 25,                    \
           "Number of runtime stub nodes to show in profile")                \
                                                                             \
   product(intx, ProfileIntervalsTicks, 100,                                 \
@@ -4149,6 +4138,13 @@
              "in the loaded class C. "                                      \
              "Check (3) is available only in debug builds.")                \
                                                                             \
+  develop_pd(intx, InitArrayShortSize,                                      \
+          "Threshold small size (in bytes) for clearing arrays. "           \
+          "Anything this size or smaller may get converted to discrete "    \
+          "scalar stores.")                                                 \
+          range(0, max_intx)                                                \
+          constraint(InitArrayShortSizeConstraintFunc, AfterErgo)           \
+                                                                            \
   diagnostic(bool, CompilerDirectivesIgnoreCompileCommands, false,          \
              "Disable backwards compatibility for compile commands.")       \
                                                                             \
--- a/src/share/vm/runtime/handles.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/handles.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -129,14 +129,6 @@
     k = k->next();
   }
 
-  // The thread local handle areas should not get very large
-  if (TraceHandleAllocation && (size_t)handles_visited > TotalHandleAllocationLimit) {
-#ifdef ASSERT
-    warning("%d: Visited in HandleMark : " SIZE_FORMAT, _nof_handlemarks, handles_visited);
-#else
-    warning("Visited in HandleMark : " SIZE_FORMAT, handles_visited);
-#endif
-  }
   if (_prev != NULL) _prev->oops_do(f);
 }
 
@@ -165,31 +157,6 @@
   assert(area->_handle_mark_nesting > 0, "must stack allocate HandleMarks" );
   debug_only(area->_handle_mark_nesting--);
 
-  // Debug code to trace the number of handles allocated per mark/
-#ifdef ASSERT
-  if (TraceHandleAllocation) {
-    size_t handles = 0;
-    Chunk *c = _chunk->next();
-    if (c == NULL) {
-      handles = area->_hwm - _hwm; // no new chunk allocated
-    } else {
-      handles = _max - _hwm;      // add rest in first chunk
-      while(c != NULL) {
-        handles += c->length();
-        c = c->next();
-      }
-      handles -= area->_max - area->_hwm; // adjust for last trunk not full
-    }
-    handles /= sizeof(void *); // Adjust for size of a handle
-    if (handles > HandleAllocationLimit) {
-      // Note: _nof_handlemarks is only set in debug mode
-      warning("%d: Allocated in HandleMark : " SIZE_FORMAT, _nof_handlemarks, handles);
-    }
-
-    tty->print_cr("Handles " SIZE_FORMAT, handles);
-  }
-#endif
-
   // Delete later chunks
   if( _chunk->next() ) {
     // reset arena size before delete chunks. Otherwise, the total
--- a/src/share/vm/runtime/interfaceSupport.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/interfaceSupport.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -521,9 +521,9 @@
     JNI_ENTRY_NO_PRESERVE(result_type, header)                       \
     WeakPreserveExceptionMark __wem(thread);
 
-#define JNI_ENTRY_NO_PRESERVE(result_type, header)             \
+#define JNI_ENTRY_NO_PRESERVE(result_type, header)                   \
 extern "C" {                                                         \
-  result_type JNICALL header {                                \
+  result_type JNICALL header {                                       \
     JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
     assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
     ThreadInVMfromNative __tiv(thread);                              \
@@ -535,7 +535,7 @@
 // a GC, is called outside the NoHandleMark (set via VM_QUICK_ENTRY_BASE).
 #define JNI_QUICK_ENTRY(result_type, header)                         \
 extern "C" {                                                         \
-  result_type JNICALL header {                                \
+  result_type JNICALL header {                                       \
     JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
     assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
     ThreadInVMfromNative __tiv(thread);                              \
@@ -545,7 +545,7 @@
 
 #define JNI_LEAF(result_type, header)                                \
 extern "C" {                                                         \
-  result_type JNICALL header {                                \
+  result_type JNICALL header {                                       \
     JavaThread* thread=JavaThread::thread_from_jni_environment(env); \
     assert( !VerifyJNIEnvThread || (thread == Thread::current()), "JNIEnv is only valid in same thread"); \
     VM_LEAF_BASE(result_type, header)
--- a/src/share/vm/runtime/simpleThresholdPolicy.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/simpleThresholdPolicy.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -138,9 +138,15 @@
     FLAG_SET_DEFAULT(CICompilerCount, 3);
   }
   int count = CICompilerCount;
+#ifdef _LP64
+  // On 64-bit systems, scale the number of compiler threads with
+  // the number of cores available on the system. Scaling is not
+  // performed on 32-bit systems because it can lead to exhaustion
+  // of the virtual memory address space available to the JVM.
   if (CICompilerCountPerCPU) {
     count = MAX2(log2_intptr(os::active_processor_count()), 1) * 3 / 2;
   }
+#endif
   set_c1_count(MAX2(count / 3, 1));
   set_c2_count(MAX2(count - c1_count(), 1));
   FLAG_SET_ERGO(intx, CICompilerCount, c1_count() + c2_count());
--- a/src/share/vm/runtime/thread.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/runtime/thread.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -3738,15 +3738,6 @@
     Chunk::start_chunk_pool_cleaner_task();
   }
 
-#if INCLUDE_JVMCI
-  if (EnableJVMCI) {
-    const char* jvmciCompiler = Arguments::PropertyList_get_value(Arguments::system_properties(), "jvmci.compiler");
-    if (jvmciCompiler != NULL) {
-      JVMCIRuntime::save_compiler(jvmciCompiler);
-    }
-  }
-#endif // INCLUDE_JVMCI
-
   // initialize compiler(s)
 #if defined(COMPILER1) || defined(COMPILER2) || defined(SHARK) || INCLUDE_JVMCI
   CompileBroker::compilation_init(CHECK_JNI_ERR);
--- a/src/share/vm/services/diagnosticCommand.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/services/diagnosticCommand.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -931,7 +931,7 @@
 }
 
 void CompilerDirectivesRemoveDCmd::execute(DCmdSource source, TRAPS) {
-  DirectivesStack::pop();
+  DirectivesStack::pop(1);
 }
 
 void CompilerDirectivesClearDCmd::execute(DCmdSource source, TRAPS) {
--- a/src/share/vm/shark/sharkBuilder.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/shark/sharkBuilder.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -29,6 +29,7 @@
 #include "gc/shared/cardTableModRefBS.hpp"
 #include "memory/resourceArea.hpp"
 #include "oops/method.hpp"
+#include "prims/unsafe.hpp"
 #include "runtime/os.hpp"
 #include "runtime/synchronizer.hpp"
 #include "runtime/thread.hpp"
@@ -326,7 +327,6 @@
 }
 
 Value* SharkBuilder::unsafe_field_offset_to_byte_offset() {
-  extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
   return make_function((address) Unsafe_field_offset_to_byte_offset, "l", "l");
 }
 
--- a/src/share/vm/utilities/globalDefinitions.hpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/utilities/globalDefinitions.hpp	Wed Mar 23 21:44:35 2016 -0700
@@ -313,7 +313,7 @@
 // ANSI C++ does not allow casting from one pointer type to a function pointer
 // directly without at best a warning. This macro accomplishes it silently
 // In every case that is present at this point the value be cast is a pointer
-// to a C linkage function. In somecase the type used for the cast reflects
+// to a C linkage function. In some case the type used for the cast reflects
 // that linkage and a picky compiler would not complain. In other cases because
 // there is no convenient place to place a typedef with extern C linkage (i.e
 // a platform dependent header file) it doesn't. At this point no compiler seems
@@ -678,7 +678,7 @@
 inline const char* type2name(BasicType t) { return (uint)t < T_CONFLICT+1 ? type2name_tab[t] : NULL; }
 extern BasicType name2type(const char* name);
 
-// Auxilary math routines
+// Auxiliary math routines
 // least common multiple
 extern size_t lcm(size_t a, size_t b);
 
@@ -801,7 +801,7 @@
 
 // TosState describes the top-of-stack state before and after the execution of
 // a bytecode or method. The top-of-stack value may be cached in one or more CPU
-// registers. The TosState corresponds to the 'machine represention' of this cached
+// registers. The TosState corresponds to the 'machine representation' of this cached
 // value. There's 4 states corresponding to the JAVA types int, long, float & double
 // as well as a 5th state in case the top-of-stack value is actually on the top
 // of stack (in memory) and thus not cached. The atos state corresponds to the itos
@@ -876,7 +876,7 @@
 // a transition from one state to another. These extra states makes it possible for the safepoint code to
 // handle certain thread_states without having to suspend the thread - making the safepoint code faster.
 //
-// Given a state, the xxx_trans state can always be found by adding 1.
+// Given a state, the xxxx_trans state can always be found by adding 1.
 //
 enum JavaThreadState {
   _thread_uninitialized     =  0, // should never happen (missing initialization)
@@ -1425,7 +1425,7 @@
 // operations.
 
 // The goal of this code to avoid undefined or implementation-defined
-// behaviour.  The use of an lvalue to reference cast is explicitly
+// behavior.  The use of an lvalue to reference cast is explicitly
 // permitted by Lvalues and rvalues [basic.lval].  [Section 3.10 Para
 // 15 in C++03]
 #define JAVA_INTEGER_OP(OP, NAME, TYPE, UNSIGNED_TYPE)  \
--- a/src/share/vm/utilities/ostream.cpp	Wed Mar 23 19:33:34 2016 -0700
+++ b/src/share/vm/utilities/ostream.cpp	Wed Mar 23 21:44:35 2016 -0700
@@ -338,7 +338,9 @@
       }
       char* oldbuf = buffer;
       assert(rm == NULL || Thread::current()->current_resource_mark() == rm,
-             "stringStream is re-allocated with a different ResourceMark");
+             "StringStream is re-allocated with a different ResourceMark. Current: "
+             PTR_FORMAT " original: " PTR_FORMAT,
+             p2i(Thread::current()->current_resource_mark()), p2i(rm));
       buffer = NEW_RESOURCE_ARRAY(char, end);
       if (buffer_pos > 0) {
         memcpy(buffer, oldbuf, buffer_pos);
--- a/test/TEST.groups	Wed Mar 23 19:33:34 2016 -0700
+++ b/test/TEST.groups	Wed Mar 23 21:44:35 2016 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -230,8 +230,10 @@
 #
 needs_g1gc = \
   compiler/regalloc/C1ObjectSpillInLogicOp.java \
+  gc/TestSmallHeap.java \
   gc/TestSystemGC.java \
   gc/arguments/TestAlignmentToUseLargePages.java \
+  gc/arguments/TestG1ConcRefinementThreads.java \
   gc/arguments/TestG1HeapRegionSize.java \
   gc/arguments/TestG1HeapSizeFlags.java \
   gc/arguments/TestG1PercentageOptions.java \
@@ -242,11 +244,11 @@
   gc/class_unloading/TestG1ClassUnloadingHWM.java \
   gc/ergonomics/TestDynamicNumberOfGCThreads.java \
   gc/g1/ \
+  gc/logging/TestGCId.java \
   gc/metaspace/G1AddMetaspaceDependency.java \
   gc/metaspace/TestMetaspacePerfCounters.java \
   gc/startup_warnings/TestG1.java \
-  gc/whitebox/TestConcMarkCycleWB.java \
-  gc/arguments/TestG1ConcRefinementThreads.java
+  gc/whitebox/TestConcMarkCycleWB.java 
 
 hotspot_native_sanity = \
   native_sanity
--- a/test/compiler/compilercontrol/jcmd/StressAddJcmdBase.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/test/compiler/compilercontrol/jcmd/StressAddJcmdBase.java	Wed Mar 23 21:44:35 2016 -0700
@@ -32,74 +32,99 @@
 import jdk.test.lib.Utils;
 import pool.PoolHelper;
 
+import java.util.ArrayList;
 import java.util.List;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
 import java.util.stream.Collectors;
-import java.util.stream.Stream;
 
 public abstract class StressAddJcmdBase {
     private static final int DIRECTIVES_AMOUNT = Integer.getInteger(
             "compiler.compilercontrol.jcmd.StressAddJcmdBase.directivesAmount",
-            1000);
-    private static final int DIRECTIVE_FILES = Integer.getInteger(
-            "compiler.compilercontrol.jcmd.StressAddJcmdBase.directiveFiles",
-            5);
+            200);
+    private static final int TIMEOUT = Integer.getInteger(
+            "compiler.compilercontrol.jcmd.StressAddJcmdBase.timeout",
+            30);
     private static final List<MethodDescriptor> DESCRIPTORS = new PoolHelper()
             .getAllMethods().stream()
                     .map(pair -> AbstractTestBase
                             .getValidMethodDescriptor(pair.first))
                     .collect(Collectors.toList());
+    private static final String DIRECTIVE_FILE = "directives.json";
+    private static final List<String> VM_OPTIONS = new ArrayList<>();
+    private static final Random RANDOM = Utils.getRandomInstance();
+
+    static {
+        VM_OPTIONS.add("-Xmixed");
+        VM_OPTIONS.add("-XX:+UnlockDiagnosticVMOptions");
+        VM_OPTIONS.add("-XX:+LogCompilation");
+        VM_OPTIONS.add("-XX:CompilerDirectivesLimit=1001");
+    }
 
     /**
      * Performs test
      */
     public void test() {
-        List<String> commands = prepareCommands();
-        Executor executor = new TimeLimitedExecutor(commands);
+        HugeDirectiveUtil.createHugeFile(DESCRIPTORS, DIRECTIVE_FILE,
+                DIRECTIVES_AMOUNT);
+        Executor executor = new TimeLimitedExecutor();
         List<OutputAnalyzer> outputAnalyzers = executor.execute();
         outputAnalyzers.get(0).shouldHaveExitValue(0);
     }
 
     /**
-     * Makes connection to the test VM
+     * Makes connection to the test VM and performs a diagnostic command
      *
-     * @param pid      a pid of the VM under test
-     * @param commands a list of jcmd commands to be executed
+     * @param pid a pid of the VM under test
      * @return true if the test should continue invocation of this method
      */
-    protected abstract boolean makeConnection(int pid, List<String> commands);
+    protected abstract boolean makeConnection(int pid);
 
     /**
      * Finish test executions
      */
     protected void finish() { }
 
-    private List<String> prepareCommands() {
-        String[] files = new String[DIRECTIVE_FILES];
-        for (int i = 0; i < DIRECTIVE_FILES; i++) {
-            files[i] = "directives" + i + ".json";
-            HugeDirectiveUtil.createHugeFile(DESCRIPTORS, files[i],
-                    DIRECTIVES_AMOUNT);
+    protected String nextCommand() {
+        int i = RANDOM.nextInt(JcmdCommand.values().length);
+        JcmdCommand jcmdCommand = JcmdCommand.values()[i];
+        switch (jcmdCommand) {
+            case ADD:
+                return jcmdCommand.command + " " + DIRECTIVE_FILE;
+            case PRINT:
+            case CLEAR:
+            case REMOVE:
+                return jcmdCommand.command;
+            default:
+                throw new Error("TESTBUG: incorrect command: " + jcmdCommand);
         }
-        return Stream.of(files)
-                .map(file -> "Compiler.directives_add " + file)
-                .collect(Collectors.toList());
+    }
+
+    private enum JcmdCommand {
+        ADD("Compiler.directives_add"),
+        PRINT("Compiler.directives_print"),
+        CLEAR("Compiler.directives_clear"),
+        REMOVE("Compiler.directives_remove");
+
+        public final String command;
+
+        JcmdCommand(String command) {
+            this.command = command;
+        }
     }
 
     private class TimeLimitedExecutor extends Executor {
-        private final List<String> jcmdCommands;
-
-        public TimeLimitedExecutor(List<String> jcmdCommands) {
+        public TimeLimitedExecutor() {
             /* There are no need to check the state */
-            super(true, null, null, jcmdCommands);
-            this.jcmdCommands = jcmdCommands;
+            super(true, VM_OPTIONS, null, null);
         }
 
         @Override
         protected OutputAnalyzer[] executeJCMD(int pid) {
             TimeLimitedRunner runner = new TimeLimitedRunner(
-                    Utils.DEFAULT_TEST_TIMEOUT,
+                    TimeUnit.SECONDS.toMillis(TIMEOUT),
                     Utils.TIMEOUT_FACTOR,
-                    () -> makeConnection(pid, jcmdCommands));
+                    () -> makeConnection(pid));
             try {
                 runner.call();
             } catch (Exception e) {
--- a/test/compiler/compilercontrol/jcmd/StressAddMultiThreadedTest.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/test/compiler/compilercontrol/jcmd/StressAddMultiThreadedTest.java	Wed Mar 23 21:44:35 2016 -0700
@@ -27,21 +27,19 @@
  * @summary Tests jcmd to be able to add a lot of huge directive files with
  *          parallel executed jcmds until timeout has reached
  * @library /testlibrary /test/lib /compiler/testlibrary ../share /
- * @ignore 8148563
  * @build compiler.compilercontrol.jcmd.StressAddMultiThreadedTest
  *        pool.sub.* pool.subpack.* sun.hotspot.WhiteBox
  *        compiler.testlibrary.CompilerUtils
  *        compiler.compilercontrol.share.actions.*
  * @run main ClassFileInstaller sun.hotspot.WhiteBox
  *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm/timeout=360 compiler.compilercontrol.jcmd.StressAddMultiThreadedTest
+ * @run driver compiler.compilercontrol.jcmd.StressAddMultiThreadedTest
  */
 
 package compiler.compilercontrol.jcmd;
 
 import jdk.test.lib.dcmd.PidJcmdExecutor;
 
-import java.util.List;
 import java.util.concurrent.ArrayBlockingQueue;
 import java.util.concurrent.BlockingQueue;
 import java.util.concurrent.ExecutorService;
@@ -49,16 +47,15 @@
 import java.util.concurrent.TimeUnit;
 
 public class StressAddMultiThreadedTest extends StressAddJcmdBase {
-    private static final int THREADS;
+    private static final int THREADS = Integer.getInteger(
+            "compiler.compilercontrol.jcmd.StressAddMultiThreadedTest.threads",
+            5);
+    private volatile int commands = Integer.getInteger(
+            "compiler.compilercontrol.jcmd.StressAddMultiThreadedTest.commands",
+            20);
     private final BlockingQueue<Runnable> queue;
     private final ExecutorService executor;
 
-    static {
-        THREADS = Runtime.getRuntime().availableProcessors()
-                * Integer.getInteger("compiler.compilercontrol.jcmd" +
-                        ".StressAddMultiThreadedTest.threadFactor", 10);
-    }
-
     public StressAddMultiThreadedTest() {
         queue = new ArrayBlockingQueue<>(THREADS);
         executor = new ThreadPoolExecutor(THREADS, THREADS, 100,
@@ -71,14 +68,10 @@
     }
 
     @Override
-    protected boolean makeConnection(int pid, List<String> commands) {
-        commands.forEach(command -> {
-            if (!executor.isShutdown()) {
-                executor.submit(() -> new PidJcmdExecutor(String.valueOf(pid))
-                        .execute(command));
-            }
-        });
-        return !executor.isShutdown();
+    protected boolean makeConnection(int pid) {
+        executor.submit(() -> new PidJcmdExecutor(String.valueOf(pid))
+                .execute(nextCommand()));
+        return (--commands != 0);
     }
 
     @Override
--- a/test/compiler/compilercontrol/jcmd/StressAddSequentiallyTest.java	Wed Mar 23 19:33:34 2016 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,55 +0,0 @@
-/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-
-/*
- * @test
- * @bug 8137167
- * @summary Tests jcmd to be able to add a lot of huge directives
- * @library /testlibrary /test/lib /compiler/testlibrary ../share /
- * @build compiler.compilercontrol.jcmd.StressAddSequentiallyTest
- *        pool.sub.* pool.subpack.* sun.hotspot.WhiteBox
- *        compiler.testlibrary.CompilerUtils
- *        compiler.compilercontrol.share.actions.*
- * @run main ClassFileInstaller sun.hotspot.WhiteBox
- *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- * @run main/othervm/timeout=300 compiler.compilercontrol.jcmd.StressAddSequentiallyTest
- */
-
-package compiler.compilercontrol.jcmd;
-
-import jdk.test.lib.dcmd.PidJcmdExecutor;
-
-import java.util.List;
-
-public class StressAddSequentiallyTest extends StressAddJcmdBase {
-    public static void main(String[] args) {
-        new StressAddSequentiallyTest().test();
-    }
-
-    @Override
-    protected boolean makeConnection(int pid, List<String> commands) {
-        commands.forEach(command -> new PidJcmdExecutor(String.valueOf(pid))
-                .execute(command));
-        return true;
-    }
-}
--- a/test/compiler/compilercontrol/share/MultiCommand.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/test/compiler/compilercontrol/share/MultiCommand.java	Wed Mar 23 21:44:35 2016 -0700
@@ -53,8 +53,8 @@
         List<CompileCommand> testCases = new ArrayList<>();
         for (Command cmd : commands) {
             if (validOnly && cmd == Command.NONEXISTENT) {
-                // skip invalid command
-                continue;
+                // replace with a valid command
+                cmd = Command.EXCLUDE;
             }
             Executable exec = Utils.getRandomElement(METHODS).first;
             MethodDescriptor md;
--- a/test/compiler/intrinsics/IntrinsicDisabledTest.java	Wed Mar 23 19:33:34 2016 -0700
+++ b/test/compiler/intrinsics/IntrinsicDisabledTest.java	Wed Mar 23 21:44:35 2016 -0700
@@ -33,8 +33,8 @@
  *                   -XX:+WhiteBoxAPI
  *                   -XX:DisableIntrinsic=_putCharVolatile,_putInt
  *                   -XX:DisableIntrinsic=_putIntVolatile
- *                   -XX:CompileCommand=option,sun.misc.Unsafe::putChar,ccstrlist,DisableIntrinsic,_getCharVolatile,_getInt
- *                   -XX:CompileCommand=option,sun.misc.Unsafe::putCharVolatile,ccstrlist,DisableIntrinsic,_getIntVolatile
+ *                   -XX:CompileCommand=option,jdk.internal.misc.Unsafe::putChar,ccstrlist,DisableIntrinsic,_getCharVolatile,_getInt
+ *                   -XX:CompileCommand=option,jdk.internal.misc.Unsafe::putCharVolatile,ccstrlist,DisableIntrinsic,_getIntVolatile
  *                   IntrinsicDisabledTest
  */
 
@@ -60,7 +60,7 @@
         return Boolean.valueOf(Objects.toString(wb.getVMFlag("TieredCompilation")));
     }
 
-    /* This test uses several methods from sun.misc.Unsafe. The method
+    /* This test uses several methods from jdk.internal.misc.Unsafe. The method
      * getMethod() returns a different Executable for each different
      * combination of its input parameters. There are eight possible
      * combinations, getMethod can return an Executable representing
@@ -74,7 +74,7 @@
         String methodTypeName = isChar ? "Char" : "Int";
 
         try {
-            Class aClass = Class.forName("sun.misc.Unsafe");
+            Class aClass = Class.forName("jdk.internal.misc.Unsafe");
             if (isPut) {
                 aMethod = aClass.getDeclaredMethod("put" + methodTypeName + (isVolatile ? "Volatile" : ""),
                                                    Object.class,
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/class/TestClassIsPrimitive.java	Wed Mar 23 21:44:35 2016 -0700
@@ -0,0 +1,166 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 8150669
+ * @summary C1 intrinsic for Class.isPrimitive
+ * @modules java.base/jdk.internal.misc
+ * @run main/othervm -ea -Diters=200   -Xint                   TestClassIsPrimitive
+ * @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=1 TestClassIsPrimitive
+ * @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=4 TestClassIsPrimitive
+ */
+import java.lang.reflect.Field;
+import java.lang.reflect.Array;
+import java.util.concurrent.Callable;
+
+public class TestClassIsPrimitive {
+    static final int ITERS = Integer.getInteger("iters", 1);
+
+    public static void main(String... args) throws Exception {
+        testOK(true,  InlineConstants::testBoolean);
+        testOK(true,  InlineConstants::testByte);
+        testOK(true,  InlineConstants::testShort);
+        testOK(true,  InlineConstants::testChar);
+        testOK(true,  InlineConstants::testInt);
+        testOK(true,  InlineConstants::testFloat);
+        testOK(true,  InlineConstants::testLong);
+        testOK(true,  InlineConstants::testDouble);
+        testOK(false, InlineConstants::testObject);
+        testOK(false, InlineConstants::testArray);
+
+        testOK(true,  StaticConstants::testBoolean);
+        testOK(true,  StaticConstants::testByte);
+        testOK(true,  StaticConstants::testShort);
+        testOK(true,  StaticConstants::testChar);
+        testOK(true,  StaticConstants::testInt);
+        testOK(true,  StaticConstants::testFloat);
+        testOK(true,  StaticConstants::testLong);
+        testOK(true,  StaticConstants::testDouble);
+        testOK(false, StaticConstants::testObject);
+        testOK(false, StaticConstants::testArray);
+        testNPE(      StaticConstants::testNull);
+
+        testOK(true,  NoConstants::testBoolean);
+        testOK(true,  NoConstants::testByte);
+        testOK(true,  NoConstants::testShort);
+        testOK(true,  NoConstants::testChar);
+        testOK(true,  NoConstants::testInt);
+        testOK(true,  NoConstants::testFloat);
+        testOK(true,  NoConstants::testLong);
+        testOK(true,  NoConstants::testDouble);
+        testOK(false, NoConstants::testObject);
+        testOK(false, NoConstants::testArray);
+        testNPE(      NoConstants::testNull);
+    }
+
+    public static void testOK(boolean expected, Callable<Object> test) throws Exception {
+        for (int c = 0; c < ITERS; c++) {
+            Object res = test.call();
+            if (!res.equals(expected)) {
+                throw new IllegalStateException("Wrong result: expected = " + expected + ", but got " + res);
+            }
+        }
+    }
+
+    static volatile Object sink;
+
+    public static void testNPE(Callable<Object> test) throws Exception {
+        for (int c = 0; c < ITERS; c++) {
+            try {
+               sink = test.call();
+               throw new IllegalStateException("Expected NPE");
+            } catch (NullPointerException iae) {
+               // expected
+            }
+        }
+    }
+
+    static volatile Class<?> classBoolean = boolean.class;
+    static volatile Class<?> classByte    = byte.class;
+    static volatile Class<?> classShort   = short.class;
+    static volatile Class<?> classChar    = char.class;
+    static volatile Class<?> classInt     = int.class;
+    static volatile Class<?> classFloat   = float.class;
+    static volatile Class<?> classLong    = long.class;
+    static volatile Class<?> classDouble  = double.class;
+    static volatile Class<?> classObject  = Object.class;
+    static volatile Class<?> classArray   = Object[].class;
+    static volatile Class<?> classNull    = null;
+
+    static final Class<?> staticClassBoolean = boolean.class;
+    static final Class<?> staticClassByte    = byte.class;
+    static final Class<?> staticClassShort   = short.class;
+    static final Class<?> staticClassChar    = char.class;
+    static final Class<?> staticClassInt     = int.class;
+    static final Class<?> staticClassFloat   = float.class;
+    static final Class<?> staticClassLong    = long.class;
+    static final Class<?> staticClassDouble  = double.class;
+    static final Class<?> staticClassObject  = Object.class;
+    static final Class<?> staticClassArray   = Object[].class;
+    static final Class<?> staticClassNull    = null;
+
+    static class InlineConstants {
+        static boolean testBoolean() { return boolean.class.isPrimitive();  }
+        static boolean testByte()    { return byte.class.isPrimitive();     }
+        static boolean testShort()   { return short.class.isPrimitive();    }
+        static boolean testChar()    { return char.class.isPrimitive();     }
+        static boolean testInt()     { return int.class.isPrimitive();      }
+        static boolean testFloat()   { return float.class.isPrimitive();    }
+        static boolean testLong()    { return long.class.isPrimitive();     }
+        static boolean testDouble()  { return double.class.isPrimitive();   }
+        static boolean testObject()  { return Object.class.isPrimitive();   }
+        static boolean testArray()   { return Object[].class.isPrimitive(); }
+    }
+
+    static class StaticConstants {
+        static boolean testBoolean() { return staticClassBoolean.isPrimitive(); }
+        static boolean testByte()    { return staticClassByte.isPrimitive();    }
+        static boolean testShort()   { return staticClassShort.isPrimitive();   }
+        static boolean testChar()    { return staticClassChar.isPrimitive();    }
+        static boolean testInt()     { return staticClassInt.isPrimitive();     }
+        static boolean testFloat()   { return staticClassFloat.isPrimitive();   }
+        static boolean testLong()    { return staticClassLong.isPrimitive();    }
+        static boolean testDouble()  { return staticClassDouble.isPrimitive();  }
+        static boolean testObject()  { return staticClassObject.isPrimitive();  }
+        static boolean testArray()   { return staticClassArray.isPrimitive();   }
+        static boolean testNull()    { return staticClassNull.isPrimitive();    }
+    }
+
+    static class NoConstants {
+        static boolean testBoolean() { return classBoolean.isPrimitive(); }
+        static boolean testByte()    { return classByte.isPrimitive();    }
+        static boolean testShort()   { return classShort.isPrimitive();   }
+        static boolean testChar()    { return classChar.isPrimitive();    }
+        static boolean testInt()     { return classInt.isPrimitive();     }
+        static boolean testFloat()   { return classFloat.isPrimitive();   }
+        static boolean testLong()    { return classLong.isPrimitive();    }
+        static boolean testDouble()  { return classDouble.isPrimitive();  }
+        static boolean testObject()  { return classObject.isPrimitive();  }
+        static boolean testArray()   { return classArray.isPrimitive();   }
+        static boolean testNull()    { return classNull.isPrimitive();    }
+    }
+
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/unsafe/AllocateUninitializedArray.java	Wed Mar 23 21:44:35 2016 -0700
@@ -0,0 +1,213 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/*
+ * @test
+ * @bug 8150465
+ * @summary Unsafe methods to produce uninitialized arrays
+ * @modules java.base/jdk.internal.misc
+ * @run main/othervm -ea -Diters=200   -Xint                   AllocateUninitializedArray
+ * @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=1 AllocateUninitializedArray
+ * @run main/othervm -ea -Diters=30000 -XX:TieredStopAtLevel=4 AllocateUninitializedArray
+ */
+import java.lang.reflect.Field;
+import java.lang.reflect.Array;
+import java.util.concurrent.Callable;
+
+public class AllocateUninitializedArray {
+    static final int ITERS = Integer.getInteger("iters", 1);
+    static final jdk.internal.misc.Unsafe UNSAFE;
+
+    static {
+        try {
+            Field f = jdk.internal.misc.Unsafe.class.getDeclaredField("theUnsafe");
+            f.setAccessible(true);
+            UNSAFE = (jdk.internal.misc.Unsafe) f.get(null);
+        } catch (Exception e) {
+            throw new RuntimeException("Unable to get Unsafe instance.", e);
+        }
+    }
+
+    public static void main(String... args) throws Exception {
+        testIAE(AllConstants::testObject);
+        testIAE(LengthIsConstant::testObject);
+        testIAE(ClassIsConstant::testObject);
+        testIAE(NothingIsConstant::testObject);
+
+        testIAE(AllConstants::testArray);
+        testIAE(LengthIsConstant::testArray);
+        testIAE(ClassIsConstant::testArray);
+        testIAE(NothingIsConstant::testArray);
+
+        testIAE(AllConstants::testNull);
+        testIAE(LengthIsConstant::testNull);
+        testIAE(ClassIsConstant::testNull);
+        testIAE(NothingIsConstant::testNull);
+
+        testOK(boolean[].class, 10, AllConstants::testBoolean);
+        testOK(byte[].class,    10, AllConstants::testByte);
+        testOK(short[].class,   10, AllConstants::testShort);
+        testOK(char[].class,    10, AllConstants::testChar);
+        testOK(int[].class,     10, AllConstants::testInt);
+        testOK(float[].class,   10, AllConstants::testFloat);
+        testOK(long[].class,    10, AllConstants::testLong);
+        testOK(double[].class,  10, AllConstants::testDouble);
+
+        testOK(boolean[].class, 10, LengthIsConstant::testBoolean);
+        testOK(byte[].class,    10, LengthIsConstant::testByte);
+        testOK(short[].class,   10, LengthIsConstant::testShort);
+        testOK(char[].class,    10, LengthIsConstant::testChar);
+        testOK(int[].class,     10, LengthIsConstant::testInt);
+        testOK(float[].class,   10, LengthIsConstant::testFloat);
+        testOK(long[].class,    10, LengthIsConstant::testLong);
+        testOK(double[].class,  10, LengthIsConstant::testDouble);
+
+        testOK(boolean[].class, 10, ClassIsConstant::testBoolean);
+        testOK(byte[].class,    10, ClassIsConstant::testByte);
+        testOK(short[].class,   10, ClassIsConstant::testShort);
+        testOK(char[].class,    10, ClassIsConstant::testChar);
+        testOK(int[].class,     10, ClassIsConstant::testInt);
+        testOK(float[].class,   10, ClassIsConstant::testFloat);
+        testOK(long[].class,    10, ClassIsConstant::testLong);
+        testOK(double[].class,  10, ClassIsConstant::testDouble);