changeset 11186:5bd9548140be

Merge
author vlivanov
date Fri, 06 May 2016 18:20:50 +0300
parents bf9e318baefe f57bf5857679
children e09c7128cc52
files src/cpu/aarch64/vm/aarch64.ad src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.cpp src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp src/cpu/sparc/vm/macroAssembler_sparc.cpp src/cpu/sparc/vm/macroAssembler_sparc.hpp src/cpu/sparc/vm/stubGenerator_sparc.cpp src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp src/cpu/x86/vm/c1_LIRGenerator_x86.cpp src/cpu/x86/vm/macroAssembler_x86.cpp src/cpu/x86/vm/macroAssembler_x86.hpp src/cpu/x86/vm/stubGenerator_x86_64.cpp src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java src/share/vm/c1/c1_LIRGenerator.cpp src/share/vm/c1/c1_LIRGenerator.hpp src/share/vm/classfile/classLoader.cpp src/share/vm/classfile/vmSymbols.hpp src/share/vm/prims/whitebox.cpp src/share/vm/runtime/globals.hpp src/share/vm/runtime/vmStructs.cpp src/share/vm/runtime/vm_version.cpp test/compiler/jvmci/jdk.vm.ci.runtime.test/src/jdk/vm/ci/runtime/test/TestResolvedJavaType.java test/compiler/unsafe/generate-unsafe-tests.sh test/compiler/whitebox/BlockingCompilation.java
diffstat 112 files changed, 2052 insertions(+), 811 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/aarch64/vm/aarch64.ad	Fri May 06 18:20:50 2016 +0300
@@ -3572,6 +3572,8 @@
   return FP_REG_mask();
 }
 
+const bool Matcher::convi2l_type_required = false;
+
 // helper for encoding java_to_runtime calls on sim
 //
 // this is needed to compute the extra arguments required when
--- a/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1030,6 +1030,14 @@
   }
 }
 
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  Unimplemented();
+}
+
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 void LIRGenerator::do_Convert(Convert* x) {
--- a/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Fri May 06 18:20:50 2016 +0300
@@ -71,7 +71,7 @@
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoRegScheduling,            false);
-define_pd_global(bool, SuperWordLoopUnrollAnalysis,  false);
+define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
 
 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Fri May 06 18:20:50 2016 +0300
@@ -4787,15 +4787,15 @@
   br(rscratch2);
 
   bind(loop);
+  add(base, base, unroll * 16);
   for (int i = -unroll; i < 0; i++)
     stp(value, value, Address(base, i * 16));
   bind(entry);
   subs(cnt, cnt, unroll * 2);
-  add(base, base, unroll * 16);
   br(Assembler::GE, loop);
 
   tbz(cnt, 0, fini);
-  str(value, Address(base, -unroll * 16));
+  str(value, Address(post(base, 8)));
   bind(fini);
 }
 
@@ -4810,6 +4810,7 @@
   Label base_aligned;
 
   assert_different_registers(base, cnt, rscratch1);
+  guarantee(base == r10 && cnt == r11, "fix register usage");
 
   Register tmp = rscratch1;
   Register tmp2 = rscratch2;
@@ -4848,15 +4849,15 @@
   br(tmp2);
 
   bind(small_loop);
+  add(base, base, unroll * 16);
   for (int i = -unroll; i < 0; i++)
     stp(zr, zr, Address(base, i * 16));
   bind(small_table_end);
   subs(cnt, cnt, unroll * 2);
-  add(base, base, unroll * 16);
   br(Assembler::GE, small_loop);
 
   tbz(cnt, 0, done);
-  str(zr, Address(base, -unroll * 16));
+  str(zr, Address(post(base, 8)));
 
   bind(done);
 }
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Fri May 06 18:20:50 2016 +0300
@@ -724,11 +724,15 @@
     Register tmp2 = rscratch2;
     int zva_length = VM_Version::zva_length();
     Label initial_table_end, loop_zva;
+    Label fini;
 
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", "zero_longs");
     address start = __ pc();
 
+    // Base must be 16 byte aligned. If not just return and let caller handle it
+    __ tst(base, 0x0f);
+    __ br(Assembler::NE, fini);
     // Align base with ZVA length.
     __ neg(tmp, base);
     __ andr(tmp, tmp, zva_length - 1);
@@ -751,6 +755,7 @@
     __ add(base, base, zva_length);
     __ br(Assembler::GE, loop_zva);
     __ add(cnt, cnt, zva_length >> 3); // count not zeroed by DC ZVA
+    __ bind(fini);
     __ ret(lr);
 
     return start;
@@ -2077,7 +2082,9 @@
     const Register to        = c_rarg0;  // source array address
     const Register value     = c_rarg1;  // value
     const Register count     = c_rarg2;  // elements count
-    const Register cnt_words = c_rarg3; // temp register
+
+    const Register bz_base = r10;        // base for block_zero routine
+    const Register cnt_words = r11;      // temp register
 
     __ enter();
 
@@ -2147,7 +2154,9 @@
       __ cmp(cnt_words, BlockZeroingLowLimit >> 3);
       __ ccmp(value, 0 /* comparing value */, 0 /* NZCV */, Assembler::GE);
       __ br(Assembler::NE, non_block_zeroing);
-      __ block_zero(to, cnt_words, true);
+      __ mov(bz_base, to);
+      __ block_zero(bz_base, cnt_words, true);
+      __ mov(to, bz_base);
       __ b(rest);
       __ bind(non_block_zeroing);
       __ fill_words(to, cnt_words, value);
--- a/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1427,4 +1427,12 @@
       ShouldNotReachHere();
     }
   }
+
+  void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+    fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+  }
 }
+
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  Unimplemented();
+}
--- a/src/cpu/ppc/vm/ppc.ad	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/ppc/vm/ppc.ad	Fri May 06 18:20:50 2016 +0300
@@ -1380,6 +1380,8 @@
     // Save return pc.
     ___(std) std(return_pc, _abi(lr), callers_sp);
   }
+  
+  C->set_frame_complete(cbuf.insts_size());
 }
 #undef ___
 #undef ___stop
@@ -2311,6 +2313,8 @@
   return RegMask();
 }
 
+const bool Matcher::convi2l_type_required = true;
+
 %}
 
 //----------ENCODING BLOCK-----------------------------------------------------
--- a/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp	Fri May 06 18:20:50 2016 +0300
@@ -869,6 +869,94 @@
   }
 }
 
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+  int flags = 0;
+  switch (x->id()) {
+    case vmIntrinsics::_updateBytesCRC32C:
+    case vmIntrinsics::_updateDirectByteBufferCRC32C: {
+
+      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
+      int array_offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
+
+      LIRItem crc(x->argument_at(0), this);
+      LIRItem buf(x->argument_at(1), this);
+      LIRItem off(x->argument_at(2), this);
+      LIRItem end(x->argument_at(3), this);
+
+      buf.load_item();
+      off.load_nonconstant();
+      end.load_nonconstant();
+
+      // len = end - off
+      LIR_Opr len  = end.result();
+      LIR_Opr tmpA = new_register(T_INT);
+      LIR_Opr tmpB = new_register(T_INT);
+      __ move(end.result(), tmpA);
+      __ move(off.result(), tmpB);
+      __ sub(tmpA, tmpB, tmpA);
+      len = tmpA;
+
+      LIR_Opr index = off.result();
+
+      if(off.result()->is_constant()) {
+        index = LIR_OprFact::illegalOpr;
+        array_offset += off.result()->as_jint();
+      }
+
+      LIR_Opr base_op = buf.result();
+
+      if (index->is_valid()) {
+        LIR_Opr tmp = new_register(T_LONG);
+        __ convert(Bytecodes::_i2l, index, tmp);
+        index = tmp;
+        if (index->is_constant()) {
+          array_offset += index->as_constant_ptr()->as_jint();
+          index = LIR_OprFact::illegalOpr;
+        } else if (index->is_register()) {
+          LIR_Opr tmp2 = new_register(T_LONG);
+          LIR_Opr tmp3 = new_register(T_LONG);
+          __ move(base_op, tmp2);
+          __ move(index, tmp3);
+          __ add(tmp2, tmp3, tmp2);
+          base_op = tmp2;
+        } else {
+          ShouldNotReachHere();
+        }
+      }
+
+      LIR_Address* a = new LIR_Address(base_op, array_offset, T_BYTE);
+
+      BasicTypeList signature(3);
+      signature.append(T_INT);
+      signature.append(T_ADDRESS);
+      signature.append(T_INT);
+      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+      const LIR_Opr result_reg = result_register_for(x->type());
+
+      LIR_Opr addr = new_pointer_register();
+      __ leal(LIR_OprFact::address(a), addr);
+
+      crc.load_item_force(cc->at(0));
+      __ move(addr, cc->at(1));
+      __ move(len, cc->at(2));
+
+      __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args());
+      __ move(result_reg, result);
+
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+    }
+  }
+}
+
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 void LIRGenerator::do_Convert(Convert* x) {
--- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Fri May 06 18:20:50 2016 +0300
@@ -4845,21 +4845,21 @@
 
 // Reverse byte order of lower 32 bits, assuming upper 32 bits all zeros
 void MacroAssembler::reverse_bytes_32(Register src, Register dst, Register tmp) {
-  srlx(src, 24, dst);
-
-  sllx(src, 32+8, tmp);
-  srlx(tmp, 32+24, tmp);
-  sllx(tmp, 8, tmp);
-  or3(dst, tmp, dst);
-
-  sllx(src, 32+16, tmp);
-  srlx(tmp, 32+24, tmp);
-  sllx(tmp, 16, tmp);
-  or3(dst, tmp, dst);
-
-  sllx(src, 32+24, tmp);
-  srlx(tmp, 32, tmp);
-  or3(dst, tmp, dst);
+    srlx(src, 24, dst);
+
+    sllx(src, 32+8, tmp);
+    srlx(tmp, 32+24, tmp);
+    sllx(tmp, 8, tmp);
+    or3(dst, tmp, dst);
+
+    sllx(src, 32+16, tmp);
+    srlx(tmp, 32+24, tmp);
+    sllx(tmp, 16, tmp);
+    or3(dst, tmp, dst);
+
+    sllx(src, 32+24, tmp);
+    srlx(tmp, 32, tmp);
+    or3(dst, tmp, dst);
 }
 
 void MacroAssembler::movitof_revbytes(Register src, FloatRegister dst, Register tmp1, Register tmp2) {
@@ -5111,3 +5111,176 @@
   not1(crc);
 }
 
+#define CHUNK_LEN   128          /* 128 x 8B = 1KB */
+#define CHUNK_K1    0x1307a0206  /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
+#define CHUNK_K2    0x1a0f717c4  /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
+#define CHUNK_K3    0x0170076fa  /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
+
+void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len, Register table) {
+
+  Label L_crc32c_head, L_crc32c_aligned;
+  Label L_crc32c_parallel, L_crc32c_parallel_loop;
+  Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
+  Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
+
+  set(ExternalAddress(StubRoutines::crc32c_table_addr()), table);
+
+  cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
+
+  // clear upper 32 bits of crc
+  clruwu(crc);
+
+  and3(buf, 7, G4);
+  cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
+
+  mov(8, G1);
+  sub(G1, G4, G4);
+
+  // ------ process the misaligned head (7 bytes or less) ------
+  bind(L_crc32c_head);
+
+  // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
+  ldub(buf, 0, G1);
+  update_byte_crc32(crc, G1, table);
+
+  inc(buf);
+  dec(len);
+  cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
+  dec(G4);
+  cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
+
+  // ------ process the 8-byte-aligned body ------
+  bind(L_crc32c_aligned);
+  nop();
+  cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
+
+  // reverse the byte order of lower 32 bits to big endian, and move to FP side
+  movitof_revbytes(crc, F0, G1, G3);
+
+  set(CHUNK_LEN*8*4, G4);
+  cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
+
+  // ------ process four 1KB chunks in parallel ------
+  bind(L_crc32c_parallel);
+
+  fzero(FloatRegisterImpl::D, F2);
+  fzero(FloatRegisterImpl::D, F4);
+  fzero(FloatRegisterImpl::D, F6);
+
+  mov(CHUNK_LEN - 1, G4);
+  bind(L_crc32c_parallel_loop);
+  // schedule ldf's ahead of crc32c's to hide the load-use latency
+  ldf(FloatRegisterImpl::D, buf, 0,            F8);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8,  F10);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
+  crc32c(F0, F8,  F0);
+  crc32c(F2, F10, F2);
+  crc32c(F4, F12, F4);
+  crc32c(F6, F14, F6);
+  inc(buf, 8);
+  dec(G4);
+  cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
+
+  ldf(FloatRegisterImpl::D, buf, 0,            F8);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8,  F10);
+  ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
+  crc32c(F0, F8,  F0);
+  crc32c(F2, F10, F2);
+  crc32c(F4, F12, F4);
+
+  inc(buf, CHUNK_LEN*24);
+  ldfl(FloatRegisterImpl::D, buf, G0, F14);  // load in little endian
+  inc(buf, 8);
+
+  prefetch(buf, 0,            Assembler::severalReads);
+  prefetch(buf, CHUNK_LEN*8,  Assembler::severalReads);
+  prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
+  prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
+
+  // move to INT side, and reverse the byte order of lower 32 bits to little endian
+  movftoi_revbytes(F0, O4, G1, G4);
+  movftoi_revbytes(F2, O5, G1, G4);
+  movftoi_revbytes(F4, G5, G1, G4);
+
+  // combine the results of 4 chunks
+  set64(CHUNK_K1, G3, G1);
+  xmulx(O4, G3, O4);
+  set64(CHUNK_K2, G3, G1);
+  xmulx(O5, G3, O5);
+  set64(CHUNK_K3, G3, G1);
+  xmulx(G5, G3, G5);
+
+  movdtox(F14, G4);
+  xor3(O4, O5, O5);
+  xor3(G5, O5, O5);
+  xor3(G4, O5, O5);
+
+  // reverse the byte order to big endian, via stack, and move to FP side
+  // TODO: use new revb instruction
+  add(SP, -8, G1);
+  srlx(G1, 3, G1);
+  sllx(G1, 3, G1);
+  stx(O5, G1, G0);
+  ldfl(FloatRegisterImpl::D, G1, G0, F2);  // load in little endian
+
+  crc32c(F6, F2, F0);
+
+  set(CHUNK_LEN*8*4, G4);
+  sub(len, G4, len);
+  cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
+  nop();
+  cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
+
+  bind(L_crc32c_serial);
+
+  mov(32, G4);
+  cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
+
+  // ------ process 32B chunks ------
+  bind(L_crc32c_x32_loop);
+  ldf(FloatRegisterImpl::D, buf, 0, F2);
+  crc32c(F0, F2, F0);
+  ldf(FloatRegisterImpl::D, buf, 8, F2);
+  crc32c(F0, F2, F0);
+  ldf(FloatRegisterImpl::D, buf, 16, F2);
+  crc32c(F0, F2, F0);
+  ldf(FloatRegisterImpl::D, buf, 24, F2);
+  inc(buf, 32);
+  crc32c(F0, F2, F0);
+  dec(len, 32);
+  cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
+
+  bind(L_crc32c_x8);
+  nop();
+  cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
+
+  // ------ process 8B chunks ------
+  bind(L_crc32c_x8_loop);
+  ldf(FloatRegisterImpl::D, buf, 0, F2);
+  inc(buf, 8);
+  crc32c(F0, F2, F0);
+  dec(len, 8);
+  cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
+
+  bind(L_crc32c_done);
+
+  // move to INT side, and reverse the byte order of lower 32 bits to little endian
+  movftoi_revbytes(F0, crc, G1, G3);
+
+  cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
+
+  // ------ process the misaligned tail (7 bytes or less) ------
+  bind(L_crc32c_tail);
+
+  // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
+  ldub(buf, 0, G1);
+  update_byte_crc32(crc, G1, table);
+
+  inc(buf);
+  dec(len);
+  cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
+
+  bind(L_crc32c_return);
+  nop();
+}
--- a/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Fri May 06 18:20:50 2016 +0300
@@ -1420,6 +1420,8 @@
   // Fold 8-bit data
   void fold_8bit_crc32(Register xcrc, Register table, Register xtmp, Register tmp);
   void fold_8bit_crc32(Register crc, Register table, Register tmp);
+  // CRC32C code for java.util.zip.CRC32C::updateBytes/updateDirectByteBuffer instrinsic.
+  void kernel_crc32c(Register crc, Register buf, Register len, Register table);
 
 };
 
--- a/src/cpu/sparc/vm/sparc.ad	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/sparc/vm/sparc.ad	Fri May 06 18:20:50 2016 +0300
@@ -2133,6 +2133,8 @@
   return L7_REGP_mask();
 }
 
+const bool Matcher::convi2l_type_required = true;
+
 %}
 
 
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -4909,11 +4909,6 @@
       return start;
   }
 
-#define CHUNK_LEN   128          /* 128 x 8B = 1KB */
-#define CHUNK_K1    0x1307a0206  /* reverseBits(pow(x, CHUNK_LEN*8*8*3 - 32) mod P(x)) << 1 */
-#define CHUNK_K2    0x1a0f717c4  /* reverseBits(pow(x, CHUNK_LEN*8*8*2 - 32) mod P(x)) << 1 */
-#define CHUNK_K3    0x0170076fa  /* reverseBits(pow(x, CHUNK_LEN*8*8*1 - 32) mod P(x)) << 1 */
-
   /**
    *  Arguments:
    *
@@ -4938,171 +4933,8 @@
     const Register len   = O2;  // number of bytes
     const Register table = O3;  // byteTable
 
-    Label L_crc32c_head, L_crc32c_aligned;
-    Label L_crc32c_parallel, L_crc32c_parallel_loop;
-    Label L_crc32c_serial, L_crc32c_x32_loop, L_crc32c_x8, L_crc32c_x8_loop;
-    Label L_crc32c_done, L_crc32c_tail, L_crc32c_return;
-
-    __ cmp_and_br_short(len, 0, Assembler::lessEqual, Assembler::pn, L_crc32c_return);
-
-    // clear upper 32 bits of crc
-    __ clruwu(crc);
-
-    __ and3(buf, 7, G4);
-    __ cmp_and_brx_short(G4, 0, Assembler::equal, Assembler::pt, L_crc32c_aligned);
-
-    __ mov(8, G1);
-    __ sub(G1, G4, G4);
-
-    // ------ process the misaligned head (7 bytes or less) ------
-    __ BIND(L_crc32c_head);
-
-    // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
-    __ ldub(buf, 0, G1);
-    __ update_byte_crc32(crc, G1, table);
-
-    __ inc(buf);
-    __ dec(len);
-    __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pn, L_crc32c_return);
-    __ dec(G4);
-    __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_head);
-
-    // ------ process the 8-byte-aligned body ------
-    __ BIND(L_crc32c_aligned);
-    __ nop();
-    __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pn, L_crc32c_tail);
-
-    // reverse the byte order of lower 32 bits to big endian, and move to FP side
-    __ movitof_revbytes(crc, F0, G1, G3);
-
-    __ set(CHUNK_LEN*8*4, G4);
-    __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pt, L_crc32c_serial);
-
-    // ------ process four 1KB chunks in parallel ------
-    __ BIND(L_crc32c_parallel);
-
-    __ fzero(FloatRegisterImpl::D, F2);
-    __ fzero(FloatRegisterImpl::D, F4);
-    __ fzero(FloatRegisterImpl::D, F6);
-
-    __ mov(CHUNK_LEN - 1, G4);
-    __ BIND(L_crc32c_parallel_loop);
-    // schedule ldf's ahead of crc32c's to hide the load-use latency
-    __ ldf(FloatRegisterImpl::D, buf, 0,            F8);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8,  F10);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*24, F14);
-    __ crc32c(F0, F8,  F0);
-    __ crc32c(F2, F10, F2);
-    __ crc32c(F4, F12, F4);
-    __ crc32c(F6, F14, F6);
-    __ inc(buf, 8);
-    __ dec(G4);
-    __ cmp_and_br_short(G4, 0, Assembler::greater, Assembler::pt, L_crc32c_parallel_loop);
-
-    __ ldf(FloatRegisterImpl::D, buf, 0,            F8);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*8,  F10);
-    __ ldf(FloatRegisterImpl::D, buf, CHUNK_LEN*16, F12);
-    __ crc32c(F0, F8,  F0);
-    __ crc32c(F2, F10, F2);
-    __ crc32c(F4, F12, F4);
-
-    __ inc(buf, CHUNK_LEN*24);
-    __ ldfl(FloatRegisterImpl::D, buf, G0, F14);  // load in little endian
-    __ inc(buf, 8);
-
-    __ prefetch(buf, 0,            Assembler::severalReads);
-    __ prefetch(buf, CHUNK_LEN*8,  Assembler::severalReads);
-    __ prefetch(buf, CHUNK_LEN*16, Assembler::severalReads);
-    __ prefetch(buf, CHUNK_LEN*24, Assembler::severalReads);
-
-    // move to INT side, and reverse the byte order of lower 32 bits to little endian
-    __ movftoi_revbytes(F0, O4, G1, G4);
-    __ movftoi_revbytes(F2, O5, G1, G4);
-    __ movftoi_revbytes(F4, G5, G1, G4);
-
-    // combine the results of 4 chunks
-    __ set64(CHUNK_K1, G3, G1);
-    __ xmulx(O4, G3, O4);
-    __ set64(CHUNK_K2, G3, G1);
-    __ xmulx(O5, G3, O5);
-    __ set64(CHUNK_K3, G3, G1);
-    __ xmulx(G5, G3, G5);
-
-    __ movdtox(F14, G4);
-    __ xor3(O4, O5, O5);
-    __ xor3(G5, O5, O5);
-    __ xor3(G4, O5, O5);
-
-    // reverse the byte order to big endian, via stack, and move to FP side
-    __ add(SP, -8, G1);
-    __ srlx(G1, 3, G1);
-    __ sllx(G1, 3, G1);
-    __ stx(O5, G1, G0);
-    __ ldfl(FloatRegisterImpl::D, G1, G0, F2);  // load in little endian
-
-    __ crc32c(F6, F2, F0);
-
-    __ set(CHUNK_LEN*8*4, G4);
-    __ sub(len, G4, len);
-    __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_parallel);
-    __ nop();
-    __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_done);
-
-    __ BIND(L_crc32c_serial);
-
-    __ mov(32, G4);
-    __ cmp_and_br_short(len, G4, Assembler::less, Assembler::pn, L_crc32c_x8);
-
-    // ------ process 32B chunks ------
-    __ BIND(L_crc32c_x32_loop);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ dec(len, 32);
-    __ cmp_and_br_short(len, G4, Assembler::greaterEqual, Assembler::pt, L_crc32c_x32_loop);
-
-    __ BIND(L_crc32c_x8);
-    __ nop();
-    __ cmp_and_br_short(len, 8, Assembler::less, Assembler::pt, L_crc32c_done);
-
-    // ------ process 8B chunks ------
-    __ BIND(L_crc32c_x8_loop);
-    __ ldf(FloatRegisterImpl::D, buf, 0, F2);
-    __ inc(buf, 8);
-    __ crc32c(F0, F2, F0);
-    __ dec(len, 8);
-    __ cmp_and_br_short(len, 8, Assembler::greaterEqual, Assembler::pt, L_crc32c_x8_loop);
-
-    __ BIND(L_crc32c_done);
-
-    // move to INT side, and reverse the byte order of lower 32 bits to little endian
-    __ movftoi_revbytes(F0, crc, G1, G3);
-
-    __ cmp_and_br_short(len, 0, Assembler::equal, Assembler::pt, L_crc32c_return);
-
-    // ------ process the misaligned tail (7 bytes or less) ------
-    __ BIND(L_crc32c_tail);
-
-    // crc = (crc >>> 8) ^ byteTable[(crc ^ b) & 0xFF];
-    __ ldub(buf, 0, G1);
-    __ update_byte_crc32(crc, G1, table);
-
-    __ inc(buf);
-    __ dec(len);
-    __ cmp_and_br_short(len, 0, Assembler::greater, Assembler::pt, L_crc32c_tail);
-
-    __ BIND(L_crc32c_return);
-    __ nop();
+    __ kernel_crc32c(crc, buf, len, table);
+
     __ retl();
     __ delayed()->nop();
 
@@ -5366,6 +5198,12 @@
       StubRoutines::_crc_table_adr = (address)StubRoutines::Sparc::_crc_table;
       StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
     }
+
+    if (UseCRC32CIntrinsics) {
+      // set table address before stub generation which use it
+      StubRoutines::_crc32c_table_addr = (address)StubRoutines::Sparc::_crc32c_table;
+      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
+    }
   }
 
 
@@ -5425,12 +5263,6 @@
       StubRoutines::_sha512_implCompress   = generate_sha512_implCompress(false, "sha512_implCompress");
       StubRoutines::_sha512_implCompressMB = generate_sha512_implCompress(true,  "sha512_implCompressMB");
     }
-
-    // generate CRC32C intrinsic code
-    if (UseCRC32CIntrinsics) {
-      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
-    }
-
     // generate Adler32 intrinsics code
     if (UseAdler32Intrinsics) {
       StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
--- a/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/sparc/vm/stubRoutines_sparc.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -147,3 +147,62 @@
     0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
     0x2d02ef8dUL
 };
+
+/**
+ * CRC32C constants lookup table
+ */
+juint StubRoutines::Sparc::_crc32c_table[] =
+{
+    0x00000000UL, 0xF26B8303UL, 0xE13B70F7UL, 0x1350F3F4UL, 0xC79A971FUL,
+    0x35F1141CUL, 0x26A1E7E8UL, 0xD4CA64EBUL, 0x8AD958CFUL, 0x78B2DBCCUL,
+    0x6BE22838UL, 0x9989AB3BUL, 0x4D43CFD0UL, 0xBF284CD3UL, 0xAC78BF27UL,
+    0x5E133C24UL, 0x105EC76FUL, 0xE235446CUL, 0xF165B798UL, 0x030E349BUL,
+    0xD7C45070UL, 0x25AFD373UL, 0x36FF2087UL, 0xC494A384UL, 0x9A879FA0UL,
+    0x68EC1CA3UL, 0x7BBCEF57UL, 0x89D76C54UL, 0x5D1D08BFUL, 0xAF768BBCUL,
+    0xBC267848UL, 0x4E4DFB4BUL, 0x20BD8EDEUL, 0xD2D60DDDUL, 0xC186FE29UL,
+    0x33ED7D2AUL, 0xE72719C1UL, 0x154C9AC2UL, 0x061C6936UL, 0xF477EA35UL,
+    0xAA64D611UL, 0x580F5512UL, 0x4B5FA6E6UL, 0xB93425E5UL, 0x6DFE410EUL,
+    0x9F95C20DUL, 0x8CC531F9UL, 0x7EAEB2FAUL, 0x30E349B1UL, 0xC288CAB2UL,
+    0xD1D83946UL, 0x23B3BA45UL, 0xF779DEAEUL, 0x05125DADUL, 0x1642AE59UL,
+    0xE4292D5AUL, 0xBA3A117EUL, 0x4851927DUL, 0x5B016189UL, 0xA96AE28AUL,
+    0x7DA08661UL, 0x8FCB0562UL, 0x9C9BF696UL, 0x6EF07595UL, 0x417B1DBCUL,
+    0xB3109EBFUL, 0xA0406D4BUL, 0x522BEE48UL, 0x86E18AA3UL, 0x748A09A0UL,
+    0x67DAFA54UL, 0x95B17957UL, 0xCBA24573UL, 0x39C9C670UL, 0x2A993584UL,
+    0xD8F2B687UL, 0x0C38D26CUL, 0xFE53516FUL, 0xED03A29BUL, 0x1F682198UL,
+    0x5125DAD3UL, 0xA34E59D0UL, 0xB01EAA24UL, 0x42752927UL, 0x96BF4DCCUL,
+    0x64D4CECFUL, 0x77843D3BUL, 0x85EFBE38UL, 0xDBFC821CUL, 0x2997011FUL,
+    0x3AC7F2EBUL, 0xC8AC71E8UL, 0x1C661503UL, 0xEE0D9600UL, 0xFD5D65F4UL,
+    0x0F36E6F7UL, 0x61C69362UL, 0x93AD1061UL, 0x80FDE395UL, 0x72966096UL,
+    0xA65C047DUL, 0x5437877EUL, 0x4767748AUL, 0xB50CF789UL, 0xEB1FCBADUL,
+    0x197448AEUL, 0x0A24BB5AUL, 0xF84F3859UL, 0x2C855CB2UL, 0xDEEEDFB1UL,
+    0xCDBE2C45UL, 0x3FD5AF46UL, 0x7198540DUL, 0x83F3D70EUL, 0x90A324FAUL,
+    0x62C8A7F9UL, 0xB602C312UL, 0x44694011UL, 0x5739B3E5UL, 0xA55230E6UL,
+    0xFB410CC2UL, 0x092A8FC1UL, 0x1A7A7C35UL, 0xE811FF36UL, 0x3CDB9BDDUL,
+    0xCEB018DEUL, 0xDDE0EB2AUL, 0x2F8B6829UL, 0x82F63B78UL, 0x709DB87BUL,
+    0x63CD4B8FUL, 0x91A6C88CUL, 0x456CAC67UL, 0xB7072F64UL, 0xA457DC90UL,
+    0x563C5F93UL, 0x082F63B7UL, 0xFA44E0B4UL, 0xE9141340UL, 0x1B7F9043UL,
+    0xCFB5F4A8UL, 0x3DDE77ABUL, 0x2E8E845FUL, 0xDCE5075CUL, 0x92A8FC17UL,
+    0x60C37F14UL, 0x73938CE0UL, 0x81F80FE3UL, 0x55326B08UL, 0xA759E80BUL,
+    0xB4091BFFUL, 0x466298FCUL, 0x1871A4D8UL, 0xEA1A27DBUL, 0xF94AD42FUL,
+    0x0B21572CUL, 0xDFEB33C7UL, 0x2D80B0C4UL, 0x3ED04330UL, 0xCCBBC033UL,
+    0xA24BB5A6UL, 0x502036A5UL, 0x4370C551UL, 0xB11B4652UL, 0x65D122B9UL,
+    0x97BAA1BAUL, 0x84EA524EUL, 0x7681D14DUL, 0x2892ED69UL, 0xDAF96E6AUL,
+    0xC9A99D9EUL, 0x3BC21E9DUL, 0xEF087A76UL, 0x1D63F975UL, 0x0E330A81UL,
+    0xFC588982UL, 0xB21572C9UL, 0x407EF1CAUL, 0x532E023EUL, 0xA145813DUL,
+    0x758FE5D6UL, 0x87E466D5UL, 0x94B49521UL, 0x66DF1622UL, 0x38CC2A06UL,
+    0xCAA7A905UL, 0xD9F75AF1UL, 0x2B9CD9F2UL, 0xFF56BD19UL, 0x0D3D3E1AUL,
+    0x1E6DCDEEUL, 0xEC064EEDUL, 0xC38D26C4UL, 0x31E6A5C7UL, 0x22B65633UL,
+    0xD0DDD530UL, 0x0417B1DBUL, 0xF67C32D8UL, 0xE52CC12CUL, 0x1747422FUL,
+    0x49547E0BUL, 0xBB3FFD08UL, 0xA86F0EFCUL, 0x5A048DFFUL, 0x8ECEE914UL,
+    0x7CA56A17UL, 0x6FF599E3UL, 0x9D9E1AE0UL, 0xD3D3E1ABUL, 0x21B862A8UL,
+    0x32E8915CUL, 0xC083125FUL, 0x144976B4UL, 0xE622F5B7UL, 0xF5720643UL,
+    0x07198540UL, 0x590AB964UL, 0xAB613A67UL, 0xB831C993UL, 0x4A5A4A90UL,
+    0x9E902E7BUL, 0x6CFBAD78UL, 0x7FAB5E8CUL, 0x8DC0DD8FUL, 0xE330A81AUL,
+    0x115B2B19UL, 0x020BD8EDUL, 0xF0605BEEUL, 0x24AA3F05UL, 0xD6C1BC06UL,
+    0xC5914FF2UL, 0x37FACCF1UL, 0x69E9F0D5UL, 0x9B8273D6UL, 0x88D28022UL,
+    0x7AB90321UL, 0xAE7367CAUL, 0x5C18E4C9UL, 0x4F48173DUL, 0xBD23943EUL,
+    0xF36E6F75UL, 0x0105EC76UL, 0x12551F82UL, 0xE03E9C81UL, 0x34F4F86AUL,
+    0xC69F7B69UL, 0xD5CF889DUL, 0x27A40B9EUL, 0x79B737BAUL, 0x8BDCB4B9UL,
+    0x988C474DUL, 0x6AE7C44EUL, 0xBE2DA0A5UL, 0x4C4623A6UL, 0x5F16D052UL,
+    0xAD7D5351UL
+};
--- a/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/sparc/vm/stubRoutines_sparc.hpp	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -56,6 +56,7 @@
   // masks and table for CRC32
   static uint64_t _crc_by128_masks[];
   static juint    _crc_table[];
+  static juint    _crc32c_table[];
 
  public:
   // test assembler stop routine by setting registers
--- a/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/sparc/vm/templateInterpreterGenerator_sparc.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1081,8 +1081,56 @@
   return NULL;
 }
 
-// Not supported
+/**
+ * Method entry for intrinsic-candidate (non-native) methods:
+ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
+ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
+ * Unlike CRC32, CRC32C does not have any methods marked as native
+ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
+ */
 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
+
+  if (UseCRC32CIntrinsics) {
+    address entry = __ pc();
+
+    // Load parameters from the stack
+    const Register crc    = O0; // initial crc
+    const Register buf    = O1; // source java byte array address
+    const Register offset = O2; // offset
+    const Register end    = O3; // index of last element to process
+    const Register len    = O2; // len argument to the kernel
+    const Register table  = O3; // crc32c lookup table address
+
+    // Arguments are reversed on java expression stack
+    // Calculate address of start element
+    if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
+      __ lduw(Gargs, 0,  end);
+      __ lduw(Gargs, 8,  offset);
+      __ ldx( Gargs, 16, buf);
+      __ lduw(Gargs, 32, crc);
+      __ add(buf, offset, buf);
+      __ sub(end, offset, len);
+    } else {
+      __ lduw(Gargs, 0,  end);
+      __ lduw(Gargs, 8,  offset);
+      __ ldx( Gargs, 16, buf);
+      __ lduw(Gargs, 24, crc);
+      __ add(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // account for the header size
+      __ add(buf, offset, buf);
+      __ sub(end, offset, len);
+    }
+
+    // Call the crc32c kernel
+    __ MacroAssembler::save_thread(L7_thread_cache);
+    __ kernel_crc32c(crc, buf, len, table);
+    __ MacroAssembler::restore_thread(L7_thread_cache);
+
+    // result in O0
+    __ retl();
+    __ delayed()->nop();
+
+    return entry;
+  }
   return NULL;
 }
 
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri May 06 18:20:50 2016 +0300
@@ -49,9 +49,11 @@
   AllocatePrefetchDistance = allocate_prefetch_distance();
   AllocatePrefetchStyle    = allocate_prefetch_style();
 
-  if (AllocatePrefetchStyle == 3 && !has_blk_init()) {
-    warning("BIS instructions are not available on this CPU");
-    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
+  if (!has_blk_init()) {
+    if (AllocatePrefetchInstr == 1) {
+      warning("BIS instructions required for AllocatePrefetchInstr 1 unavailable");
+      FLAG_SET_DEFAULT(AllocatePrefetchInstr, 0);
+    }
   }
 
   UseSSE = 0; // Only on x86 and x64
@@ -88,11 +90,13 @@
       if (has_blk_init() && UseTLAB &&
           FLAG_IS_DEFAULT(AllocatePrefetchInstr)) {
         // Use BIS instruction for TLAB allocation prefetch.
-        FLAG_SET_ERGO(intx, AllocatePrefetchInstr, 1);
-        if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
-          FLAG_SET_ERGO(intx, AllocatePrefetchStyle, 3);
-        }
-        if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+        FLAG_SET_DEFAULT(AllocatePrefetchInstr, 1);
+      }
+      if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
+        if (AllocatePrefetchInstr == 0) {
+          // Use different prefetch distance without BIS
+          FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
+        } else {
           // Use smaller prefetch distance with BIS
           FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
         }
@@ -107,25 +111,14 @@
           FLAG_SET_ERGO(intx, AllocateInstancePrefetchLines, AllocateInstancePrefetchLines*2);
         }
       }
-      if (AllocatePrefetchStyle != 3 && FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
-        // Use different prefetch distance without BIS
-        FLAG_SET_DEFAULT(AllocatePrefetchDistance, 256);
-      }
-      if (AllocatePrefetchInstr == 1) {
-        // Need extra space at the end of TLAB for BIS, otherwise prefetching
-        // instructions will fault (due to accessing memory outside of heap).
-        // The amount of space is the max of the number of lines to
-        // prefetch for array and for instance allocations. (Extra space must be
-        // reserved to accomodate both types of allocations.)
+    }
 
-        // +1 for rounding up to next cache line, +1 to be safe
-        int lines = MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
-        int step_size = AllocatePrefetchStepSize;
-        int distance = AllocatePrefetchDistance;
-        _reserve_for_allocation_prefetch = (distance + step_size*lines)/(int)HeapWordSize;
-      }
+    if (AllocatePrefetchInstr == 1) {
+      // Use allocation prefetch style 3 because BIS instructions
+      // require aligned memory addresses.
+      FLAG_SET_DEFAULT(AllocatePrefetchStyle, 3);
     }
-#endif
+#endif /* COMPILER2 */
   }
 
   // Use hardware population count instruction if available.
--- a/src/cpu/x86/vm/assembler_x86.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri May 06 18:20:50 2016 +0300
@@ -2332,6 +2332,22 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::ktestq(KRegister src1, KRegister src2) {
+  assert(VM_Version::supports_avx512bw(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x99);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::ktestd(KRegister src1, KRegister src2) {
+  assert(VM_Version::supports_avx512bw(), "");
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  int encode = vex_prefix_and_encode(src1->encoding(), 0, src2->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0x99);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::movb(Address dst, int imm8) {
   InstructionMark im(this);
    prefix(dst);
@@ -2500,7 +2516,7 @@
   emit_operand(src, dst);
 }
 
-void Assembler::evmovdqub(KRegister mask, XMMRegister dst, Address src, int vector_len) {
+void Assembler::evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len) {
   assert(VM_Version::supports_avx512vlbw(), "");
   assert(is_vector_masking(), "");    // For stub code use only
   InstructionMark im(this);
@@ -2513,16 +2529,6 @@
   emit_operand(dst, src);
 }
 
-void Assembler::evmovdquw(XMMRegister dst, XMMRegister src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
-  attributes.set_is_evex_instruction();
-  int prefix = (_legacy_mode_bw) ? VEX_SIMD_F2 : VEX_SIMD_F3;
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), (Assembler::VexSimdPrefix)prefix, VEX_OPCODE_0F, &attributes);
-  emit_int8(0x6F);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
 void Assembler::evmovdquw(XMMRegister dst, Address src, int vector_len) {
   assert(VM_Version::supports_evex(), "");
   InstructionMark im(this);
@@ -2535,6 +2541,19 @@
   emit_operand(dst, src);
 }
 
+void Assembler::evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+  assert(is_vector_masking(), "");
+  assert(VM_Version::supports_avx512vlbw(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  attributes.set_embedded_opmask_register_specifier(mask);
+  attributes.set_is_evex_instruction();
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x6F);
+  emit_operand(dst, src);
+}
+
 void Assembler::evmovdquw(Address dst, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
@@ -2548,6 +2567,19 @@
   emit_operand(src, dst);
 }
 
+void Assembler::evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx512vlbw(), "");
+  assert(src != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  attributes.set_embedded_opmask_register_specifier(mask);
+  attributes.set_is_evex_instruction();
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x7F);
+  emit_operand(src, dst);
+}
+
 void Assembler::evmovdqul(XMMRegister dst, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_evex(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -3295,10 +3327,71 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+  assert(VM_Version::supports_avx512vlbw(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  attributes.set_is_evex_instruction();
+  int dst_enc = kdst->encoding();
+  vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x64);
+  emit_operand(as_Register(dst_enc), src);
+}
+
+void Assembler::evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
+  assert(is_vector_masking(), "");
+  assert(VM_Version::supports_avx512vlbw(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  attributes.set_embedded_opmask_register_specifier(mask);
+  attributes.set_is_evex_instruction();
+  int dst_enc = kdst->encoding();
+  vex_prefix(src, nds->encoding(), dst_enc, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8(0x64);
+  emit_operand(as_Register(dst_enc), src);
+}
+
+void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
+  assert(VM_Version::supports_avx512vlbw(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x3E);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(vcc);
+}
+
+void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
+  assert(is_vector_masking(), "");
+  assert(VM_Version::supports_avx512vlbw(), "");
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_embedded_opmask_register_specifier(mask);
+  attributes.set_is_evex_instruction();
+  int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x3E);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(vcc);
+}
+
+void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
+  assert(VM_Version::supports_avx512vlbw(), "");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
+  attributes.set_is_evex_instruction();
+  int dst_enc = kdst->encoding();
+  vex_prefix(src, nds->encoding(), kdst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x3E);
+  emit_operand(as_Register(dst_enc), src);
+  emit_int8(vcc);
+}
+
 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx512bw(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
   attributes.set_is_evex_instruction();
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   int dst_enc = kdst->encoding();
@@ -3307,7 +3400,7 @@
   emit_operand(as_Register(dst_enc), src);
 }
 
-void Assembler::evpcmpeqb(KRegister mask, KRegister kdst, XMMRegister nds, Address src, int vector_len) {
+void Assembler::evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx512vlbw(), "");
   assert(is_vector_masking(), "");    // For stub code use only
   InstructionMark im(this);
@@ -3620,6 +3713,46 @@
   emit_operand(dst, src);
 }
 
+void Assembler::evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len) {
+  assert(is_vector_masking(), "");
+  assert(VM_Version::supports_avx512vlbw(), "");
+  assert(dst != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
+  attributes.set_embedded_opmask_register_specifier(mask);
+  attributes.set_is_evex_instruction();
+  vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x30);
+  emit_operand(dst, src);
+}
+
+void Assembler::evpmovwb(Address dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx512vlbw(), "");
+  assert(src != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
+  attributes.set_is_evex_instruction();
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x30);
+  emit_operand(src, dst);
+}
+
+void Assembler::evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len) {
+  assert(is_vector_masking(), "");
+  assert(VM_Version::supports_avx512vlbw(), "");
+  assert(src != xnoreg, "sanity");
+  InstructionMark im(this);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
+  attributes.set_embedded_opmask_register_specifier(mask);
+  attributes.set_is_evex_instruction();
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
+  emit_int8(0x30);
+  emit_operand(src, dst);
+}
+
 // generic
 void Assembler::pop(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
@@ -6406,7 +6539,6 @@
   emit_int8(0x77);
 }
 
-
 #ifndef _LP64
 // 32bit only pieces of the assembler
 
@@ -6973,7 +7105,10 @@
   emit_int8(byte3);
 
   // P2: byte 4 as zL'Lbv'aaa
-  int byte4 = (_attributes->is_no_reg_mask()) ? 0 : _attributes->get_embedded_opmask_register_specifier(); // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
+  // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
+  int byte4 = (_attributes->is_no_reg_mask()) ?
+              0 :
+              _attributes->get_embedded_opmask_register_specifier();
   // EVEX.v` for extending EVEX.vvvv or VIDX
   byte4 |= (evex_v ? 0: EVEX_V);
   // third EXEC.b for broadcast actions
--- a/src/cpu/x86/vm/assembler_x86.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri May 06 18:20:50 2016 +0300
@@ -587,6 +587,16 @@
 #endif
   };
 
+  enum ComparisonPredicate {
+    eq = 0,
+    lt = 1,
+    le = 2,
+    _false = 3,
+    neq = 4,
+    nlt = 5,
+    nle = 6,
+    _true = 7
+  };
 
 
   // NOTE: The general philopsophy of the declarations here is that 64bit versions
@@ -830,7 +840,6 @@
   void clear_vector_masking(void) { _vector_masking = false; }
   bool is_vector_masking(void) { return _vector_masking; }
 
-
   void lea(Register dst, Address src);
 
   void mov(Register dst, Register src);
@@ -1362,6 +1371,9 @@
   void kortestdl(KRegister dst, KRegister src);
   void kortestql(KRegister dst, KRegister src);
 
+  void ktestq(KRegister src1, KRegister src2);
+  void ktestd(KRegister src1, KRegister src2);
+
   void ktestql(KRegister dst, KRegister src);
 
   void movdl(XMMRegister dst, Register src);
@@ -1391,10 +1403,11 @@
   void evmovdqub(Address dst, XMMRegister src, int vector_len);
   void evmovdqub(XMMRegister dst, Address src, int vector_len);
   void evmovdqub(XMMRegister dst, XMMRegister src, int vector_len);
-  void evmovdqub(KRegister mask, XMMRegister dst, Address src, int vector_len);
+  void evmovdqub(XMMRegister dst, KRegister mask, Address src, int vector_len);
   void evmovdquw(Address dst, XMMRegister src, int vector_len);
+  void evmovdquw(Address dst, KRegister mask, XMMRegister src, int vector_len);
   void evmovdquw(XMMRegister dst, Address src, int vector_len);
-  void evmovdquw(XMMRegister dst, XMMRegister src, int vector_len);
+  void evmovdquw(XMMRegister dst, KRegister mask, Address src, int vector_len);
   void evmovdqul(Address dst, XMMRegister src, int vector_len);
   void evmovdqul(XMMRegister dst, Address src, int vector_len);
   void evmovdqul(XMMRegister dst, XMMRegister src, int vector_len);
@@ -1545,7 +1558,14 @@
   void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void evpcmpeqb(KRegister kdst, XMMRegister nds, XMMRegister src, int vector_len);
   void evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
-  void evpcmpeqb(KRegister mask, KRegister kdst, XMMRegister nds, Address src, int vector_len);
+  void evpcmpeqb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
+
+  void evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len);
+  void evpcmpgtb(KRegister kdst, KRegister mask, XMMRegister nds, Address src, int vector_len);
+
+  void evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len);
+  void evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate of, int vector_len);
+  void evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len);
 
   void pcmpeqw(XMMRegister dst, XMMRegister src);
   void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -1589,7 +1609,11 @@
   void pmovzxbw(XMMRegister dst, XMMRegister src);
   void pmovzxbw(XMMRegister dst, Address src);
 
-  void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
+  void vpmovzxbw( XMMRegister dst, Address src, int vector_len);
+  void evpmovzxbw(XMMRegister dst, KRegister mask, Address src, int vector_len);
+
+  void evpmovwb(Address dst, XMMRegister src, int vector_len);
+  void evpmovwb(Address dst, KRegister mask, XMMRegister src, int vector_len);
 
 #ifndef _LP64 // no 32bit push/pop on amd64
   void popl(Address dst);
@@ -1839,6 +1863,8 @@
   void vsubss(XMMRegister dst, XMMRegister nds, Address src);
   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
 
+  void shlxl(Register dst, Register src1, Register src2);
+  void shlxq(Register dst, Register src1, Register src2);
 
   //====================VECTOR ARITHMETIC=====================================
 
@@ -2073,9 +2099,6 @@
   void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
   void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
 
-  void shlxl(Register dst, Register src1, Register src2);
-  void shlxq(Register dst, Register src1, Register src2);
-
  protected:
   // Next instructions require address alignment 16 bytes SSE mode.
   // They should be called only from corresponding MacroAssembler instructions.
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1109,6 +1109,87 @@
   }
 }
 
+void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
+  Unimplemented();
+}
+
+void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
+  assert(UseVectorizedMismatchIntrinsic, "need AVX instruction support");
+
+  // Make all state_for calls early since they can emit code
+  LIR_Opr result = rlock_result(x);
+
+  LIRItem a(x->argument_at(0), this); // Object
+  LIRItem aOffset(x->argument_at(1), this); // long
+  LIRItem b(x->argument_at(2), this); // Object
+  LIRItem bOffset(x->argument_at(3), this); // long
+  LIRItem length(x->argument_at(4), this); // int
+  LIRItem log2ArrayIndexScale(x->argument_at(5), this); // int
+
+  a.load_item();
+  aOffset.load_nonconstant();
+  b.load_item();
+  bOffset.load_nonconstant();
+
+  long constant_aOffset = 0;
+  LIR_Opr result_aOffset = aOffset.result();
+  if (result_aOffset->is_constant()) {
+    constant_aOffset = result_aOffset->as_jlong();
+    result_aOffset = LIR_OprFact::illegalOpr;
+  }
+  LIR_Opr result_a = a.result();
+
+  long constant_bOffset = 0;
+  LIR_Opr result_bOffset = bOffset.result();
+  if (result_bOffset->is_constant()) {
+    constant_bOffset = result_bOffset->as_jlong();
+    result_bOffset = LIR_OprFact::illegalOpr;
+  }
+  LIR_Opr result_b = b.result();
+
+#ifndef _LP64
+  result_a = new_register(T_INT);
+  __ convert(Bytecodes::_l2i, a.result(), result_a);
+  result_b = new_register(T_INT);
+  __ convert(Bytecodes::_l2i, b.result(), result_b);
+#endif
+
+
+  LIR_Address* addr_a = new LIR_Address(result_a,
+                                        result_aOffset,
+                                        LIR_Address::times_1,
+                                        constant_aOffset,
+                                        T_BYTE);
+
+  LIR_Address* addr_b = new LIR_Address(result_b,
+                                        result_bOffset,
+                                        LIR_Address::times_1,
+                                        constant_bOffset,
+                                        T_BYTE);
+
+  BasicTypeList signature(4);
+  signature.append(T_ADDRESS);
+  signature.append(T_ADDRESS);
+  signature.append(T_INT);
+  signature.append(T_INT);
+  CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+  const LIR_Opr result_reg = result_register_for(x->type());
+
+  LIR_Opr ptr_addr_a = new_pointer_register();
+  __ leal(LIR_OprFact::address(addr_a), ptr_addr_a);
+
+  LIR_Opr ptr_addr_b = new_pointer_register();
+  __ leal(LIR_OprFact::address(addr_b), ptr_addr_b);
+
+  __ move(ptr_addr_a, cc->at(0));
+  __ move(ptr_addr_b, cc->at(1));
+  length.load_item_force(cc->at(2));
+  log2ArrayIndexScale.load_item_force(cc->at(3));
+
+  __ call_runtime_leaf(StubRoutines::vectorizedMismatch(), getThreadTemp(), result_reg, cc->args());
+  __ move(result_reg, result);
+}
+
 // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
 // _i2b, _i2c, _i2s
 LIR_Opr fixed_register_for(BasicType type) {
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Fri May 06 18:20:50 2016 +0300
@@ -8259,10 +8259,19 @@
 
 // Search for Non-ASCII character (Negative byte value) in a byte array,
 // return true if it has any and false otherwise.
+//   ..\jdk\src\java.base\share\classes\java\lang\StringCoding.java
+//   @HotSpotIntrinsicCandidate
+//   private static boolean hasNegatives(byte[] ba, int off, int len) {
+//     for (int i = off; i < off + len; i++) {
+//       if (ba[i] < 0) {
+//         return true;
+//       }
+//     }
+//     return false;
+//   }
 void MacroAssembler::has_negatives(Register ary1, Register len,
-                                   Register result, Register tmp1,
-                                   XMMRegister vec1, XMMRegister vec2) {
-
+  Register result, Register tmp1,
+  XMMRegister vec1, XMMRegister vec2) {
   // rsi: byte array
   // rcx: len
   // rax: result
@@ -8275,79 +8284,161 @@
   testl(len, len);
   jcc(Assembler::zero, FALSE_LABEL);
 
-  movl(result, len); // copy
-
-  if (UseAVX >= 2 && UseSSE >= 2) {
-    // With AVX2, use 32-byte vector compare
-    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
-
-    // Compare 32-byte vectors
-    andl(result, 0x0000001f);  //   tail count (in bytes)
-    andl(len, 0xffffffe0);   // vector count (in bytes)
-    jcc(Assembler::zero, COMPARE_TAIL);
+  if ((UseAVX > 2) && // AVX512
+    VM_Version::supports_avx512vlbw() &&
+    VM_Version::supports_bmi2()) {
+
+    set_vector_masking();  // opening of the stub context for programming mask registers
+
+    Label test_64_loop, test_tail;
+    Register tmp3_aliased = len;
+
+    movl(tmp1, len);
+    vpxor(vec2, vec2, vec2, Assembler::AVX_512bit);
+
+    andl(tmp1, 64 - 1);   // tail count (in chars) 0x3F
+    andl(len, ~(64 - 1));    // vector count (in chars)
+    jccb(Assembler::zero, test_tail);
 
     lea(ary1, Address(ary1, len, Address::times_1));
     negptr(len);
 
-    movl(tmp1, 0x80808080);   // create mask to test for Unicode chars in vector
-    movdl(vec2, tmp1);
-    vpbroadcastd(vec2, vec2);
-
-    bind(COMPARE_WIDE_VECTORS);
-    vmovdqu(vec1, Address(ary1, len, Address::times_1));
-    vptest(vec1, vec2);
+    bind(test_64_loop);
+    // Check whether our 64 elements of size byte contain negatives
+    evpcmpgtb(k2, vec2, Address(ary1, len, Address::times_1), Assembler::AVX_512bit);
+    kortestql(k2, k2);
     jcc(Assembler::notZero, TRUE_LABEL);
-    addptr(len, 32);
-    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
-
-    testl(result, result);
+
+    addptr(len, 64);
+    jccb(Assembler::notZero, test_64_loop);
+
+
+    bind(test_tail);
+    // bail out when there is nothing to be done
+    testl(tmp1, -1);
     jcc(Assembler::zero, FALSE_LABEL);
 
-    vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
-    vptest(vec1, vec2);
+    // Save k1
+    kmovql(k3, k1);
+
+    // ~(~0 << len) applied up to two times (for 32-bit scenario)
+#ifdef _LP64
+    mov64(tmp3_aliased, 0xFFFFFFFFFFFFFFFF);
+    shlxq(tmp3_aliased, tmp3_aliased, tmp1);
+    notq(tmp3_aliased);
+    kmovql(k1, tmp3_aliased);
+#else
+    Label k_init;
+    jmp(k_init);
+
+    // We could not read 64-bits from a general purpose register thus we move
+    // data required to compose 64 1's to the instruction stream
+    // We emit 64 byte wide series of elements from 0..63 which later on would
+    // be used as a compare targets with tail count contained in tmp1 register.
+    // Result would be a k1 register having tmp1 consecutive number or 1
+    // counting from least significant bit.
+    address tmp = pc();
+    emit_int64(0x0706050403020100);
+    emit_int64(0x0F0E0D0C0B0A0908);
+    emit_int64(0x1716151413121110);
+    emit_int64(0x1F1E1D1C1B1A1918);
+    emit_int64(0x2726252423222120);
+    emit_int64(0x2F2E2D2C2B2A2928);
+    emit_int64(0x3736353433323130);
+    emit_int64(0x3F3E3D3C3B3A3938);
+
+    bind(k_init);
+    lea(len, InternalAddress(tmp));
+    // create mask to test for negative byte inside a vector
+    evpbroadcastb(vec1, tmp1, Assembler::AVX_512bit);
+    evpcmpgtb(k1, vec1, Address(len, 0), Assembler::AVX_512bit);
+
+#endif
+    evpcmpgtb(k2, k1, vec2, Address(ary1, 0), Assembler::AVX_512bit);
+    ktestq(k2, k1);
+    // Restore k1
+    kmovql(k1, k3);
     jcc(Assembler::notZero, TRUE_LABEL);
+
     jmp(FALSE_LABEL);
 
-    bind(COMPARE_TAIL); // len is zero
-    movl(len, result);
-    // Fallthru to tail compare
-  } else if (UseSSE42Intrinsics) {
-    assert(UseSSE >= 4, "SSE4 must be  for SSE4.2 intrinsics to be available");
-    // With SSE4.2, use double quad vector compare
-    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
-
-    // Compare 16-byte vectors
-    andl(result, 0x0000000f);  //   tail count (in bytes)
-    andl(len, 0xfffffff0);   // vector count (in bytes)
-    jccb(Assembler::zero, COMPARE_TAIL);
-
-    lea(ary1, Address(ary1, len, Address::times_1));
-    negptr(len);
-
-    movl(tmp1, 0x80808080);
-    movdl(vec2, tmp1);
-    pshufd(vec2, vec2, 0);
-
-    bind(COMPARE_WIDE_VECTORS);
-    movdqu(vec1, Address(ary1, len, Address::times_1));
-    ptest(vec1, vec2);
-    jcc(Assembler::notZero, TRUE_LABEL);
-    addptr(len, 16);
-    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
-
-    testl(result, result);
-    jcc(Assembler::zero, FALSE_LABEL);
-
-    movdqu(vec1, Address(ary1, result, Address::times_1, -16));
-    ptest(vec1, vec2);
-    jccb(Assembler::notZero, TRUE_LABEL);
-    jmpb(FALSE_LABEL);
-
-    bind(COMPARE_TAIL); // len is zero
-    movl(len, result);
-    // Fallthru to tail compare
-  }
-
+    clear_vector_masking();   // closing of the stub context for programming mask registers
+  }
+  else {
+    movl(result, len); // copy
+
+    if (UseAVX == 2 && UseSSE >= 2) {
+      // With AVX2, use 32-byte vector compare
+      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+      // Compare 32-byte vectors
+      andl(result, 0x0000001f);  //   tail count (in bytes)
+      andl(len, 0xffffffe0);   // vector count (in bytes)
+      jccb(Assembler::zero, COMPARE_TAIL);
+
+      lea(ary1, Address(ary1, len, Address::times_1));
+      negptr(len);
+
+      movl(tmp1, 0x80808080);   // create mask to test for Unicode chars in vector
+      movdl(vec2, tmp1);
+      vpbroadcastd(vec2, vec2);
+
+      bind(COMPARE_WIDE_VECTORS);
+      vmovdqu(vec1, Address(ary1, len, Address::times_1));
+      vptest(vec1, vec2);
+      jccb(Assembler::notZero, TRUE_LABEL);
+      addptr(len, 32);
+      jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+      testl(result, result);
+      jccb(Assembler::zero, FALSE_LABEL);
+
+      vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
+      vptest(vec1, vec2);
+      jccb(Assembler::notZero, TRUE_LABEL);
+      jmpb(FALSE_LABEL);
+
+      bind(COMPARE_TAIL); // len is zero
+      movl(len, result);
+      // Fallthru to tail compare
+    }
+    else if (UseSSE42Intrinsics) {
+      assert(UseSSE >= 4, "SSE4 must be  for SSE4.2 intrinsics to be available");
+      // With SSE4.2, use double quad vector compare
+      Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+      // Compare 16-byte vectors
+      andl(result, 0x0000000f);  //   tail count (in bytes)
+      andl(len, 0xfffffff0);   // vector count (in bytes)
+      jccb(Assembler::zero, COMPARE_TAIL);
+
+      lea(ary1, Address(ary1, len, Address::times_1));
+      negptr(len);
+
+      movl(tmp1, 0x80808080);
+      movdl(vec2, tmp1);
+      pshufd(vec2, vec2, 0);
+
+      bind(COMPARE_WIDE_VECTORS);
+      movdqu(vec1, Address(ary1, len, Address::times_1));
+      ptest(vec1, vec2);
+      jccb(Assembler::notZero, TRUE_LABEL);
+      addptr(len, 16);
+      jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+      testl(result, result);
+      jccb(Assembler::zero, FALSE_LABEL);
+
+      movdqu(vec1, Address(ary1, result, Address::times_1, -16));
+      ptest(vec1, vec2);
+      jccb(Assembler::notZero, TRUE_LABEL);
+      jmpb(FALSE_LABEL);
+
+      bind(COMPARE_TAIL); // len is zero
+      movl(len, result);
+      // Fallthru to tail compare
+    }
+  }
   // Compare 4-byte vectors
   andl(len, 0xfffffffc); // vector count (in bytes)
   jccb(Assembler::zero, COMPARE_CHAR);
@@ -8395,7 +8486,6 @@
     vpxor(vec2, vec2);
   }
 }
-
 // Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
 void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
                                    Register limit, Register result, Register chr,
@@ -8841,10 +8931,23 @@
 }
 
 // encode char[] to byte[] in ISO_8859_1
+   //@HotSpotIntrinsicCandidate
+   //private static int implEncodeISOArray(byte[] sa, int sp,
+   //byte[] da, int dp, int len) {
+   //  int i = 0;
+   //  for (; i < len; i++) {
+   //    char c = StringUTF16.getChar(sa, sp++);
+   //    if (c > '\u00FF')
+   //      break;
+   //    da[dp++] = (byte)c;
+   //  }
+   //  return i;
+   //}
 void MacroAssembler::encode_iso_array(Register src, Register dst, Register len,
-                                      XMMRegister tmp1Reg, XMMRegister tmp2Reg,
-                                      XMMRegister tmp3Reg, XMMRegister tmp4Reg,
-                                      Register tmp5, Register result) {
+  XMMRegister tmp1Reg, XMMRegister tmp2Reg,
+  XMMRegister tmp3Reg, XMMRegister tmp4Reg,
+  Register tmp5, Register result) {
+
   // rsi: src
   // rdi: dst
   // rdx: len
@@ -8859,6 +8962,7 @@
   // check for zero length
   testl(len, len);
   jcc(Assembler::zero, L_done);
+
   movl(result, len);
 
   // Setup pointers
@@ -8967,6 +9071,7 @@
 
   bind(L_copy_1_char_exit);
   addptr(result, len); // len is negative count of not processed elements
+
   bind(L_done);
 }
 
@@ -9478,8 +9583,8 @@
     notq(tmp2);
     kmovql(k1, tmp2);
 
-    evmovdqub(k1, rymm0, Address(obja, result), Assembler::AVX_512bit);
-    evpcmpeqb(k1, k7, rymm0, Address(objb, result), Assembler::AVX_512bit);
+    evmovdqub(rymm0, k1, Address(obja, result), Assembler::AVX_512bit);
+    evpcmpeqb(k7, k1, rymm0, Address(objb, result), Assembler::AVX_512bit);
 
     ktestql(k7, k1);
     // Restore k1
@@ -10838,13 +10943,24 @@
 #undef BIND
 #undef BLOCK_COMMENT
 
-
 // Compress char[] array to byte[].
+//   ..\jdk\src\java.base\share\classes\java\lang\StringUTF16.java
+//   @HotSpotIntrinsicCandidate
+//   private static int compress(char[] src, int srcOff, byte[] dst, int dstOff, int len) {
+//     for (int i = 0; i < len; i++) {
+//       int c = src[srcOff++];
+//       if (c >>> 8 != 0) {
+//         return 0;
+//       }
+//       dst[dstOff++] = (byte)c;
+//     }
+//     return len;
+//   }
 void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
-                                         XMMRegister tmp1Reg, XMMRegister tmp2Reg,
-                                         XMMRegister tmp3Reg, XMMRegister tmp4Reg,
-                                         Register tmp5, Register result) {
-  Label copy_chars_loop, return_length, return_zero, done;
+  XMMRegister tmp1Reg, XMMRegister tmp2Reg,
+  XMMRegister tmp3Reg, XMMRegister tmp4Reg,
+  Register tmp5, Register result) {
+  Label copy_chars_loop, return_length, return_zero, done, below_threshold;
 
   // rsi: src
   // rdi: dst
@@ -10861,11 +10977,141 @@
   // save length for return
   push(len);
 
+  if ((UseAVX > 2) && // AVX512
+    VM_Version::supports_avx512vlbw() &&
+    VM_Version::supports_bmi2()) {
+
+    set_vector_masking();  // opening of the stub context for programming mask registers
+
+    Label copy_32_loop, copy_loop_tail, copy_just_portion_of_candidates;
+
+    // alignement
+    Label post_alignement;
+
+    // if length of the string is less than 16, handle it in an old fashioned
+    // way
+    testl(len, -32);
+    jcc(Assembler::zero, below_threshold);
+
+    // First check whether a character is compressable ( <= 0xFF).
+    // Create mask to test for Unicode chars inside zmm vector
+    movl(result, 0x00FF);
+    evpbroadcastw(tmp2Reg, result, Assembler::AVX_512bit);
+
+    testl(len, -64);
+    jcc(Assembler::zero, post_alignement);
+
+    // Save k1
+    kmovql(k3, k1);
+
+    movl(tmp5, dst);
+    andl(tmp5, (64 - 1));
+    negl(tmp5);
+    andl(tmp5, (64 - 1));
+
+    // bail out when there is nothing to be done
+    testl(tmp5, 0xFFFFFFFF);
+    jcc(Assembler::zero, post_alignement);
+
+    // ~(~0 << len), where len is the # of remaining elements to process
+    movl(result, 0xFFFFFFFF);
+    shlxl(result, result, tmp5);
+    notl(result);
+
+    kmovdl(k1, result);
+
+    evmovdquw(tmp1Reg, k1, Address(src, 0), Assembler::AVX_512bit);
+    evpcmpuw(k2, k1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
+    ktestd(k2, k1);
+    jcc(Assembler::carryClear, copy_just_portion_of_candidates);
+
+    evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
+
+    addptr(src, tmp5);
+    addptr(src, tmp5);
+    addptr(dst, tmp5);
+    subl(len, tmp5);
+
+    bind(post_alignement);
+    // end of alignement
+
+    movl(tmp5, len);
+    andl(tmp5, (32 - 1));   // tail count (in chars)
+    andl(len, ~(32 - 1));    // vector count (in chars)
+    jcc(Assembler::zero, copy_loop_tail);
+
+    lea(src, Address(src, len, Address::times_2));
+    lea(dst, Address(dst, len, Address::times_1));
+    negptr(len);
+
+    bind(copy_32_loop);
+    evmovdquw(tmp1Reg, Address(src, len, Address::times_2), Assembler::AVX_512bit);
+    evpcmpuw(k2, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
+    kortestdl(k2, k2);
+    jcc(Assembler::carryClear, copy_just_portion_of_candidates);
+
+    // All elements in current processed chunk are valid candidates for
+    // compression. Write a truncated byte elements to the memory.
+    evpmovwb(Address(dst, len, Address::times_1), tmp1Reg, Assembler::AVX_512bit);
+    addptr(len, 32);
+    jcc(Assembler::notZero, copy_32_loop);
+
+    bind(copy_loop_tail);
+    // bail out when there is nothing to be done
+    testl(tmp5, 0xFFFFFFFF);
+    jcc(Assembler::zero, return_length);
+
+    // Save k1
+    kmovql(k3, k1);
+
+    movl(len, tmp5);
+
+    // ~(~0 << len), where len is the # of remaining elements to process
+    movl(result, 0xFFFFFFFF);
+    shlxl(result, result, len);
+    notl(result);
+
+    kmovdl(k1, result);
+
+    evmovdquw(tmp1Reg, k1, Address(src, 0), Assembler::AVX_512bit);
+    evpcmpuw(k2, k1, tmp1Reg, tmp2Reg, Assembler::le, Assembler::AVX_512bit);
+    ktestd(k2, k1);
+    jcc(Assembler::carryClear, copy_just_portion_of_candidates);
+
+    evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
+    // Restore k1
+    kmovql(k1, k3);
+
+    jmp(return_length);
+
+    bind(copy_just_portion_of_candidates);
+    kmovdl(tmp5, k2);
+    tzcntl(tmp5, tmp5);
+
+    // ~(~0 << tmp5), where tmp5 is a number of elements in an array from the
+    // result to the first element larger than 0xFF
+    movl(result, 0xFFFFFFFF);
+    shlxl(result, result, tmp5);
+    notl(result);
+
+    kmovdl(k1, result);
+
+    evpmovwb(Address(dst, 0), k1, tmp1Reg, Assembler::AVX_512bit);
+    // Restore k1
+    kmovql(k1, k3);
+
+    jmp(return_zero);
+
+    clear_vector_masking();   // closing of the stub context for programming mask registers
+  }
   if (UseSSE42Intrinsics) {
     assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
     Label copy_32_loop, copy_16, copy_tail;
 
+    bind(below_threshold);
+
     movl(result, len);
+
     movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
 
     // vectored compression
@@ -10947,10 +11193,16 @@
 }
 
 // Inflate byte[] array to char[].
+//   ..\jdk\src\java.base\share\classes\java\lang\StringLatin1.java
+//   @HotSpotIntrinsicCandidate
+//   private static void inflate(byte[] src, int srcOff, char[] dst, int dstOff, int len) {
+//     for (int i = 0; i < len; i++) {
+//       dst[dstOff++] = (char)(src[srcOff++] & 0xff);
+//     }
+//   }
 void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
-                                        XMMRegister tmp1, Register tmp2) {
-  Label copy_chars_loop, done;
-
+  XMMRegister tmp1, Register tmp2) {
+  Label copy_chars_loop, done, below_threshold;
   // rsi: src
   // rdi: dst
   // rdx: len
@@ -10961,20 +11213,109 @@
   // rdx holds length
   assert_different_registers(src, dst, len, tmp2);
 
+  if ((UseAVX > 2) && // AVX512
+    VM_Version::supports_avx512vlbw() &&
+    VM_Version::supports_bmi2()) {
+
+    set_vector_masking();  // opening of the stub context for programming mask registers
+
+    Label copy_32_loop, copy_tail;
+    Register tmp3_aliased = len;
+
+    // if length of the string is less than 16, handle it in an old fashioned
+    // way
+    testl(len, -16);
+    jcc(Assembler::zero, below_threshold);
+
+    // In order to use only one arithmetic operation for the main loop we use
+    // this pre-calculation
+    movl(tmp2, len);
+    andl(tmp2, (32 - 1)); // tail count (in chars), 32 element wide loop
+    andl(len, -32);     // vector count
+    jccb(Assembler::zero, copy_tail);
+
+    lea(src, Address(src, len, Address::times_1));
+    lea(dst, Address(dst, len, Address::times_2));
+    negptr(len);
+
+
+    // inflate 32 chars per iter
+    bind(copy_32_loop);
+    vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_512bit);
+    evmovdquw(Address(dst, len, Address::times_2), tmp1, Assembler::AVX_512bit);
+    addptr(len, 32);
+    jcc(Assembler::notZero, copy_32_loop);
+
+    bind(copy_tail);
+    // bail out when there is nothing to be done
+    testl(tmp2, -1); // we don't destroy the contents of tmp2 here
+    jcc(Assembler::zero, done);
+
+    // Save k1
+    kmovql(k2, k1);
+
+    // ~(~0 << length), where length is the # of remaining elements to process
+    movl(tmp3_aliased, -1);
+    shlxl(tmp3_aliased, tmp3_aliased, tmp2);
+    notl(tmp3_aliased);
+    kmovdl(k1, tmp3_aliased);
+    evpmovzxbw(tmp1, k1, Address(src, 0), Assembler::AVX_512bit);
+    evmovdquw(Address(dst, 0), k1, tmp1, Assembler::AVX_512bit);
+
+    // Restore k1
+    kmovql(k1, k2);
+    jmp(done);
+
+    clear_vector_masking();   // closing of the stub context for programming mask registers
+  }
   if (UseSSE42Intrinsics) {
     assert(UseSSE >= 4, "SSE4 must be enabled for SSE4.2 intrinsics to be available");
-    Label copy_8_loop, copy_bytes, copy_tail;
+    Label copy_16_loop, copy_8_loop, copy_bytes, copy_new_tail, copy_tail;
 
     movl(tmp2, len);
-    andl(tmp2, 0x00000007);   // tail count (in chars)
-    andl(len, 0xfffffff8);    // vector count (in chars)
-    jccb(Assembler::zero, copy_tail);
+
+    if (UseAVX > 1) {
+      andl(tmp2, (16 - 1));
+      andl(len, -16);
+      jccb(Assembler::zero, copy_new_tail);
+    } else {
+      andl(tmp2, 0x00000007);   // tail count (in chars)
+      andl(len, 0xfffffff8);    // vector count (in chars)
+      jccb(Assembler::zero, copy_tail);
+    }
 
     // vectored inflation
     lea(src, Address(src, len, Address::times_1));
     lea(dst, Address(dst, len, Address::times_2));
     negptr(len);
 
+    if (UseAVX > 1) {
+      bind(copy_16_loop);
+      vpmovzxbw(tmp1, Address(src, len, Address::times_1), Assembler::AVX_256bit);
+      vmovdqu(Address(dst, len, Address::times_2), tmp1);
+      addptr(len, 16);
+      jcc(Assembler::notZero, copy_16_loop);
+
+      bind(below_threshold);
+      bind(copy_new_tail);
+      if (UseAVX > 2) {
+        movl(tmp2, len);
+      }
+      else {
+        movl(len, tmp2);
+      }
+      andl(tmp2, 0x00000007);
+      andl(len, 0xFFFFFFF8);
+      jccb(Assembler::zero, copy_tail);
+
+      pmovzxbw(tmp1, Address(src, 0));
+      movdqu(Address(dst, 0), tmp1);
+      addptr(src, 8);
+      addptr(dst, 2 * 8);
+
+      jmp(copy_tail, true);
+    }
+
     // inflate 8 chars per iter
     bind(copy_8_loop);
     pmovzxbw(tmp1, Address(src, len, Address::times_1));  // unpack to 8 words
@@ -11013,7 +11354,6 @@
   bind(done);
 }
 
-
 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
   switch (cond) {
     // Note some conditions are synonyms for others
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Fri May 06 18:20:50 2016 +0300
@@ -847,7 +847,7 @@
   void call(Label& L, relocInfo::relocType rtype);
   void call(Register entry);
 
-  // NOTE: this call tranfers to the effective address of entry NOT
+  // NOTE: this call transfers to the effective address of entry NOT
   // the address contained by entry. This is because this is more natural
   // for jumps/calls.
   void call(AddressLiteral entry);
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri May 06 18:20:50 2016 +0300
@@ -4409,7 +4409,7 @@
   *   c_rarg0   - int crc
   *   c_rarg1   - byte* buf
   *   c_rarg2   - long length
-  *   c_rarg3   - table_start - optional (present only when doing a library_calll,
+  *   c_rarg3   - table_start - optional (present only when doing a library_call,
   *              not used by x86 algorithm)
   *
   * Ouput:
@@ -4532,6 +4532,9 @@
   *    c_rarg1   - objb     address
   *    c_rarg3   - length   length
   *    c_rarg4   - scale    log2_array_indxscale
+  *
+  *  Output:
+  *        rax   - int >= mismatched index, < 0 bitwise complement of tail
   */
   address generate_vectorizedMismatch() {
     __ align(CodeEntryAlignment);
@@ -5291,9 +5294,6 @@
     if (UseMulAddIntrinsic) {
       StubRoutines::_mulAdd = generate_mulAdd();
     }
-    if (UseVectorizedMismatchIntrinsic) {
-      StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
-    }
 #ifndef _WINDOWS
     if (UseMontgomeryMultiplyIntrinsic) {
       StubRoutines::_montgomeryMultiply
@@ -5305,6 +5305,10 @@
     }
 #endif // WINDOWS
 #endif // COMPILER2
+
+    if (UseVectorizedMismatchIntrinsic) {
+      StubRoutines::_vectorizedMismatch = generate_vectorizedMismatch();
+    }
   }
 
  public:
--- a/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/x86/vm/templateInterpreterGenerator_x86_64.cpp	Fri May 06 18:20:50 2016 +0300
@@ -289,9 +289,9 @@
 }
 
 /**
-* Method entry for static native methods:
+* Method entry for static (non-native) methods:
 *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
-*   int java.util.zip.CRC32C.updateByteBuffer(int crc, long address, int off, int end)
+*   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
 */
 address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
   if (UseCRC32CIntrinsics) {
@@ -306,7 +306,7 @@
     // Arguments are reversed on java expression stack
     // Calculate address of start element
     if (kind == Interpreter::java_util_zip_CRC32C_updateDirectByteBuffer) {
-      __ movptr(buf, Address(rsp, 3 * wordSize)); // long buf
+      __ movptr(buf, Address(rsp, 3 * wordSize)); // long address
       __ movl2ptr(off, Address(rsp, 2 * wordSize)); // offset
       __ addq(buf, off); // + offset
       __ movl(crc, Address(rsp, 5 * wordSize)); // Initial CRC
--- a/src/cpu/x86/vm/x86.ad	Fri May 06 09:54:58 2016 +0000
+++ b/src/cpu/x86/vm/x86.ad	Fri May 06 18:20:50 2016 +0300
@@ -1861,6 +1861,8 @@
   return false;
 }
 
+const bool Matcher::convi2l_type_required = true;
+
 // Helper methods for MachSpillCopyNode::implementation().
 static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
                           int src_hi, int dst_hi, uint ireg, outputStream* st) {
--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/VM.java	Fri May 06 18:20:50 2016 +0300
@@ -326,8 +326,9 @@
        Address vmInternalInfoAddr = vmVersion.getAddressField("_s_internal_vm_info_string").getValue();
        vmInternalInfo = CStringUtilities.getString(vmInternalInfoAddr);
 
+       Type threadLocalAllocBuffer = db.lookupType("ThreadLocalAllocBuffer");
        CIntegerType intType = (CIntegerType) db.lookupType("int");
-       CIntegerField reserveForAllocationPrefetchField = vmVersion.getCIntegerField("_reserve_for_allocation_prefetch");
+       CIntegerField reserveForAllocationPrefetchField = threadLocalAllocBuffer.getCIntegerField("_reserve_for_allocation_prefetch");
        reserveForAllocationPrefetch = (int)reserveForAllocationPrefetchField.getCInteger(intType);
     } catch (Exception exp) {
        throw new RuntimeException("can't determine target's VM version : " + exp.getMessage());
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMemoryAccessProviderImpl.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMemoryAccessProviderImpl.java	Fri May 06 18:20:50 2016 +0300
@@ -146,6 +146,9 @@
 
     @Override
     public JavaConstant readUnsafeConstant(JavaKind kind, JavaConstant baseConstant, long displacement) {
+        if (kind == null) {
+            throw new IllegalArgumentException("null JavaKind");
+        }
         if (kind == JavaKind.Object) {
             Object o = readRawObject(baseConstant, displacement, runtime.getConfig().useCompressedOops);
             return HotSpotObjectConstantImpl.forObject(o);
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMethodHandleAccessProvider.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotMethodHandleAccessProvider.java	Fri May 06 18:20:50 2016 +0300
@@ -130,7 +130,7 @@
 
         /* Load non-public field: LambdaForm MethodHandle.form */
         JavaConstant lambdaForm = constantReflection.readFieldValue(LazyInitialization.methodHandleFormField, methodHandle);
-        if (lambdaForm.isNull()) {
+        if (lambdaForm == null || lambdaForm.isNull()) {
             return null;
         }
 
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedObjectTypeImpl.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedObjectTypeImpl.java	Fri May 06 18:20:50 2016 +0300
@@ -882,4 +882,9 @@
     public boolean isTrustedInterfaceType() {
         return TrustedInterface.class.isAssignableFrom(mirror());
     }
+
+    @Override
+    public boolean isCloneableWithAllocation() {
+        return (getAccessFlags() & config().jvmAccIsCloneableFast) != 0;
+    }
 }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedPrimitiveType.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotResolvedPrimitiveType.java	Fri May 06 18:20:50 2016 +0300
@@ -267,4 +267,9 @@
     public boolean isTrustedInterfaceType() {
         return false;
     }
+
+    @Override
+    public boolean isCloneableWithAllocation() {
+        return false;
+    }
 }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotVMConfig.java	Fri May 06 18:20:50 2016 +0300
@@ -1077,6 +1077,7 @@
     @HotSpotVMConstant(name = "JVM_ACC_FIELD_STABLE") @Stable public int jvmAccFieldStable;
     @HotSpotVMConstant(name = "JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE") @Stable public int jvmAccFieldHasGenericSignature;
     @HotSpotVMConstant(name = "JVM_ACC_WRITTEN_FLAGS") @Stable public int jvmAccWrittenFlags;
+    @HotSpotVMConstant(name = "JVM_ACC_IS_CLONEABLE_FAST") @Stable public int jvmAccIsCloneableFast;
 
     // Modifier.SYNTHETIC is not public so we get it via vmStructs.
     @HotSpotVMConstant(name = "JVM_ACC_SYNTHETIC") @Stable public int jvmAccSynthetic;
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/MemoryAccessProvider.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/MemoryAccessProvider.java	Fri May 06 18:20:50 2016 +0300
@@ -35,8 +35,8 @@
      * @param displacement the displacement within the object in bytes
      * @return the read value encapsulated in a {@link JavaConstant} object, or {@code null} if the
      *         value cannot be read.
-     * @throws IllegalArgumentException if {@code kind} is {@link JavaKind#Void} or not
-     *             {@linkplain JavaKind#isPrimitive() primitive} kind
+     * @throws IllegalArgumentException if {@code kind} is {@code null}, {@link JavaKind#Void} or
+     *             not {@linkplain JavaKind#isPrimitive() primitive} kind
      */
     JavaConstant readUnsafeConstant(JavaKind kind, JavaConstant base, long displacement) throws IllegalArgumentException;
 
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/ResolvedJavaType.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.meta/src/jdk/vm/ci/meta/ResolvedJavaType.java	Fri May 06 18:20:50 2016 +0300
@@ -359,4 +359,12 @@
         }
         return null;
     }
+
+    /**
+     * Returns true if this type is {@link Cloneable} and can be safely cloned by creating a normal
+     * Java allocation and populating it from the fields returned by
+     * {@link #getInstanceFields(boolean)}. Some types may require special handling by the platform
+     * so they would to go through the normal {@link Object#clone} path.
+     */
+    boolean isCloneableWithAllocation();
 }
--- a/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/FolderNode.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/FolderNode.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,11 +25,13 @@
 
 import com.sun.hotspot.igv.coordinator.actions.RemoveCookie;
 import com.sun.hotspot.igv.data.*;
+import com.sun.hotspot.igv.util.PropertiesSheet;
 import java.awt.Image;
 import java.util.List;
 import org.openide.nodes.AbstractNode;
 import org.openide.nodes.Children;
 import org.openide.nodes.Node;
+import org.openide.nodes.Sheet;
 import org.openide.util.ImageUtilities;
 import org.openide.util.lookup.AbstractLookup;
 import org.openide.util.lookup.InstanceContent;
@@ -75,6 +77,16 @@
     }
 
     @Override
+    protected Sheet createSheet() {
+        Sheet s = super.createSheet();
+        if (children.folder instanceof Properties.Entity) {
+            Properties.Entity p = (Properties.Entity) children.folder;
+            PropertiesSheet.initializeSheet(p.getProperties(), s);
+        }
+        return s;
+    }
+
+    @Override
     public Image getIcon(int i) {
         return ImageUtilities.loadImage("com/sun/hotspot/igv/coordinator/images/folder.png");
     }
--- a/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/OutlineTopComponent.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/OutlineTopComponent.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -124,6 +124,8 @@
 
     public void clear() {
         document.clear();
+        root = new FolderNode(document);
+        manager.setRootContext(root);
     }
 
     @Override
--- a/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/actions/ImportAction.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/actions/ImportAction.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,31 +31,47 @@
 import com.sun.hotspot.igv.data.serialization.ParseMonitor;
 import com.sun.hotspot.igv.data.serialization.Parser;
 import com.sun.hotspot.igv.settings.Settings;
-import java.awt.event.InputEvent;
-import java.awt.event.KeyEvent;
+import java.awt.event.ActionEvent;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.nio.channels.FileChannel;
 import java.nio.file.StandardOpenOption;
-import javax.swing.Action;
+import java.util.logging.Level;
+import java.util.logging.Logger;
 import javax.swing.JFileChooser;
-import javax.swing.KeyStroke;
 import javax.swing.SwingUtilities;
 import javax.swing.filechooser.FileFilter;
 import org.netbeans.api.progress.ProgressHandle;
 import org.netbeans.api.progress.ProgressHandleFactory;
 import org.openide.util.Exceptions;
+import org.openide.util.RequestProcessor;
+import org.openide.awt.ActionID;
+import org.openide.awt.ActionReference;
+import org.openide.awt.ActionReferences;
+import org.openide.awt.ActionRegistration;
 import org.openide.util.HelpCtx;
 import org.openide.util.NbBundle;
-import org.openide.util.RequestProcessor;
-import org.openide.util.actions.CallableSystemAction;
+import org.openide.util.actions.SystemAction;
 
 /**
  *
  * @author Thomas Wuerthinger
  */
-public final class ImportAction extends CallableSystemAction {
+
+@ActionID(
+        category = "File",
+        id = "com.sun.hotspot.igv.coordinator.actions.ImportAction"
+)
+@ActionRegistration(
+        iconBase = "com/sun/hotspot/igv/coordinator/images/import.png",
+        displayName = "#CTL_ImportAction"
+)
+@ActionReferences({
+    @ActionReference(path = "Menu/File", position = 0),
+    @ActionReference(path = "Shortcuts", name = "C-O")
+})
+public final class ImportAction extends SystemAction {
     private static final int WORKUNITS = 10000;
 
     public static FileFilter getFileFilter() {
@@ -74,74 +90,77 @@
     }
 
     @Override
-    public void performAction() {
-
+    public void actionPerformed(ActionEvent e) {
         JFileChooser fc = new JFileChooser();
         fc.setFileFilter(ImportAction.getFileFilter());
         fc.setCurrentDirectory(new File(Settings.get().get(Settings.DIRECTORY, Settings.DIRECTORY_DEFAULT)));
+        fc.setMultiSelectionEnabled(true);
 
         if (fc.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) {
-            File file = fc.getSelectedFile();
+            for (final File file : fc.getSelectedFiles()) {
+                File dir = file;
+                if (!dir.isDirectory()) {
+                    dir = dir.getParentFile();
+                }
 
-            File dir = file;
-            if (!dir.isDirectory()) {
-                dir = dir.getParentFile();
-            }
-
-            Settings.get().put(Settings.DIRECTORY, dir.getAbsolutePath());
-            try {
-                final FileChannel channel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
-                final ProgressHandle handle = ProgressHandleFactory.createHandle("Opening file " + file.getName());
-                handle.start(WORKUNITS);
-                final long start = channel.size();
-                ParseMonitor monitor = new ParseMonitor() {
-                    @Override
-                    public void updateProgress() {
-                        try {
-                            int prog = (int) (WORKUNITS * (double) channel.position() / (double) start);
-                            handle.progress(prog);
-                        } catch (IOException ex) {
-                        }
+                Settings.get().put(Settings.DIRECTORY, dir.getAbsolutePath());
+                try {
+                    final FileChannel channel = FileChannel.open(file.toPath(), StandardOpenOption.READ);
+                    final ProgressHandle handle = ProgressHandleFactory.createHandle("Opening file " + file.getName());
+                    handle.start(WORKUNITS);
+                    final long startTime = System.currentTimeMillis();
+                    final long start = channel.size();
+                    ParseMonitor monitor = new ParseMonitor() {
+                            @Override
+                            public void updateProgress() {
+                                try {
+                                    int prog = (int) (WORKUNITS * (double) channel.position() / (double) start);
+                                    handle.progress(prog);
+                                } catch (IOException ex) {
+                                }
+                            }
+                            @Override
+                            public void setState(String state) {
+                                updateProgress();
+                                handle.progress(state);
+                            }
+                        };
+                    final GraphParser parser;
+                    final OutlineTopComponent component = OutlineTopComponent.findInstance();
+                    if (file.getName().endsWith(".xml")) {
+                        parser = new Parser(channel, monitor, null);
+                    } else if (file.getName().endsWith(".bgv")) {
+                        parser = new BinaryParser(channel, monitor, component.getDocument(), null);
+                    } else {
+                        parser = null;
                     }
-                    @Override
-                    public void setState(String state) {
-                        updateProgress();
-                        handle.progress(state);
-                    }
-                };
-                final GraphParser parser;
-                final OutlineTopComponent component = OutlineTopComponent.findInstance();
-                if (file.getName().endsWith(".xml")) {
-                    parser = new Parser(channel, monitor, null);
-                } else if (file.getName().endsWith(".bgv")) {
-                    parser = new BinaryParser(channel, monitor, component.getDocument(), null);
-                } else {
-                    parser = null;
+                    RequestProcessor.getDefault().post(new Runnable() {
+                            @Override
+                            public void run() {
+                                try {
+                                    final GraphDocument document = parser.parse();
+                                    if (document != null) {
+                                        SwingUtilities.invokeLater(new Runnable(){
+                                                @Override
+                                                public void run() {
+                                                    component.requestActive();
+                                                    component.getDocument().addGraphDocument(document);
+                                                }
+                                            });
+                                    }
+                                } catch (IOException ex) {
+                                    Exceptions.printStackTrace(ex);
+                                }
+                                handle.finish();
+                                long stop = System.currentTimeMillis();
+                                Logger.getLogger(getClass().getName()).log(Level.INFO, "Loaded in " + file + " in " + ((stop - startTime) / 1000.0) + " seconds");
+                            }
+                        });
+                } catch (FileNotFoundException ex) {
+                    Exceptions.printStackTrace(ex);
+                } catch (IOException ex) {
+                    Exceptions.printStackTrace(ex);
                 }
-                RequestProcessor.getDefault().post(new Runnable() {
-                    @Override
-                    public void run() {
-                        try {
-                            final GraphDocument document = parser.parse();
-                            if (document != null) {
-                                SwingUtilities.invokeLater(new Runnable(){
-                                    @Override
-                                    public void run() {
-                                        component.requestActive();
-                                        component.getDocument().addGraphDocument(document);
-                                    }
-                                });
-                            }
-                        } catch (IOException ex) {
-                            Exceptions.printStackTrace(ex);
-                        }
-                        handle.finish();
-                    }
-                });
-            } catch (FileNotFoundException ex) {
-                Exceptions.printStackTrace(ex);
-            } catch (IOException ex) {
-                Exceptions.printStackTrace(ex);
             }
         }
     }
@@ -151,11 +170,6 @@
         return NbBundle.getMessage(ImportAction.class, "CTL_ImportAction");
     }
 
-    public ImportAction() {
-        putValue(Action.SHORT_DESCRIPTION, "Open XML graph document...");
-        putValue(Action.ACCELERATOR_KEY, KeyStroke.getKeyStroke(KeyEvent.VK_O, InputEvent.CTRL_MASK));
-    }
-
     @Override
     protected String iconResource() {
         return "com/sun/hotspot/igv/coordinator/images/import.png";
@@ -165,9 +179,4 @@
     public HelpCtx getHelpCtx() {
         return HelpCtx.DEFAULT_HELP;
     }
-
-    @Override
-    protected boolean asynchronous() {
-        return false;
-    }
 }
--- a/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/actions/SaveAllAction.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/actions/SaveAllAction.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,14 +29,30 @@
 import java.awt.event.KeyEvent;
 import javax.swing.Action;
 import javax.swing.KeyStroke;
+import org.openide.awt.ActionID;
+import org.openide.awt.ActionReference;
+import org.openide.awt.ActionReferences;
+import org.openide.awt.ActionRegistration;
 import org.openide.util.HelpCtx;
 import org.openide.util.NbBundle;
+import org.openide.util.NbBundle.Messages;
 import org.openide.util.actions.CallableSystemAction;
 
 /**
  *
  * @author Thomas Wuerthinger
  */
+@ActionID(
+        category = "File",
+        id = "com.sun.hotspot.igv.coordinator.actions.SaveAllAction"
+)
+@ActionRegistration(
+        displayName = "#CTL_SaveAllAction"
+)
+@ActionReferences({
+    @ActionReference(path = "Menu/File", position = 0),
+    @ActionReference(path = "Shortcuts", name = "C-S")
+})
 public final class SaveAllAction extends CallableSystemAction {
 
     @Override
--- a/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/layer.xml	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/layer.xml	Fri May 06 18:20:50 2016 +0300
@@ -18,6 +18,8 @@
     
     <folder name="Menu">
         <folder name="File">
+            <file name="Export_hidden"/>
+            <file name="Import_hidden"/>
             <file name="Separator2.instance_hidden"/>
             <file name="Separator3.instance_hidden"/>
             <file name="SeparatorOpen.instance_hidden"/>
@@ -43,11 +45,11 @@
             </file>
             <file name="com-sun-hotspot-igv-coordinator-actions-RemoveAction.shadow">
                 <attr name="originalFile" stringvalue="Actions/Edit/com-sun-hotspot-igv-coordinator-actions-RemoveAction.instance"/>
-                <attr name="position" intvalue="400" />
+                <attr name="position" intvalue="400"/>
             </file>
             <file name="com-sun-hotspot-igv-coordinator-actions-RemoveAllAction.shadow">
                 <attr name="originalFile" stringvalue="Actions/Edit/com-sun-hotspot-igv-coordinator-actions-RemoveAllAction.instance"/>
-                <attr name="position" intvalue="500" />
+                <attr name="position" intvalue="500"/>
             </file>
             
             <!-- Hidden menu entries from other modules -->
--- a/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/GraphDocument.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/GraphDocument.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -51,11 +51,13 @@
     }
 
     public void addGraphDocument(GraphDocument document) {
-        for (FolderElement e : document.elements) {
-            e.setParent(this);
-            this.addElement(e);
+        if (document != this) {
+            for (FolderElement e : document.elements) {
+                e.setParent(this);
+                this.addElement(e);
+            }
+            document.clear();
         }
-        document.clear();
         getChangedEvent().fire();
     }
 
--- a/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/InputBlock.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/InputBlock.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -90,7 +90,9 @@
     public void addNode(int id) {
         InputNode node = graph.getNode(id);
         assert node != null;
-        assert !nodes.contains(node) : "duplicate : " + node;
+        // nodes.contains(node) is too expensive for large graphs so
+        // just make sure the Graph doesn't know it yet.
+        assert graph.getBlock(id) == null : "duplicate : " + node;
         graph.setBlock(node, this);
         nodes.add(node);
     }
--- a/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/Properties.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/Properties.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,7 +24,9 @@
 package com.sun.hotspot.igv.data;
 
 import java.io.Serializable;
+import java.lang.ref.WeakReference;
 import java.util.*;
+import java.util.Map.Entry;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.regex.PatternSyntaxException;
@@ -36,7 +38,7 @@
 public class Properties implements Serializable, Iterable<Property> {
 
     public static final long serialVersionUID = 1L;
-    private String[] map = new String[4];
+    protected String[] map = new String[4];
 
     public Properties() {
     }
@@ -102,6 +104,59 @@
         System.arraycopy(p.map, 0, map, 0, p.map.length);
     }
 
+    protected Properties(String[] map) {
+        this.map = map;
+    }
+
+    static class SharedProperties extends Properties {
+        int hashCode;
+
+        SharedProperties(String[] map) {
+            super(map);
+            this.hashCode = Arrays.hashCode(map);
+        }
+
+        @Override
+        protected void setPropertyInternal(String name, String value) {
+            throw new UnsupportedOperationException();
+        }
+
+        @Override
+        public boolean equals(Object other) {
+            if (this == other) {
+                return true;
+            }
+            if (!(other instanceof SharedProperties)) {
+                return super.equals(other);
+            }
+            SharedProperties props2 = (SharedProperties) other;
+            return Arrays.equals(map, props2.map);
+        }
+
+        @Override
+        public int hashCode() {
+            return hashCode;
+        }
+    }
+
+    private static class PropertyCache {
+        static WeakHashMap<SharedProperties, WeakReference<SharedProperties>> immutableCache = new WeakHashMap<>();
+
+        static synchronized SharedProperties intern(Properties properties) {
+            String[] map = properties.map;
+            SharedProperties key = new SharedProperties(map);
+            WeakReference<SharedProperties> entry = immutableCache.get(key);
+            if (entry != null) {
+                SharedProperties props = entry.get();
+                if (props != null) {
+                    return props;
+                }
+            }
+            immutableCache.put(key, new WeakReference<>(key));
+            return key;
+        }
+    }
+
     public static class Entity implements Provider {
 
         private Properties properties;
@@ -118,6 +173,10 @@
         public Properties getProperties() {
             return properties;
         }
+
+        public void internProperties() {
+            properties = PropertyCache.intern(properties);
+        }
     }
 
     public interface PropertyMatcher {
@@ -322,8 +381,8 @@
     public void setProperty(String name, String value) {
         setPropertyInternal(name.intern(), value != null ? value.intern() : null);
     }
-    private void setPropertyInternal(String name, String value) {
 
+    protected void setPropertyInternal(String name, String value) {
         for (int i = 0; i < map.length; i += 2) {
             if (map[i] != null && map[i].equals(name)) {
                 String p = map[i + 1];
--- a/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/serialization/BinaryParser.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/serialization/BinaryParser.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.channels.ReadableByteChannel;
+import java.nio.charset.Charset;
 import java.util.*;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
@@ -68,6 +69,8 @@
 
     private static final String NO_BLOCK = "noBlock";
 
+    private static final Charset utf8 = Charset.forName("UTF-8");
+
     private final GroupCallback callback;
     private final List<Object> constantPool;
     private final ByteBuffer buffer;
@@ -275,28 +278,36 @@
         hashStack = new LinkedList<>();
         this.monitor = monitor;
         try {
-            this.digest = MessageDigest.getInstance("SHA-256");
+            this.digest = MessageDigest.getInstance("SHA-1");
         } catch (NoSuchAlgorithmException e) {
         }
     }
 
     private void fill() throws IOException {
+        // All the data between lastPosition and position has been
+        // used so add it to the digest.
+        int position = buffer.position();
+        buffer.position(lastPosition);
+        byte[] remaining = new byte[position - buffer.position()];
+        buffer.get(remaining);
+        digest.update(remaining);
+        assert position == buffer.position();
+
         buffer.compact();
         if (channel.read(buffer) < 0) {
             throw new EOFException();
         }
         buffer.flip();
+        lastPosition = buffer.position();
     }
 
     private void ensureAvailable(int i) throws IOException {
+        if (i > buffer.capacity()) {
+            throw new IllegalArgumentException(String.format("Can not request %d bytes: buffer capacity is %d", i, buffer.capacity()));
+        }
         while (buffer.remaining() < i) {
             fill();
         }
-        buffer.mark();
-        byte[] result = new byte[i];
-        buffer.get(result);
-        digest.update(result);
-        buffer.reset();
     }
 
     private int readByte() throws IOException {
@@ -330,12 +341,7 @@
     }
 
     private String readString() throws IOException {
-        int len = readInt();
-        ensureAvailable(len * 2);
-        char[] chars = new char[len];
-        buffer.asCharBuffer().get(chars);
-        buffer.position(buffer.position() + len * 2);
-        return new String(chars).intern();
+        return new String(readBytes(), utf8).intern();
     }
 
     private byte[] readBytes() throws IOException {
@@ -343,10 +349,15 @@
         if (len < 0) {
             return null;
         }
-        ensureAvailable(len);
-        byte[] data = new byte[len];
-        buffer.get(data);
-        return data;
+        byte[] b = new byte[len];
+        int bytesRead = 0;
+        while (bytesRead < b.length) {
+            int toRead = Math.min(b.length - bytesRead, buffer.capacity());
+            ensureAvailable(toRead);
+            buffer.get(b, bytesRead, toRead);
+            bytesRead += toRead;
+        }
+        return b;
     }
 
     private String readIntsToString() throws IOException {
@@ -643,6 +654,7 @@
         int bci = readInt();
         Group group = new Group(parent);
         group.getProperties().setProperty("name", name);
+        parseProperties(group.getProperties());
         if (method != null) {
             InputMethod inMethod = new InputMethod(group, method.name, shortName, bci);
             inMethod.setBytecodes("TODO");
@@ -651,13 +663,25 @@
         return group;
     }
 
+    int lastPosition = 0;
+
     private InputGraph parseGraph() throws IOException {
         if (monitor != null) {
             monitor.updateProgress();
         }
         String title = readPoolObject(String.class);
         digest.reset();
+        lastPosition = buffer.position();
         InputGraph graph = parseGraph(title);
+
+        int position = buffer.position();
+        buffer.position(lastPosition);
+        byte[] remaining = new byte[position - buffer.position()];
+        buffer.get(remaining);
+        digest.update(remaining);
+        assert position == buffer.position();
+        lastPosition = buffer.position();
+
         byte[] d = digest.digest();
         byte[] hash = hashStack.peek();
         if (hash != null && Arrays.equals(hash, d)) {
@@ -669,11 +693,24 @@
         return graph;
     }
 
+    private void parseProperties(Properties properties) throws IOException {
+        int propCount = readShort();
+        for (int j = 0; j < propCount; j++) {
+            String key = readPoolObject(String.class);
+            Object value = readPropertyObject();
+            properties.setProperty(key, value != null ? value.toString() : "null");
+        }
+    }
+
     private InputGraph parseGraph(String title) throws IOException {
         InputGraph graph = new InputGraph(title);
+        parseProperties(graph.getProperties());
         parseNodes(graph);
         parseBlocks(graph);
         graph.ensureNodesInBlocks();
+        for (InputNode node : graph.getNodes()) {
+            node.internProperties();
+        }
         return graph;
     }
 
@@ -822,9 +859,10 @@
         }
     }
 
+    static final Pattern templatePattern = Pattern.compile("\\{(p|i)#([a-zA-Z0-9$_]+)(/(l|m|s))?\\}");
+
     private String createName(List<Edge> edges, Map<String, Object> properties, String template) {
-        Pattern p = Pattern.compile("\\{(p|i)#([a-zA-Z0-9$_]+)(/(l|m|s))?\\}");
-        Matcher m = p.matcher(template);
+        Matcher m = templatePattern.matcher(template);
         StringBuffer sb = new StringBuffer();
         while (m.find()) {
             String name = m.group(2);
--- a/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/services/InputGraphProvider.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/services/InputGraphProvider.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,4 +37,14 @@
     InputGraph getGraph();
 
     void setSelectedNodes(Set<InputNode> nodes);
+
+    /**
+     * @return an iterator walking forward through the {@link InputGraph}s following the {@link #getGraph()}
+     */
+    Iterable<InputGraph> searchForward();
+
+    /**
+     * @return an iterator walking backward through the {@link InputGraph}s preceeding the {@link #getGraph()}
+     */
+    Iterable<InputGraph> searchBackward();
 }
--- a/src/share/tools/IdealGraphVisualizer/Graph/nbproject/project.xml	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Graph/nbproject/project.xml	Fri May 06 18:20:50 2016 +0300
@@ -22,6 +22,14 @@
                         <specification-version>1.0</specification-version>
                     </run-dependency>
                 </dependency>
+                <dependency>
+                    <code-name-base>com.sun.hotspot.igv.util</code-name-base>
+                    <build-prerequisite/>
+                    <compile-dependency/>
+                    <run-dependency>
+                        <specification-version>1.0</specification-version>
+                    </run-dependency>
+                </dependency>
             </module-dependencies>
             <public-packages>
                 <package>com.sun.hotspot.igv.graph</package>
--- a/src/share/tools/IdealGraphVisualizer/Graph/src/com/sun/hotspot/igv/graph/Slot.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/Graph/src/com/sun/hotspot/igv/graph/Slot.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 import com.sun.hotspot.igv.data.Source;
 import com.sun.hotspot.igv.layout.Port;
 import com.sun.hotspot.igv.layout.Vertex;
+import com.sun.hotspot.igv.util.StringUtils;
 import java.awt.Color;
 import java.awt.Font;
 import java.awt.FontMetrics;
@@ -141,7 +142,7 @@
         sb.append(text);
 
         for (InputNode n : getSource().getSourceNodes()) {
-            sb.append("Node (ID=" + n.getId() + "): " + n.getProperties().get("name"));
+            sb.append(StringUtils.escapeHTML("Node (ID=" + n.getId() + "): " + n.getProperties().get("name")));
             sb.append("<br>");
         }
 
--- a/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/DiagramViewModel.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/DiagramViewModel.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -445,5 +445,48 @@
     void close() {
         filterChain.getChangedEvent().removeListener(filterChainChangedListener);
         sequenceFilterChain.getChangedEvent().removeListener(filterChainChangedListener);
+    }
+
+    Iterable<InputGraph> getGraphsForward() {
+        return new Iterable<InputGraph>() {
+
+            @Override
+            public Iterator<InputGraph> iterator() {
+                return new Iterator<InputGraph>() {
+                    int index = getFirstPosition();
+
+                    @Override
+                    public boolean hasNext() {
+                        return index + 1 < graphs.size();
+                    }
+
+                    @Override
+                    public InputGraph next() {
+                        return graphs.get(++index);
+                    }
+                };
+            }
+        };
+    }
+
+    Iterable<InputGraph> getGraphsBackward() {
+        return new Iterable<InputGraph>() {
+            @Override
+            public Iterator<InputGraph> iterator() {
+                return new Iterator<InputGraph>() {
+                    int index = getFirstPosition();
+
+                    @Override
+                    public boolean hasNext() {
+                        return index - 1 > 0;
+                    }
+
+                    @Override
+                    public InputGraph next() {
+                        return graphs.get(--index);
+                    }
+                };
+            }
+        };
+    }
 }
-}
--- a/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/EditorInputGraphProvider.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/EditorInputGraphProvider.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -50,4 +50,14 @@
     public void setSelectedNodes(Set<InputNode> nodes) {
         editor.setSelectedNodes(nodes);
     }
+
+    @Override
+    public Iterable<InputGraph> searchBackward() {
+        return editor.getDiagramModel().getGraphsBackward();
+    }
+
+    @Override
+    public Iterable<InputGraph> searchForward() {
+        return editor.getDiagramModel().getGraphsForward();
+    }
 }
--- a/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/EditorTopComponent.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/EditorTopComponent.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -289,7 +289,10 @@
             quicksearch = (Component) quicksearch.getClass().getConstructor(KeyStroke.class).newInstance(new Object[]{null});
         } catch (ReflectiveOperationException | IllegalArgumentException | SecurityException e) {
         }
-        quicksearch.setMinimumSize(quicksearch.getPreferredSize()); // necessary for GTK LAF
+        Dimension preferredSize = quicksearch.getPreferredSize();
+        preferredSize = new Dimension((int) preferredSize.getWidth() * 2, (int) preferredSize.getHeight());
+        quicksearch.setMinimumSize(preferredSize); // necessary for GTK LAF
+        quicksearch.setPreferredSize(preferredSize);
         toolBar.add(quicksearch);
 
         centerPanel = new JPanel();
--- a/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/NodeQuickSearch.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/NodeQuickSearch.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,7 @@
  */
 package com.sun.hotspot.igv.view;
 
+import com.sun.hotspot.igv.data.InputGraph;
 import com.sun.hotspot.igv.data.InputNode;
 import com.sun.hotspot.igv.data.Properties;
 import com.sun.hotspot.igv.data.Properties.RegexpPropertyMatcher;
@@ -81,57 +82,67 @@
 
         final InputGraphProvider p = LookupHistory.getLast(InputGraphProvider.class);
         if (p != null && p.getGraph() != null) {
-            List<InputNode> matches = null;
-            try {
-                RegexpPropertyMatcher matcher = new RegexpPropertyMatcher(name, value, Pattern.CASE_INSENSITIVE);
-                Properties.PropertySelector<InputNode> selector = new Properties.PropertySelector<>(p.getGraph().getNodes());
-
-                matches = selector.selectMultiple(matcher);
-            } catch (Exception e) {
-                final String msg = e.getMessage();
-                response.addResult(new Runnable() {
-                    @Override
-                        public void run() {
-                            Message desc = new NotifyDescriptor.Message("An exception occurred during the search, "
-                                    + "perhaps due to a malformed query string:\n" + msg,
-                                    NotifyDescriptor.WARNING_MESSAGE);
-                            DialogDisplayer.getDefault().notify(desc);
-                        }
-                    },
-                    "(Error during search)"
-                );
+            InputGraph matchGraph = p.getGraph();
+            // Search the current graph
+            List<InputNode> matches = findMatches(name, value, p.getGraph(), response);
+            if (matches == null) {
+                // See if the it hits in a later graph
+                for (InputGraph graph : p.searchForward()) {
+                    matches = findMatches(name, value, graph, response);
+                    if (matches != null) {
+                        matchGraph = graph;
+                        break;
+                    }
+                }
+            }
+            if (matches == null) {
+                // See if it hits in a earlier graph
+                for (InputGraph graph : p.searchBackward()) {
+                    matches = findMatches(name, value, graph, response);
+                    if (matches != null) {
+                        matchGraph = graph;
+                        break;
+                    }
+                }
             }
 
             if (matches != null) {
                 final Set<InputNode> set = new HashSet<>(matches);
+                final InputGraph theGraph = p.getGraph() != matchGraph ? matchGraph : null;
                 response.addResult(new Runnable() {
                     @Override
-                        public void run() {
-                            final EditorTopComponent comp = EditorTopComponent.getActive();
-                            if (comp != null) {
-                                comp.setSelectedNodes(set);
-                                comp.requestActive();
+                    public void run() {
+                        final EditorTopComponent comp = EditorTopComponent.getActive();
+                        if (comp != null) {
+                            if (theGraph != null) {
+                                comp.getDiagramModel().selectGraph(theGraph);
                             }
+                            comp.setSelectedNodes(set);
+                            comp.requestActive();
                         }
-                    },
-                    "All " + matches.size() + " matching nodes (" + name + "=" + value + ")"
+                    }
+                },
+                        "All " + matches.size() + " matching nodes (" + name + "=" + value + ")" + (theGraph != null ? " in " + theGraph.getName() : "")
                 );
 
                 // Single matches
                 for (final InputNode n : matches) {
                     response.addResult(new Runnable() {
                         @Override
-                            public void run() {
-                                final EditorTopComponent comp = EditorTopComponent.getActive();
-                                if (comp != null) {
-                                    final Set<InputNode> tmpSet = new HashSet<>();
-                                    tmpSet.add(n);
-                                    comp.setSelectedNodes(tmpSet);
-                                    comp.requestActive();
+                        public void run() {
+                            final EditorTopComponent comp = EditorTopComponent.getActive();
+                            if (comp != null) {
+                                final Set<InputNode> tmpSet = new HashSet<>();
+                                tmpSet.add(n);
+                                if (theGraph != null) {
+                                    comp.getDiagramModel().selectGraph(theGraph);
                                 }
+                                comp.setSelectedNodes(tmpSet);
+                                comp.requestActive();
                             }
-                        },
-                        n.getProperties().get(name) + " (" + n.getId() + " " + n.getProperties().get("name") + ")"
+                        }
+                    },
+                            n.getProperties().get(name) + " (" + n.getId() + " " + n.getProperties().get("name") + ")" + (theGraph != null ? " in " + theGraph.getName() : "")
                     );
                 }
             }
@@ -139,4 +150,27 @@
             System.out.println("no input graph provider!");
         }
     }
+
+    private List<InputNode> findMatches(String name, String value, InputGraph inputGraph, SearchResponse response) {
+        try {
+            RegexpPropertyMatcher matcher = new RegexpPropertyMatcher(name, value, Pattern.CASE_INSENSITIVE);
+            Properties.PropertySelector<InputNode> selector = new Properties.PropertySelector<>(inputGraph.getNodes());
+            List<InputNode> matches = selector.selectMultiple(matcher);
+            return matches.size() == 0 ? null : matches;
+        } catch (Exception e) {
+            final String msg = e.getMessage();
+            response.addResult(new Runnable() {
+                @Override
+                public void run() {
+                    Message desc = new NotifyDescriptor.Message("An exception occurred during the search, "
+                            + "perhaps due to a malformed query string:\n" + msg,
+                            NotifyDescriptor.WARNING_MESSAGE);
+                    DialogDisplayer.getDefault().notify(desc);
+                }
+            },
+                    "(Error during search)"
+            );
+        }
+        return null;
+    }
 }
--- a/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/layer.xml	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/layer.xml	Fri May 06 18:20:50 2016 +0300
@@ -61,14 +61,26 @@
                 <attr name="position" intvalue="710"/>
             </file>
         </folder>
+        <folder name="Window">
+            <file name="Tools_hidden"/>
+            <file name="Web_hidden"/>
+            <file name="org-netbeans-modules-tasks-ui-DashboardTopComponent.shadow_hidden"/>
+        </folder>
     </folder>
 
     <folder name="QuickSearch">
+        <file name="Actions_hidden"/>
+        <file name="GoToOption_hidden"/>
+        <file name="GoToSymbol_hidden"/>
+        <file name="GoToType_hidden"/>
+        <file name="Help_hidden"/>
+        <file name="Hudson_hidden"/>
         <folder name="Nodes">
             <attr name="command" stringvalue="n"/>
             <attr name="position" intvalue="0"/>
             <file name="com-sun-hotspot-igv-view-NodeQuickSearch.instance"/>
         </folder>
+        <file name="Projects_hidden"/>
     </folder>
     
     <folder name="QuickSearchShadow">
--- a/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/widgets/LineWidget.java	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/widgets/LineWidget.java	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2008, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 import com.sun.hotspot.igv.graph.Figure;
 import com.sun.hotspot.igv.graph.InputSlot;
 import com.sun.hotspot.igv.graph.OutputSlot;
+import com.sun.hotspot.igv.util.StringUtils;
 import com.sun.hotspot.igv.view.DiagramScene;
 import java.awt.*;
 import java.awt.geom.Line2D;
@@ -148,7 +149,7 @@
     private String generateToolTipText(List<Connection> conn) {
         StringBuilder sb = new StringBuilder();
         for (Connection c : conn) {
-            sb.append(c.getToolTipText());
+            sb.append(StringUtils.escapeHTML(c.getToolTipText()));
             sb.append("<br>");
         }
         return sb.toString();
--- a/src/share/tools/IdealGraphVisualizer/nbproject/project.properties	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/tools/IdealGraphVisualizer/nbproject/project.properties	Fri May 06 18:20:50 2016 +0300
@@ -48,5 +48,5 @@
 
 # Disable assertions for RequestProcessor to prevent annoying messages in case
 # of multiple SceneAnimator update tasks in the default RequestProcessor.
-run.args.extra = -J-server -J-da:org.openide.util.RequestProcessor -J-Xms2g -J-Xmx8g -J-Djava.lang.Integer.IntegerCache.high=20000
+run.args.extra = -J-server -J-da:org.openide.util.RequestProcessor -J-Xms2g -J-Xmx8g -J-Djava.lang.Integer.IntegerCache.high=200000
 debug.args.extra = -J-server -J-da:org.openide.util.RequestProcessor
--- a/src/share/vm/c1/c1_Compiler.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/c1/c1_Compiler.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -224,6 +224,11 @@
   case vmIntrinsics::_updateCRC32:
   case vmIntrinsics::_updateBytesCRC32:
   case vmIntrinsics::_updateByteBufferCRC32:
+#ifdef SPARC
+  case vmIntrinsics::_updateBytesCRC32C:
+  case vmIntrinsics::_updateDirectByteBufferCRC32C:
+#endif
+  case vmIntrinsics::_vectorizedMismatch:
   case vmIntrinsics::_compareAndSwapInt:
   case vmIntrinsics::_compareAndSwapObject:
   case vmIntrinsics::_getCharStringU:
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Fri May 06 18:20:50 2016 +0300
@@ -3174,6 +3174,15 @@
     do_update_CRC32(x);
     break;
 
+  case vmIntrinsics::_updateBytesCRC32C:
+  case vmIntrinsics::_updateDirectByteBufferCRC32C:
+    do_update_CRC32C(x);
+    break;
+
+  case vmIntrinsics::_vectorizedMismatch:
+    do_vectorizedMismatch(x);
+    break;
+
   default: ShouldNotReachHere(); break;
   }
 }
--- a/src/share/vm/c1/c1_LIRGenerator.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/c1/c1_LIRGenerator.hpp	Fri May 06 18:20:50 2016 +0300
@@ -253,6 +253,8 @@
   void do_FPIntrinsics(Intrinsic* x);
   void do_Reference_get(Intrinsic* x);
   void do_update_CRC32(Intrinsic* x);
+  void do_update_CRC32C(Intrinsic* x);
+  void do_vectorizedMismatch(Intrinsic* x);
 
   LIR_Opr call_runtime(BasicTypeArray* signature, LIRItemList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
   LIR_Opr call_runtime(BasicTypeArray* signature, LIR_OprList* args, address entry, ValueType* result_type, CodeEmitInfo* info);
--- a/src/share/vm/c1/c1_Runtime1.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/c1/c1_Runtime1.cpp	Fri May 06 18:20:50 2016 +0300
@@ -318,6 +318,8 @@
   FUNCTION_CASE(entry, TRACE_TIME_METHOD);
 #endif
   FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32());
+  FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32C());
+  FUNCTION_CASE(entry, StubRoutines::vectorizedMismatch());
   FUNCTION_CASE(entry, StubRoutines::dexp());
   FUNCTION_CASE(entry, StubRoutines::dlog());
   FUNCTION_CASE(entry, StubRoutines::dlog10());
--- a/src/share/vm/ci/ciReplay.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/ci/ciReplay.cpp	Fri May 06 18:20:50 2016 +0300
@@ -552,7 +552,7 @@
     }
     replay_state = this;
     CompileBroker::compile_method(method, entry_bci, comp_level,
-                                  methodHandle(), 0, "replay", THREAD);
+                                  methodHandle(), 0, CompileTask::Reason_Replay, THREAD);
     replay_state = NULL;
     reset();
   }
--- a/src/share/vm/classfile/classLoader.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/classfile/classLoader.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1647,7 +1647,7 @@
               }
               // Force compilation
               CompileBroker::compile_method(m, InvocationEntryBci, comp_level,
-                                            methodHandle(), 0, "CTW", THREAD);
+                                            methodHandle(), 0, CompileTask::Reason_CTW, THREAD);
               if (HAS_PENDING_EXCEPTION) {
                 clear_pending_exception_if_not_oom(CHECK);
                 tty->print_cr("CompileTheWorld (%d) : Skipping method: %s", _compile_the_world_class_counter, m->name_and_sig_as_C_string());
@@ -1663,7 +1663,7 @@
                   m->clear_code();
                 }
                 CompileBroker::compile_method(m, InvocationEntryBci, CompLevel_full_optimization,
-                                              methodHandle(), 0, "CTW", THREAD);
+                                              methodHandle(), 0, CompileTask::Reason_CTW, THREAD);
                 if (HAS_PENDING_EXCEPTION) {
                   clear_pending_exception_if_not_oom(CHECK);
                   tty->print_cr("CompileTheWorld (%d) : Skipping method: %s", _compile_the_world_class_counter, m->name_and_sig_as_C_string());
--- a/src/share/vm/classfile/vmSymbols.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/classfile/vmSymbols.cpp	Fri May 06 18:20:50 2016 +0300
@@ -353,6 +353,7 @@
   case vmIntrinsics::_updateCRC32:
   case vmIntrinsics::_updateBytesCRC32:
   case vmIntrinsics::_updateByteBufferCRC32:
+  case vmIntrinsics::_vectorizedMismatch:
     return true;
   default:
     return false;
@@ -384,6 +385,7 @@
   case vmIntrinsics::_updateCRC32:
   case vmIntrinsics::_updateBytesCRC32:
   case vmIntrinsics::_updateByteBufferCRC32:
+  case vmIntrinsics::_vectorizedMismatch:
     return false;
   default:
     return true;
@@ -669,6 +671,9 @@
   case vmIntrinsics::_updateDirectByteBufferCRC32C:
     if (!UseCRC32CIntrinsics) return true;
     break;
+  case vmIntrinsics::_vectorizedMismatch:
+    if (!UseVectorizedMismatchIntrinsic) return true;
+    break;
   case vmIntrinsics::_updateBytesAdler32:
   case vmIntrinsics::_updateByteBufferAdler32:
     if (!UseAdler32Intrinsics) return true;
@@ -734,9 +739,6 @@
   case vmIntrinsics::_montgomerySquare:
     if (!UseMontgomerySquareIntrinsic) return true;
     break;
-  case vmIntrinsics::_vectorizedMismatch:
-    if (!UseVectorizedMismatchIntrinsic) return true;
-    break;
   case vmIntrinsics::_addExactI:
   case vmIntrinsics::_addExactL:
   case vmIntrinsics::_decrementExactI:
--- a/src/share/vm/classfile/vmSymbols.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri May 06 18:20:50 2016 +0300
@@ -117,6 +117,7 @@
   template(java_util_Hashtable,                       "java/util/Hashtable")                      \
   template(java_lang_Compiler,                        "java/lang/Compiler")                       \
   template(jdk_internal_misc_Signal,                  "jdk/internal/misc/Signal")                 \
+  template(jdk_internal_util_Preconditions,           "jdk/internal/util/Preconditions")          \
   template(java_lang_AssertionStatusDirectives,       "java/lang/AssertionStatusDirectives")      \
   template(getBootClassPathEntryForClass_name,        "getBootClassPathEntryForClass")            \
   template(jdk_internal_vm_PostVMInitHook,            "jdk/internal/vm/PostVMInitHook")           \
@@ -934,8 +935,8 @@
   do_intrinsic(_equalsL,                  java_lang_StringLatin1,equals_name, equalsB_signature,                 F_S)   \
   do_intrinsic(_equalsU,                  java_lang_StringUTF16, equals_name, equalsB_signature,                 F_S)   \
                                                                                                                         \
-  do_intrinsic(_Objects_checkIndex,       java_util_Objects,      checkIndex_name, Objects_checkIndex_signature, F_S)   \
-   do_signature(Objects_checkIndex_signature,                     "(IILjava/util/function/BiFunction;)I")               \
+  do_intrinsic(_Preconditions_checkIndex, jdk_internal_util_Preconditions, checkIndex_name, Preconditions_checkIndex_signature, F_S)   \
+   do_signature(Preconditions_checkIndex_signature,              "(IILjava/util/function/BiFunction;)I")                \
                                                                                                                         \
   do_class(java_nio_Buffer,               "java/nio/Buffer")                                                            \
   do_intrinsic(_checkIndex,               java_nio_Buffer,        checkIndex_name, int_int_signature,            F_R)   \
--- a/src/share/vm/compiler/compileBroker.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/compiler/compileBroker.cpp	Fri May 06 18:20:50 2016 +0300
@@ -169,6 +169,8 @@
 CompileQueue* CompileBroker::_c2_compile_queue   = NULL;
 CompileQueue* CompileBroker::_c1_compile_queue   = NULL;
 
+
+
 class CompilationLog : public StringEventLog {
  public:
   CompilationLog() : StringEventLog("Compilation events") {
@@ -844,7 +846,7 @@
                                         int comp_level,
                                         const methodHandle& hot_method,
                                         int hot_count,
-                                        const char* comment,
+                                        CompileTask::CompileReason compile_reason,
                                         bool blocking,
                                         Thread* thread) {
   guarantee(!method->is_abstract(), "cannot compile abstract methods");
@@ -860,7 +862,7 @@
     if (osr_bci != InvocationEntryBci) {
       tty->print(" osr_bci: %d", osr_bci);
     }
-    tty->print(" level: %d comment: %s count: %d", comp_level, comment, hot_count);
+    tty->print(" level: %d comment: %s count: %d", comp_level, CompileTask::reason_name(compile_reason), hot_count);
     if (!hot_method.is_null()) {
       tty->print(" hot: ");
       if (hot_method() != method()) {
@@ -1024,7 +1026,7 @@
     task = create_compile_task(queue,
                                compile_id, method,
                                osr_bci, comp_level,
-                               hot_method, hot_count, comment,
+                               hot_method, hot_count, compile_reason,
                                blocking);
   }
 
@@ -1036,15 +1038,18 @@
 nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
                                        int comp_level,
                                        const methodHandle& hot_method, int hot_count,
-                                       const char* comment, Thread* THREAD) {
-  // do nothing if compilebroker is not available
-  if (!_initialized) {
+                                       CompileTask::CompileReason compile_reason,
+                                       Thread* THREAD) {
+  // Do nothing if compilebroker is not initalized or compiles are submitted on level none
+  if (!_initialized || comp_level == CompLevel_none) {
     return NULL;
   }
+
   AbstractCompiler *comp = CompileBroker::compiler(comp_level);
-  assert(comp != NULL, "Ensure we don't compile before compilebroker init");
+  assert(comp != NULL, "Ensure we have a compiler");
+
   DirectiveSet* directive = DirectivesStack::getMatchingDirective(method, comp);
-  nmethod* nm = CompileBroker::compile_method(method, osr_bci, comp_level, hot_method, hot_count, comment, directive, THREAD);
+  nmethod* nm = CompileBroker::compile_method(method, osr_bci, comp_level, hot_method, hot_count, compile_reason, directive, THREAD);
   DirectivesStack::release(directive);
   return nm;
 }
@@ -1052,7 +1057,8 @@
 nmethod* CompileBroker::compile_method(const methodHandle& method, int osr_bci,
                                          int comp_level,
                                          const methodHandle& hot_method, int hot_count,
-                                         const char* comment, DirectiveSet* directive,
+                                         CompileTask::CompileReason compile_reason,
+                                         DirectiveSet* directive,
                                          Thread* THREAD) {
 
   // make sure arguments make sense
@@ -1178,7 +1184,7 @@
       return NULL;
     }
     bool is_blocking = !directive->BackgroundCompilationOption || CompileTheWorld || ReplayCompiles;
-    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, comment, is_blocking, THREAD);
+    compile_method_base(method, osr_bci, comp_level, hot_method, hot_count, compile_reason, is_blocking, THREAD);
   }
 
   // return requested nmethod
@@ -1342,11 +1348,11 @@
                                                 int                 comp_level,
                                                 const methodHandle& hot_method,
                                                 int                 hot_count,
-                                                const char*         comment,
+                                                CompileTask::CompileReason compile_reason,
                                                 bool                blocking) {
   CompileTask* new_task = CompileTask::allocate();
   new_task->initialize(compile_id, method, osr_bci, comp_level,
-                       hot_method, hot_count, comment,
+                       hot_method, hot_count, compile_reason,
                        blocking);
   queue->add(new_task);
   return new_task;
--- a/src/share/vm/compiler/compileBroker.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/compiler/compileBroker.hpp	Fri May 06 18:20:50 2016 +0300
@@ -232,7 +232,7 @@
                                           int                 comp_level,
                                           const methodHandle& hot_method,
                                           int                 hot_count,
-                                          const char*         comment,
+                                          CompileTask::CompileReason compile_reason,
                                           bool                blocking);
   static void wait_for_completion(CompileTask* task);
 #if INCLUDE_JVMCI
@@ -251,7 +251,7 @@
                                   int comp_level,
                                   const methodHandle& hot_method,
                                   int hot_count,
-                                  const char* comment,
+                                  CompileTask::CompileReason compile_reason,
                                   bool blocking,
                                   Thread* thread);
 
@@ -289,14 +289,15 @@
                                  int comp_level,
                                  const methodHandle& hot_method,
                                  int hot_count,
-                                 const char* comment, Thread* thread);
+                                 CompileTask::CompileReason compile_reason,
+                                 Thread* thread);
 
   static nmethod* compile_method(const methodHandle& method,
                                    int osr_bci,
                                    int comp_level,
                                    const methodHandle& hot_method,
                                    int hot_count,
-                                   const char* comment,
+                                   CompileTask::CompileReason compile_reason,
                                    DirectiveSet* directive,
                                    Thread* thread);
 
--- a/src/share/vm/compiler/compileTask.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/compiler/compileTask.cpp	Fri May 06 18:20:50 2016 +0300
@@ -82,7 +82,7 @@
                              int comp_level,
                              const methodHandle& hot_method,
                              int hot_count,
-                             const char* comment,
+                             CompileTask::CompileReason compile_reason,
                              bool is_blocking) {
   assert(!_lock->is_locked(), "bad locking");
 
@@ -104,7 +104,7 @@
   _hot_method_holder = NULL;
   _hot_count = hot_count;
   _time_queued = 0;  // tidy
-  _comment = comment;
+  _compile_reason = compile_reason;
   _failure_reason = NULL;
 
   if (LogCompilation) {
@@ -309,9 +309,9 @@
 
   xtty->begin_elem("task_queued");
   log_task(xtty);
-  if (_comment != NULL) {
-    xtty->print(" comment='%s'", _comment);
-  }
+  assert(_compile_reason > CompileTask::Reason_None && _compile_reason < CompileTask::Reason_Count, "Valid values");
+  xtty->print(" comment='%s'", reason_name(_compile_reason));
+
   if (_hot_method != NULL) {
     methodHandle hot(thread, _hot_method);
     methodHandle method(thread, _method);
@@ -440,3 +440,5 @@
   }
   st->cr();
 }
+
+
--- a/src/share/vm/compiler/compileTask.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/compiler/compileTask.hpp	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,39 @@
   friend class VMStructs;
   friend class JVMCIVMStructs;
 
+ public:
+  // Different reasons for a compilation
+  // The order is important - Reason_Whitebox and higher can not become
+  // stale, see CompileTask::can_become_stale()
+  // Also mapped to reason_names[]
+  enum CompileReason {
+      Reason_None,
+      Reason_InvocationCount,  // Simple/StackWalk-policy
+      Reason_BackedgeCount,    // Simple/StackWalk-policy
+      Reason_Tiered,           // Tiered-policy
+      Reason_CTW,              // Compile the world
+      Reason_Replay,           // ciReplay
+      Reason_Whitebox,         // Whitebox API
+      Reason_MustBeCompiled,   // Java callHelper, LinkResolver
+      Reason_Bootstrap,        // JVMCI bootstrap
+      Reason_Count
+  };
+
+  static const char* reason_name(CompileTask::CompileReason compile_reason) {
+    static const char* reason_names[] = {
+      "no_reason",
+      "count",
+      "backedge_count",
+      "tiered",
+      "CTW",
+      "replay",
+      "whitebox",
+      "must_be_compiled",
+      "bootstrap"
+    };
+    return reason_names[compile_reason];
+  }
+
  private:
   static CompileTask* _task_free_list;
 #ifdef ASSERT
@@ -69,7 +102,7 @@
   Method*      _hot_method;   // which method actually triggered this task
   jobject      _hot_method_holder;
   int          _hot_count;    // information about its invocation counter
-  const char*  _comment;      // more info about the task
+  CompileReason _compile_reason;      // more info about the task
   const char*  _failure_reason;
 
  public:
@@ -78,8 +111,8 @@
   }
 
   void initialize(int compile_id, const methodHandle& method, int osr_bci, int comp_level,
-                  const methodHandle& hot_method, int hot_count, const char* comment,
-                  bool is_blocking);
+                  const methodHandle& hot_method, int hot_count,
+                  CompileTask::CompileReason compile_reason, bool is_blocking);
 
   static CompileTask* allocate();
   static void         free(CompileTask* task);
@@ -91,6 +124,15 @@
   bool         is_complete() const               { return _is_complete; }
   bool         is_blocking() const               { return _is_blocking; }
   bool         is_success() const                { return _is_success; }
+  bool         can_become_stale() const          {
+    switch (_compile_reason) {
+      case Reason_BackedgeCount:
+      case Reason_InvocationCount:
+      case Reason_Tiered:
+        return !_is_blocking;
+    }
+    return false;
+  }
 #if INCLUDE_JVMCI
   bool         has_waiter() const                { return _has_waiter; }
   void         clear_waiter()                    { _has_waiter = false; }
--- a/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/gc/shared/threadLocalAllocBuffer.cpp	Fri May 06 18:20:50 2016 +0300
@@ -36,6 +36,7 @@
 
 // static member initialization
 size_t           ThreadLocalAllocBuffer::_max_size       = 0;
+int              ThreadLocalAllocBuffer::_reserve_for_allocation_prefetch = 0;
 unsigned         ThreadLocalAllocBuffer::_target_refills = 0;
 GlobalTLABStats* ThreadLocalAllocBuffer::_global_stats   = NULL;
 
@@ -215,6 +216,23 @@
 
   _global_stats = new GlobalTLABStats();
 
+  // Need extra space at the end of TLAB, otherwise prefetching
+  // instructions will fault (due to accessing memory outside of heap).
+  // The amount of space is the max of the number of lines to
+  // prefetch for array and for instance allocations. (Extra space must be
+  // reserved to accommodate both types of allocations.)
+  //
+  // Only SPARC-specific BIS instructions are known to fault. (Those
+  // instructions are generated if AllocatePrefetchStyle==3 and
+  // AllocatePrefetchInstr==1). To be on the safe side, however,
+  // extra space is reserved for all combinations of
+  // AllocatePrefetchStyle and AllocatePrefetchInstr.
+
+  // +1 for rounding up to next cache line, +1 to be safe
+  int lines =  MAX2(AllocatePrefetchLines, AllocateInstancePrefetchLines) + 2;
+  _reserve_for_allocation_prefetch = (AllocatePrefetchDistance + AllocatePrefetchStepSize * lines) /
+                                     (int)HeapWordSize;
+
   // During jvm startup, the main (primordial) thread is initialized
   // before the heap is initialized.  So reinitialize it now.
   guarantee(Thread::current()->is_Java_thread(), "tlab initialization thread not Java thread");
--- a/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/gc/shared/threadLocalAllocBuffer.hpp	Fri May 06 18:20:50 2016 +0300
@@ -49,8 +49,9 @@
   size_t    _refill_waste_limit;                 // hold onto tlab if free() is larger than this
   size_t    _allocated_before_last_gc;           // total bytes allocated up until the last gc
 
-  static size_t   _max_size;                     // maximum size of any TLAB
-  static unsigned _target_refills;               // expected number of refills between GCs
+  static size_t   _max_size;                          // maximum size of any TLAB
+  static int      _reserve_for_allocation_prefetch;   // Reserve at the end of the TLAB
+  static unsigned _target_refills;                    // expected number of refills between GCs
 
   unsigned  _number_of_refills;
   unsigned  _fast_refill_waste;
@@ -129,7 +130,7 @@
   // Reserve space at the end of TLAB
   static size_t end_reserve() {
     int reserve_size = typeArrayOopDesc::header_size(T_INT);
-    return MAX2(reserve_size, VM_Version::reserve_for_allocation_prefetch());
+    return MAX2(reserve_size, _reserve_for_allocation_prefetch);
   }
   static size_t alignment_reserve()              { return align_object_size(end_reserve()); }
   static size_t alignment_reserve_in_bytes()     { return alignment_reserve() * HeapWordSize; }
--- a/src/share/vm/jvmci/jvmciCompiler.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/jvmci/jvmciCompiler.cpp	Fri May 06 18:20:50 2016 +0300
@@ -85,7 +85,7 @@
     if (!mh->is_native() && !mh->is_static() && !mh->is_initializer()) {
       ResourceMark rm;
       int hot_count = 10; // TODO: what's the appropriate value?
-      CompileBroker::compile_method(mh, InvocationEntryBci, CompLevel_full_optimization, mh, hot_count, "bootstrap", THREAD);
+      CompileBroker::compile_method(mh, InvocationEntryBci, CompLevel_full_optimization, mh, hot_count, CompileTask::Reason_Bootstrap, THREAD);
     }
   }
 
--- a/src/share/vm/jvmci/jvmciRuntime.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/jvmci/jvmciRuntime.cpp	Fri May 06 18:20:50 2016 +0300
@@ -425,19 +425,22 @@
   }
 JRT_END
 
-JRT_ENTRY(void, JVMCIRuntime::throw_and_post_jvmti_exception(JavaThread* thread, Symbol* name, const char* message))
-  SharedRuntime::throw_and_post_jvmti_exception(thread, name, message);
+JRT_ENTRY(void, JVMCIRuntime::throw_and_post_jvmti_exception(JavaThread* thread, const char* exception, const char* message))
+  TempNewSymbol symbol = SymbolTable::new_symbol(exception, CHECK);
+  SharedRuntime::throw_and_post_jvmti_exception(thread, symbol, message);
 JRT_END
 
-JRT_ENTRY(void, JVMCIRuntime::throw_klass_external_name_exception(JavaThread* thread, Symbol* exception, Klass* klass))
+JRT_ENTRY(void, JVMCIRuntime::throw_klass_external_name_exception(JavaThread* thread, const char* exception, Klass* klass))
   ResourceMark rm(thread);
-  SharedRuntime::throw_and_post_jvmti_exception(thread, exception, klass->external_name());
+  TempNewSymbol symbol = SymbolTable::new_symbol(exception, CHECK);
+  SharedRuntime::throw_and_post_jvmti_exception(thread, symbol, klass->external_name());
 JRT_END
 
-JRT_ENTRY(void, JVMCIRuntime::throw_class_cast_exception(JavaThread* thread, Symbol* exception, Klass* caster_klass, Klass* target_klass))
+JRT_ENTRY(void, JVMCIRuntime::throw_class_cast_exception(JavaThread* thread, const char* exception, Klass* caster_klass, Klass* target_klass))
   ResourceMark rm(thread);
   const char* message = SharedRuntime::generate_class_cast_message(caster_klass, target_klass);
-  SharedRuntime::throw_and_post_jvmti_exception(thread, exception, message);
+  TempNewSymbol symbol = SymbolTable::new_symbol(exception, CHECK);
+  SharedRuntime::throw_and_post_jvmti_exception(thread, symbol, message);
 JRT_END
 
 JRT_LEAF(void, JVMCIRuntime::log_object(JavaThread* thread, oopDesc* obj, bool as_string, bool newline))
--- a/src/share/vm/jvmci/jvmciRuntime.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/jvmci/jvmciRuntime.hpp	Fri May 06 18:20:50 2016 +0300
@@ -156,10 +156,10 @@
   static void new_store_pre_barrier(JavaThread* thread);
 
   // used to throw exceptions from compiled JVMCI code
-  static void throw_and_post_jvmti_exception(JavaThread* thread, Symbol* exception, const char* message);
+  static void throw_and_post_jvmti_exception(JavaThread* thread, const char* exception, const char* message);
   // helper methods to throw exception with complex messages
-  static void throw_klass_external_name_exception(JavaThread* thread, Symbol* exception, Klass* klass);
-  static void throw_class_cast_exception(JavaThread* thread, Symbol* exception, Klass* caster_klass, Klass* target_klass);
+  static void throw_klass_external_name_exception(JavaThread* thread, const char* exception, Klass* klass);
+  static void throw_class_cast_exception(JavaThread* thread, const char* exception, Klass* caster_klass, Klass* target_klass);
 
   // Test only function
   static int test_deoptimize_call_int(JavaThread* thread, int value);
--- a/src/share/vm/jvmci/vmStructs_jvmci.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/jvmci/vmStructs_jvmci.cpp	Fri May 06 18:20:50 2016 +0300
@@ -309,6 +309,7 @@
   declare_constant(JVM_ACC_MONITOR_MATCH)                                 \
   declare_constant(JVM_ACC_HAS_MONITOR_BYTECODES)                         \
   declare_constant(JVM_ACC_HAS_FINALIZER)                                 \
+  declare_constant(JVM_ACC_IS_CLONEABLE_FAST)                             \
   declare_constant(JVM_ACC_FIELD_INTERNAL)                                \
   declare_constant(JVM_ACC_FIELD_STABLE)                                  \
   declare_constant(JVM_ACC_FIELD_HAS_GENERIC_SIGNATURE)                   \
--- a/src/share/vm/opto/c2compiler.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/opto/c2compiler.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2016, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -545,7 +545,7 @@
   case vmIntrinsics::_updateByteBufferAdler32:
   case vmIntrinsics::_profileBoolean:
   case vmIntrinsics::_isCompileConstant:
-  case vmIntrinsics::_Objects_checkIndex:
+  case vmIntrinsics::_Preconditions_checkIndex:
     break;
   default:
     return false;
--- a/src/share/vm/opto/compile.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/opto/compile.cpp	Fri May 06 18:20:50 2016 +0300
@@ -3263,6 +3263,43 @@
     frc._tests.push(iff);
     break;
   }
+  case Op_ConvI2L: {
+    if (!Matcher::convi2l_type_required) {
+      // Code generation on some platforms doesn't need accurate
+      // ConvI2L types. Widening the type can help remove redundant
+      // address computations.
+      n->as_Type()->set_type(TypeLong::INT);
+      ResourceMark rm;
+      Node_List wq;
+      wq.push(n);
+      for (uint next = 0; next < wq.size(); next++) {
+        Node *m = wq.at(next);
+
+        for(;;) {
+          // Loop over all nodes with identical inputs edges as m
+          Node* k = m->find_similar(m->Opcode());
+          if (k == NULL) {
+            break;
+          }
+          // Push their uses so we get a chance to remove node made
+          // redundant
+          for (DUIterator_Fast imax, i = k->fast_outs(imax); i < imax; i++) {
+            Node* u = k->fast_out(i);
+            assert(!wq.contains(u), "shouldn't process one node several times");
+            if (u->Opcode() == Op_LShiftL ||
+                u->Opcode() == Op_AddL ||
+                u->Opcode() == Op_SubL ||
+                u->Opcode() == Op_AddP) {
+              wq.push(u);
+            }
+          }
+          // Replace all nodes with identical edges as m with m
+          k->subsume_by(m, this);
+        }
+      }
+    }
+    break;
+  }
   default:
     assert( !n->is_Call(), "" );
     assert( !n->is_Mem(), "" );
--- a/src/share/vm/opto/compile.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/opto/compile.hpp	Fri May 06 18:20:50 2016 +0300
@@ -1101,7 +1101,7 @@
   int               code_size()                 { return _method_size; }
   CodeBuffer*       code_buffer()               { return &_code_buffer; }
   int               first_block_size()          { return _first_block_size; }
-  void              set_frame_complete(int off) { _code_offsets.set_value(CodeOffsets::Frame_Complete, off); }
+  void              set_frame_complete(int off) { if (!in_scratch_emit_size()) { _code_offsets.set_value(CodeOffsets::Frame_Complete, off); } }
   ExceptionHandlerTable*  handler_table()       { return &_handler_table; }
   ImplicitExceptionTable* inc_table()           { return &_inc_table; }
   OopMapSet*        oop_map_set()               { return _oop_map_set; }
--- a/src/share/vm/opto/library_call.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/opto/library_call.cpp	Fri May 06 18:20:50 2016 +0300
@@ -261,7 +261,7 @@
   bool inline_native_getLength();
   bool inline_array_copyOf(bool is_copyOfRange);
   bool inline_array_equals(StrIntrinsicNode::ArgEnc ae);
-  bool inline_objects_checkIndex();
+  bool inline_preconditions_checkIndex();
   void copy_to_clone(Node* obj, Node* alloc_obj, Node* obj_size, bool is_array, bool card_mark);
   bool inline_native_clone(bool is_virtual);
   bool inline_native_Reflection_getCallerClass();
@@ -714,7 +714,7 @@
   case vmIntrinsics::_copyOfRange:              return inline_array_copyOf(true);
   case vmIntrinsics::_equalsB:                  return inline_array_equals(StrIntrinsicNode::LL);
   case vmIntrinsics::_equalsC:                  return inline_array_equals(StrIntrinsicNode::UU);
-  case vmIntrinsics::_Objects_checkIndex:       return inline_objects_checkIndex();
+  case vmIntrinsics::_Preconditions_checkIndex: return inline_preconditions_checkIndex();
   case vmIntrinsics::_clone:                    return inline_native_clone(intrinsic()->is_virtual());
 
   case vmIntrinsics::_allocateUninitializedArray: return inline_unsafe_newArray(true);
@@ -1141,7 +1141,7 @@
   return true;
 }
 
-bool LibraryCallKit::inline_objects_checkIndex() {
+bool LibraryCallKit::inline_preconditions_checkIndex() {
   Node* index = argument(0);
   Node* length = argument(1);
   if (too_many_traps(Deoptimization::Reason_intrinsic) || too_many_traps(Deoptimization::Reason_range_check)) {
--- a/src/share/vm/opto/macro.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/opto/macro.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1897,7 +1897,7 @@
 
       Node *prefetch_adr;
       Node *prefetch;
-      uint lines = AllocatePrefetchDistance / AllocatePrefetchStepSize;
+      uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
       uint step_size = AllocatePrefetchStepSize;
       uint distance = 0;
 
@@ -1926,12 +1926,8 @@
       contended_phi_rawmem = pf_phi_rawmem;
       i_o = pf_phi_abio;
    } else if( UseTLAB && AllocatePrefetchStyle == 3 ) {
-      // Insert a prefetch for each allocation.
-      // This code is used for Sparc with BIS.
-      Node *pf_region = new RegionNode(3);
-      Node *pf_phi_rawmem = new PhiNode( pf_region, Type::MEMORY,
-                                             TypeRawPtr::BOTTOM );
-      transform_later(pf_region);
+      // Insert a prefetch instruction for each allocation.
+      // This code is used for SPARC with BIS.
 
       // Generate several prefetch instructions.
       uint lines = (length != NULL) ? AllocatePrefetchLines : AllocateInstancePrefetchLines;
@@ -1940,10 +1936,15 @@
 
       // Next cache address.
       Node *cache_adr = new AddPNode(old_eden_top, old_eden_top,
-                                            _igvn.MakeConX(distance));
+                                     _igvn.MakeConX(step_size + distance));
       transform_later(cache_adr);
       cache_adr = new CastP2XNode(needgc_false, cache_adr);
       transform_later(cache_adr);
+      // For BIS instructions to be emitted, the address must be aligned at cache line size.
+      // (The VM sets AllocatePrefetchStepSize to the cache line size, unless a value is
+      // specified at the command line.) If the address is not aligned at cache line size
+      // boundary, a standard store instruction is triggered (instead of the BIS). For the
+      // latter, 8-byte alignment is necessary.
       Node* mask = _igvn.MakeConX(~(intptr_t)(step_size-1));
       cache_adr = new AndXNode(cache_adr, mask);
       transform_later(cache_adr);
--- a/src/share/vm/opto/matcher.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/opto/matcher.hpp	Fri May 06 18:20:50 2016 +0300
@@ -488,6 +488,9 @@
   // ourselves.
   static const bool need_masked_shift_count;
 
+  // Whether code generation need accurate ConvI2L types.
+  static const bool convi2l_type_required;
+
   // This routine is run whenever a graph fails to match.
   // If it returns, the compiler should bailout to interpreter without error.
   // In non-product mode, SoftMatchFailure is false to detect non-canonical
--- a/src/share/vm/opto/node.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/opto/node.cpp	Fri May 06 18:20:50 2016 +0300
@@ -2297,7 +2297,8 @@
     if (def && def->outcnt() >= 2) {
       for (DUIterator_Fast dmax, i = def->fast_outs(dmax); i < dmax; i++) {
         Node* use = def->fast_out(i);
-        if (use->Opcode() == opc &&
+        if (use != this &&
+            use->Opcode() == opc &&
             use->req() == req()) {
           uint j;
           for (j = 0; j < use->req(); j++) {
--- a/src/share/vm/opto/output.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/opto/output.cpp	Fri May 06 18:20:50 2016 +0300
@@ -1548,6 +1548,10 @@
       }
       dump_asm(node_offsets, node_offset_limit);
       if (xtty != NULL) {
+        // print_metadata and dump_asm above may safepoint which makes us loose the ttylock.
+        // Retake lock too make sure the end tag is coherent, and that xmlStream->pop_tag is done
+        // thread safe
+        ttyLocker ttyl2;
         xtty->tail("opto_assembly");
       }
     }
--- a/src/share/vm/opto/superword.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/opto/superword.cpp	Fri May 06 18:20:50 2016 +0300
@@ -198,7 +198,7 @@
     ignored_loop_nodes[i] = -1;
   }
 
-  int max_vector = Matcher::max_vector_size(T_INT);
+  int max_vector = Matcher::max_vector_size(T_BYTE);
   bool post_loop_allowed = (PostLoopMultiversioning && Matcher::has_predicated_vectors() && cl->is_post_loop());
 
   // Process the loop, some/all of the stack entries will not be in order, ergo
--- a/src/share/vm/prims/whitebox.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/prims/whitebox.cpp	Fri May 06 18:20:50 2016 +0300
@@ -709,7 +709,7 @@
     return false;
   }
   methodHandle mh(THREAD, method);
-  nmethod* nm = CompileBroker::compile_method(mh, bci, comp_level, mh, mh->invocation_count(), "WhiteBox", THREAD);
+  nmethod* nm = CompileBroker::compile_method(mh, bci, comp_level, mh, mh->invocation_count(), CompileTask::Reason_Whitebox, THREAD);
   MutexLockerEx mu(Compile_lock);
   return (mh->queued_for_compilation() || nm != NULL);
 }
--- a/src/share/vm/runtime/advancedThresholdPolicy.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/runtime/advancedThresholdPolicy.cpp	Fri May 06 18:20:50 2016 +0300
@@ -191,8 +191,8 @@
       max_method = method;
     } else {
       // If a method has been stale for some time, remove it from the queue.
-      // Blocking tasks don't become stale
-      if (!task->is_blocking() && is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
+      // Blocking tasks and tasks submitted from whitebox API don't become stale
+      if (task->can_become_stale() && is_stale(t, TieredCompileTaskTimeout, method) && !is_old(method)) {
         if (PrintTieredEvents) {
           print_event(REMOVE_FROM_QUEUE, method, method, task->osr_bci(), (CompLevel)task->comp_level());
         }
@@ -491,7 +491,7 @@
 void AdvancedThresholdPolicy::submit_compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread) {
   int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
   update_rate(os::javaTimeMillis(), mh());
-  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", thread);
+  CompileBroker::compile_method(mh, bci, level, mh, hot_count, CompileTask::Reason_Tiered, thread);
 }
 
 // Handle the invocation event.
--- a/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Fri May 06 18:20:50 2016 +0300
@@ -90,16 +90,29 @@
 }
 
 Flag::Error AllocatePrefetchDistanceConstraintFunc(intx value, bool verbose) {
-  if (value < 0) {
+  if (value < 0 || value > 512) {
     CommandLineError::print(verbose,
-                            "Unable to determine system-specific value for AllocatePrefetchDistance. "
-                            "Please provide appropriate value, if unsure, use 0 to disable prefetching\n");
+                            "AllocatePrefetchDistance (" INTX_FORMAT ") must be "
+                            "between 0 and " INTX_FORMAT "\n",
+                            AllocatePrefetchDistance, 512);
     return Flag::VIOLATES_CONSTRAINT;
   }
 
   return Flag::SUCCESS;
 }
 
+Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) {
+  if (AllocatePrefetchStyle == 3) {
+    if (value % wordSize != 0) {
+      CommandLineError::print(verbose,
+                              "AllocatePrefetchStepSize (" INTX_FORMAT ") must be multiple of %d\n",
+                              value, wordSize);
+      return Flag::VIOLATES_CONSTRAINT;
+    }
+  }
+  return Flag::SUCCESS;
+}
+
 Flag::Error AllocatePrefetchInstrConstraintFunc(intx value, bool verbose) {
   intx max_value = max_intx;
 #if defined(SPARC)
@@ -117,49 +130,6 @@
   return Flag::SUCCESS;
 }
 
-Flag::Error AllocatePrefetchStepSizeConstraintFunc(intx value, bool verbose) {
-  intx max_value = 512;
-  if (value < 1 || value > max_value) {
-    CommandLineError::print(verbose,
-                            "AllocatePrefetchStepSize (" INTX_FORMAT ") "
-                            "must be between 1 and %d\n",
-                            AllocatePrefetchStepSize,
-                            max_value);
-    return Flag::VIOLATES_CONSTRAINT;
-  }
-
-  if (AllocatePrefetchDistance % AllocatePrefetchStepSize != 0) {
-    CommandLineError::print(verbose,
-                            "AllocatePrefetchDistance (" INTX_FORMAT ") "
-                            "%% AllocatePrefetchStepSize (" INTX_FORMAT ") "
-                            "= " INTX_FORMAT " "
-                            "must be 0\n",
-                            AllocatePrefetchDistance, AllocatePrefetchStepSize,
-                            AllocatePrefetchDistance % AllocatePrefetchStepSize);
-    return Flag::VIOLATES_CONSTRAINT;
-  }
-
-  /* The limit of 64 for the quotient of AllocatePrefetchDistance and AllocatePrefetchSize
-   * originates from the limit of 64 for AllocatePrefetchLines/AllocateInstancePrefetchLines.
-   * If AllocatePrefetchStyle == 2, the quotient from above is used in PhaseMacroExpand::prefetch_allocation()
-   * to determine the number of lines to prefetch. For other values of AllocatePrefetchStyle,
-   * AllocatePrefetchDistance and AllocatePrefetchSize is used. For consistency, all these
-   * quantities must have the same limit (64 in this case).
-   */
-  if (AllocatePrefetchDistance / AllocatePrefetchStepSize > 64) {
-    CommandLineError::print(verbose,
-                            "AllocatePrefetchDistance (" INTX_FORMAT ") too large or "
-                            "AllocatePrefetchStepSize (" INTX_FORMAT ") too small; "
-                            "try decreasing/increasing values so that "
-                            "AllocatePrefetchDistance / AllocatePrefetchStepSize <= 64\n",
-                            AllocatePrefetchDistance, AllocatePrefetchStepSize,
-                            AllocatePrefetchDistance % AllocatePrefetchStepSize);
-    return Flag::VIOLATES_CONSTRAINT;
-  }
-
-  return Flag::SUCCESS;
-}
-
 Flag::Error CompileThresholdConstraintFunc(intx value, bool verbose) {
   if (value < 0 || value > INT_MAX >> InvocationCounter::count_shift) {
     CommandLineError::print(verbose,
--- a/src/share/vm/runtime/compilationPolicy.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/runtime/compilationPolicy.cpp	Fri May 06 18:20:50 2016 +0300
@@ -130,7 +130,7 @@
     }
     CompileBroker::compile_method(selected_method, InvocationEntryBci,
         CompilationPolicy::policy()->initial_compile_level(),
-        methodHandle(), 0, "must_be_compiled", CHECK);
+        methodHandle(), 0, CompileTask::Reason_MustBeCompiled, CHECK);
   }
 }
 
@@ -508,12 +508,11 @@
   const int comp_level = CompLevel_highest_tier;
   const int hot_count = m->invocation_count();
   reset_counter_for_invocation_event(m);
-  const char* comment = "count";
 
   if (is_compilation_enabled() && can_be_compiled(m, comp_level)) {
     CompiledMethod* nm = m->code();
     if (nm == NULL ) {
-      CompileBroker::compile_method(m, InvocationEntryBci, comp_level, m, hot_count, comment, thread);
+      CompileBroker::compile_method(m, InvocationEntryBci, comp_level, m, hot_count, CompileTask::Reason_InvocationCount, thread);
     }
   }
 }
@@ -521,10 +520,9 @@
 void SimpleCompPolicy::method_back_branch_event(const methodHandle& m, int bci, JavaThread* thread) {
   const int comp_level = CompLevel_highest_tier;
   const int hot_count = m->backedge_count();
-  const char* comment = "backedge_count";
 
   if (is_compilation_enabled() && can_be_osr_compiled(m, comp_level)) {
-    CompileBroker::compile_method(m, bci, comp_level, m, hot_count, comment, thread);
+    CompileBroker::compile_method(m, bci, comp_level, m, hot_count, CompileTask::Reason_BackedgeCount, thread);
     NOT_PRODUCT(trace_osr_completion(m->lookup_osr_nmethod_for(bci, comp_level, true));)
   }
 }
@@ -539,7 +537,6 @@
   const int comp_level = CompLevel_highest_tier;
   const int hot_count = m->invocation_count();
   reset_counter_for_invocation_event(m);
-  const char* comment = "count";
 
   if (is_compilation_enabled() && m->code() == NULL && can_be_compiled(m, comp_level)) {
     ResourceMark rm(thread);
@@ -569,7 +566,7 @@
       assert(top != NULL, "findTopInlinableFrame returned null");
       if (TraceCompilationPolicy) top->print();
       CompileBroker::compile_method(top->top_method(), InvocationEntryBci, comp_level,
-                                    m, hot_count, comment, thread);
+                                    m, hot_count, CompileTask::Reason_InvocationCount, thread);
     }
   }
 }
@@ -577,10 +574,9 @@
 void StackWalkCompPolicy::method_back_branch_event(const methodHandle& m, int bci, JavaThread* thread) {
   const int comp_level = CompLevel_highest_tier;
   const int hot_count = m->backedge_count();
-  const char* comment = "backedge_count";
 
   if (is_compilation_enabled() && can_be_osr_compiled(m, comp_level)) {
-    CompileBroker::compile_method(m, bci, comp_level, m, hot_count, comment, thread);
+    CompileBroker::compile_method(m, bci, comp_level, m, hot_count, CompileTask::Reason_BackedgeCount, thread);
     NOT_PRODUCT(trace_osr_completion(m->lookup_osr_nmethod_for(bci, comp_level, true));)
   }
 }
--- a/src/share/vm/runtime/globals.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/runtime/globals.hpp	Fri May 06 18:20:50 2016 +0300
@@ -2901,9 +2901,9 @@
                                                                             \
   product(intx,  AllocatePrefetchStyle, 1,                                  \
           "0 = no prefetch, "                                               \
-          "1 = prefetch instructions for each allocation, "                 \
+          "1 = generate prefetch instructions for each allocation, "        \
           "2 = use TLAB watermark to gate allocation prefetch, "            \
-          "3 = use BIS instruction on Sparc for allocation prefetch")       \
+          "3 = generate one prefetch instruction per cache line")           \
           range(0, 3)                                                       \
                                                                             \
   product(intx,  AllocatePrefetchDistance, -1,                              \
@@ -2926,8 +2926,8 @@
           constraint(AllocatePrefetchStepSizeConstraintFunc,AfterMemoryInit)\
                                                                             \
   product(intx,  AllocatePrefetchInstr, 0,                                  \
-          "Prefetch instruction to prefetch ahead of allocation pointer")   \
-          constraint(AllocatePrefetchInstrConstraintFunc, AfterErgo)        \
+          "Select instruction to prefetch ahead of allocation pointer")     \
+          constraint(AllocatePrefetchInstrConstraintFunc, AfterMemoryInit)  \
                                                                             \
   /* deoptimization */                                                      \
   develop(bool, TraceDeoptimization, false,                                 \
--- a/src/share/vm/runtime/simpleThresholdPolicy.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/runtime/simpleThresholdPolicy.cpp	Fri May 06 18:20:50 2016 +0300
@@ -265,7 +265,7 @@
 // Tell the broker to compile the method
 void SimpleThresholdPolicy::submit_compile(const methodHandle& mh, int bci, CompLevel level, JavaThread* thread) {
   int hot_count = (bci == InvocationEntryBci) ? mh->invocation_count() : mh->backedge_count();
-  CompileBroker::compile_method(mh, bci, level, mh, hot_count, "tiered", thread);
+  CompileBroker::compile_method(mh, bci, level, mh, hot_count, CompileTask::Reason_Tiered, thread);
 }
 
 // Call and loop predicates determine whether a transition to a higher
--- a/src/share/vm/runtime/vmStructs.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/runtime/vmStructs.cpp	Fri May 06 18:20:50 2016 +0300
@@ -600,6 +600,7 @@
   nonstatic_field(ThreadLocalAllocBuffer,      _pf_top,                                       HeapWord*)                             \
   nonstatic_field(ThreadLocalAllocBuffer,      _desired_size,                                 size_t)                                \
   nonstatic_field(ThreadLocalAllocBuffer,      _refill_waste_limit,                           size_t)                                \
+     static_field(ThreadLocalAllocBuffer,      _reserve_for_allocation_prefetch,              int)                                   \
      static_field(ThreadLocalAllocBuffer,      _target_refills,                               unsigned)                              \
   nonstatic_field(ThreadLocalAllocBuffer,      _number_of_refills,                            unsigned)                              \
   nonstatic_field(ThreadLocalAllocBuffer,      _fast_refill_waste,                            unsigned)                              \
@@ -1324,7 +1325,6 @@
      static_field(Abstract_VM_Version,         _vm_minor_version,                             int)                                   \
      static_field(Abstract_VM_Version,         _vm_security_version,                          int)                                   \
      static_field(Abstract_VM_Version,         _vm_build_number,                              int)                                   \
-     static_field(Abstract_VM_Version,         _reserve_for_allocation_prefetch,              int)                                   \
                                                                                                                                      \
      static_field(JDK_Version,                 _current,                                      JDK_Version)                           \
   nonstatic_field(JDK_Version,                 _major,                                        unsigned char)                         \
--- a/src/share/vm/runtime/vm_version.cpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/runtime/vm_version.cpp	Fri May 06 18:20:50 2016 +0300
@@ -43,7 +43,6 @@
 bool Abstract_VM_Version::_supports_atomic_getadd8 = false;
 unsigned int Abstract_VM_Version::_logical_processors_per_package = 1U;
 unsigned int Abstract_VM_Version::_L1_data_cache_line_size = 0;
-int Abstract_VM_Version::_reserve_for_allocation_prefetch = 0;
 
 #ifndef HOTSPOT_VERSION_STRING
   #error HOTSPOT_VERSION_STRING must be defined
--- a/src/share/vm/runtime/vm_version.hpp	Fri May 06 09:54:58 2016 +0000
+++ b/src/share/vm/runtime/vm_version.hpp	Fri May 06 18:20:50 2016 +0300
@@ -57,7 +57,6 @@
   static int          _vm_build_number;
   static unsigned int _parallel_worker_threads;
   static bool         _parallel_worker_threads_initialized;
-  static int          _reserve_for_allocation_prefetch;
 
   static unsigned int nof_parallel_worker_threads(unsigned int num,
                                                   unsigned int dem,
@@ -139,12 +138,6 @@
     return _L1_data_cache_line_size;
   }
 
-  // Need a space at the end of TLAB for prefetch instructions
-  // which may fault when accessing memory outside of heap.
-  static int reserve_for_allocation_prefetch() {
-    return _reserve_for_allocation_prefetch;
-  }
-
   // ARCH specific policy for the BiasedLocking
   static bool use_biased_locking()  { return true; }
 
--- a/test/compiler/jvmci/jdk.vm.ci.runtime.test/src/jdk/vm/ci/runtime/test/TestResolvedJavaType.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/jvmci/jdk.vm.ci.runtime.test/src/jdk/vm/ci/runtime/test/TestResolvedJavaType.java	Fri May 06 18:20:50 2016 +0300
@@ -868,6 +868,31 @@
         }
     }
 
+    static class TrivialCloneable implements Cloneable {
+        @Override
+        protected Object clone() {
+            return new TrivialCloneable();
+        }
+    }
+
+    @Test
+    public void isCloneableWithAllocationTest() {
+        ResolvedJavaType cloneable = metaAccess.lookupJavaType(Cloneable.class);
+        for (Class<?> c : classes) {
+            ResolvedJavaType type = metaAccess.lookupJavaType(c);
+            if (type.isCloneableWithAllocation()) {
+                // Only Cloneable types should be allocation cloneable
+                assertTrue(c.toString(), cloneable.isAssignableFrom(type));
+            }
+        }
+        /*
+         * We can't know for sure which types should be allocation cloneable on a particular
+         * platform but assume that at least totally trivial objects should be.
+         */
+        ResolvedJavaType trivialCloneable = metaAccess.lookupJavaType(TrivialCloneable.class);
+        assertTrue(trivialCloneable.toString(), trivialCloneable.isCloneableWithAllocation());
+    }
+
     @Test
     public void findMethodTest() {
         try {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/startup/TieredStopAtLevel0SanityTest.java	Fri May 06 18:20:50 2016 +0300
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8154151
+ * @summary Sanity test flag combo that force compiles on level 0
+ * @run main/othervm -Xcomp -XX:+UnlockDiagnosticVMOptions -XX:TieredStopAtLevel=0 TieredStopAtLevel0SanityTest
+ */
+
+public class TieredStopAtLevel0SanityTest {
+    public static void main(String[] args) throws Exception {
+        System.out.println("Hello world!");
+    }
+}
--- a/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestBoolean.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestBoolean.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class JdkInternalMiscUnsafeAccessTestBoolean {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final jdk.internal.misc.Unsafe UNSAFE;
 
@@ -148,4 +149,3 @@
 
 }
 
-
--- a/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestByte.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestByte.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class JdkInternalMiscUnsafeAccessTestByte {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final jdk.internal.misc.Unsafe UNSAFE;
 
@@ -185,4 +186,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestChar.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestChar.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class JdkInternalMiscUnsafeAccessTestChar {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final jdk.internal.misc.Unsafe UNSAFE;
 
@@ -203,4 +204,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestDouble.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestDouble.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class JdkInternalMiscUnsafeAccessTestDouble {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final jdk.internal.misc.Unsafe UNSAFE;
 
@@ -185,4 +186,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestFloat.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestFloat.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class JdkInternalMiscUnsafeAccessTestFloat {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final jdk.internal.misc.Unsafe UNSAFE;
 
@@ -185,4 +186,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestInt.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestInt.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class JdkInternalMiscUnsafeAccessTestInt {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final jdk.internal.misc.Unsafe UNSAFE;
 
@@ -251,22 +252,31 @@
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwapInt(base, offset, 1, 2);
-            assertEquals(r, true, "weakCompareAndSwap int");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwapInt(base, offset, 1, 2);
+            }
+            assertEquals(success, true, "weakCompareAndSwap int");
             int x = UNSAFE.getInt(base, offset);
             assertEquals(x, 2, "weakCompareAndSwap int value");
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwapIntAcquire(base, offset, 2, 1);
-            assertEquals(r, true, "weakCompareAndSwapAcquire int");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwapIntAcquire(base, offset, 2, 1);
+            }
+            assertEquals(success, true, "weakCompareAndSwapAcquire int");
             int x = UNSAFE.getInt(base, offset);
             assertEquals(x, 1, "weakCompareAndSwapAcquire int");
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwapIntRelease(base, offset, 1, 2);
-            assertEquals(r, true, "weakCompareAndSwapRelease int");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwapIntRelease(base, offset, 1, 2);
+            }
+            assertEquals(success, true, "weakCompareAndSwapRelease int");
             int x = UNSAFE.getInt(base, offset);
             assertEquals(x, 2, "weakCompareAndSwapRelease int");
         }
@@ -286,7 +296,7 @@
             int o = UNSAFE.getAndAddInt(base, offset, 2);
             assertEquals(o, 1, "getAndAdd int");
             int x = UNSAFE.getInt(base, offset);
-            assertEquals(x, 1 + 2, "weakCompareAndSwapRelease int");
+            assertEquals(x, 1 + 2, "getAndAdd int");
         }
     }
 
@@ -300,4 +310,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestLong.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestLong.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class JdkInternalMiscUnsafeAccessTestLong {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final jdk.internal.misc.Unsafe UNSAFE;
 
@@ -251,22 +252,31 @@
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwapLong(base, offset, 1L, 2L);
-            assertEquals(r, true, "weakCompareAndSwap long");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwapLong(base, offset, 1L, 2L);
+            }
+            assertEquals(success, true, "weakCompareAndSwap long");
             long x = UNSAFE.getLong(base, offset);
             assertEquals(x, 2L, "weakCompareAndSwap long value");
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwapLongAcquire(base, offset, 2L, 1L);
-            assertEquals(r, true, "weakCompareAndSwapAcquire long");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwapLongAcquire(base, offset, 2L, 1L);
+            }
+            assertEquals(success, true, "weakCompareAndSwapAcquire long");
             long x = UNSAFE.getLong(base, offset);
             assertEquals(x, 1L, "weakCompareAndSwapAcquire long");
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwapLongRelease(base, offset, 1L, 2L);
-            assertEquals(r, true, "weakCompareAndSwapRelease long");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwapLongRelease(base, offset, 1L, 2L);
+            }
+            assertEquals(success, true, "weakCompareAndSwapRelease long");
             long x = UNSAFE.getLong(base, offset);
             assertEquals(x, 2L, "weakCompareAndSwapRelease long");
         }
@@ -286,7 +296,7 @@
             long o = UNSAFE.getAndAddLong(base, offset, 2L);
             assertEquals(o, 1L, "getAndAdd long");
             long x = UNSAFE.getLong(base, offset);
-            assertEquals(x, 1L + 2L, "weakCompareAndSwapRelease long");
+            assertEquals(x, 1L + 2L, "getAndAdd long");
         }
     }
 
@@ -300,4 +310,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestObject.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestObject.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class JdkInternalMiscUnsafeAccessTestObject {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final jdk.internal.misc.Unsafe UNSAFE;
 
@@ -204,22 +205,31 @@
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwapObject(base, offset, "foo", "bar");
-            assertEquals(r, true, "weakCompareAndSwap Object");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwapObject(base, offset, "foo", "bar");
+            }
+            assertEquals(success, true, "weakCompareAndSwap Object");
             Object x = UNSAFE.getObject(base, offset);
             assertEquals(x, "bar", "weakCompareAndSwap Object value");
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwapObjectAcquire(base, offset, "bar", "foo");
-            assertEquals(r, true, "weakCompareAndSwapAcquire Object");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwapObjectAcquire(base, offset, "bar", "foo");
+            }
+            assertEquals(success, true, "weakCompareAndSwapAcquire Object");
             Object x = UNSAFE.getObject(base, offset);
             assertEquals(x, "foo", "weakCompareAndSwapAcquire Object");
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwapObjectRelease(base, offset, "foo", "bar");
-            assertEquals(r, true, "weakCompareAndSwapRelease Object");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwapObjectRelease(base, offset, "foo", "bar");
+            }
+            assertEquals(success, true, "weakCompareAndSwapRelease Object");
             Object x = UNSAFE.getObject(base, offset);
             assertEquals(x, "bar", "weakCompareAndSwapRelease Object");
         }
@@ -236,4 +246,3 @@
 
 }
 
-
--- a/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestShort.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/JdkInternalMiscUnsafeAccessTestShort.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class JdkInternalMiscUnsafeAccessTestShort {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final jdk.internal.misc.Unsafe UNSAFE;
 
@@ -203,4 +204,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/SunMiscUnsafeAccessTestBoolean.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/SunMiscUnsafeAccessTestBoolean.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class SunMiscUnsafeAccessTestBoolean {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final sun.misc.Unsafe UNSAFE;
 
@@ -131,9 +132,7 @@
 
 
 
-
     }
 
 }
 
-
--- a/test/compiler/unsafe/SunMiscUnsafeAccessTestByte.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/SunMiscUnsafeAccessTestByte.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class SunMiscUnsafeAccessTestByte {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final sun.misc.Unsafe UNSAFE;
 
@@ -160,7 +161,6 @@
 
 
 
-
     }
 
     static void testAccess(long address) {
@@ -173,4 +173,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/SunMiscUnsafeAccessTestChar.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/SunMiscUnsafeAccessTestChar.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class SunMiscUnsafeAccessTestChar {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final sun.misc.Unsafe UNSAFE;
 
@@ -160,7 +161,6 @@
 
 
 
-
     }
 
     static void testAccess(long address) {
@@ -173,4 +173,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/SunMiscUnsafeAccessTestDouble.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/SunMiscUnsafeAccessTestDouble.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class SunMiscUnsafeAccessTestDouble {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final sun.misc.Unsafe UNSAFE;
 
@@ -160,7 +161,6 @@
 
 
 
-
     }
 
     static void testAccess(long address) {
@@ -173,4 +173,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/SunMiscUnsafeAccessTestFloat.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/SunMiscUnsafeAccessTestFloat.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class SunMiscUnsafeAccessTestFloat {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final sun.misc.Unsafe UNSAFE;
 
@@ -160,7 +161,6 @@
 
 
 
-
     }
 
     static void testAccess(long address) {
@@ -173,4 +173,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/SunMiscUnsafeAccessTestInt.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/SunMiscUnsafeAccessTestInt.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class SunMiscUnsafeAccessTestInt {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final sun.misc.Unsafe UNSAFE;
 
@@ -165,7 +166,6 @@
 
 
 
-
         UNSAFE.putInt(base, offset, 1);
 
         // Compare
@@ -184,7 +184,6 @@
         }
 
 
-
         // Compare set and get
         {
             int o = UNSAFE.getAndSetInt(base, offset, 1);
@@ -200,7 +199,7 @@
             int o = UNSAFE.getAndAddInt(base, offset, 2);
             assertEquals(o, 1, "getAndAdd int");
             int x = UNSAFE.getInt(base, offset);
-            assertEquals(x, 1 + 2, "weakCompareAndSwapRelease int");
+            assertEquals(x, 1 + 2, "getAndAdd int");
         }
     }
 
@@ -214,4 +213,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/SunMiscUnsafeAccessTestLong.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/SunMiscUnsafeAccessTestLong.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class SunMiscUnsafeAccessTestLong {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final sun.misc.Unsafe UNSAFE;
 
@@ -165,7 +166,6 @@
 
 
 
-
         UNSAFE.putLong(base, offset, 1L);
 
         // Compare
@@ -184,7 +184,6 @@
         }
 
 
-
         // Compare set and get
         {
             long o = UNSAFE.getAndSetLong(base, offset, 1L);
@@ -200,7 +199,7 @@
             long o = UNSAFE.getAndAddLong(base, offset, 2L);
             assertEquals(o, 1L, "getAndAdd long");
             long x = UNSAFE.getLong(base, offset);
-            assertEquals(x, 1L + 2L, "weakCompareAndSwapRelease long");
+            assertEquals(x, 1L + 2L, "getAndAdd long");
         }
     }
 
@@ -214,4 +213,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/SunMiscUnsafeAccessTestObject.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/SunMiscUnsafeAccessTestObject.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class SunMiscUnsafeAccessTestObject {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final sun.misc.Unsafe UNSAFE;
 
@@ -136,7 +137,6 @@
 
 
 
-
         UNSAFE.putObject(base, offset, "foo");
 
         // Compare
@@ -155,7 +155,6 @@
         }
 
 
-
         // Compare set and get
         {
             Object o = UNSAFE.getAndSetObject(base, offset, "foo");
@@ -168,4 +167,3 @@
 
 }
 
-
--- a/test/compiler/unsafe/SunMiscUnsafeAccessTestShort.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/SunMiscUnsafeAccessTestShort.java	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class SunMiscUnsafeAccessTestShort {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final sun.misc.Unsafe UNSAFE;
 
@@ -160,7 +161,6 @@
 
 
 
-
     }
 
     static void testAccess(long address) {
@@ -173,4 +173,3 @@
     }
 }
 
-
--- a/test/compiler/unsafe/X-UnsafeAccessTest.java.template	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/unsafe/X-UnsafeAccessTest.java.template	Fri May 06 18:20:50 2016 +0300
@@ -40,6 +40,7 @@
 
 public class $Qualifier$UnsafeAccessTest$Type$ {
     static final int ITERS = Integer.getInteger("iters", 1);
+    static final int WEAK_ATTEMPTS = Integer.getInteger("weakAttempts", 10);
 
     static final $package$.Unsafe UNSAFE;
 
@@ -273,22 +274,31 @@
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwap$Type$(base, offset, $value1$, $value2$);
-            assertEquals(r, true, "weakCompareAndSwap $type$");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwap$Type$(base, offset, $value1$, $value2$);
+            }
+            assertEquals(success, true, "weakCompareAndSwap $type$");
             $type$ x = UNSAFE.get$Type$(base, offset);
             assertEquals(x, $value2$, "weakCompareAndSwap $type$ value");
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwap$Type$Acquire(base, offset, $value2$, $value1$);
-            assertEquals(r, true, "weakCompareAndSwapAcquire $type$");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwap$Type$Acquire(base, offset, $value2$, $value1$);
+            }
+            assertEquals(success, true, "weakCompareAndSwapAcquire $type$");
             $type$ x = UNSAFE.get$Type$(base, offset);
             assertEquals(x, $value1$, "weakCompareAndSwapAcquire $type$");
         }
 
         {
-            boolean r = UNSAFE.weakCompareAndSwap$Type$Release(base, offset, $value1$, $value2$);
-            assertEquals(r, true, "weakCompareAndSwapRelease $type$");
+            boolean success = false;
+            for (int c = 0; c < WEAK_ATTEMPTS && !success; c++) {
+                success = UNSAFE.weakCompareAndSwap$Type$Release(base, offset, $value1$, $value2$);
+            }
+            assertEquals(success, true, "weakCompareAndSwapRelease $type$");
             $type$ x = UNSAFE.get$Type$(base, offset);
             assertEquals(x, $value2$, "weakCompareAndSwapRelease $type$");
         }
@@ -311,7 +321,7 @@
             $type$ o = UNSAFE.getAndAdd$Type$(base, offset, $value2$);
             assertEquals(o, $value1$, "getAndAdd $type$");
             $type$ x = UNSAFE.get$Type$(base, offset);
-            assertEquals(x, $value1$ + $value2$, "weakCompareAndSwapRelease $type$");
+            assertEquals(x, $value1$ + $value2$, "getAndAdd $type$");
         }
 #end[AtomicAdd]
     }
--- a/test/compiler/unsafe/generate-unsafe-tests.sh	Fri May 06 09:54:58 2016 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,97 +0,0 @@
-#!/bin/bash
-
-javac -d . ../../../../jdk/make/src/classes/build/tools/spp/Spp.java
-
-SPP=build.tools.spp.Spp
-
-# Generates unsafe access tests for objects and all primitive types
-# $1 = package name to Unsafe, sun.misc | jdk.internal.misc
-# $2 = test class qualifier name, SunMisc | JdkInternalMisc
-function generate {
-    package=$1
-    Qualifier=$2
-
-    for type in boolean byte short char int long float double Object
-    do
-      Type="$(tr '[:lower:]' '[:upper:]' <<< ${type:0:1})${type:1}"
-      args="-K$type -Dtype=$type -DType=$Type"
-
-      case $type in
-        Object|int|long)
-          args="$args -KCAS -KOrdered"
-          ;;
-      esac
-
-      case $type in
-        int|long)
-          args="$args -KAtomicAdd"
-          ;;
-      esac
-
-      case $type in
-        short|char|int|long)
-          args="$args -KUnaligned"
-          ;;
-      esac
-
-      case $type in
-        boolean)
-          value1=true
-          value2=false
-          value3=false
-          ;;
-        byte)
-          value1=(byte)1
-          value2=(byte)2
-          value3=(byte)3
-          ;;
-        short)
-          value1=(short)1
-          value2=(short)2
-          value3=(short)3
-          ;;
-        char)
-          value1=\'a\'
-          value2=\'b\'
-          value3=\'c\'
-          ;;
-        int)
-          value1=1
-          value2=2
-          value3=3
-          ;;
-        long)
-          value1=1L
-          value2=2L
-          value3=3L
-          ;;
-        float)
-          value1=1.0f
-          value2=2.0f
-          value3=3.0f
-          ;;
-        double)
-          value1=1.0d
-          value2=2.0d
-          value3=3.0d
-          ;;
-        Object)
-          value1=\"foo\"
-          value2=\"bar\"
-          value3=\"baz\"
-          ;;
-      esac
-
-      args="$args -Dvalue1=$value1 -Dvalue2=$value2 -Dvalue3=$value3"
-
-      echo $args
-
-      java $SPP -nel -K$Qualifier -Dpackage=$package -DQualifier=$Qualifier \
-          $args < X-UnsafeAccessTest.java.template > ${Qualifier}UnsafeAccessTest${Type}.java
-    done
-}
-
-generate sun.misc SunMisc
-generate jdk.internal.misc JdkInternalMisc
-
-rm -fr build
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/vectorization/TestVectorUnalignedOffset.java	Fri May 06 18:20:50 2016 +0300
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2016, Red Hat, Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8155612
+ * @summary Aarch64: vector nodes need to support misaligned offset
+ * @run main/othervm -XX:-BackgroundCompilation TestVectorUnalignedOffset
+ *
+ */
+
+
+public class TestVectorUnalignedOffset {
+
+    static void test1(int[] src_array, int[] dst_array, int l) {
+        for (int i = 0; i < l; i++) {
+            dst_array[i + 250] = src_array[i + 250];
+        }
+    }
+
+    static void test2(byte[] src_array, byte[] dst_array, int l) {
+        for (int i = 0; i < l; i++) {
+            dst_array[i + 250] = src_array[i + 250];
+        }
+    }
+
+    static public void main(String[] args) {
+        int[] int_array = new int[1000];
+        byte[] byte_array = new byte[1000];
+        for (int i = 0; i < 20000; i++) {
+            test1(int_array, int_array, int_array.length - 250);
+            test2(byte_array, byte_array, byte_array.length - 250);
+        }
+    }
+}
--- a/test/compiler/whitebox/BlockingCompilation.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/compiler/whitebox/BlockingCompilation.java	Fri May 06 18:20:50 2016 +0300
@@ -23,15 +23,14 @@
 
 /*
  * @test
- * @bug 8150646
+ * @bug 8150646 8153013
  * @summary Add support for blocking compiles through whitebox API
  * @modules java.base/jdk.internal.misc
  * @library /testlibrary /test/lib /
  * @build sun.hotspot.WhiteBox
  *        compiler.testlibrary.CompilerUtils
- * @run main ClassFileInstaller sun.hotspot.WhiteBox
- *                              sun.hotspot.WhiteBox$WhiteBoxPermission
- *
+ * @run driver ClassFileInstaller sun.hotspot.WhiteBox
+ *                                sun.hotspot.WhiteBox$WhiteBoxPermission
  * @run main/othervm/timeout=60
  *        -Xbootclasspath/a:.
  *        -Xmixed
@@ -41,11 +40,10 @@
  *        BlockingCompilation
  */
 
+import compiler.testlibrary.CompilerUtils;
 import java.lang.reflect.Method;
 import java.util.Random;
-
 import sun.hotspot.WhiteBox;
-import compiler.testlibrary.CompilerUtils;
 
 public class BlockingCompilation {
     private static final WhiteBox WB = WhiteBox.getWhiteBox();
@@ -78,7 +76,13 @@
         // If the compiles are blocking, this call will block until the test time out,
         // Progress == success
         // (Don't run with -Xcomp since that can cause long timeouts due to many compiles)
-        WB.enqueueMethodForCompilation(m, highest_level);
+        if (!WB.enqueueMethodForCompilation(m, highest_level)) {
+            throw new Exception("Failed to enqueue method on level: " + highest_level);
+        }
+
+        if (!WB.isMethodQueuedForCompilation(m)) {
+            throw new Exception("Must be enqueued because of locked compilation");
+        }
 
         // restore state
         WB.unlockCompilation();
--- a/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java	Fri May 06 09:54:58 2016 +0000
+++ b/test/runtime/CommandLine/OptionsValidation/TestOptionsWithRanges.java	Fri May 06 18:20:50 2016 +0300
@@ -90,13 +90,6 @@
         excludeTestMaxRange("CICompilerCount");
 
         /*
-         * JDK-8153340
-         * Temporary exclude AllocatePrefetchDistance option from testing
-         */
-        excludeTestRange("AllocatePrefetchDistance");
-
-
-        /*
          * JDK-8136766
          * Temporarily remove ThreadStackSize from testing because Windows can set it to 0
          * (for default OS size) but other platforms insist it must be greater than 0