changeset 8694:a87c296434eb

Merge
author jbachorik
date Fri, 10 Jul 2015 16:37:41 +0200
parents 07f48b118941 d264a730c1f1
children b45432acc8ad
files src/share/vm/classfile/classFileParser.cpp
diffstat 46 files changed, 1254 insertions(+), 382 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Thu Jul 09 15:39:05 2015 -0400
+++ b/.hgtags	Fri Jul 10 16:37:41 2015 +0200
@@ -473,3 +473,5 @@
 11af3990d56c97b40318bc1f20608e86f051a3f7 jdk9-b68
 ff0929a59ced0e144201aa05819ae2e47d6f2c61 jdk9-b69
 8672e9264db30c21504063932dbc374eabc287a1 jdk9-b70
+07c6b035d68b0c41b1dcd442157b50b41a2551e9 jdk9-b71
+c1b2825ef47e75cb34dd18450d1c4280b7c5853c jdk9-b72
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -2270,17 +2270,21 @@
   }
 
   // CRC32 instructions
-#define INSN(NAME, sf, sz)                                                \
+#define INSN(NAME, c, sf, sz)                                             \
   void NAME(Register Rd, Register Rn, Register Rm) {                      \
     starti;                                                               \
-    f(sf, 31), f(0b0011010110, 30, 21), f(0b0100, 15, 12), f(sz, 11, 10); \
-    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                                     \
+    f(sf, 31), f(0b0011010110, 30, 21), f(0b010, 15, 13), f(c, 12);       \
+    f(sz, 11, 10), rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                      \
   }
 
-  INSN(crc32b, 0, 0b00);
-  INSN(crc32h, 0, 0b01);
-  INSN(crc32w, 0, 0b10);
-  INSN(crc32x, 1, 0b11);
+  INSN(crc32b,  0, 0, 0b00);
+  INSN(crc32h,  0, 0, 0b01);
+  INSN(crc32w,  0, 0, 0b10);
+  INSN(crc32x,  0, 1, 0b11);
+  INSN(crc32cb, 1, 0, 0b00);
+  INSN(crc32ch, 1, 0, 0b01);
+  INSN(crc32cw, 1, 0, 0b10);
+  INSN(crc32cx, 1, 1, 0b11);
 
 #undef INSN
 
--- a/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,12 +28,6 @@
 
 const int StackAlignmentInBytes  = 16;
 
-// Indicates whether the C calling conventions require that
-// 32-bit integer argument values are properly extended to 64 bits.
-// If set, SharedRuntime::c_calling_convention() must adapt
-// signatures accordingly.
-const bool CCallingConventionRequiresIntsAsLongs = true;
-
 #define SUPPORTS_NATIVE_CX8
 
 // The maximum B/BL offset range on AArch64 is 128MB.
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -2914,6 +2914,65 @@
     ornw(crc, zr, crc);
 }
 
+/**
+ * @param crc   register containing existing CRC (32-bit)
+ * @param buf   register pointing to input byte buffer (byte*)
+ * @param len   register containing number of bytes
+ * @param table register that will contain address of CRC table
+ * @param tmp   scratch register
+ */
+void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len,
+        Register table0, Register table1, Register table2, Register table3,
+        Register tmp, Register tmp2, Register tmp3) {
+  Label L_exit;
+  Label CRC_by64_loop, CRC_by4_loop, CRC_by1_loop;
+
+    subs(len, len, 64);
+    br(Assembler::GE, CRC_by64_loop);
+    adds(len, len, 64-4);
+    br(Assembler::GE, CRC_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::GT, CRC_by1_loop);
+    b(L_exit);
+
+  BIND(CRC_by4_loop);
+    ldrw(tmp, Address(post(buf, 4)));
+    subs(len, len, 4);
+    crc32cw(crc, crc, tmp);
+    br(Assembler::GE, CRC_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::LE, L_exit);
+  BIND(CRC_by1_loop);
+    ldrb(tmp, Address(post(buf, 1)));
+    subs(len, len, 1);
+    crc32cb(crc, crc, tmp);
+    br(Assembler::GT, CRC_by1_loop);
+    b(L_exit);
+
+    align(CodeEntryAlignment);
+  BIND(CRC_by64_loop);
+    subs(len, len, 64);
+    ldp(tmp, tmp3, Address(post(buf, 16)));
+    crc32cx(crc, crc, tmp);
+    crc32cx(crc, crc, tmp3);
+    ldp(tmp, tmp3, Address(post(buf, 16)));
+    crc32cx(crc, crc, tmp);
+    crc32cx(crc, crc, tmp3);
+    ldp(tmp, tmp3, Address(post(buf, 16)));
+    crc32cx(crc, crc, tmp);
+    crc32cx(crc, crc, tmp3);
+    ldp(tmp, tmp3, Address(post(buf, 16)));
+    crc32cx(crc, crc, tmp);
+    crc32cx(crc, crc, tmp3);
+    br(Assembler::GE, CRC_by64_loop);
+    adds(len, len, 64-4);
+    br(Assembler::GE, CRC_by4_loop);
+    adds(len, len, 4);
+    br(Assembler::GT, CRC_by1_loop);
+  BIND(L_exit);
+    return;
+}
+
 SkipIfEqual::SkipIfEqual(
     MacroAssembler* masm, const bool* flag_addr, bool value) {
   _masm = masm;
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -967,6 +967,10 @@
   void kernel_crc32(Register crc, Register buf, Register len,
         Register table0, Register table1, Register table2, Register table3,
         Register tmp, Register tmp2, Register tmp3);
+  // CRC32 code for java.util.zip.CRC32C::updateBytes() instrinsic.
+  void kernel_crc32c(Register crc, Register buf, Register len,
+        Register table0, Register table1, Register table2, Register table3,
+        Register tmp, Register tmp2, Register tmp3);
 
 #undef VIRTUAL
 
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -2359,6 +2359,47 @@
   /**
    *  Arguments:
    *
+   * Inputs:
+   *   c_rarg0   - int crc
+   *   c_rarg1   - byte* buf
+   *   c_rarg2   - int length
+   *   c_rarg3   - int* table
+   *
+   * Ouput:
+   *       rax   - int crc result
+   */
+  address generate_updateBytesCRC32C() {
+    assert(UseCRC32CIntrinsics, "what are we doing here?");
+
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");
+
+    address start = __ pc();
+
+    const Register crc   = c_rarg0;  // crc
+    const Register buf   = c_rarg1;  // source java byte array address
+    const Register len   = c_rarg2;  // length
+    const Register table0 = c_rarg3; // crc_table address
+    const Register table1 = c_rarg4;
+    const Register table2 = c_rarg5;
+    const Register table3 = c_rarg6;
+    const Register tmp3 = c_rarg7;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    __ kernel_crc32c(crc, buf, len,
+              table0, table1, table2, table3, rscratch1, rscratch2, tmp3);
+
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(lr);
+
+    return start;
+  }
+
+  /**
+   *  Arguments:
+   *
    *  Input:
    *    c_rarg0   - x address
    *    c_rarg1   - x length
@@ -2579,6 +2620,10 @@
       StubRoutines::_sha256_implCompressMB = generate_sha256_implCompress(true,  "sha256_implCompressMB");
     }
 
+    if (UseCRC32CIntrinsics) {
+      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
                                                        &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -199,9 +199,12 @@
     UseCRC32Intrinsics = true;
   }
 
-  if (UseCRC32CIntrinsics) {
-    if (!FLAG_IS_DEFAULT(UseCRC32CIntrinsics))
-      warning("CRC32C intrinsics are not available on this CPU");
+  if (auxv & HWCAP_CRC32) {
+    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
+      FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
+    }
+  } else if (UseCRC32CIntrinsics) {
+    warning("CRC32C is not available on the CPU");
     FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
   }
 
@@ -214,34 +217,31 @@
     FLAG_SET_DEFAULT(UseSHA, false);
   }
 
-  if (!UseSHA) {
+  if (UseSHA && (auxv & HWCAP_SHA1)) {
+    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+    }
+  } else if (UseSHA1Intrinsics) {
+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
-    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+  }
+
+  if (UseSHA && (auxv & HWCAP_SHA2)) {
+    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+    }
+  } else if (UseSHA256Intrinsics) {
+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
+    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+  }
+
+  if (UseSHA512Intrinsics) {
+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
-  } else {
-    if (auxv & HWCAP_SHA1) {
-      if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
-        FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
-      }
-    } else if (UseSHA1Intrinsics) {
-      warning("SHA1 instruction is not available on this CPU.");
-      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
-    }
-    if (auxv & HWCAP_SHA2) {
-      if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
-        FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
-      }
-    } else if (UseSHA256Intrinsics) {
-      warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU.");
-      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
-    }
-    if (UseSHA512Intrinsics) {
-      warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU.");
-      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
-    }
-    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
-      FLAG_SET_DEFAULT(UseSHA, false);
-    }
+  }
+
+  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+    FLAG_SET_DEFAULT(UseSHA, false);
   }
 
   // This machine allows unaligned memory accesses
--- a/src/cpu/ppc/vm/globalDefinitions_ppc.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,12 +31,6 @@
 
 const int StackAlignmentInBytes = 16;
 
-// Indicates whether the C calling conventions require that
-// 32-bit integer argument values are properly extended to 64 bits.
-// If set, SharedRuntime::c_calling_convention() must adapt
-// signatures accordingly.
-const bool CCallingConventionRequiresIntsAsLongs = true;
-
 #define SUPPORTS_NATIVE_CX8
 
 // The PPC CPUs are NOT multiple-copy-atomic.
--- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -731,23 +731,8 @@
     case T_SHORT:
     case T_INT:
       // We must cast ints to longs and use full 64 bit stack slots
-      // here. We do the cast in GraphKit::gen_stub() and just guard
-      // here against loosing that change.
-      assert(CCallingConventionRequiresIntsAsLongs,
-             "argument of type int should be promoted to type long");
-      guarantee(i > 0 && sig_bt[i-1] == T_LONG,
-                "argument of type (bt) should have been promoted to type (T_LONG,bt) for bt in "
-                "{T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
-      // Do not count halves.
-      regs[i].set_bad();
-      --arg;
-      break;
+      // here.  Thus fall through, handle as long.
     case T_LONG:
-      guarantee(sig_bt[i+1] == T_VOID    ||
-                sig_bt[i+1] == T_BOOLEAN || sig_bt[i+1] == T_CHAR  ||
-                sig_bt[i+1] == T_BYTE    || sig_bt[i+1] == T_SHORT ||
-                sig_bt[i+1] == T_INT,
-                "expecting type (T_LONG,half) or type (T_LONG,bt) with bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
     case T_OBJECT:
     case T_ARRAY:
     case T_ADDRESS:
@@ -1273,7 +1258,7 @@
 static void int_move(MacroAssembler*masm,
                      VMRegPair src, VMRegPair dst,
                      Register r_caller_sp, Register r_temp) {
-  assert(src.first()->is_valid() && src.second() == src.first()->next(), "incoming must be long-int");
+  assert(src.first()->is_valid(), "incoming must be int");
   assert(dst.first()->is_valid() && dst.second() == dst.first()->next(), "outgoing must be long");
 
   if (src.first()->is_stack()) {
@@ -1762,13 +1747,6 @@
   // the jni function will expect them. To figure out where they go
   // we convert the java signature to a C signature by inserting
   // the hidden arguments as arg[0] and possibly arg[1] (static method)
-  //
-  // Additionally, on ppc64 we must convert integers to longs in the C
-  // signature. We do this in advance in order to have no trouble with
-  // indexes into the bt-arrays.
-  // So convert the signature and registers now, and adjust the total number
-  // of in-arguments accordingly.
-  int i2l_argcnt = convert_ints_to_longints_argcnt(total_in_args, in_sig_bt); // PPC64: pass ints as longs.
 
   // Calculate the total number of C arguments and create arrays for the
   // signature and the outgoing registers.
@@ -1776,7 +1754,7 @@
   // some floating-point arguments must be passed in registers _and_
   // in stack locations.
   bool method_is_static = method->is_static();
-  int  total_c_args     = i2l_argcnt;
+  int  total_c_args     = total_in_args;
 
   if (!is_critical_native) {
     int n_hidden_args = method_is_static ? 2 : 1;
@@ -1785,7 +1763,7 @@
     // No JNIEnv*, no this*, but unpacked arrays (base+length).
     for (int i = 0; i < total_in_args; i++) {
       if (in_sig_bt[i] == T_ARRAY) {
-        total_c_args += 2; // PPC64: T_LONG, T_INT, T_ADDRESS (see convert_ints_to_longints and c_calling_convention)
+        total_c_args++;
       }
     }
   }
@@ -1803,8 +1781,6 @@
 
   int argc = 0;
   if (!is_critical_native) {
-    convert_ints_to_longints(i2l_argcnt, total_in_args, in_sig_bt, in_regs); // PPC64: pass ints as longs.
-
     out_sig_bt[argc++] = T_ADDRESS;
     if (method->is_static()) {
       out_sig_bt[argc++] = T_OBJECT;
@@ -1815,7 +1791,7 @@
     }
   } else {
     Thread* THREAD = Thread::current();
-    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, i2l_argcnt);
+    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
     SignatureStream ss(method->signature());
     int o = 0;
     for (int i = 0; i < total_in_args ; i++, o++) {
@@ -1839,28 +1815,16 @@
         }
       } else {
         in_elem_bt[o] = T_VOID;
-        switch(in_sig_bt[i]) { // PPC64: pass ints as longs.
-          case T_BOOLEAN:
-          case T_CHAR:
-          case T_BYTE:
-          case T_SHORT:
-          case T_INT: in_elem_bt[++o] = T_VOID; break;
-          default: break;
-        }
       }
       if (in_sig_bt[i] != T_VOID) {
         assert(in_sig_bt[i] == ss.type(), "must match");
         ss.next();
       }
     }
-    assert(i2l_argcnt==o, "must match");
-
-    convert_ints_to_longints(i2l_argcnt, total_in_args, in_sig_bt, in_regs); // PPC64: pass ints as longs.
 
     for (int i = 0; i < total_in_args ; i++ ) {
       if (in_sig_bt[i] == T_ARRAY) {
         // Arrays are passed as int, elem* pair.
-        out_sig_bt[argc++] = T_LONG; // PPC64: pass ints as longs.
         out_sig_bt[argc++] = T_INT;
         out_sig_bt[argc++] = T_ADDRESS;
       } else {
@@ -1921,7 +1885,8 @@
           case T_BYTE:
           case T_SHORT:
           case T_CHAR:
-          case T_INT:  /*single_slots++;*/ break; // PPC64: pass ints as longs.
+          case T_INT:
+          // Fall through.
           case T_ARRAY:
           case T_LONG: double_slots++; break;
           default:  ShouldNotReachHere();
@@ -2019,7 +1984,7 @@
 
   __ save_LR_CR(r_temp_1);
   __ generate_stack_overflow_check(frame_size_in_bytes); // Check before creating frame.
-  __ mr(r_callers_sp, R1_SP);                       // Remember frame pointer.
+  __ mr(r_callers_sp, R1_SP);                            // Remember frame pointer.
   __ push_frame(frame_size_in_bytes, r_temp_1);          // Push the c2n adapter's frame.
   frame_done_pc = (intptr_t)__ pc();
 
@@ -2098,24 +2063,16 @@
       case T_BYTE:
       case T_SHORT:
       case T_INT:
-        guarantee(in > 0 && in_sig_bt[in-1] == T_LONG,
-                  "expecting type (T_LONG,bt) for bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
+        // Move int and do sign extension.
+        int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
         break;
       case T_LONG:
-        if (in_sig_bt[in+1] == T_VOID) {
-          long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
-        } else {
-          guarantee(in_sig_bt[in+1] == T_BOOLEAN || in_sig_bt[in+1] == T_CHAR  ||
-                    in_sig_bt[in+1] == T_BYTE    || in_sig_bt[in+1] == T_SHORT ||
-                    in_sig_bt[in+1] == T_INT,
-                 "expecting type (T_LONG,bt) for bt in {T_BOOLEAN, T_CHAR, T_BYTE, T_SHORT, T_INT}");
-          int_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
-        }
+        long_move(masm, in_regs[in], out_regs[out], r_callers_sp, r_temp_1);
         break;
       case T_ARRAY:
         if (is_critical_native) {
           int body_arg = out;
-          out -= 2; // Point to length arg. PPC64: pass ints as longs.
+          out -= 1; // Point to length arg.
           unpack_array_argument(masm, in_regs[in], in_elem_bt[in], out_regs[body_arg], out_regs[out],
                                 r_callers_sp, r_temp_1, r_temp_2);
           break;
@@ -2187,7 +2144,6 @@
   // Make sure that thread is non-volatile; it crosses a bunch of VM calls below.
   assert(R16_thread->is_nonvolatile(), "thread must be in non-volatile register");
 
-
 # if 0
   // DTrace method entry
 # endif
--- a/src/cpu/sparc/vm/globalDefinitions_sparc.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/sparc/vm/globalDefinitions_sparc.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,12 +30,6 @@
 
 const int StackAlignmentInBytes = (2*wordSize);
 
-// Indicates whether the C calling conventions require that
-// 32-bit integer argument values are properly extended to 64 bits.
-// If set, SharedRuntime::c_calling_convention() must adapt
-// signatures accordingly.
-const bool CCallingConventionRequiresIntsAsLongs = false;
-
 #define SUPPORTS_NATIVE_CX8
 
 // The expected size in bytes of a cache line, used to pad data structures.
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -329,39 +329,35 @@
     FLAG_SET_DEFAULT(UseSHA, false);
   }
 
-  if (!UseSHA) {
+  if (UseSHA && has_sha1()) {
+    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
+    }
+  } else if (UseSHA1Intrinsics) {
+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+  }
+
+  if (UseSHA && has_sha256()) {
+    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
+    }
+  } else if (UseSHA256Intrinsics) {
+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+  }
+
+  if (UseSHA && has_sha512()) {
+    if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
+      FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
+    }
+  } else if (UseSHA512Intrinsics) {
+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
-  } else {
-    if (has_sha1()) {
-      if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
-        FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
-      }
-    } else if (UseSHA1Intrinsics) {
-      warning("SHA1 instruction is not available on this CPU.");
-      FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
-    }
-    if (has_sha256()) {
-      if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
-        FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
-      }
-    } else if (UseSHA256Intrinsics) {
-      warning("SHA256 instruction (for SHA-224 and SHA-256) is not available on this CPU.");
-      FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
-    }
+  }
 
-    if (has_sha512()) {
-      if (FLAG_IS_DEFAULT(UseSHA512Intrinsics)) {
-        FLAG_SET_DEFAULT(UseSHA512Intrinsics, true);
-      }
-    } else if (UseSHA512Intrinsics) {
-      warning("SHA512 instruction (for SHA-384 and SHA-512) is not available on this CPU.");
-      FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
-    }
-    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
-      FLAG_SET_DEFAULT(UseSHA, false);
-    }
+  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
+    FLAG_SET_DEFAULT(UseSHA, false);
   }
 
   // SPARC T4 and above should have support for CRC32C instruction
--- a/src/cpu/x86/vm/globalDefinitions_x86.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/x86/vm/globalDefinitions_x86.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -27,12 +27,6 @@
 
 const int StackAlignmentInBytes  = 16;
 
-// Indicates whether the C calling conventions require that
-// 32-bit integer argument values are properly extended to 64 bits.
-// If set, SharedRuntime::c_calling_convention() must adapt
-// signatures accordingly.
-const bool CCallingConventionRequiresIntsAsLongs = false;
-
 #define SUPPORTS_NATIVE_CX8
 
 // The expected size in bytes of a cache line, used to pad data structures.
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -23,6 +23,9 @@
  */
 
 #include "precompiled.hpp"
+#ifndef _WINDOWS
+#include "alloca.h"
+#endif
 #include "asm/macroAssembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "code/debugInfoRec.hpp"
@@ -3511,6 +3514,250 @@
 }
 
 
+//------------------------------Montgomery multiplication------------------------
+//
+
+#ifndef _WINDOWS
+
+#define ASM_SUBTRACT
+
+#ifdef ASM_SUBTRACT
+// Subtract 0:b from carry:a.  Return carry.
+static unsigned long
+sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
+  long i = 0, cnt = len;
+  unsigned long tmp;
+  asm volatile("clc; "
+               "0: ; "
+               "mov (%[b], %[i], 8), %[tmp]; "
+               "sbb %[tmp], (%[a], %[i], 8); "
+               "inc %[i]; dec %[cnt]; "
+               "jne 0b; "
+               "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
+               : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
+               : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
+               : "memory");
+  return tmp;
+}
+#else // ASM_SUBTRACT
+typedef int __attribute__((mode(TI))) int128;
+
+// Subtract 0:b from carry:a.  Return carry.
+static unsigned long
+sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) {
+  int128 tmp = 0;
+  int i;
+  for (i = 0; i < len; i++) {
+    tmp += a[i];
+    tmp -= b[i];
+    a[i] = tmp;
+    tmp >>= 64;
+    assert(-1 <= tmp && tmp <= 0, "invariant");
+  }
+  return tmp + carry;
+}
+#endif // ! ASM_SUBTRACT
+
+// Multiply (unsigned) Long A by Long B, accumulating the double-
+// length result into the accumulator formed of T0, T1, and T2.
+#define MACC(A, B, T0, T1, T2)                                  \
+do {                                                            \
+  unsigned long hi, lo;                                         \
+  __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4"   \
+           : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)  \
+           : "r"(A), "a"(B) : "cc");                            \
+ } while(0)
+
+// As above, but add twice the double-length result into the
+// accumulator.
+#define MACC2(A, B, T0, T1, T2)                                 \
+do {                                                            \
+  unsigned long hi, lo;                                         \
+  __asm__ ("mul %5; add %%rax, %2; adc %%rdx, %3; adc $0, %4; " \
+           "add %%rax, %2; adc %%rdx, %3; adc $0, %4"           \
+           : "=&d"(hi), "=a"(lo), "+r"(T0), "+r"(T1), "+g"(T2)  \
+           : "r"(A), "a"(B) : "cc");                            \
+ } while(0)
+
+// Fast Montgomery multiplication.  The derivation of the algorithm is
+// in  A Cryptographic Library for the Motorola DSP56000,
+// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
+
+static void __attribute__((noinline))
+montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
+                    unsigned long m[], unsigned long inv, int len) {
+  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+  int i;
+
+  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+  for (i = 0; i < len; i++) {
+    int j;
+    for (j = 0; j < i; j++) {
+      MACC(a[j], b[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    MACC(a[i], b[0], t0, t1, t2);
+    m[i] = t0 * inv;
+    MACC(m[i], n[0], t0, t1, t2);
+
+    assert(t0 == 0, "broken Montgomery multiply");
+
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  for (i = len; i < 2*len; i++) {
+    int j;
+    for (j = i-len+1; j < len; j++) {
+      MACC(a[j], b[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    m[i-len] = t0;
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  while (t0)
+    t0 = sub(m, n, t0, len);
+}
+
+// Fast Montgomery squaring.  This uses asymptotically 25% fewer
+// multiplies so it should be up to 25% faster than Montgomery
+// multiplication.  However, its loop control is more complex and it
+// may actually run slower on some machines.
+
+static void __attribute__((noinline))
+montgomery_square(unsigned long a[], unsigned long n[],
+                  unsigned long m[], unsigned long inv, int len) {
+  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+  int i;
+
+  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+
+  for (i = 0; i < len; i++) {
+    int j;
+    int end = (i+1)/2;
+    for (j = 0; j < end; j++) {
+      MACC2(a[j], a[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    if ((i & 1) == 0) {
+      MACC(a[j], a[j], t0, t1, t2);
+    }
+    for (; j < i; j++) {
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    m[i] = t0 * inv;
+    MACC(m[i], n[0], t0, t1, t2);
+
+    assert(t0 == 0, "broken Montgomery square");
+
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  for (i = len; i < 2*len; i++) {
+    int start = i-len+1;
+    int end = start + (len - start)/2;
+    int j;
+    for (j = start; j < end; j++) {
+      MACC2(a[j], a[i-j], t0, t1, t2);
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    if ((i & 1) == 0) {
+      MACC(a[j], a[j], t0, t1, t2);
+    }
+    for (; j < len; j++) {
+      MACC(m[j], n[i-j], t0, t1, t2);
+    }
+    m[i-len] = t0;
+    t0 = t1; t1 = t2; t2 = 0;
+  }
+
+  while (t0)
+    t0 = sub(m, n, t0, len);
+}
+
+// Swap words in a longword.
+static unsigned long swap(unsigned long x) {
+  return (x << 32) | (x >> 32);
+}
+
+// Copy len longwords from s to d, word-swapping as we go.  The
+// destination array is reversed.
+static void reverse_words(unsigned long *s, unsigned long *d, int len) {
+  d += len;
+  while(len-- > 0) {
+    d--;
+    *d = swap(*s);
+    s++;
+  }
+}
+
+// The threshold at which squaring is advantageous was determined
+// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
+#define MONTGOMERY_SQUARING_THRESHOLD 64
+
+void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
+                                        jint len, jlong inv,
+                                        jint *m_ints) {
+  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
+  int longwords = len/2;
+
+  // Make very sure we don't use so much space that the stack might
+  // overflow.  512 jints corresponds to an 16384-bit integer and
+  // will use here a total of 8k bytes of stack space.
+  int total_allocation = longwords * sizeof (unsigned long) * 4;
+  guarantee(total_allocation <= 8192, "must be");
+  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+  // Local scratch arrays
+  unsigned long
+    *a = scratch + 0 * longwords,
+    *b = scratch + 1 * longwords,
+    *n = scratch + 2 * longwords,
+    *m = scratch + 3 * longwords;
+
+  reverse_words((unsigned long *)a_ints, a, longwords);
+  reverse_words((unsigned long *)b_ints, b, longwords);
+  reverse_words((unsigned long *)n_ints, n, longwords);
+
+  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
+
+  reverse_words(m, (unsigned long *)m_ints, longwords);
+}
+
+void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
+                                      jint len, jlong inv,
+                                      jint *m_ints) {
+  assert(len % 2 == 0, "array length in montgomery_square must be even");
+  int longwords = len/2;
+
+  // Make very sure we don't use so much space that the stack might
+  // overflow.  512 jints corresponds to an 16384-bit integer and
+  // will use here a total of 6k bytes of stack space.
+  int total_allocation = longwords * sizeof (unsigned long) * 3;
+  guarantee(total_allocation <= 8192, "must be");
+  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
+
+  // Local scratch arrays
+  unsigned long
+    *a = scratch + 0 * longwords,
+    *n = scratch + 1 * longwords,
+    *m = scratch + 2 * longwords;
+
+  reverse_words((unsigned long *)a_ints, a, longwords);
+  reverse_words((unsigned long *)n_ints, n, longwords);
+
+  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
+    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
+  } else {
+    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
+  }
+
+  reverse_words(m, (unsigned long *)m_ints, longwords);
+}
+
+#endif // WINDOWS
+
 #ifdef COMPILER2
 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
 //
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -4318,7 +4318,18 @@
     if (UseMulAddIntrinsic) {
       StubRoutines::_mulAdd = generate_mulAdd();
     }
-#endif
+
+#ifndef _WINDOWS
+    if (UseMontgomeryMultiplyIntrinsic) {
+      StubRoutines::_montgomeryMultiply
+        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
+    }
+    if (UseMontgomerySquareIntrinsic) {
+      StubRoutines::_montgomerySquare
+        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
+    }
+#endif // WINDOWS
+#endif // COMPILER2
   }
 
  public:
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -692,10 +692,19 @@
     warning("SHA instructions are not available on this CPU");
     FLAG_SET_DEFAULT(UseSHA, false);
   }
-  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
-    warning("SHA intrinsics are not available on this CPU");
+
+  if (UseSHA1Intrinsics) {
+    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+  }
+
+  if (UseSHA256Intrinsics) {
+    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
+  }
+
+  if (UseSHA512Intrinsics) {
+    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
 
@@ -813,6 +822,12 @@
   if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
     UseMulAddIntrinsic = true;
   }
+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+    UseMontgomeryMultiplyIntrinsic = true;
+  }
+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+    UseMontgomerySquareIntrinsic = true;
+  }
 #else
   if (UseMultiplyToLenIntrinsic) {
     if (!FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
@@ -820,6 +835,18 @@
     }
     FLAG_SET_DEFAULT(UseMultiplyToLenIntrinsic, false);
   }
+  if (UseMontgomeryMultiplyIntrinsic) {
+    if (!FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+      warning("montgomeryMultiply intrinsic is not available in 32-bit VM");
+    }
+    FLAG_SET_DEFAULT(UseMontgomeryMultiplyIntrinsic, false);
+  }
+  if (UseMontgomerySquareIntrinsic) {
+    if (!FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+      warning("montgomerySquare intrinsic is not available in 32-bit VM");
+    }
+    FLAG_SET_DEFAULT(UseMontgomerySquareIntrinsic, false);
+  }
   if (UseSquareToLenIntrinsic) {
     if (!FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
       warning("squareToLen intrinsic is not available in 32-bit VM");
--- a/src/cpu/zero/vm/globalDefinitions_zero.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/cpu/zero/vm/globalDefinitions_zero.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2009 Red Hat, Inc.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2009, 2015, Red Hat, Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,10 +28,4 @@
 
 #include <ffi.h>
 
-// Indicates whether the C calling conventions require that
-// 32-bit integer argument values are properly extended to 64 bits.
-// If set, SharedRuntime::c_calling_convention() must adapt
-// signatures accordingly.
-const bool CCallingConventionRequiresIntsAsLongs = false;
-
 #endif // CPU_ZERO_VM_GLOBALDEFINITIONS_ZERO_HPP
--- a/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/os_cpu/solaris_sparc/vm/vm_version_solaris_sparc.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -191,7 +191,7 @@
     return CPUVisitor::visit(nodeh, state);
   }
 
-  PICL(bool is_fujitsu) : _L1_data_cache_line_size(0), _L2_data_cache_line_size(0), _dl_handle(NULL) {
+  PICL(bool is_fujitsu, bool is_sun4v) : _L1_data_cache_line_size(0), _L2_data_cache_line_size(0), _dl_handle(NULL) {
     if (!open_library()) {
       return;
     }
@@ -203,7 +203,7 @@
         if (is_fujitsu) {
           cpu_class = "core";
         }
-        CPUVisitor cpu_visitor(this, os::processor_count());
+        CPUVisitor cpu_visitor(this, (is_sun4v && !is_fujitsu) ? 1 : os::processor_count());
         _picl_walk_tree_by_class(rooth, cpu_class, &cpu_visitor, PICL_visit_cpu_helper);
         if (cpu_visitor.l1_visitor()->is_assigned()) { // Is there a value?
           _L1_data_cache_line_size = cpu_visitor.l1_visitor()->value();
@@ -447,7 +447,7 @@
   }
 
   // Figure out cache line sizes using PICL
-  PICL picl((features & sparc64_family_m) != 0);
+  PICL picl((features & sparc64_family_m) != 0, (features & sun4v_m) != 0);
   _L1_data_cache_line_size = picl.L1_data_cache_line_size();
   _L2_data_cache_line_size = picl.L2_data_cache_line_size();
 
--- a/src/share/tools/hsdis/Makefile	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/tools/hsdis/Makefile	Fri Jul 10 16:37:41 2015 +0200
@@ -70,7 +70,8 @@
 else   #linux
 CPU             = $(shell uname -m)
 ARCH1=$(CPU:x86_64=amd64)
-ARCH=$(ARCH1:i686=i386)
+ARCH2=$(ARCH1:i686=i386)
+ARCH=$(ARCH2:ppc64le=ppc64)
 ifdef LP64
 CFLAGS/sparcv9	+= -m64
 CFLAGS/amd64	+= -m64
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -3157,6 +3157,9 @@
       // code for the inlined version will be different than the root
       // compiled version which could lead to monotonicity problems on
       // intel.
+      if (CheckIntrinsics && !scope->method()->intrinsic_candidate()) {
+        BAILOUT("failed to inline intrinsic, method not annotated");
+      }
 
       // Set up a stream so that appending instructions works properly.
       ciBytecodeStream s(scope->method());
@@ -3197,6 +3200,9 @@
         // result in the referent being marked live and the reference
         // object removed from the list of discovered references during
         // reference processing.
+        if (CheckIntrinsics && !scope->method()->intrinsic_candidate()) {
+          BAILOUT("failed to inline intrinsic, method not annotated");
+        }
 
         // Also we need intrinsic to prevent commoning reads from this field
         // across safepoint since GC can change its value.
@@ -3317,7 +3323,8 @@
   }
 
   // handle intrinsics
-  if (callee->intrinsic_id() != vmIntrinsics::_none) {
+  if (callee->intrinsic_id() != vmIntrinsics::_none &&
+      (CheckIntrinsics ? callee->intrinsic_candidate() : true)) {
     if (try_inline_intrinsics(callee)) {
       print_inlining(callee, "intrinsic");
       return true;
@@ -4278,7 +4285,7 @@
   assert(result_type->is_int(), "int result");
   Values* args = state()->pop_arguments(callee->arg_size());
 
-  // Pop off some args to speically handle, then push back
+  // Pop off some args to specially handle, then push back
   Value newval = args->pop();
   Value cmpval = args->pop();
   Value offset = args->pop();
--- a/src/share/vm/ci/bcEscapeAnalyzer.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/ci/bcEscapeAnalyzer.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -1185,7 +1185,6 @@
   vmIntrinsics::ID iid = method()->intrinsic_id();
 
   if (iid == vmIntrinsics::_getClass ||
-      iid ==  vmIntrinsics::_fillInStackTrace ||
       iid == vmIntrinsics::_hashCode)
     return iid;
   else
@@ -1199,10 +1198,6 @@
   case vmIntrinsics::_getClass:
     _return_local = false;
     break;
-  case vmIntrinsics::_fillInStackTrace:
-    arg.set(0); // 'this'
-    set_returned(arg);
-    break;
   case vmIntrinsics::_hashCode:
     // initialized state is correct
     break;
--- a/src/share/vm/ci/ciMethod.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/ci/ciMethod.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -178,9 +178,10 @@
   // Code size for inlining decisions.
   int code_size_for_inlining();
 
-  bool caller_sensitive()   const { return get_Method()->caller_sensitive();   }
-  bool force_inline()       const { return get_Method()->force_inline();       }
-  bool dont_inline()        const { return get_Method()->dont_inline();        }
+  bool caller_sensitive()    const { return get_Method()->caller_sensitive();    }
+  bool force_inline()        const { return get_Method()->force_inline();        }
+  bool dont_inline()         const { return get_Method()->dont_inline();         }
+  bool intrinsic_candidate() const { return get_Method()->intrinsic_candidate(); }
 
   int comp_level();
   int highest_osr_comp_level();
--- a/src/share/vm/classfile/classFileParser.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/classfile/classFileParser.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -1751,6 +1751,10 @@
     if (_location != _in_method)  break;  // only allow for methods
     if (!privileged)              break;  // only allow in privileged code
     return _method_LambdaForm_Hidden;
+  case vmSymbols::VM_SYMBOL_ENUM_NAME(jdk_internal_HotSpotIntrinsicCandidate_signature):
+    if (_location != _in_method)  break;  // only allow for methods
+    if (!privileged)              break;  // only allow in privileged code
+    return _method_HotSpotIntrinsicCandidate;
   case vmSymbols::VM_SYMBOL_ENUM_NAME(java_lang_invoke_Stable_signature):
     if (_location != _in_field)   break;  // only allow for fields
     if (!privileged)              break;  // only allow in privileged code
@@ -1790,6 +1794,8 @@
     m->set_intrinsic_id(vmIntrinsics::_compiledLambdaForm);
   if (has_annotation(_method_LambdaForm_Hidden))
     m->set_hidden(true);
+  if (has_annotation(_method_HotSpotIntrinsicCandidate) && !m->is_synthetic())
+    m->set_intrinsic_candidate(true);
 }
 
 void ClassFileParser::ClassAnnotationCollector::apply_to(instanceKlassHandle k) {
@@ -4131,9 +4137,78 @@
     // (We used to do this lazily, but now we query it in Rewriter,
     // which is eagerly done for every method, so we might as well do it now,
     // when everything is fresh in memory.)
-    if (Method::klass_id_for_intrinsics(this_klass()) != vmSymbols::NO_SID) {
+    vmSymbols::SID klass_id = Method::klass_id_for_intrinsics(this_klass());
+    if (klass_id != vmSymbols::NO_SID) {
       for (int j = 0; j < methods->length(); j++) {
-        methods->at(j)->init_intrinsic_id();
+        Method* method = methods->at(j);
+        method->init_intrinsic_id();
+
+        if (CheckIntrinsics) {
+          // Check if an intrinsic is defined for method 'method',
+          // but the method is not annotated with @HotSpotIntrinsicCandidate.
+          if (method->intrinsic_id() != vmIntrinsics::_none &&
+              !method->intrinsic_candidate()) {
+            tty->print("Compiler intrinsic is defined for method [%s], "
+                       "but the method is not annotated with @HotSpotIntrinsicCandidate.%s",
+                       method->name_and_sig_as_C_string(),
+                       NOT_DEBUG(" Method will not be inlined.") DEBUG_ONLY(" Exiting.")
+                       );
+            tty->cr();
+            DEBUG_ONLY(vm_exit(1));
+          }
+          // Check is the method 'method' is annotated with @HotSpotIntrinsicCandidate,
+          // but there is no intrinsic available for it.
+          if (method->intrinsic_candidate() &&
+              method->intrinsic_id() == vmIntrinsics::_none) {
+            tty->print("Method [%s] is annotated with @HotSpotIntrinsicCandidate, "
+                       "but no compiler intrinsic is defined for the method.%s",
+                       method->name_and_sig_as_C_string(),
+                       NOT_DEBUG("") DEBUG_ONLY(" Exiting.")
+                       );
+            tty->cr();
+            DEBUG_ONLY(vm_exit(1));
+          }
+        }
+      }
+
+      if (CheckIntrinsics) {
+        // Check for orphan methods in the current class. A method m
+        // of a class C is orphan if an intrinsic is defined for method m,
+        // but class C does not declare m.
+
+        for (int id = vmIntrinsics::FIRST_ID; id < (int)vmIntrinsics::ID_LIMIT; id++) {
+          if (id == vmIntrinsics::_compiledLambdaForm) {
+            // The _compiledLamdbdaForm intrinsic is a special marker for bytecode
+            // generated for the JVM from a LambdaForm and therefore no method
+            // is defined for it.
+            continue;
+          }
+
+          if (vmIntrinsics::class_for(vmIntrinsics::ID_from(id)) == klass_id) {
+            // Check if the current class contains a method with the same
+            // name, flags, signature.
+            bool match = false;
+            for (int j = 0; j < methods->length(); j++) {
+              Method* method = methods->at(j);
+              if (id == method->intrinsic_id()) {
+                match = true;
+                break;
+              }
+            }
+
+            if (!match) {
+              char buf[1000];
+              tty->print("Compiler intrinsic is defined for method [%s], "
+                         "but the method is not available in class [%s].%s",
+                         vmIntrinsics::short_name_as_C_string(vmIntrinsics::ID_from(id), buf, sizeof(buf)),
+                         this_klass->name()->as_C_string(),
+                         NOT_DEBUG("") DEBUG_ONLY(" Exiting.")
+                         );
+              tty->cr();
+              DEBUG_ONLY(vm_exit(1));
+            }
+          }
+        }
       }
     }
 
--- a/src/share/vm/classfile/classFileParser.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/classfile/classFileParser.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -130,6 +130,7 @@
       _method_InjectedProfile,
       _method_LambdaForm_Compiled,
       _method_LambdaForm_Hidden,
+      _method_HotSpotIntrinsicCandidate,
       _sun_misc_Contended,
       _field_Stable,
       _annotation_LIMIT
--- a/src/share/vm/classfile/javaClasses.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/classfile/javaClasses.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -1707,8 +1707,7 @@
     // - rest of the stack
 
     if (!skip_fillInStackTrace_check) {
-      if ((method->name() == vmSymbols::fillInStackTrace_name() ||
-           method->name() == vmSymbols::fillInStackTrace0_name()) &&
+      if (method->name() == vmSymbols::fillInStackTrace_name() &&
           throwable->is_a(method->method_holder())) {
         continue;
       }
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -258,6 +258,8 @@
   /* Type Annotations (JDK 8 and above) */                                                        \
   template(type_annotations_name,                     "typeAnnotations")                          \
                                                                                                   \
+  /* Intrinsic Annotation (JDK 9 and above) */                                                    \
+  template(jdk_internal_HotSpotIntrinsicCandidate_signature, "Ljdk/internal/HotSpotIntrinsicCandidate;") \
                                                                                                   \
   /* Support for JSR 292 & invokedynamic (JDK 1.7 and above) */                                   \
   template(java_lang_invoke_CallSite,                 "java/lang/invoke/CallSite")                \
@@ -345,7 +347,6 @@
   template(dispatch_name,                             "dispatch")                                 \
   template(getSystemClassLoader_name,                 "getSystemClassLoader")                     \
   template(fillInStackTrace_name,                     "fillInStackTrace")                         \
-  template(fillInStackTrace0_name,                    "fillInStackTrace0")                        \
   template(getCause_name,                             "getCause")                                 \
   template(initCause_name,                            "initCause")                                \
   template(setProperty_name,                          "setProperty")                              \
@@ -635,7 +636,43 @@
 // The F_xx is one of the Flags enum; see below.
 //
 // for Emacs: (let ((c-backslash-column 120) (c-backslash-max-column 120)) (c-backslash-region (point) (point-max) nil t))
+//
+//
+// There are two types of intrinsic methods: (1) Library intrinsics and (2) bytecode intrinsics.
+//
+// (1) A library intrinsic method may be replaced with hand-crafted assembly code,
+// with hand-crafted compiler IR, or with a combination of the two. The semantics
+// of the replacement code may differ from the semantics of the replaced code.
+//
+// (2) Bytecode intrinsic methods are not replaced by special code, but they are
+// treated in some other special way by the compiler. For example, the compiler
+// may delay inlining for some String-related intrinsic methods (e.g., some methods
+// defined in the StringBuilder and StringBuffer classes, see
+// Compile::should_delay_string_inlining() for more details).
+//
+// Due to the difference between the semantics of an intrinsic method as defined
+// in the (Java) source code and the semantics of the method as defined
+// by the code in the VM, intrinsic methods must be explicitly marked.
+//
+// Intrinsic methods are marked by the jdk.internal.HotSpotIntrinsicCandidate
+// annotation. If CheckIntrinsics is enabled, the VM performs the following
+// checks when a class C is loaded: (1) all intrinsics defined by the VM for
+// class C are present in the loaded class file and are marked;
+// (2) an intrinsic is defined by the VM for all marked methods of class C.
+//
+// If a mismatch is detected for a method, the VM behaves differently depending
+// on the type of build. A fastdebug build exits and reports an error on a mismatch.
+// A product build will not replace an unmarked library intrinsic method with
+// hand-crafted code, that is, unmarked library intrinsics are treated as ordinary
+// methods in a product build. The special treatment of a bytecode intrinsic method
+// persists even if the method not marked.
+//
+// When adding an intrinsic for a method, please make sure to appropriately
+// annotate the method in the source code. The list below contains all
+// library intrinsics followed by bytecode intrinsics. Please also make sure to
+// add the declaration of the intrinsic to the approriate section of the list.
 #define VM_INTRINSICS_DO(do_intrinsic, do_class, do_name, do_signature, do_alias)                                       \
+  /* (1) Library intrinsics                                                                        */                   \
   do_intrinsic(_hashCode,                 java_lang_Object,       hashCode_name, void_int_signature,             F_R)   \
    do_name(     hashCode_name,                                   "hashCode")                                            \
   do_intrinsic(_getClass,                 java_lang_Object,       getClass_name, void_class_signature,           F_R)   \
@@ -792,12 +829,12 @@
                                                                                                                         \
   do_class(sun_nio_cs_iso8859_1_Encoder,  "sun/nio/cs/ISO_8859_1$Encoder")                                              \
   do_intrinsic(_encodeISOArray,     sun_nio_cs_iso8859_1_Encoder, encodeISOArray_name, encodeISOArray_signature, F_S)   \
-   do_name(     encodeISOArray_name,                             "encodeISOArray")                                      \
+   do_name(     encodeISOArray_name,                             "implEncodeISOArray")                                  \
    do_signature(encodeISOArray_signature,                        "([CI[BII)I")                                          \
                                                                                                                         \
   do_class(java_math_BigInteger,                      "java/math/BigInteger")                                           \
-  do_intrinsic(_multiplyToLen,      java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_R)             \
-   do_name(     multiplyToLen_name,                             "multiplyToLen")                                        \
+  do_intrinsic(_multiplyToLen,      java_math_BigInteger, multiplyToLen_name, multiplyToLen_signature, F_S)             \
+   do_name(     multiplyToLen_name,                             "implMultiplyToLen")                                    \
    do_signature(multiplyToLen_signature,                        "([II[II[I)[I")                                         \
                                                                                                                         \
   do_intrinsic(_squareToLen, java_math_BigInteger, squareToLen_name, squareToLen_signature, F_S)                        \
@@ -808,6 +845,14 @@
    do_name(     mulAdd_name,                                  "implMulAdd")                                             \
    do_signature(mulAdd_signature,                             "([I[IIII)I")                                             \
                                                                                                                         \
+  do_intrinsic(_montgomeryMultiply,      java_math_BigInteger, montgomeryMultiply_name, montgomeryMultiply_signature, F_S) \
+   do_name(     montgomeryMultiply_name,                             "implMontgomeryMultiply")                          \
+   do_signature(montgomeryMultiply_signature,                        "([I[I[IIJ[I)[I")                                  \
+                                                                                                                        \
+  do_intrinsic(_montgomerySquare,      java_math_BigInteger, montgomerySquare_name, montgomerySquare_signature, F_S)    \
+   do_name(     montgomerySquare_name,                             "implMontgomerySquare")                              \
+   do_signature(montgomerySquare_signature,                        "([I[IIJ[I)[I")                                      \
+                                                                                                                        \
   /* java/lang/ref/Reference */                                                                                         \
   do_intrinsic(_Reference_get,            java_lang_ref_Reference, get_name,    void_object_signature, F_R)             \
                                                                                                                         \
@@ -815,21 +860,21 @@
   do_class(com_sun_crypto_provider_aescrypt,      "com/sun/crypto/provider/AESCrypt")                                   \
   do_intrinsic(_aescrypt_encryptBlock, com_sun_crypto_provider_aescrypt, encryptBlock_name, byteArray_int_byteArray_int_signature, F_R)   \
   do_intrinsic(_aescrypt_decryptBlock, com_sun_crypto_provider_aescrypt, decryptBlock_name, byteArray_int_byteArray_int_signature, F_R)   \
-   do_name(     encryptBlock_name,                                 "encryptBlock")                                      \
-   do_name(     decryptBlock_name,                                 "decryptBlock")                                      \
+   do_name(     encryptBlock_name,                                 "implEncryptBlock")                                  \
+   do_name(     decryptBlock_name,                                 "implDecryptBlock")                                  \
    do_signature(byteArray_int_byteArray_int_signature,             "([BI[BI)V")                                         \
                                                                                                                         \
   do_class(com_sun_crypto_provider_cipherBlockChaining,            "com/sun/crypto/provider/CipherBlockChaining")       \
    do_intrinsic(_cipherBlockChaining_encryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, encrypt_name, byteArray_int_int_byteArray_int_signature, F_R)   \
    do_intrinsic(_cipherBlockChaining_decryptAESCrypt, com_sun_crypto_provider_cipherBlockChaining, decrypt_name, byteArray_int_int_byteArray_int_signature, F_R)   \
-   do_name(     encrypt_name,                                      "encrypt")                                           \
-   do_name(     decrypt_name,                                      "decrypt")                                           \
+   do_name(     encrypt_name,                                      "implEncrypt")                                       \
+   do_name(     decrypt_name,                                      "implDecrypt")                                       \
    do_signature(byteArray_int_int_byteArray_int_signature,         "([BII[BI)I")                                        \
                                                                                                                         \
   /* support for sun.security.provider.SHA */                                                                           \
   do_class(sun_security_provider_sha,                              "sun/security/provider/SHA")                         \
   do_intrinsic(_sha_implCompress, sun_security_provider_sha, implCompress_name, implCompress_signature, F_R)            \
-   do_name(     implCompress_name,                                 "implCompress")                                      \
+   do_name(     implCompress_name,                                 "implCompress0")                                     \
    do_signature(implCompress_signature,                            "([BI)V")                                            \
                                                                                                                         \
   /* support for sun.security.provider.SHA2 */                                                                          \
@@ -843,7 +888,7 @@
   /* support for sun.security.provider.DigestBase */                                                                    \
   do_class(sun_security_provider_digestbase,                       "sun/security/provider/DigestBase")                  \
   do_intrinsic(_digestBase_implCompressMB, sun_security_provider_digestbase, implCompressMB_name, implCompressMB_signature, F_R)   \
-   do_name(     implCompressMB_name,                               "implCompressMultiBlock")                            \
+   do_name(     implCompressMB_name,                               "implCompressMultiBlock0")                           \
    do_signature(implCompressMB_signature,                          "([BII)I")                                           \
                                                                                                                         \
   /* support for com.sun.crypto.provider.GHASH */                                                                       \
@@ -857,17 +902,18 @@
   do_intrinsic(_updateCRC32,               java_util_zip_CRC32,   update_name, int2_int_signature,               F_SN)  \
    do_name(     update_name,                                      "update")                                             \
   do_intrinsic(_updateBytesCRC32,          java_util_zip_CRC32,   updateBytes_name, updateBytes_signature,       F_SN)  \
-   do_name(     updateBytes_name,                                "updateBytes")                                         \
+   do_name(     updateBytes_name,                                "updateBytes0")                                        \
    do_signature(updateBytes_signature,                           "(I[BII)I")                                            \
   do_intrinsic(_updateByteBufferCRC32,     java_util_zip_CRC32,   updateByteBuffer_name, updateByteBuffer_signature, F_SN) \
-   do_name(     updateByteBuffer_name,                           "updateByteBuffer")                                    \
+   do_name(     updateByteBuffer_name,                           "updateByteBuffer0")                                   \
    do_signature(updateByteBuffer_signature,                      "(IJII)I")                                             \
                                                                                                                         \
   /* support for java.util.zip.CRC32C */                                                                                \
   do_class(java_util_zip_CRC32C,          "java/util/zip/CRC32C")                                                       \
-  do_intrinsic(_updateBytesCRC32C,         java_util_zip_CRC32C,  updateBytes_name, updateBytes_signature,       F_S)   \
-  do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_name, updateByteBuffer_signature, F_S) \
-   do_name(     updateDirectByteBuffer_name,                     "updateDirectByteBuffer")                              \
+  do_intrinsic(_updateBytesCRC32C,         java_util_zip_CRC32C,  updateBytes_C_name, updateBytes_signature,       F_S) \
+   do_name(     updateBytes_C_name,                               "updateBytes")                                        \
+  do_intrinsic(_updateDirectByteBufferCRC32C, java_util_zip_CRC32C, updateDirectByteBuffer_C_name, updateByteBuffer_signature, F_S) \
+   do_name(    updateDirectByteBuffer_C_name,                     "updateDirectByteBuffer")                             \
                                                                                                                         \
   /* support for sun.misc.Unsafe */                                                                                     \
   do_class(sun_misc_Unsafe,               "sun/misc/Unsafe")                                                            \
@@ -878,12 +924,6 @@
   do_intrinsic(_copyMemory,               sun_misc_Unsafe,        copyMemory_name, copyMemory_signature,         F_RN)  \
    do_name(     copyMemory_name,                                 "copyMemory")                                          \
    do_signature(copyMemory_signature,         "(Ljava/lang/Object;JLjava/lang/Object;JJ)V")                             \
-  do_intrinsic(_park,                     sun_misc_Unsafe,        park_name, park_signature,                     F_RN)  \
-   do_name(     park_name,                                       "park")                                                \
-   do_signature(park_signature,                                  "(ZJ)V")                                               \
-  do_intrinsic(_unpark,                   sun_misc_Unsafe,        unpark_name, unpark_signature,                 F_RN)  \
-   do_name(     unpark_name,                                     "unpark")                                              \
-   do_alias(    unpark_signature,                               /*(LObject;)V*/ object_void_signature)                  \
   do_intrinsic(_loadFence,                sun_misc_Unsafe,        loadFence_name, loadFence_signature,           F_RN)  \
    do_name(     loadFence_name,                                  "loadFence")                                           \
    do_alias(    loadFence_signature,                              void_method_signature)                                \
@@ -1066,11 +1106,15 @@
   do_intrinsic(_getAndSetObject,          sun_misc_Unsafe,        getAndSetObject_name, getAndSetObject_signature,  F_R)\
    do_name(     getAndSetObject_name,                             "getAndSetObject")                                    \
    do_signature(getAndSetObject_signature,                        "(Ljava/lang/Object;JLjava/lang/Object;)Ljava/lang/Object;" ) \
-    /*== LAST_COMPILER_INLINE*/                                                                                         \
-    /*the compiler does have special inlining code for these; bytecode inline is just fine */                           \
                                                                                                                         \
-  do_intrinsic(_fillInStackTrace,         java_lang_Throwable, fillInStackTrace_name, void_throwable_signature,  F_RNY) \
-                                                                                                                          \
+   /* (2) Bytecode intrinsics                                                                        */                 \
+                                                                                                                        \
+  do_intrinsic(_park,                     sun_misc_Unsafe,        park_name, park_signature,                     F_RN)  \
+   do_name(     park_name,                                       "park")                                                \
+   do_signature(park_signature,                                  "(ZJ)V")                                               \
+  do_intrinsic(_unpark,                   sun_misc_Unsafe,        unpark_name, unpark_signature,                 F_RN)  \
+   do_name(     unpark_name,                                     "unpark")                                              \
+   do_alias(    unpark_signature,                               /*(LObject;)V*/ object_void_signature)                  \
   do_intrinsic(_StringBuilder_void,   java_lang_StringBuilder, object_initializer_name, void_method_signature,     F_R)   \
   do_intrinsic(_StringBuilder_int,    java_lang_StringBuilder, object_initializer_name, int_void_signature,        F_R)   \
   do_intrinsic(_StringBuilder_String, java_lang_StringBuilder, object_initializer_name, string_void_signature,     F_R)   \
--- a/src/share/vm/interpreter/rewriter.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/interpreter/rewriter.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -107,7 +107,7 @@
 // more complicated solution is required.  A special return bytecode
 // is used only by Object.<init> to signal the finalization
 // registration point.  Additionally local 0 must be preserved so it's
-// available to pass to the registration function.  For simplicty we
+// available to pass to the registration function.  For simplicity we
 // require that local 0 is never overwritten so it's available as an
 // argument for registration.
 
--- a/src/share/vm/oops/method.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/oops/method.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -82,7 +82,8 @@
     _dont_inline          = 1 << 3,
     _hidden               = 1 << 4,
     _has_injected_profile = 1 << 5,
-    _running_emcp         = 1 << 6
+    _running_emcp         = 1 << 6,
+    _intrinsic_candidate  = 1 << 7
   };
   u1 _flags;
 
@@ -815,6 +816,13 @@
     _flags = x ? (_flags | _hidden) : (_flags & ~_hidden);
   }
 
+  bool intrinsic_candidate() {
+    return (_flags & _intrinsic_candidate) != 0;
+  }
+  void set_intrinsic_candidate(bool x) {
+    _flags = x ? (_flags | _intrinsic_candidate) : (_flags & ~_intrinsic_candidate);
+  }
+
   bool has_injected_profile() {
     return (_flags & _has_injected_profile) != 0;
   }
--- a/src/share/vm/opto/c2_globals.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -688,6 +688,12 @@
   product(bool, UseMulAddIntrinsic, false,                                  \
           "Enables intrinsification of BigInteger.mulAdd()")                \
                                                                             \
+  product(bool, UseMontgomeryMultiplyIntrinsic, false,                      \
+          "Enables intrinsification of BigInteger.montgomeryMultiply()")    \
+                                                                            \
+  product(bool, UseMontgomerySquareIntrinsic, false,                        \
+          "Enables intrinsification of BigInteger.montgomerySquare()")      \
+                                                                            \
   product(bool, UseTypeSpeculation, true,                                   \
           "Speculatively propagate types from profiles")                    \
                                                                             \
--- a/src/share/vm/opto/escape.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/opto/escape.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -976,8 +976,10 @@
                   strcmp(call->as_CallLeaf()->_name, "sha512_implCompressMB") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "multiplyToLen") == 0 ||
                   strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
-                  strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0)
-                  ))) {
+                  strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
+                  strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
+                 ))) {
             call->dump();
             fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
           }
--- a/src/share/vm/opto/generateOptoStub.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/opto/generateOptoStub.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -118,19 +118,14 @@
   // The C routines gets the base of thread-local storage passed in as an
   // extra argument.  Not all calls need it, but its cheap to add here.
   for (uint pcnt = cnt; pcnt < parm_cnt; pcnt++, cnt++) {
-    // Convert ints to longs if required.
-    if (CCallingConventionRequiresIntsAsLongs && jdomain->field_at(pcnt)->isa_int()) {
-      fields[cnt++] = TypeLong::LONG;
-      fields[cnt]   = Type::HALF; // must add an additional half for a long
-    } else {
-      fields[cnt] = jdomain->field_at(pcnt);
-    }
+    fields[cnt] = jdomain->field_at(pcnt);
   }
 
   fields[cnt++] = TypeRawPtr::BOTTOM; // Thread-local storage
   // Also pass in the caller's PC, if asked for.
-  if( return_pc )
+  if (return_pc) {
     fields[cnt++] = TypeRawPtr::BOTTOM; // Return PC
+  }
 
   const TypeTuple* domain = TypeTuple::make(cnt,fields);
   // The C routine we are about to call cannot return an oop; it can block on
@@ -143,21 +138,22 @@
   const Type **rfields = TypeTuple::fields(jrange->cnt() - TypeFunc::Parms);
   // Fixup oop returns
   int retval_ptr = retval->isa_oop_ptr();
-  if( retval_ptr ) {
+  if (retval_ptr) {
     assert( pass_tls, "Oop must be returned thru TLS" );
     // Fancy-jumps return address; others return void
     rfields[TypeFunc::Parms] = is_fancy_jump ? TypeRawPtr::BOTTOM : Type::TOP;
 
-  } else if( retval->isa_int() ) { // Returning any integer subtype?
+  } else if (retval->isa_int()) { // Returning any integer subtype?
     // "Fatten" byte, char & short return types to 'int' to show that
     // the native C code can return values with junk high order bits.
     // We'll sign-extend it below later.
     rfields[TypeFunc::Parms] = TypeInt::INT; // It's "dirty" and needs sign-ext
 
-  } else if( jrange->cnt() >= TypeFunc::Parms+1 ) { // Else copy other types
+  } else if (jrange->cnt() >= TypeFunc::Parms+1) { // Else copy other types
     rfields[TypeFunc::Parms] = jrange->field_at(TypeFunc::Parms);
-    if( jrange->cnt() == TypeFunc::Parms+2 )
+    if (jrange->cnt() == TypeFunc::Parms+2) {
       rfields[TypeFunc::Parms+1] = jrange->field_at(TypeFunc::Parms+1);
+    }
   }
   const TypeTuple* range = TypeTuple::make(jrange->cnt(),rfields);
 
@@ -181,14 +177,7 @@
   // A little too aggressive on the parm copy; return address is not an input
   call->set_req(TypeFunc::ReturnAdr, top());
   for (; i < parm_cnt; i++) { // Regular input arguments
-    // Convert ints to longs if required.
-    if (CCallingConventionRequiresIntsAsLongs && jdomain->field_at(i)->isa_int()) {
-      Node* int_as_long = _gvn.transform(new ConvI2LNode(map()->in(i)));
-      call->init_req(cnt++, int_as_long); // long
-      call->init_req(cnt++, top());       // half
-    } else {
-      call->init_req(cnt++, map()->in(i));
-    }
+    call->init_req(cnt++, map()->in(i));
   }
 
   call->init_req( cnt++, thread );
--- a/src/share/vm/opto/library_call.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/opto/library_call.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -297,6 +297,8 @@
   bool inline_multiplyToLen();
   bool inline_squareToLen();
   bool inline_mulAdd();
+  bool inline_montgomeryMultiply();
+  bool inline_montgomerySquare();
 
   bool inline_profileBoolean();
   bool inline_isCompileConstant();
@@ -508,6 +510,13 @@
     if (!UseMulAddIntrinsic) return NULL;
     break;
 
+  case vmIntrinsics::_montgomeryMultiply:
+     if (!UseMontgomeryMultiplyIntrinsic) return NULL;
+    break;
+  case vmIntrinsics::_montgomerySquare:
+     if (!UseMontgomerySquareIntrinsic) return NULL;
+    break;
+
   case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
   case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
     if (!UseAESIntrinsics) return NULL;
@@ -642,7 +651,8 @@
   const int bci    = kit.bci();
 
   // Try to inline the intrinsic.
-  if (kit.try_to_inline(_last_predicate)) {
+  if ((CheckIntrinsics ? callee->intrinsic_candidate() : true) &&
+      kit.try_to_inline(_last_predicate)) {
     if (C->print_intrinsics() || C->print_inlining()) {
       C->print_inlining(callee, jvms->depth() - 1, bci, is_virtual() ? "(intrinsic, virtual)" : "(intrinsic)");
     }
@@ -663,7 +673,13 @@
   if (C->print_intrinsics() || C->print_inlining()) {
     if (jvms->has_method()) {
       // Not a root compile.
-      const char* msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
+      const char* msg;
+      if (callee->intrinsic_candidate()) {
+        msg = is_virtual() ? "failed to inline (intrinsic, virtual)" : "failed to inline (intrinsic)";
+      } else {
+        msg = is_virtual() ? "failed to inline (intrinsic, virtual), method not annotated"
+                           : "failed to inline (intrinsic), method not annotated";
+      }
       C->print_inlining(callee, jvms->depth() - 1, bci, msg);
     } else {
       // Root compile
@@ -942,6 +958,11 @@
   case vmIntrinsics::_mulAdd:
     return inline_mulAdd();
 
+  case vmIntrinsics::_montgomeryMultiply:
+    return inline_montgomeryMultiply();
+  case vmIntrinsics::_montgomerySquare:
+    return inline_montgomerySquare();
+
   case vmIntrinsics::_ghash_processBlocks:
     return inline_ghash_processBlocks();
 
@@ -5244,7 +5265,7 @@
 
 //-------------inline_multiplyToLen-----------------------------------
 bool LibraryCallKit::inline_multiplyToLen() {
-  assert(UseMultiplyToLenIntrinsic, "not implementated on this platform");
+  assert(UseMultiplyToLenIntrinsic, "not implemented on this platform");
 
   address stubAddr = StubRoutines::multiplyToLen();
   if (stubAddr == NULL) {
@@ -5254,11 +5275,12 @@
 
   assert(callee()->signature()->size() == 5, "multiplyToLen has 5 parameters");
 
-  Node* x    = argument(1);
-  Node* xlen = argument(2);
-  Node* y    = argument(3);
-  Node* ylen = argument(4);
-  Node* z    = argument(5);
+  // no receiver because it is a static method
+  Node* x    = argument(0);
+  Node* xlen = argument(1);
+  Node* y    = argument(2);
+  Node* ylen = argument(3);
+  Node* z    = argument(4);
 
   const Type* x_type = x->Value(&_gvn);
   const Type* y_type = y->Value(&_gvn);
@@ -5437,6 +5459,121 @@
   return true;
 }
 
+//-------------inline_montgomeryMultiply-----------------------------------
+bool LibraryCallKit::inline_montgomeryMultiply() {
+  address stubAddr = StubRoutines::montgomeryMultiply();
+  if (stubAddr == NULL) {
+    return false; // Intrinsic's stub is not implemented on this platform
+  }
+
+  assert(UseMontgomeryMultiplyIntrinsic, "not implemented on this platform");
+  const char* stubName = "montgomery_square";
+
+  assert(callee()->signature()->size() == 7, "montgomeryMultiply has 7 parameters");
+
+  Node* a    = argument(0);
+  Node* b    = argument(1);
+  Node* n    = argument(2);
+  Node* len  = argument(3);
+  Node* inv  = argument(4);
+  Node* m    = argument(6);
+
+  const Type* a_type = a->Value(&_gvn);
+  const TypeAryPtr* top_a = a_type->isa_aryptr();
+  const Type* b_type = b->Value(&_gvn);
+  const TypeAryPtr* top_b = b_type->isa_aryptr();
+  const Type* n_type = a->Value(&_gvn);
+  const TypeAryPtr* top_n = n_type->isa_aryptr();
+  const Type* m_type = a->Value(&_gvn);
+  const TypeAryPtr* top_m = m_type->isa_aryptr();
+  if (top_a  == NULL || top_a->klass()  == NULL ||
+      top_b == NULL || top_b->klass()  == NULL ||
+      top_n == NULL || top_n->klass()  == NULL ||
+      top_m == NULL || top_m->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+
+  BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType b_elem = b_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (a_elem != T_INT || b_elem != T_INT || n_elem != T_INT || m_elem != T_INT) {
+    return false;
+  }
+
+  // Make the call
+  {
+    Node* a_start = array_element_address(a, intcon(0), a_elem);
+    Node* b_start = array_element_address(b, intcon(0), b_elem);
+    Node* n_start = array_element_address(n, intcon(0), n_elem);
+    Node* m_start = array_element_address(m, intcon(0), m_elem);
+
+    Node* call = make_runtime_call(RC_LEAF,
+                                   OptoRuntime::montgomeryMultiply_Type(),
+                                   stubAddr, stubName, TypePtr::BOTTOM,
+                                   a_start, b_start, n_start, len, inv, top(),
+                                   m_start);
+    set_result(m);
+  }
+
+  return true;
+}
+
+bool LibraryCallKit::inline_montgomerySquare() {
+  address stubAddr = StubRoutines::montgomerySquare();
+  if (stubAddr == NULL) {
+    return false; // Intrinsic's stub is not implemented on this platform
+  }
+
+  assert(UseMontgomerySquareIntrinsic, "not implemented on this platform");
+  const char* stubName = "montgomery_square";
+
+  assert(callee()->signature()->size() == 6, "montgomerySquare has 6 parameters");
+
+  Node* a    = argument(0);
+  Node* n    = argument(1);
+  Node* len  = argument(2);
+  Node* inv  = argument(3);
+  Node* m    = argument(5);
+
+  const Type* a_type = a->Value(&_gvn);
+  const TypeAryPtr* top_a = a_type->isa_aryptr();
+  const Type* n_type = a->Value(&_gvn);
+  const TypeAryPtr* top_n = n_type->isa_aryptr();
+  const Type* m_type = a->Value(&_gvn);
+  const TypeAryPtr* top_m = m_type->isa_aryptr();
+  if (top_a  == NULL || top_a->klass()  == NULL ||
+      top_n == NULL || top_n->klass()  == NULL ||
+      top_m == NULL || top_m->klass()  == NULL) {
+    // failed array check
+    return false;
+  }
+
+  BasicType a_elem = a_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType n_elem = n_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType m_elem = m_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
+  if (a_elem != T_INT || n_elem != T_INT || m_elem != T_INT) {
+    return false;
+  }
+
+  // Make the call
+  {
+    Node* a_start = array_element_address(a, intcon(0), a_elem);
+    Node* n_start = array_element_address(n, intcon(0), n_elem);
+    Node* m_start = array_element_address(m, intcon(0), m_elem);
+
+    Node* call = make_runtime_call(RC_LEAF,
+                                   OptoRuntime::montgomerySquare_Type(),
+                                   stubAddr, stubName, TypePtr::BOTTOM,
+                                   a_start, n_start, len, inv, top(),
+                                   m_start);
+    set_result(m);
+  }
+
+  return true;
+}
+
 
 /**
  * Calculate CRC32 for byte.
--- a/src/share/vm/opto/loopTransform.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/opto/loopTransform.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -2941,13 +2941,6 @@
     _igvn.register_new_node_with_optimizer(store_value);
   }
 
-  if (CCallingConventionRequiresIntsAsLongs &&
-      // See StubRoutines::select_fill_function for types. FLOAT has been converted to INT.
-      (t == T_FLOAT || t == T_INT ||  is_subword_type(t))) {
-    store_value = new ConvI2LNode(store_value);
-    _igvn.register_new_node_with_optimizer(store_value);
-  }
-
   Node* mem_phi = store->in(MemNode::Memory);
   Node* result_ctrl;
   Node* result_mem;
@@ -2957,9 +2950,6 @@
   uint cnt = 0;
   call->init_req(TypeFunc::Parms + cnt++, from);
   call->init_req(TypeFunc::Parms + cnt++, store_value);
-  if (CCallingConventionRequiresIntsAsLongs) {
-    call->init_req(TypeFunc::Parms + cnt++, C->top());
-  }
 #ifdef _LP64
   len = new ConvI2LNode(len);
   _igvn.register_new_node_with_optimizer(len);
--- a/src/share/vm/opto/runtime.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/opto/runtime.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -779,18 +779,10 @@
 const TypeFunc* OptoRuntime::array_fill_Type() {
   const Type** fields;
   int argp = TypeFunc::Parms;
-  if (CCallingConventionRequiresIntsAsLongs) {
   // create input type (domain): pointer, int, size_t
-    fields = TypeTuple::fields(3 LP64_ONLY( + 2));
-    fields[argp++] = TypePtr::NOTNULL;
-    fields[argp++] = TypeLong::LONG;
-    fields[argp++] = Type::HALF;
-  } else {
-    // create input type (domain): pointer, int, size_t
-    fields = TypeTuple::fields(3 LP64_ONLY( + 1));
-    fields[argp++] = TypePtr::NOTNULL;
-    fields[argp++] = TypeInt::INT;
-  }
+  fields = TypeTuple::fields(3 LP64_ONLY( + 1));
+  fields[argp++] = TypePtr::NOTNULL;
+  fields[argp++] = TypeInt::INT;
   fields[argp++] = TypeX_X;               // size in whatevers (size_t)
   LP64_ONLY(fields[argp++] = Type::HALF); // other half of long length
   const TypeTuple *domain = TypeTuple::make(argp, fields);
@@ -1010,6 +1002,53 @@
   return TypeFunc::make(domain, range);
 }
 
+const TypeFunc* OptoRuntime::montgomeryMultiply_Type() {
+  // create input type (domain)
+  int num_args      = 7;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // a
+  fields[argp++] = TypePtr::NOTNULL;    // b
+  fields[argp++] = TypePtr::NOTNULL;    // n
+  fields[argp++] = TypeInt::INT;        // len
+  fields[argp++] = TypeLong::LONG;      // inv
+  fields[argp++] = Type::HALF;
+  fields[argp++] = TypePtr::NOTNULL;    // result
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
+
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::montgomerySquare_Type() {
+  // create input type (domain)
+  int num_args      = 6;
+  int argcnt = num_args;
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // a
+  fields[argp++] = TypePtr::NOTNULL;    // n
+  fields[argp++] = TypeInt::INT;        // len
+  fields[argp++] = TypeLong::LONG;      // inv
+  fields[argp++] = Type::HALF;
+  fields[argp++] = TypePtr::NOTNULL;    // result
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // result type needed
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypePtr::NOTNULL;
+
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
 // GHASH block processing
 const TypeFunc* OptoRuntime::ghash_processBlocks_Type() {
     int argcnt = 4;
--- a/src/share/vm/opto/runtime.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/opto/runtime.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -311,6 +311,8 @@
   static const TypeFunc* digestBase_implCompressMB_Type();
 
   static const TypeFunc* multiplyToLen_Type();
+  static const TypeFunc* montgomeryMultiply_Type();
+  static const TypeFunc* montgomerySquare_Type();
 
   static const TypeFunc* squareToLen_Type();
 
--- a/src/share/vm/runtime/globals.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/runtime/globals.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -728,7 +728,8 @@
           "Control whether AES instructions can be used on x86/x64")        \
                                                                             \
   product(bool, UseSHA, false,                                              \
-          "Control whether SHA instructions can be used on SPARC")          \
+          "Control whether SHA instructions can be used "                   \
+          "on SPARC and on ARM")                                            \
                                                                             \
   product(bool, UseGHASHIntrinsics, false,                                  \
           "Use intrinsics for GHASH versions of crypto")                    \
@@ -837,13 +838,16 @@
           "Use intrinsics for AES versions of crypto")                      \
                                                                             \
   product(bool, UseSHA1Intrinsics, false,                                   \
-          "Use intrinsics for SHA-1 crypto hash function")                  \
+          "Use intrinsics for SHA-1 crypto hash function. "                 \
+          "Requires that UseSHA is enabled.")                                \
                                                                             \
   product(bool, UseSHA256Intrinsics, false,                                 \
-          "Use intrinsics for SHA-224 and SHA-256 crypto hash functions")   \
+          "Use intrinsics for SHA-224 and SHA-256 crypto hash functions. "  \
+          "Requires that UseSHA is enabled.")                               \
                                                                             \
   product(bool, UseSHA512Intrinsics, false,                                 \
-          "Use intrinsics for SHA-384 and SHA-512 crypto hash functions")   \
+          "Use intrinsics for SHA-384 and SHA-512 crypto hash functions. "  \
+          "Requires that UseSHA is enabled.")                               \
                                                                             \
   product(bool, UseCRC32Intrinsics, false,                                  \
           "use intrinsics for java.util.zip.CRC32")                         \
@@ -4124,7 +4128,16 @@
                                                                             \
   product_pd(bool, PreserveFramePointer,                                    \
              "Use the FP register for holding the frame pointer "           \
-             "and not as a general purpose register.")
+             "and not as a general purpose register.")                      \
+                                                                            \
+  diagnostic(bool, CheckIntrinsics, trueInDebug,                            \
+             "When a class C is loaded, check that "                        \
+             "(1) all intrinsics defined by the VM for class C are present "\
+             "in the loaded class file and are marked with the "            \
+             "@HotSpotIntrinsicCandidate annotation and also that "         \
+             "(2) there is an intrinsic registered for all loaded methods " \
+             "that are annotated with the @HotSpotIntrinsicCandidate "      \
+             "annotation.")
 
 /*
  *  Macros for factoring of globals
--- a/src/share/vm/runtime/sharedRuntime.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -2642,71 +2642,6 @@
   GC_locker::unlock_critical(thread);
 JRT_END
 
-int SharedRuntime::convert_ints_to_longints_argcnt(int in_args_count, BasicType* in_sig_bt) {
-  int argcnt = in_args_count;
-  if (CCallingConventionRequiresIntsAsLongs) {
-    for (int in = 0; in < in_args_count; in++) {
-      BasicType bt = in_sig_bt[in];
-      switch (bt) {
-        case T_BOOLEAN:
-        case T_CHAR:
-        case T_BYTE:
-        case T_SHORT:
-        case T_INT:
-          argcnt++;
-          break;
-        default:
-          break;
-      }
-    }
-  } else {
-    assert(0, "This should not be needed on this platform");
-  }
-
-  return argcnt;
-}
-
-void SharedRuntime::convert_ints_to_longints(int i2l_argcnt, int& in_args_count,
-                                             BasicType*& in_sig_bt, VMRegPair*& in_regs) {
-  if (CCallingConventionRequiresIntsAsLongs) {
-    VMRegPair *new_in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, i2l_argcnt);
-    BasicType *new_in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, i2l_argcnt);
-
-    int argcnt = 0;
-    for (int in = 0; in < in_args_count; in++, argcnt++) {
-      BasicType bt  = in_sig_bt[in];
-      VMRegPair reg = in_regs[in];
-      switch (bt) {
-        case T_BOOLEAN:
-        case T_CHAR:
-        case T_BYTE:
-        case T_SHORT:
-        case T_INT:
-          // Convert (bt) to (T_LONG,bt).
-          new_in_sig_bt[argcnt] = T_LONG;
-          new_in_sig_bt[argcnt+1] = bt;
-          assert(reg.first()->is_valid() && !reg.second()->is_valid(), "");
-          new_in_regs[argcnt].set2(reg.first());
-          new_in_regs[argcnt+1].set_bad();
-          argcnt++;
-          break;
-        default:
-          // No conversion needed.
-          new_in_sig_bt[argcnt] = bt;
-          new_in_regs[argcnt]   = reg;
-          break;
-      }
-    }
-    assert(argcnt == i2l_argcnt, "must match");
-
-    in_regs = new_in_regs;
-    in_sig_bt = new_in_sig_bt;
-    in_args_count = i2l_argcnt;
-  } else {
-    assert(0, "This should not be needed on this platform");
-  }
-}
-
 // -------------------------------------------------------------------------
 // Java-Java calling convention
 // (what you use when Java calls Java)
--- a/src/share/vm/runtime/sharedRuntime.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/runtime/sharedRuntime.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -145,6 +145,12 @@
   static double dsqrt(double f);
 #endif
 
+  // Montgomery multiplication
+  static void montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
+                                  jint len, jlong inv, jint *m_ints);
+  static void montgomery_square(jint *a_ints, jint *n_ints,
+                                jint len, jlong inv, jint *m_ints);
+
 #ifdef __SOFTFP__
   // C++ compiler generates soft float instructions as well as passing
   // float and double in registers.
--- a/src/share/vm/runtime/stubRoutines.cpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/runtime/stubRoutines.cpp	Fri Jul 10 16:37:41 2015 +0200
@@ -143,6 +143,8 @@
 address StubRoutines::_multiplyToLen = NULL;
 address StubRoutines::_squareToLen = NULL;
 address StubRoutines::_mulAdd = NULL;
+address StubRoutines::_montgomeryMultiply = NULL;
+address StubRoutines::_montgomerySquare = NULL;
 
 double (* StubRoutines::_intrinsic_log   )(double) = NULL;
 double (* StubRoutines::_intrinsic_log10 )(double) = NULL;
--- a/src/share/vm/runtime/stubRoutines.hpp	Thu Jul 09 15:39:05 2015 -0400
+++ b/src/share/vm/runtime/stubRoutines.hpp	Fri Jul 10 16:37:41 2015 +0200
@@ -202,6 +202,8 @@
   static address _multiplyToLen;
   static address _squareToLen;
   static address _mulAdd;
+  static address _montgomeryMultiply;
+  static address _montgomerySquare;
 
   // These are versions of the java.lang.Math methods which perform
   // the same operations as the intrinsic version.  They are used for
@@ -366,6 +368,8 @@
   static address multiplyToLen()       {return _multiplyToLen; }
   static address squareToLen()         {return _squareToLen; }
   static address mulAdd()              {return _mulAdd; }
+  static address montgomeryMultiply()  { return _montgomeryMultiply; }
+  static address montgomerySquare()    { return _montgomerySquare; }
 
   static address select_fill_function(BasicType t, bool aligned, const char* &name);
 
--- a/test/compiler/dependencies/MonomorphicObjectCall/TestMonomorphicObjectCall.java	Thu Jul 09 15:39:05 2015 -0400
+++ b/test/compiler/dependencies/MonomorphicObjectCall/TestMonomorphicObjectCall.java	Fri Jul 10 16:37:41 2015 +0200
@@ -34,6 +34,7 @@
  * @library /testlibrary
  * @modules java.base/sun.misc
  *          java.management
+ *          java.base/jdk.internal
  * @compile -XDignore.symbol.file java/lang/Object.java TestMonomorphicObjectCall.java
  * @run main TestMonomorphicObjectCall
  */
--- a/test/compiler/dependencies/MonomorphicObjectCall/java/lang/Object.java	Thu Jul 09 15:39:05 2015 -0400
+++ b/test/compiler/dependencies/MonomorphicObjectCall/java/lang/Object.java	Fri Jul 10 16:37:41 2015 +0200
@@ -25,25 +25,33 @@
 
 package java.lang;
 
+import jdk.internal.HotSpotIntrinsicCandidate;
+
 /**
  * Slightly modified version of java.lang.Object that replaces
  * finalize() by finalizeObject() to avoid overriding in subclasses.
  */
 public class Object {
 
+    @HotSpotIntrinsicCandidate
+    public Object() {}
+
     private static native void registerNatives();
     static {
         registerNatives();
     }
 
+    @HotSpotIntrinsicCandidate
     public final native Class<?> getClass();
 
+    @HotSpotIntrinsicCandidate
     public native int hashCode();
 
     public boolean equals(Object obj) {
         return (this == obj);
     }
 
+    @HotSpotIntrinsicCandidate
     protected native Object clone() throws CloneNotSupportedException;
 
     public String toString() {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/montgomerymultiply/MontgomeryMultiplyTest.java	Fri Jul 10 16:37:41 2015 +0200
@@ -0,0 +1,277 @@
+//
+// Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 2015, Red Hat Inc. All rights reserved.
+// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+//
+// This code is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License version 2 only, as
+// published by the Free Software Foundation.
+//
+// This code is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+// version 2 for more details (a copy is included in the LICENSE file that
+// accompanied this code).
+//
+// You should have received a copy of the GNU General Public License version
+// 2 along with this work; if not, write to the Free Software Foundation,
+// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+// or visit www.oracle.com if you need additional information or have any
+// questions.
+//
+//
+
+import java.lang.invoke.MethodHandle;
+import java.lang.invoke.MethodHandles;
+import java.lang.invoke.MethodType;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.lang.reflect.Method;
+import java.math.BigInteger;
+import java.util.Arrays;
+import java.util.Random;
+
+/**
+ * @test
+ * @bug 8130150
+ * @library /testlibrary
+ * @summary Verify that the Montgomery multiply intrinsic works and correctly checks its arguments.
+ */
+
+public class MontgomeryMultiplyTest {
+
+    static final MethodHandles.Lookup lookup = MethodHandles.lookup();
+
+    static final MethodHandle montgomeryMultiplyHandle, montgomerySquareHandle;
+    static final MethodHandle bigIntegerConstructorHandle;
+    static final Field bigIntegerMagField;
+
+    static {
+       // Use reflection to gain access to the methods we want to test.
+        try {
+            Method m = BigInteger.class.getDeclaredMethod("montgomeryMultiply",
+                /*a*/int[].class, /*b*/int[].class, /*n*/int[].class, /*len*/int.class,
+                /*inv*/long.class, /*product*/int[].class);
+            m.setAccessible(true);
+            montgomeryMultiplyHandle = lookup.unreflect(m);
+
+            m = BigInteger.class.getDeclaredMethod("montgomerySquare",
+                /*a*/int[].class, /*n*/int[].class, /*len*/int.class,
+                /*inv*/long.class, /*product*/int[].class);
+            m.setAccessible(true);
+            montgomerySquareHandle = lookup.unreflect(m);
+
+            Constructor c
+                = BigInteger.class.getDeclaredConstructor(int.class, int[].class);
+            c.setAccessible(true);
+            bigIntegerConstructorHandle = lookup.unreflectConstructor(c);
+
+            bigIntegerMagField = BigInteger.class.getDeclaredField("mag");
+            bigIntegerMagField.setAccessible(true);
+
+        } catch (Throwable ex) {
+            throw new RuntimeException(ex);
+        }
+    }
+
+    // Invoke either BigInteger.montgomeryMultiply or BigInteger.montgomerySquare.
+    int[] montgomeryMultiply(int[] a, int[] b, int[] n, int len, long inv,
+                             int[] product) throws Throwable {
+        int[] result =
+            (a == b) ? (int[]) montgomerySquareHandle.invokeExact(a, n, len, inv, product)
+                     : (int[]) montgomeryMultiplyHandle.invokeExact(a, b, n, len, inv, product);
+        return Arrays.copyOf(result, len);
+    }
+
+    // Invoke the private constructor BigInteger(int[]).
+    BigInteger newBigInteger(int[] val) throws Throwable {
+        return (BigInteger) bigIntegerConstructorHandle.invokeExact(1, val);
+    }
+
+    // Get the private field BigInteger.mag
+    int[] mag(BigInteger n) {
+        try {
+            return (int[]) bigIntegerMagField.get(n);
+        } catch (Exception ex) {
+            throw new RuntimeException(ex);
+        }
+    }
+
+    // Montgomery multiplication
+    // Calculate a * b * r^-1 mod n)
+    //
+    // R is a power of the word size
+    // N' = R^-1 mod N
+    //
+    // T := ab
+    // m := (T mod R)N' mod R [so 0 <= m < R]
+    // t := (T + mN)/R
+    // if t >= N then return t - N else return t
+    //
+    BigInteger montgomeryMultiply(BigInteger a, BigInteger b, BigInteger N,
+            int len, BigInteger n_prime)
+            throws Throwable {
+        BigInteger T = a.multiply(b);
+        BigInteger R = BigInteger.ONE.shiftLeft(len*32);
+        BigInteger mask = R.subtract(BigInteger.ONE);
+        BigInteger m = (T.and(mask)).multiply(n_prime);
+        m = m.and(mask); // i.e. m.mod(R)
+        T = T.add(m.multiply(N));
+        T = T.shiftRight(len*32); // i.e. T.divide(R)
+        if (T.compareTo(N) > 0) {
+            T = T.subtract(N);
+        }
+        return T;
+    }
+
+    // Call the Montgomery multiply intrinsic.
+    BigInteger montgomeryMultiply(int[] a_words, int[] b_words, int[] n_words,
+            int len, BigInteger inv)
+            throws Throwable {
+        BigInteger t = montgomeryMultiply(
+                newBigInteger(a_words),
+                newBigInteger(b_words),
+                newBigInteger(n_words),
+                len, inv);
+        return t;
+    }
+
+    // Check that the Montgomery multiply intrinsic returns the same
+    // result as the longhand calculation.
+    void check(int[] a_words, int[] b_words, int[] n_words, int len, BigInteger inv)
+            throws Throwable {
+        BigInteger n = newBigInteger(n_words);
+        BigInteger slow = montgomeryMultiply(a_words, b_words, n_words, len, inv);
+        BigInteger fast
+            = newBigInteger(montgomeryMultiply
+                            (a_words, b_words, n_words, len, inv.longValue(), null));
+        // The intrinsic may not return the same value as the longhand
+        // calculation but they must have the same residue mod N.
+        if (!slow.mod(n).equals(fast.mod(n))) {
+            throw new RuntimeException();
+        }
+    }
+
+    Random rnd = new Random(0);
+
+    // Return a random value of length <= bits in an array of even length
+    int[] random_val(int bits) {
+        int len = (bits+63)/64;  // i.e. length in longs
+        int[] val = new int[len*2];
+        for (int i = 0; i < val.length; i++)
+            val[i] = rnd.nextInt();
+        int leadingZeros = 64 - (bits & 64);
+        if (leadingZeros >= 32) {
+            val[0] = 0;
+            val[1] &= ~(-1l << (leadingZeros & 31));
+        } else {
+            val[0] &= ~(-1l << leadingZeros);
+        }
+        return val;
+    }
+
+    void testOneLength(int lenInBits, int lenInInts) throws Throwable {
+        BigInteger mod = new BigInteger(lenInBits, 2, rnd);
+        BigInteger r = BigInteger.ONE.shiftLeft(lenInInts * 32);
+        BigInteger n_prime = mod.modInverse(r).negate();
+
+        // Make n.length even, padding with a zero if necessary
+        int[] n = mag(mod);
+        if (n.length < lenInInts) {
+            int[] x = new int[lenInInts];
+            System.arraycopy(n, 0, x, lenInInts-n.length, n.length);
+            n = x;
+        }
+
+        for (int i = 0; i < 10000; i++) {
+            // multiply
+            check(random_val(lenInBits), random_val(lenInBits), n, lenInInts, n_prime);
+            // square
+            int[] tmp = random_val(lenInBits);
+            check(tmp, tmp, n, lenInInts, n_prime);
+        }
+    }
+
+    // Test the Montgomery multiply intrinsic with a bunch of random
+    // values of varying lengths.  Do this for long enough that the
+    // caller of the intrinsic is C2-compiled.
+    void testResultValues() throws Throwable {
+        // Test a couple of interesting edge cases.
+        testOneLength(1024, 32);
+        testOneLength(1025, 34);
+        for (int j = 10; j > 0; j--) {
+            // Construct a random prime whose length in words is even
+            int lenInBits = rnd.nextInt(2048) + 64;
+            int lenInInts = (lenInBits + 63)/64*2;
+            testOneLength(lenInBits, lenInInts);
+        }
+    }
+
+    // Range checks
+    void testOneMontgomeryMultiplyCheck(int[] a, int[] b, int[] n, int len, long inv,
+                                        int[] product, Class klass) {
+        try {
+            montgomeryMultiply(a, b, n, len, inv, product);
+        } catch (Throwable ex) {
+            if (klass.isAssignableFrom(ex.getClass()))
+                return;
+            throw new RuntimeException(klass + " expected, " + ex + " was thrown");
+        }
+        throw new RuntimeException(klass + " expected, was not thrown");
+    }
+
+    void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv,
+            Class klass) {
+        testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), null, klass);
+    }
+
+    void testOneMontgomeryMultiplyCheck(int[] a, int[] b, BigInteger n, int len, BigInteger inv,
+            int[] product, Class klass) {
+        testOneMontgomeryMultiplyCheck(a, b, mag(n), len, inv.longValue(), product, klass);
+    }
+
+    void testMontgomeryMultiplyChecks() {
+        int[] blah = random_val(40);
+        int[] small = random_val(39);
+        BigInteger mod = new BigInteger(40*32 , 2, rnd);
+        BigInteger r = BigInteger.ONE.shiftLeft(40*32);
+        BigInteger n_prime = mod.modInverse(r).negate();
+
+        // Length out of range: square
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, 41, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, 0, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, -1, n_prime, IllegalArgumentException.class);
+        // As above, but for multiply
+        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 41, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 0, n_prime, IllegalArgumentException.class);
+
+        // Length odd
+        testOneMontgomeryMultiplyCheck(small, small, mod, 39, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small, mod, 0, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small, mod, -1, n_prime, IllegalArgumentException.class);
+        // As above, but for multiply
+        testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 39, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small.clone(), mod, 0, n_prime, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small.clone(), mod, -1, n_prime, IllegalArgumentException.class);
+
+        // array too small
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah.clone(), mod, 40, n_prime, small, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, blah, mod, 40, n_prime, blah, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, small, mod, 40, n_prime, blah, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(blah, blah, mod, 40, n_prime, small, IllegalArgumentException.class);
+        testOneMontgomeryMultiplyCheck(small, small, mod, 40, n_prime, blah, IllegalArgumentException.class);
+    }
+
+    public static void main(String args[]) {
+        try {
+            new MontgomeryMultiplyTest().testMontgomeryMultiplyChecks();
+            new MontgomeryMultiplyTest().testResultValues();
+        } catch (Throwable ex) {
+            throw new RuntimeException(ex);
+        }
+     }
+}
--- a/test/compiler/intrinsics/sha/cli/SHAOptionsBase.java	Thu Jul 09 15:39:05 2015 -0400
+++ b/test/compiler/intrinsics/sha/cli/SHAOptionsBase.java	Fri Jul 10 16:37:41 2015 +0200
@@ -47,16 +47,12 @@
     // expressions, not just a plain strings.
     protected static final String SHA_INSTRUCTIONS_ARE_NOT_AVAILABLE
             = "SHA instructions are not available on this CPU";
-    protected static final String SHA1_INSTRUCTION_IS_NOT_AVAILABLE
-            = "SHA1 instruction is not available on this CPU\\.";
-    protected static final String SHA256_INSTRUCTION_IS_NOT_AVAILABLE
-            = "SHA256 instruction \\(for SHA-224 and SHA-256\\) "
-            + "is not available on this CPU\\.";
-    protected static final String SHA512_INSTRUCTION_IS_NOT_AVAILABLE
-            = "SHA512 instruction \\(for SHA-384 and SHA-512\\) "
-            + "is not available on this CPU\\.";
-    protected static final String SHA_INTRINSICS_ARE_NOT_AVAILABLE
-            = "SHA intrinsics are not available on this CPU";
+    protected static final String SHA1_INTRINSICS_ARE_NOT_AVAILABLE
+            = "Intrinsics for SHA-1 crypto hash functions not available on this CPU.";
+    protected static final String SHA256_INTRINSICS_ARE_NOT_AVAILABLE
+            = "Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.";
+    protected static final String SHA512_INTRINSICS_ARE_NOT_AVAILABLE
+            = "Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.";
 
     private final TestCase[] testCases;
 
@@ -71,33 +67,23 @@
      *         instructions required by the option are not supported.
      */
     protected static String getWarningForUnsupportedCPU(String optionName) {
-        if (Platform.isSparc() || Platform.isAArch64()) {
+        if (Platform.isSparc() || Platform.isAArch64() ||
+            Platform.isX64() || Platform.isX86()) {
             switch (optionName) {
-                case SHAOptionsBase.USE_SHA_OPTION:
-                    return SHAOptionsBase.SHA_INSTRUCTIONS_ARE_NOT_AVAILABLE;
-                case SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION:
-                    return SHAOptionsBase.SHA1_INSTRUCTION_IS_NOT_AVAILABLE;
-                case SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION:
-                    return SHAOptionsBase.SHA256_INSTRUCTION_IS_NOT_AVAILABLE;
-                case SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION:
-                    return SHAOptionsBase.SHA512_INSTRUCTION_IS_NOT_AVAILABLE;
-                default:
-                    throw new Error("Unexpected option " + optionName);
-            }
-        } else if (Platform.isX64() || Platform.isX86()) {
-            switch (optionName) {
-                case SHAOptionsBase.USE_SHA_OPTION:
-                    return SHAOptionsBase.SHA_INSTRUCTIONS_ARE_NOT_AVAILABLE;
-                case SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION:
-                case SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION:
-                case SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION:
-                    return SHAOptionsBase.SHA_INTRINSICS_ARE_NOT_AVAILABLE;
-                default:
-                    throw new Error("Unexpected option " + optionName);
+            case SHAOptionsBase.USE_SHA_OPTION:
+                return SHAOptionsBase.SHA_INSTRUCTIONS_ARE_NOT_AVAILABLE;
+            case SHAOptionsBase.USE_SHA1_INTRINSICS_OPTION:
+                return SHAOptionsBase.SHA1_INTRINSICS_ARE_NOT_AVAILABLE;
+            case SHAOptionsBase.USE_SHA256_INTRINSICS_OPTION:
+                return SHAOptionsBase.SHA256_INTRINSICS_ARE_NOT_AVAILABLE;
+            case SHAOptionsBase.USE_SHA512_INTRINSICS_OPTION:
+                return SHAOptionsBase.SHA512_INTRINSICS_ARE_NOT_AVAILABLE;
+            default:
+                throw new Error("Unexpected option " + optionName);
             }
         } else {
-            throw new Error("Support for CPUs other then X86 or SPARC is not "
-                    + "implemented.");
+            throw new Error("Support for CPUs different fromn X86, SPARC, and AARCH64 "
+                            + "is not implemented");
         }
     }
 
--- a/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForSupportedCPU.java	Thu Jul 09 15:39:05 2015 -0400
+++ b/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForSupportedCPU.java	Fri Jul 10 16:37:41 2015 +0200
@@ -64,18 +64,20 @@
                         SHAOptionsBase.USE_SHA_OPTION, true),
                 CommandLineOptionTest.prepareBooleanFlag(optionName, false));
 
-        // Verify that it is possible to enable the tested option and disable
-        // all SHA intrinsics via -UseSHA without any warnings.
-        CommandLineOptionTest.verifySameJVMStartup(null, new String[] {
-                        SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
-                }, shouldPassMessage, String.format("It should be able to "
-                        + "enable option '%s' even if %s was passed to JVM",
-                        optionName, CommandLineOptionTest.prepareBooleanFlag(
-                            SHAOptionsBase.USE_SHA_OPTION, false)),
-                ExitCode.OK,
-                CommandLineOptionTest.prepareBooleanFlag(
-                        SHAOptionsBase.USE_SHA_OPTION, false),
-                CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+        if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
+            // Verify that if -XX:-UseSHA is passed to the JVM, it is not possible
+            // to enable the tested option and a warning is printed.
+            CommandLineOptionTest.verifySameJVMStartup(
+                    new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
+                    null,
+                    shouldPassMessage,
+                    String.format("Enabling option '%s' should not be possible and should result in a warning if %s was passed to JVM",
+                                  optionName,
+                                  CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false)),
+                    ExitCode.OK,
+                    CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
+                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+        }
     }
 
     @Override
--- a/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedAArch64CPU.java	Thu Jul 09 15:39:05 2015 -0400
+++ b/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedAArch64CPU.java	Fri Jul 10 16:37:41 2015 +0200
@@ -49,14 +49,22 @@
                 }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
                 CommandLineOptionTest.prepareBooleanFlag(optionName, false));
 
-        shouldPassMessage = String.format("JVM should start with '-XX:+"
-                + "%s' flag, but output should contain warning.", optionName);
-        // Verify that when the tested option is explicitly enabled, then
-        // a warning will occur in VM output.
-        CommandLineOptionTest.verifySameJVMStartup(new String[] {
-                        SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
-                }, null, shouldPassMessage, shouldPassMessage, ExitCode.OK,
-                CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+        shouldPassMessage = String.format("If JVM is started with '-XX:-"
+                + "%s' '-XX:+%s', output should contain warning.",
+                SHAOptionsBase.USE_SHA_OPTION, optionName);
+
+        // Verify that when the tested option is enabled, then
+        // a warning will occur in VM output if UseSHA is disabled.
+        if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
+            CommandLineOptionTest.verifySameJVMStartup(
+                    new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
+                    null,
+                    shouldPassMessage,
+                    shouldPassMessage,
+                    ExitCode.OK,
+                    CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
+                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+        }
     }
 
     @Override
--- a/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedSparcCPU.java	Thu Jul 09 15:39:05 2015 -0400
+++ b/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedSparcCPU.java	Fri Jul 10 16:37:41 2015 +0200
@@ -48,6 +48,19 @@
                         SHAOptionsBase.getWarningForUnsupportedCPU(optionName)
                 }, shouldPassMessage, shouldPassMessage, ExitCode.OK,
                 CommandLineOptionTest.prepareBooleanFlag(optionName, false));
+
+        // Verify that when the tested option is enabled, then
+        // a warning will occur in VM output if UseSHA is disabled.
+        if (!optionName.equals(SHAOptionsBase.USE_SHA_OPTION)) {
+            CommandLineOptionTest.verifySameJVMStartup(
+                    new String[] { SHAOptionsBase.getWarningForUnsupportedCPU(optionName) },
+                    null,
+                    shouldPassMessage,
+                    shouldPassMessage,
+                    ExitCode.OK,
+                    CommandLineOptionTest.prepareBooleanFlag(SHAOptionsBase.USE_SHA_OPTION, false),
+                    CommandLineOptionTest.prepareBooleanFlag(optionName, true));
+        }
     }
 
     @Override