changeset 8778:850de1d9b5fc

Merge
author jbachorik
date Fri, 31 Jul 2015 12:13:57 +0200
parents e8351756255d abf45dd6ae7c
children b5d723199d45
files src/share/vm/runtime/arguments.cpp src/share/vm/runtime/arguments.hpp src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp src/share/vm/runtime/globals.hpp
diffstat 27 files changed, 2785 insertions(+), 991 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/aarch64/vm/aarch64.ad	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/cpu/aarch64/vm/aarch64.ad	Fri Jul 31 12:13:57 2015 +0200
@@ -2167,8 +2167,12 @@
     return 0;            // Self copy, no move.
   }
 
+  bool is64 = (src_lo & 1) == 0 && src_lo + 1 == src_hi &&
+              (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi;
+  int src_offset = ra_->reg2offset(src_lo);
+  int dst_offset = ra_->reg2offset(dst_lo);
+
   if (bottom_type()->isa_vect() != NULL) {
-    uint len = 4;
     uint ireg = ideal_reg();
     assert(ireg == Op_VecD || ireg == Op_VecX, "must be 64 bit or 128 bit vector");
     if (cbuf) {
@@ -2176,334 +2180,115 @@
       assert((src_lo_rc != rc_int && dst_lo_rc != rc_int), "sanity");
       if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
         // stack->stack
-        int src_offset = ra_->reg2offset(src_lo);
-        int dst_offset = ra_->reg2offset(dst_lo);
         assert((src_offset & 7) && (dst_offset & 7), "unaligned stack offset");
-        len = 8;
         if (ireg == Op_VecD) {
-          __ ldr(rscratch1, Address(sp, src_offset));
-          __ str(rscratch1, Address(sp, dst_offset));
+          __ unspill(rscratch1, true, src_offset);
+          __ spill(rscratch1, true, dst_offset);
         } else {
-          if (src_offset < 512) {
-            __ ldp(rscratch1, rscratch2, Address(sp, src_offset));
-          } else {
-            __ ldr(rscratch1, Address(sp, src_offset));
-            __ ldr(rscratch2, Address(sp, src_offset+4));
-            len += 4;
-          }
-          if (dst_offset < 512) {
-            __ stp(rscratch1, rscratch2, Address(sp, dst_offset));
-          } else {
-            __ str(rscratch1, Address(sp, dst_offset));
-            __ str(rscratch2, Address(sp, dst_offset+4));
-            len += 4;
-          }
+          __ spill_copy128(src_offset, dst_offset);
         }
       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
-        __ orr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+        __ mov(as_FloatRegister(Matcher::_regEncode[dst_lo]),
                ireg == Op_VecD ? __ T8B : __ T16B,
-               as_FloatRegister(Matcher::_regEncode[src_lo]),
                as_FloatRegister(Matcher::_regEncode[src_lo]));
       } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
-        __ str(as_FloatRegister(Matcher::_regEncode[src_lo]),
-               ireg == Op_VecD ? __ D : __ Q,
-               Address(sp, ra_->reg2offset(dst_lo)));
+        __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
+                       ireg == Op_VecD ? __ D : __ Q,
+                       ra_->reg2offset(dst_lo));
       } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
-        __ ldr(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-               ireg == Op_VecD ? __ D : __ Q,
-               Address(sp, ra_->reg2offset(src_lo)));
+        __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                       ireg == Op_VecD ? __ D : __ Q,
+                       ra_->reg2offset(src_lo));
       } else {
         ShouldNotReachHere();
       }
-    } else if (st) {
-      if (src_lo_rc == rc_stack && dst_lo_rc == rc_stack) {
-        // stack->stack
-        int src_offset = ra_->reg2offset(src_lo);
-        int dst_offset = ra_->reg2offset(dst_lo);
-        if (ireg == Op_VecD) {
-          st->print("ldr  rscratch1, [sp, #%d]", src_offset);
-          st->print("str  rscratch1, [sp, #%d]", dst_offset);
+    }
+  } else if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    switch (src_lo_rc) {
+    case rc_int:
+      if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
+        if (is64) {
+            __ mov(as_Register(Matcher::_regEncode[dst_lo]),
+                   as_Register(Matcher::_regEncode[src_lo]));
         } else {
-          if (src_offset < 512) {
-            st->print("ldp  rscratch1, rscratch2, [sp, #%d]", src_offset);
-          } else {
-            st->print("ldr  rscratch1, [sp, #%d]", src_offset);
-            st->print("\nldr  rscratch2, [sp, #%d]", src_offset+4);
-          }
-          if (dst_offset < 512) {
-            st->print("\nstp  rscratch1, rscratch2, [sp, #%d]", dst_offset);
-          } else {
-            st->print("\nstr  rscratch1, [sp, #%d]", dst_offset);
-            st->print("\nstr  rscratch2, [sp, #%d]", dst_offset+4);
-          }
+            MacroAssembler _masm(cbuf);
+            __ movw(as_Register(Matcher::_regEncode[dst_lo]),
+                    as_Register(Matcher::_regEncode[src_lo]));
         }
-        st->print("\t# vector spill, stack to stack");
-      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_float) {
-        st->print("mov  %s, %s\t# vector spill, reg to reg",
-                   Matcher::regName[dst_lo], Matcher::regName[src_lo]);
-      } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) {
-        st->print("str  %s, [sp, #%d]\t# vector spill, reg to stack",
-                   Matcher::regName[src_lo], ra_->reg2offset(dst_lo));
-      } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) {
-        st->print("ldr  %s, [sp, #%d]\t# vector spill, stack to reg",
-                   Matcher::regName[dst_lo], ra_->reg2offset(src_lo));
+      } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
+        if (is64) {
+            __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                     as_Register(Matcher::_regEncode[src_lo]));
+        } else {
+            __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                     as_Register(Matcher::_regEncode[src_lo]));
+        }
+      } else {                    // gpr --> stack spill
+        assert(dst_lo_rc == rc_stack, "spill to bad register class");
+        __ spill(as_Register(Matcher::_regEncode[src_lo]), is64, dst_offset);
       }
-    }
-    return len;
+      break;
+    case rc_float:
+      if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
+        if (is64) {
+            __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else {
+            __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
+        }
+      } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
+          if (cbuf) {
+            __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
+        } else {
+            __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                     as_FloatRegister(Matcher::_regEncode[src_lo]));
+        }
+      } else {                    // fpr --> stack spill
+        assert(dst_lo_rc == rc_stack, "spill to bad register class");
+        __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]),
+                 is64 ? __ D : __ S, dst_offset);
+      }
+      break;
+    case rc_stack:
+      if (dst_lo_rc == rc_int) {  // stack --> gpr load
+        __ unspill(as_Register(Matcher::_regEncode[dst_lo]), is64, src_offset);
+      } else if (dst_lo_rc == rc_float) { // stack --> fpr load
+        __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]),
+                   is64 ? __ D : __ S, src_offset);
+      } else {                    // stack --> stack copy
+        assert(dst_lo_rc == rc_stack, "spill to bad register class");
+        __ unspill(rscratch1, is64, src_offset);
+        __ spill(rscratch1, is64, dst_offset);
+      }
+      break;
+    default:
+      assert(false, "bad rc_class for spill");
+      ShouldNotReachHere();
+    }
   }
 
-  switch (src_lo_rc) {
-  case rc_int:
-    if (dst_lo_rc == rc_int) {  // gpr --> gpr copy
-      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
-          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
-          // 64 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ mov(as_Register(Matcher::_regEncode[dst_lo]),
-                 as_Register(Matcher::_regEncode[src_lo]));
-        } else if (st) {
-          st->print("mov  %s, %s\t# shuffle",
-                    Matcher::regName[dst_lo],
-                    Matcher::regName[src_lo]);
-        }
-      } else {
-        // 32 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ movw(as_Register(Matcher::_regEncode[dst_lo]),
-                  as_Register(Matcher::_regEncode[src_lo]));
-        } else if (st) {
-          st->print("movw  %s, %s\t# shuffle",
-                    Matcher::regName[dst_lo],
-                    Matcher::regName[src_lo]);
-        }
-      }
-    } else if (dst_lo_rc == rc_float) { // gpr --> fpr copy
-      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
-          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
-          // 64 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-                   as_Register(Matcher::_regEncode[src_lo]));
-        } else if (st) {
-          st->print("fmovd  %s, %s\t# shuffle",
-                    Matcher::regName[dst_lo],
-                    Matcher::regName[src_lo]);
-        }
-      } else {
-        // 32 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-                   as_Register(Matcher::_regEncode[src_lo]));
-        } else if (st) {
-          st->print("fmovs  %s, %s\t# shuffle",
-                    Matcher::regName[dst_lo],
-                    Matcher::regName[src_lo]);
-        }
-      }
-    } else {                    // gpr --> stack spill
-      assert(dst_lo_rc == rc_stack, "spill to bad register class");
-      int dst_offset = ra_->reg2offset(dst_lo);
-      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
-          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
-          // 64 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ str(as_Register(Matcher::_regEncode[src_lo]),
-                 Address(sp, dst_offset));
-        } else if (st) {
-          st->print("str  %s, [sp, #%d]\t# spill",
-                    Matcher::regName[src_lo],
-                    dst_offset);
-        }
-      } else {
-        // 32 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ strw(as_Register(Matcher::_regEncode[src_lo]),
-                 Address(sp, dst_offset));
-        } else if (st) {
-          st->print("strw  %s, [sp, #%d]\t# spill",
-                    Matcher::regName[src_lo],
-                    dst_offset);
-        }
-      }
-    }
-    return 4;
-  case rc_float:
-    if (dst_lo_rc == rc_int) {  // fpr --> gpr copy
-      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
-          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
-          // 64 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ fmovd(as_Register(Matcher::_regEncode[dst_lo]),
-                   as_FloatRegister(Matcher::_regEncode[src_lo]));
-        } else if (st) {
-          st->print("fmovd  %s, %s\t# shuffle",
-                    Matcher::regName[dst_lo],
-                    Matcher::regName[src_lo]);
-        }
-      } else {
-        // 32 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ fmovs(as_Register(Matcher::_regEncode[dst_lo]),
-                   as_FloatRegister(Matcher::_regEncode[src_lo]));
-        } else if (st) {
-          st->print("fmovs  %s, %s\t# shuffle",
-                    Matcher::regName[dst_lo],
-                    Matcher::regName[src_lo]);
-        }
-      }
-    } else if (dst_lo_rc == rc_float) { // fpr --> fpr copy
-      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
-          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
-          // 64 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ fmovd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-                   as_FloatRegister(Matcher::_regEncode[src_lo]));
-        } else if (st) {
-          st->print("fmovd  %s, %s\t# shuffle",
-                    Matcher::regName[dst_lo],
-                    Matcher::regName[src_lo]);
-        }
-      } else {
-        // 32 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ fmovs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-                   as_FloatRegister(Matcher::_regEncode[src_lo]));
-        } else if (st) {
-          st->print("fmovs  %s, %s\t# shuffle",
-                    Matcher::regName[dst_lo],
-                    Matcher::regName[src_lo]);
-        }
-      }
-    } else {                    // fpr --> stack spill
-      assert(dst_lo_rc == rc_stack, "spill to bad register class");
-      int dst_offset = ra_->reg2offset(dst_lo);
-      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
-          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
-          // 64 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ strd(as_FloatRegister(Matcher::_regEncode[src_lo]),
-                 Address(sp, dst_offset));
-        } else if (st) {
-          st->print("strd  %s, [sp, #%d]\t# spill",
-                    Matcher::regName[src_lo],
-                    dst_offset);
-        }
-      } else {
-        // 32 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ strs(as_FloatRegister(Matcher::_regEncode[src_lo]),
-                 Address(sp, dst_offset));
-        } else if (st) {
-          st->print("strs  %s, [sp, #%d]\t# spill",
-                    Matcher::regName[src_lo],
-                    dst_offset);
-        }
-      }
-    }
-    return 4;
-  case rc_stack:
-    int src_offset = ra_->reg2offset(src_lo);
-    if (dst_lo_rc == rc_int) {  // stack --> gpr load
-      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
-          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
-          // 64 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ ldr(as_Register(Matcher::_regEncode[dst_lo]),
-                 Address(sp, src_offset));
-        } else if (st) {
-          st->print("ldr  %s, [sp, %d]\t# restore",
-                    Matcher::regName[dst_lo],
-                    src_offset);
-        }
-      } else {
-        // 32 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ ldrw(as_Register(Matcher::_regEncode[dst_lo]),
-                  Address(sp, src_offset));
-        } else if (st) {
-          st->print("ldr  %s, [sp, %d]\t# restore",
-                    Matcher::regName[dst_lo],
-                   src_offset);
-        }
-      }
-      return 4;
-    } else if (dst_lo_rc == rc_float) { // stack --> fpr load
-      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
-          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
-          // 64 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ ldrd(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-                 Address(sp, src_offset));
-        } else if (st) {
-          st->print("ldrd  %s, [sp, %d]\t# restore",
-                    Matcher::regName[dst_lo],
-                    src_offset);
-        }
-      } else {
-        // 32 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ ldrs(as_FloatRegister(Matcher::_regEncode[dst_lo]),
-                  Address(sp, src_offset));
-        } else if (st) {
-          st->print("ldrs  %s, [sp, %d]\t# restore",
-                    Matcher::regName[dst_lo],
-                   src_offset);
-        }
-      }
-      return 4;
-    } else {                    // stack --> stack copy
-      assert(dst_lo_rc == rc_stack, "spill to bad register class");
-      int dst_offset = ra_->reg2offset(dst_lo);
-      if (((src_lo & 1) == 0 && src_lo + 1 == src_hi) &&
-          (dst_lo & 1) == 0 && dst_lo + 1 == dst_hi) {
-          // 64 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ ldr(rscratch1, Address(sp, src_offset));
-          __ str(rscratch1, Address(sp, dst_offset));
-        } else if (st) {
-          st->print("ldr  rscratch1, [sp, %d]\t# mem-mem spill",
-                    src_offset);
-          st->print("\n\t");
-          st->print("str  rscratch1, [sp, %d]",
-                    dst_offset);
-        }
-      } else {
-        // 32 bit
-        if (cbuf) {
-          MacroAssembler _masm(cbuf);
-          __ ldrw(rscratch1, Address(sp, src_offset));
-          __ strw(rscratch1, Address(sp, dst_offset));
-        } else if (st) {
-          st->print("ldrw  rscratch1, [sp, %d]\t# mem-mem spill",
-                    src_offset);
-          st->print("\n\t");
-          st->print("strw  rscratch1, [sp, %d]",
-                    dst_offset);
-        }
-      }
-      return 8;
+  if (st) {
+    st->print("spill ");
+    if (src_lo_rc == rc_stack) {
+      st->print("[sp, #%d] -> ", ra_->reg2offset(src_lo));
+    } else {
+      st->print("%s -> ", Matcher::regName[src_lo]);
+    }
+    if (dst_lo_rc == rc_stack) {
+      st->print("[sp, #%d]", ra_->reg2offset(dst_lo));
+    } else {
+      st->print("%s", Matcher::regName[dst_lo]);
+    }
+    if (bottom_type()->isa_vect() != NULL) {
+      st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128);
+    } else {
+      st->print("\t# spill size = %d", is64 ? 64:32);
     }
   }
 
-  assert(false," bad rc_class for spill ");
-  Unimplemented();
   return 0;
 
 }
@@ -2522,7 +2307,7 @@
 }
 
 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
-  return implementation(NULL, ra_, true, NULL);
+  return MachNode::size(ra_);
 }
 
 //=============================================================================
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -1896,7 +1896,7 @@
  public:
 
   enum SIMD_Arrangement {
-       T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D
+       T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D, T1Q
   };
 
   enum SIMD_RegVariant {
@@ -2225,14 +2225,16 @@
     f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0);
   }
 
-  // We do not handle the 1Q arrangement.
   void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
     starti;
-    assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier");
-    f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10);
-    rf(Vn, 5), rf(Vd, 0);
+    assert((Ta == T1Q && (Tb == T1D || Tb == T2D)) ||
+           (Ta == T8H && (Tb == T8B || Tb == T16B)), "Invalid Size specifier");
+    int size = (Ta == T1Q) ? 0b11 : 0b00;
+    f(0, 31), f(Tb & 1, 30), f(0b001110, 29, 24), f(size, 23, 22);
+    f(1, 21), rf(Vm, 16), f(0b111000, 15, 10), rf(Vn, 5), rf(Vd, 0);
   }
   void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
+    assert(Tb == T2D || Tb == T16B, "pmull2 assumes T2D or T16B as the second size specifier");
     pmull(Vd, Ta, Vn, Vm, Tb);
   }
 
@@ -2245,15 +2247,6 @@
     f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
   }
 
-  void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn)
-  {
-    starti;
-    assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H");
-    f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24);
-    f(T <= T16B ? 0b00 : 0b01, 23, 22), f(0b100000000010, 21, 10);
-    rf(Vn, 5), rf(Vd, 0);
-  }
-
   void dup(FloatRegister Vd, SIMD_Arrangement T, Register Xs)
   {
     starti;
@@ -2290,6 +2283,57 @@
 
 #undef INSN
 
+  // Table vector lookup
+#define INSN(NAME, op)                                                                                       \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, unsigned registers, FloatRegister Vm) {  \
+    starti;                                                                                                  \
+    assert(T == T8B || T == T16B, "invalid arrangement");                                                    \
+    assert(0 < registers && registers <= 4, "invalid number of registers");                                  \
+    f(0, 31), f((int)T & 1, 30), f(0b001110000, 29, 21), rf(Vm, 16), f(0, 15);                               \
+    f(registers - 1, 14, 13), f(op, 12),f(0b00, 11, 10), rf(Vn, 5), rf(Vd, 0);                               \
+  }
+
+  INSN(tbl, 0);
+  INSN(tbx, 1);
+
+#undef INSN
+
+#define INSN(NAME, U, opcode)                                                       \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {               \
+       starti;                                                                      \
+       assert((ASSERTION), MSG);                                                    \
+       f(0, 31), f((int)T & 1, 30), f(U, 29), f(0b01110, 28, 24);                   \
+       f((int)(T >> 1), 23, 22), f(0b10000, 21, 17), f(opcode, 16, 12);             \
+       f(0b10, 11, 10), rf(Vn, 5), rf(Vd, 0);                                       \
+ }
+
+#define MSG "invalid arrangement"
+
+#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
+  INSN(rev64, 0, 0b00000);
+#undef ASSERTION
+
+#define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H)
+  INSN(rev32, 1, 0b00000);
+#undef ASSERTION
+
+#define ASSERTION (T == T8B || T == T16B)
+  INSN(rev16, 0, 0b00001);
+#undef ASSERTION
+
+#undef MSG
+
+#undef INSN
+
+void ext(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm, int index)
+  {
+    starti;
+    assert(T == T8B || T == T16B, "invalid arrangement");
+    assert((T == T8B && index <= 0b0111) || (T == T16B && index <= 0b1111), "Invalid index value");
+    f(0, 31), f((int)T & 1, 30), f(0b101110000, 29, 21);
+    rf(Vm, 16), f(0, 15), f(index, 14, 11);
+    f(0, 10), rf(Vn, 5), rf(Vd, 0);
+  }
 
 /* Simulator extensions to the ISA
 
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -2009,6 +2009,14 @@
   }
 }
 
+void MacroAssembler::sub(Register Rd, Register Rn, RegisterOrConstant decrement) {
+  if (decrement.is_register()) {
+    sub(Rd, Rn, decrement.as_register());
+  } else {
+    sub(Rd, Rn, decrement.as_constant());
+  }
+}
+
 void MacroAssembler::reinit_heapbase()
 {
   if (UseCompressedOops) {
@@ -2307,6 +2315,28 @@
   }
 }
 
+Address MacroAssembler::spill_address(int size, int offset, Register tmp)
+{
+  assert(offset >= 0, "spill to negative address?");
+  // Offset reachable ?
+  //   Not aligned - 9 bits signed offset
+  //   Aligned - 12 bits unsigned offset shifted
+  Register base = sp;
+  if ((offset & (size-1)) && offset >= (1<<8)) {
+    add(tmp, base, offset & ((1<<12)-1));
+    base = tmp;
+    offset &= -1<<12;
+  }
+
+  if (offset >= (1<<12) * size) {
+    add(tmp, base, offset & (((1<<12)-1)<<12));
+    base = tmp;
+    offset &= ~(((1<<12)-1)<<12);
+  }
+
+  return Address(base, offset);
+}
+
 /**
  * Multiply 64 bit by 64 bit first loop.
  */
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -464,10 +464,21 @@
     mov(dst, (long)i);
   }
 
+  void mov(Register dst, RegisterOrConstant src) {
+    if (src.is_register())
+      mov(dst, src.as_register());
+    else
+      mov(dst, src.as_constant());
+  }
+
   void movptr(Register r, uintptr_t imm64);
 
   void mov(FloatRegister Vd, SIMD_Arrangement T, u_int32_t imm32);
 
+  void mov(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {
+    orr(Vd, T, Vn, Vn);
+  }
+
   // macro instructions for accessing and updating floating point
   // status register
   //
@@ -1045,6 +1056,7 @@
 
   void add(Register Rd, Register Rn, RegisterOrConstant increment);
   void addw(Register Rd, Register Rn, RegisterOrConstant increment);
+  void sub(Register Rd, Register Rn, RegisterOrConstant decrement);
 
   void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
 
@@ -1161,6 +1173,46 @@
   // Uses rscratch2.
   Address offsetted_address(Register r, Register r1, Address::extend ext,
                             int offset, int size);
+
+private:
+  // Returns an address on the stack which is reachable with a ldr/str of size
+  // Uses rscratch2 if the address is not directly reachable
+  Address spill_address(int size, int offset, Register tmp=rscratch2);
+
+public:
+  void spill(Register Rx, bool is64, int offset) {
+    if (is64) {
+      str(Rx, spill_address(8, offset));
+    } else {
+      strw(Rx, spill_address(4, offset));
+    }
+  }
+  void spill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
+    str(Vx, T, spill_address(1 << (int)T, offset));
+  }
+  void unspill(Register Rx, bool is64, int offset) {
+    if (is64) {
+      ldr(Rx, spill_address(8, offset));
+    } else {
+      ldrw(Rx, spill_address(4, offset));
+    }
+  }
+  void unspill(FloatRegister Vx, SIMD_RegVariant T, int offset) {
+    ldr(Vx, T, spill_address(1 << (int)T, offset));
+  }
+  void spill_copy128(int src_offset, int dst_offset,
+                     Register tmp1=rscratch1, Register tmp2=rscratch2) {
+    if (src_offset < 512 && (src_offset & 7) == 0 &&
+        dst_offset < 512 && (dst_offset & 7) == 0) {
+      ldp(tmp1, tmp2, Address(sp, src_offset));
+      stp(tmp1, tmp2, Address(sp, dst_offset));
+    } else {
+      unspill(tmp1, true, src_offset);
+      spill(tmp1, true, dst_offset);
+      unspill(tmp1, true, src_offset+8);
+      spill(tmp1, true, dst_offset+8);
+    }
+  }
 };
 
 #ifdef ASSERT
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -120,10 +120,8 @@
   // we save r19-r28 which Java uses as scratch registers and C
   // expects to be callee-save
   //
-  // we don't save any FP registers since only v8-v15 are callee-save
-  // (strictly only the f and d components) and Java uses them as
-  // callee-save. v0-v7 are arg registers and C treats v16-v31 as
-  // volatile (as does Java?)
+  // we save the bottom 64 bits of each value stored in v8-v15; it is
+  // the responsibility of the caller to preserve larger values.
   //
   // so the stub frame looks like this when we enter Java code
   //
@@ -131,14 +129,14 @@
   //     [ argument word n      ]
   //      ...
   // -27 [ argument word 1      ]
-  // -26 [ saved d15            ] <--- sp_after_call
-  // -25 [ saved d14            ]
-  // -24 [ saved d13            ]
-  // -23 [ saved d12            ]
-  // -22 [ saved d11            ]
-  // -21 [ saved d10            ]
-  // -20 [ saved d9             ]
-  // -19 [ saved d8             ]
+  // -26 [ saved v15            ] <--- sp_after_call
+  // -25 [ saved v14            ]
+  // -24 [ saved v13            ]
+  // -23 [ saved v12            ]
+  // -22 [ saved v11            ]
+  // -21 [ saved v10            ]
+  // -20 [ saved v9             ]
+  // -19 [ saved v8             ]
   // -18 [ saved r28            ]
   // -17 [ saved r27            ]
   // -16 [ saved r26            ]
@@ -2437,6 +2435,137 @@
     return start;
   }
 
+  /**
+   *  Arguments:
+   *
+   *  Input:
+   *  c_rarg0   - current state address
+   *  c_rarg1   - H key address
+   *  c_rarg2   - data address
+   *  c_rarg3   - number of blocks
+   *
+   *  Output:
+   *  Updated state at c_rarg0
+   */
+  address generate_ghash_processBlocks() {
+    __ align(CodeEntryAlignment);
+    Label L_ghash_loop, L_exit;
+
+    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+    address start = __ pc();
+
+    Register state   = c_rarg0;
+    Register subkeyH = c_rarg1;
+    Register data    = c_rarg2;
+    Register blocks  = c_rarg3;
+
+    FloatRegister vzr = v30;
+    __ eor(vzr, __ T16B, vzr, vzr); // zero register
+
+    __ mov(v26, __ T16B, 1);
+    __ mov(v27, __ T16B, 63);
+    __ mov(v28, __ T16B, 62);
+    __ mov(v29, __ T16B, 57);
+
+    __ ldrq(v6, Address(state));
+    __ ldrq(v16, Address(subkeyH));
+
+    __ ext(v0, __ T16B, v6, v6, 0x08);
+    __ ext(v1, __ T16B, v16, v16, 0x08);
+    __ eor(v16, __ T16B, v16, v1);
+
+    __ bind(L_ghash_loop);
+
+    __ ldrq(v2, Address(__ post(data, 0x10)));
+    __ rev64(v2, __ T16B, v2); // swap data
+
+    __ ext(v6, __ T16B, v0, v0, 0x08);
+    __ eor(v6, __ T16B, v6, v2);
+    __ ext(v2, __ T16B, v6, v6, 0x08);
+
+    __ pmull2(v7, __ T1Q, v2, v1, __ T2D);  // A1*B1
+    __ eor(v6, __ T16B, v6, v2);
+    __ pmull(v5,  __ T1Q, v2, v1, __ T1D);  // A0*B0
+    __ pmull(v20, __ T1Q, v6, v16, __ T1D);  // (A1 + A0)(B1 + B0)
+
+    __ ext(v21, __ T16B, v5, v7, 0x08);
+    __ eor(v18, __ T16B, v7, v5); // A1*B1 xor A0*B0
+    __ eor(v20, __ T16B, v20, v21);
+    __ eor(v20, __ T16B, v20, v18);
+
+    // Registers pair <v7:v5> holds the result of carry-less multiplication
+    __ ins(v7, __ D, v20, 0, 1);
+    __ ins(v5, __ D, v20, 1, 0);
+
+    // Result of the multiplication is shifted by one bit position
+    // [X3:X2:X1:X0] = [X3:X2:X1:X0] << 1
+    __ ushr(v18, __ T2D, v5, -63 & 63);
+    __ ins(v25, __ D, v18, 1, 0);
+    __ ins(v25, __ D, vzr, 0, 0);
+    __ ushl(v5, __ T2D, v5, v26);
+    __ orr(v5, __ T16B, v5, v25);
+
+    __ ushr(v19, __ T2D, v7, -63 & 63);
+    __ ins(v19, __ D, v19, 1, 0);
+    __ ins(v19, __ D, v18, 0, 1);
+    __ ushl(v7, __ T2D, v7, v26);
+    __ orr(v6, __ T16B, v7, v19);
+
+    __ ins(v24, __ D, v5, 0, 1);
+
+    // A = X0 << 63
+    __ ushl(v21, __ T2D, v5, v27);
+
+    // A = X0 << 62
+    __ ushl(v22, __ T2D, v5, v28);
+
+    // A = X0 << 57
+    __ ushl(v23, __ T2D, v5, v29);
+
+    // D = X1^A^B^C
+    __ eor(v21, __ T16B, v21, v22);
+    __ eor(v21, __ T16B, v21, v23);
+    __ eor(v21, __ T16B, v21, v24);
+    __ ins(v5, __ D, v21, 1, 0);
+
+    // [E1:E0] = [D:X0] >> 1
+    __ ushr(v20, __ T2D, v5, -1 & 63);
+    __ ushl(v18, __ T2D, v5, v27);
+    __ ext(v25, __ T16B, v18, vzr, 0x08);
+    __ orr(v19, __ T16B, v20, v25);
+
+    __ eor(v7, __ T16B, v5, v19);
+
+    // [F1:F0] = [D:X0] >> 2
+    __ ushr(v20, __ T2D, v5, -2 & 63);
+    __ ushl(v18, __ T2D, v5, v28);
+    __ ins(v25, __ D, v18, 0, 1);
+    __ orr(v19, __ T16B, v20, v25);
+
+    __ eor(v7, __ T16B, v7, v19);
+
+    // [G1:G0] = [D:X0] >> 7
+    __ ushr(v20, __ T2D, v5, -7 & 63);
+    __ ushl(v18, __ T2D, v5, v29);
+    __ ins(v25, __ D, v18, 0, 1);
+    __ orr(v19, __ T16B, v20, v25);
+
+    // [H1:H0] = [D^E1^F1^G1:X0^E0^F0^G0]
+    __ eor(v7, __ T16B, v7, v19);
+
+    // Result = [H1:H0]^[X3:X2]
+    __ eor(v0, __ T16B, v7, v6);
+
+    __ subs(blocks, blocks, 1);
+    __ cbnz(blocks, L_ghash_loop);
+
+    __ ext(v1, __ T16B, v0, v0, 0x08);
+    __ st1(v1, __ T16B, state);
+    __ ret(lr);
+
+    return start;
+  }
+
   // Continuation point for throwing of implicit exceptions that are
   // not handled in the current activation. Fabricates an exception
   // oop and initiates normal exception dispatching in this
@@ -2544,6 +2673,828 @@
     return stub->entry_point();
   }
 
+  class MontgomeryMultiplyGenerator : public MacroAssembler {
+
+    Register Pa_base, Pb_base, Pn_base, Pm_base, inv, Rlen, Ra, Rb, Rm, Rn,
+      Pa, Pb, Pn, Pm, Rhi_ab, Rlo_ab, Rhi_mn, Rlo_mn, t0, t1, t2, Ri, Rj;
+
+    RegSet _toSave;
+    bool _squaring;
+
+  public:
+    MontgomeryMultiplyGenerator (Assembler *as, bool squaring)
+      : MacroAssembler(as->code()), _squaring(squaring) {
+
+      // Register allocation
+
+      Register reg = c_rarg0;
+      Pa_base = reg;       // Argument registers
+      if (squaring)
+        Pb_base = Pa_base;
+      else
+        Pb_base = ++reg;
+      Pn_base = ++reg;
+      Rlen= ++reg;
+      inv = ++reg;
+      Pm_base = ++reg;
+
+                          // Working registers:
+      Ra =  ++reg;        // The current digit of a, b, n, and m.
+      Rb =  ++reg;
+      Rm =  ++reg;
+      Rn =  ++reg;
+
+      Pa =  ++reg;        // Pointers to the current/next digit of a, b, n, and m.
+      Pb =  ++reg;
+      Pm =  ++reg;
+      Pn =  ++reg;
+
+      t0 =  ++reg;        // Three registers which form a
+      t1 =  ++reg;        // triple-precision accumuator.
+      t2 =  ++reg;
+
+      Ri =  ++reg;        // Inner and outer loop indexes.
+      Rj =  ++reg;
+
+      Rhi_ab = ++reg;     // Product registers: low and high parts
+      Rlo_ab = ++reg;     // of a*b and m*n.
+      Rhi_mn = ++reg;
+      Rlo_mn = ++reg;
+
+      // r19 and up are callee-saved.
+      _toSave = RegSet::range(r19, reg) + Pm_base;
+    }
+
+  private:
+    void save_regs() {
+      push(_toSave, sp);
+    }
+
+    void restore_regs() {
+      pop(_toSave, sp);
+    }
+
+    template <typename T>
+    void unroll_2(Register count, T block) {
+      Label loop, end, odd;
+      tbnz(count, 0, odd);
+      cbz(count, end);
+      align(16);
+      bind(loop);
+      (this->*block)();
+      bind(odd);
+      (this->*block)();
+      subs(count, count, 2);
+      br(Assembler::GT, loop);
+      bind(end);
+    }
+
+    template <typename T>
+    void unroll_2(Register count, T block, Register d, Register s, Register tmp) {
+      Label loop, end, odd;
+      tbnz(count, 0, odd);
+      cbz(count, end);
+      align(16);
+      bind(loop);
+      (this->*block)(d, s, tmp);
+      bind(odd);
+      (this->*block)(d, s, tmp);
+      subs(count, count, 2);
+      br(Assembler::GT, loop);
+      bind(end);
+    }
+
+    void pre1(RegisterOrConstant i) {
+      block_comment("pre1");
+      // Pa = Pa_base;
+      // Pb = Pb_base + i;
+      // Pm = Pm_base;
+      // Pn = Pn_base + i;
+      // Ra = *Pa;
+      // Rb = *Pb;
+      // Rm = *Pm;
+      // Rn = *Pn;
+      ldr(Ra, Address(Pa_base));
+      ldr(Rb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord)));
+      ldr(Rm, Address(Pm_base));
+      ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
+      lea(Pa, Address(Pa_base));
+      lea(Pb, Address(Pb_base, i, Address::uxtw(LogBytesPerWord)));
+      lea(Pm, Address(Pm_base));
+      lea(Pn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
+
+      // Zero the m*n result.
+      mov(Rhi_mn, zr);
+      mov(Rlo_mn, zr);
+    }
+
+    // The core multiply-accumulate step of a Montgomery
+    // multiplication.  The idea is to schedule operations as a
+    // pipeline so that instructions with long latencies (loads and
+    // multiplies) have time to complete before their results are
+    // used.  This most benefits in-order implementations of the
+    // architecture but out-of-order ones also benefit.
+    void step() {
+      block_comment("step");
+      // MACC(Ra, Rb, t0, t1, t2);
+      // Ra = *++Pa;
+      // Rb = *--Pb;
+      umulh(Rhi_ab, Ra, Rb);
+      mul(Rlo_ab, Ra, Rb);
+      ldr(Ra, pre(Pa, wordSize));
+      ldr(Rb, pre(Pb, -wordSize));
+      acc(Rhi_mn, Rlo_mn, t0, t1, t2); // The pending m*n from the
+                                       // previous iteration.
+      // MACC(Rm, Rn, t0, t1, t2);
+      // Rm = *++Pm;
+      // Rn = *--Pn;
+      umulh(Rhi_mn, Rm, Rn);
+      mul(Rlo_mn, Rm, Rn);
+      ldr(Rm, pre(Pm, wordSize));
+      ldr(Rn, pre(Pn, -wordSize));
+      acc(Rhi_ab, Rlo_ab, t0, t1, t2);
+    }
+
+    void post1() {
+      block_comment("post1");
+
+      // MACC(Ra, Rb, t0, t1, t2);
+      // Ra = *++Pa;
+      // Rb = *--Pb;
+      umulh(Rhi_ab, Ra, Rb);
+      mul(Rlo_ab, Ra, Rb);
+      acc(Rhi_mn, Rlo_mn, t0, t1, t2);  // The pending m*n
+      acc(Rhi_ab, Rlo_ab, t0, t1, t2);
+
+      // *Pm = Rm = t0 * inv;
+      mul(Rm, t0, inv);
+      str(Rm, Address(Pm));
+
+      // MACC(Rm, Rn, t0, t1, t2);
+      // t0 = t1; t1 = t2; t2 = 0;
+      umulh(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
+      {
+        mul(Rlo_mn, Rm, Rn);
+        add(Rlo_mn, t0, Rlo_mn);
+        Label ok;
+        cbz(Rlo_mn, ok); {
+          stop("broken Montgomery multiply");
+        } bind(ok);
+      }
+#endif
+      // We have very carefully set things up so that
+      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
+      // the lower half of Rm * Rn because we know the result already:
+      // it must be -t0.  t0 + (-t0) must generate a carry iff
+      // t0 != 0.  So, rather than do a mul and an adds we just set
+      // the carry flag iff t0 is nonzero.
+      //
+      // mul(Rlo_mn, Rm, Rn);
+      // adds(zr, t0, Rlo_mn);
+      subs(zr, t0, 1); // Set carry iff t0 is nonzero
+      adcs(t0, t1, Rhi_mn);
+      adc(t1, t2, zr);
+      mov(t2, zr);
+    }
+
+    void pre2(RegisterOrConstant i, RegisterOrConstant len) {
+      block_comment("pre2");
+      // Pa = Pa_base + i-len;
+      // Pb = Pb_base + len;
+      // Pm = Pm_base + i-len;
+      // Pn = Pn_base + len;
+
+      if (i.is_register()) {
+        sub(Rj, i.as_register(), len);
+      } else {
+        mov(Rj, i.as_constant());
+        sub(Rj, Rj, len);
+      }
+      // Rj == i-len
+
+      lea(Pa, Address(Pa_base, Rj, Address::uxtw(LogBytesPerWord)));
+      lea(Pb, Address(Pb_base, len, Address::uxtw(LogBytesPerWord)));
+      lea(Pm, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord)));
+      lea(Pn, Address(Pn_base, len, Address::uxtw(LogBytesPerWord)));
+
+      // Ra = *++Pa;
+      // Rb = *--Pb;
+      // Rm = *++Pm;
+      // Rn = *--Pn;
+      ldr(Ra, pre(Pa, wordSize));
+      ldr(Rb, pre(Pb, -wordSize));
+      ldr(Rm, pre(Pm, wordSize));
+      ldr(Rn, pre(Pn, -wordSize));
+
+      mov(Rhi_mn, zr);
+      mov(Rlo_mn, zr);
+    }
+
+    void post2(RegisterOrConstant i, RegisterOrConstant len) {
+      block_comment("post2");
+      if (i.is_constant()) {
+        mov(Rj, i.as_constant()-len.as_constant());
+      } else {
+        sub(Rj, i.as_register(), len);
+      }
+
+      adds(t0, t0, Rlo_mn); // The pending m*n, low part
+
+      // As soon as we know the least significant digit of our result,
+      // store it.
+      // Pm_base[i-len] = t0;
+      str(t0, Address(Pm_base, Rj, Address::uxtw(LogBytesPerWord)));
+
+      // t0 = t1; t1 = t2; t2 = 0;
+      adcs(t0, t1, Rhi_mn); // The pending m*n, high part
+      adc(t1, t2, zr);
+      mov(t2, zr);
+    }
+
+    // A carry in t0 after Montgomery multiplication means that we
+    // should subtract multiples of n from our result in m.  We'll
+    // keep doing that until there is no carry.
+    void normalize(RegisterOrConstant len) {
+      block_comment("normalize");
+      // while (t0)
+      //   t0 = sub(Pm_base, Pn_base, t0, len);
+      Label loop, post, again;
+      Register cnt = t1, i = t2; // Re-use registers; we're done with them now
+      cbz(t0, post); {
+        bind(again); {
+          mov(i, zr);
+          mov(cnt, len);
+          ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord)));
+          ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
+          subs(zr, zr, zr); // set carry flag, i.e. no borrow
+          align(16);
+          bind(loop); {
+            sbcs(Rm, Rm, Rn);
+            str(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord)));
+            add(i, i, 1);
+            ldr(Rm, Address(Pm_base, i, Address::uxtw(LogBytesPerWord)));
+            ldr(Rn, Address(Pn_base, i, Address::uxtw(LogBytesPerWord)));
+            sub(cnt, cnt, 1);
+          } cbnz(cnt, loop);
+          sbc(t0, t0, zr);
+        } cbnz(t0, again);
+      } bind(post);
+    }
+
+    // Move memory at s to d, reversing words.
+    //    Increments d to end of copied memory
+    //    Destroys tmp1, tmp2
+    //    Preserves len
+    //    Leaves s pointing to the address which was in d at start
+    void reverse(Register d, Register s, Register len, Register tmp1, Register tmp2) {
+      assert(tmp1 < r19 && tmp2 < r19, "register corruption");
+
+      lea(s, Address(s, len, Address::uxtw(LogBytesPerWord)));
+      mov(tmp1, len);
+      unroll_2(tmp1, &MontgomeryMultiplyGenerator::reverse1, d, s, tmp2);
+      sub(s, d, len, ext::uxtw, LogBytesPerWord);
+    }
+    // where
+    void reverse1(Register d, Register s, Register tmp) {
+      ldr(tmp, pre(s, -wordSize));
+      ror(tmp, tmp, 32);
+      str(tmp, post(d, wordSize));
+    }
+
+    void step_squaring() {
+      // An extra ACC
+      step();
+      acc(Rhi_ab, Rlo_ab, t0, t1, t2);
+    }
+
+    void last_squaring(RegisterOrConstant i) {
+      Label dont;
+      // if ((i & 1) == 0) {
+      tbnz(i.as_register(), 0, dont); {
+        // MACC(Ra, Rb, t0, t1, t2);
+        // Ra = *++Pa;
+        // Rb = *--Pb;
+        umulh(Rhi_ab, Ra, Rb);
+        mul(Rlo_ab, Ra, Rb);
+        acc(Rhi_ab, Rlo_ab, t0, t1, t2);
+      } bind(dont);
+    }
+
+    void extra_step_squaring() {
+      acc(Rhi_mn, Rlo_mn, t0, t1, t2);  // The pending m*n
+
+      // MACC(Rm, Rn, t0, t1, t2);
+      // Rm = *++Pm;
+      // Rn = *--Pn;
+      umulh(Rhi_mn, Rm, Rn);
+      mul(Rlo_mn, Rm, Rn);
+      ldr(Rm, pre(Pm, wordSize));
+      ldr(Rn, pre(Pn, -wordSize));
+    }
+
+    void post1_squaring() {
+      acc(Rhi_mn, Rlo_mn, t0, t1, t2);  // The pending m*n
+
+      // *Pm = Rm = t0 * inv;
+      mul(Rm, t0, inv);
+      str(Rm, Address(Pm));
+
+      // MACC(Rm, Rn, t0, t1, t2);
+      // t0 = t1; t1 = t2; t2 = 0;
+      umulh(Rhi_mn, Rm, Rn);
+
+#ifndef PRODUCT
+      // assert(m[i] * n[0] + t0 == 0, "broken Montgomery multiply");
+      {
+        mul(Rlo_mn, Rm, Rn);
+        add(Rlo_mn, t0, Rlo_mn);
+        Label ok;
+        cbz(Rlo_mn, ok); {
+          stop("broken Montgomery multiply");
+        } bind(ok);
+      }
+#endif
+      // We have very carefully set things up so that
+      // m[i]*n[0] + t0 == 0 (mod b), so we don't have to calculate
+      // the lower half of Rm * Rn because we know the result already:
+      // it must be -t0.  t0 + (-t0) must generate a carry iff
+      // t0 != 0.  So, rather than do a mul and an adds we just set
+      // the carry flag iff t0 is nonzero.
+      //
+      // mul(Rlo_mn, Rm, Rn);
+      // adds(zr, t0, Rlo_mn);
+      subs(zr, t0, 1); // Set carry iff t0 is nonzero
+      adcs(t0, t1, Rhi_mn);
+      adc(t1, t2, zr);
+      mov(t2, zr);
+    }
+
+    void acc(Register Rhi, Register Rlo,
+             Register t0, Register t1, Register t2) {
+      adds(t0, t0, Rlo);
+      adcs(t1, t1, Rhi);
+      adc(t2, t2, zr);
+    }
+
+  public:
+    /**
+     * Fast Montgomery multiplication.  The derivation of the
+     * algorithm is in A Cryptographic Library for the Motorola
+     * DSP56000, Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
+     *
+     * Arguments:
+     *
+     * Inputs for multiplication:
+     *   c_rarg0   - int array elements a
+     *   c_rarg1   - int array elements b
+     *   c_rarg2   - int array elements n (the modulus)
+     *   c_rarg3   - int length
+     *   c_rarg4   - int inv
+     *   c_rarg5   - int array elements m (the result)
+     *
+     * Inputs for squaring:
+     *   c_rarg0   - int array elements a
+     *   c_rarg1   - int array elements n (the modulus)
+     *   c_rarg2   - int length
+     *   c_rarg3   - int inv
+     *   c_rarg4   - int array elements m (the result)
+     *
+     */
+    address generate_multiply() {
+      Label argh, nothing;
+      bind(argh);
+      stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+      align(CodeEntryAlignment);
+      address entry = pc();
+
+      cbzw(Rlen, nothing);
+
+      enter();
+
+      // Make room.
+      cmpw(Rlen, 512);
+      br(Assembler::HI, argh);
+      sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint)));
+      andr(sp, Ra, -2 * wordSize);
+
+      lsrw(Rlen, Rlen, 1);  // length in longwords = len/2
+
+      {
+        // Copy input args, reversing as we go.  We use Ra as a
+        // temporary variable.
+        reverse(Ra, Pa_base, Rlen, t0, t1);
+        if (!_squaring)
+          reverse(Ra, Pb_base, Rlen, t0, t1);
+        reverse(Ra, Pn_base, Rlen, t0, t1);
+      }
+
+      // Push all call-saved registers and also Pm_base which we'll need
+      // at the end.
+      save_regs();
+
+#ifndef PRODUCT
+      // assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
+      {
+        ldr(Rn, Address(Pn_base, 0));
+        mul(Rlo_mn, Rn, inv);
+        cmp(Rlo_mn, -1);
+        Label ok;
+        br(EQ, ok); {
+          stop("broken inverse in Montgomery multiply");
+        } bind(ok);
+      }
+#endif
+
+      mov(Pm_base, Ra);
+
+      mov(t0, zr);
+      mov(t1, zr);
+      mov(t2, zr);
+
+      block_comment("for (int i = 0; i < len; i++) {");
+      mov(Ri, zr); {
+        Label loop, end;
+        cmpw(Ri, Rlen);
+        br(Assembler::GE, end);
+
+        bind(loop);
+        pre1(Ri);
+
+        block_comment("  for (j = i; j; j--) {"); {
+          movw(Rj, Ri);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+        } block_comment("  } // j");
+
+        post1();
+        addw(Ri, Ri, 1);
+        cmpw(Ri, Rlen);
+        br(Assembler::LT, loop);
+        bind(end);
+        block_comment("} // i");
+      }
+
+      block_comment("for (int i = len; i < 2*len; i++) {");
+      mov(Ri, Rlen); {
+        Label loop, end;
+        cmpw(Ri, Rlen, Assembler::LSL, 1);
+        br(Assembler::GE, end);
+
+        bind(loop);
+        pre2(Ri, Rlen);
+
+        block_comment("  for (j = len*2-i-1; j; j--) {"); {
+          lslw(Rj, Rlen, 1);
+          subw(Rj, Rj, Ri);
+          subw(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step);
+        } block_comment("  } // j");
+
+        post2(Ri, Rlen);
+        addw(Ri, Ri, 1);
+        cmpw(Ri, Rlen, Assembler::LSL, 1);
+        br(Assembler::LT, loop);
+        bind(end);
+      }
+      block_comment("} // i");
+
+      normalize(Rlen);
+
+      mov(Ra, Pm_base);  // Save Pm_base in Ra
+      restore_regs();  // Restore caller's Pm_base
+
+      // Copy our result into caller's Pm_base
+      reverse(Pm_base, Ra, Rlen, t0, t1);
+
+      leave();
+      bind(nothing);
+      ret(lr);
+
+      return entry;
+    }
+    // In C, approximately:
+
+    // void
+    // montgomery_multiply(unsigned long Pa_base[], unsigned long Pb_base[],
+    //                     unsigned long Pn_base[], unsigned long Pm_base[],
+    //                     unsigned long inv, int len) {
+    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+    //   unsigned long *Pa, *Pb, *Pn, *Pm;
+    //   unsigned long Ra, Rb, Rn, Rm;
+
+    //   int i;
+
+    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
+
+    //   for (i = 0; i < len; i++) {
+    //     int j;
+
+    //     Pa = Pa_base;
+    //     Pb = Pb_base + i;
+    //     Pm = Pm_base;
+    //     Pn = Pn_base + i;
+
+    //     Ra = *Pa;
+    //     Rb = *Pb;
+    //     Rm = *Pm;
+    //     Rn = *Pn;
+
+    //     int iters = i;
+    //     for (j = 0; iters--; j++) {
+    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
+    //       MACC(Ra, Rb, t0, t1, t2);
+    //       Ra = *++Pa;
+    //       Rb = *--Pb;
+    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+    //       MACC(Rm, Rn, t0, t1, t2);
+    //       Rm = *++Pm;
+    //       Rn = *--Pn;
+    //     }
+
+    //     assert(Ra == Pa_base[i] && Rb == Pb_base[0], "must be");
+    //     MACC(Ra, Rb, t0, t1, t2);
+    //     *Pm = Rm = t0 * inv;
+    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
+    //     MACC(Rm, Rn, t0, t1, t2);
+
+    //     assert(t0 == 0, "broken Montgomery multiply");
+
+    //     t0 = t1; t1 = t2; t2 = 0;
+    //   }
+
+    //   for (i = len; i < 2*len; i++) {
+    //     int j;
+
+    //     Pa = Pa_base + i-len;
+    //     Pb = Pb_base + len;
+    //     Pm = Pm_base + i-len;
+    //     Pn = Pn_base + len;
+
+    //     Ra = *++Pa;
+    //     Rb = *--Pb;
+    //     Rm = *++Pm;
+    //     Rn = *--Pn;
+
+    //     int iters = len*2-i-1;
+    //     for (j = i-len+1; iters--; j++) {
+    //       assert(Ra == Pa_base[j] && Rb == Pb_base[i-j], "must be");
+    //       MACC(Ra, Rb, t0, t1, t2);
+    //       Ra = *++Pa;
+    //       Rb = *--Pb;
+    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+    //       MACC(Rm, Rn, t0, t1, t2);
+    //       Rm = *++Pm;
+    //       Rn = *--Pn;
+    //     }
+
+    //     Pm_base[i-len] = t0;
+    //     t0 = t1; t1 = t2; t2 = 0;
+    //   }
+
+    //   while (t0)
+    //     t0 = sub(Pm_base, Pn_base, t0, len);
+    // }
+
+    /**
+     * Fast Montgomery squaring.  This uses asymptotically 25% fewer
+     * multiplies than Montgomery multiplication so it should be up to
+     * 25% faster.  However, its loop control is more complex and it
+     * may actually run slower on some machines.
+     *
+     * Arguments:
+     *
+     * Inputs:
+     *   c_rarg0   - int array elements a
+     *   c_rarg1   - int array elements n (the modulus)
+     *   c_rarg2   - int length
+     *   c_rarg3   - int inv
+     *   c_rarg4   - int array elements m (the result)
+     *
+     */
+    address generate_square() {
+      Label argh;
+      bind(argh);
+      stop("MontgomeryMultiply total_allocation must be <= 8192");
+
+      align(CodeEntryAlignment);
+      address entry = pc();
+
+      enter();
+
+      // Make room.
+      cmpw(Rlen, 512);
+      br(Assembler::HI, argh);
+      sub(Ra, sp, Rlen, ext::uxtw, exact_log2(4 * sizeof (jint)));
+      andr(sp, Ra, -2 * wordSize);
+
+      lsrw(Rlen, Rlen, 1);  // length in longwords = len/2
+
+      {
+        // Copy input args, reversing as we go.  We use Ra as a
+        // temporary variable.
+        reverse(Ra, Pa_base, Rlen, t0, t1);
+        reverse(Ra, Pn_base, Rlen, t0, t1);
+      }
+
+      // Push all call-saved registers and also Pm_base which we'll need
+      // at the end.
+      save_regs();
+
+      mov(Pm_base, Ra);
+
+      mov(t0, zr);
+      mov(t1, zr);
+      mov(t2, zr);
+
+      block_comment("for (int i = 0; i < len; i++) {");
+      mov(Ri, zr); {
+        Label loop, end;
+        bind(loop);
+        cmp(Ri, Rlen);
+        br(Assembler::GE, end);
+
+        pre1(Ri);
+
+        block_comment("for (j = (i+1)/2; j; j--) {"); {
+          add(Rj, Ri, 1);
+          lsr(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+        } block_comment("  } // j");
+
+        last_squaring(Ri);
+
+        block_comment("  for (j = i/2; j; j--) {"); {
+          lsr(Rj, Ri, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+        } block_comment("  } // j");
+
+        post1_squaring();
+        add(Ri, Ri, 1);
+        cmp(Ri, Rlen);
+        br(Assembler::LT, loop);
+
+        bind(end);
+        block_comment("} // i");
+      }
+
+      block_comment("for (int i = len; i < 2*len; i++) {");
+      mov(Ri, Rlen); {
+        Label loop, end;
+        bind(loop);
+        cmp(Ri, Rlen, Assembler::LSL, 1);
+        br(Assembler::GE, end);
+
+        pre2(Ri, Rlen);
+
+        block_comment("  for (j = (2*len-i-1)/2; j; j--) {"); {
+          lsl(Rj, Rlen, 1);
+          sub(Rj, Rj, Ri);
+          sub(Rj, Rj, 1);
+          lsr(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::step_squaring);
+        } block_comment("  } // j");
+
+        last_squaring(Ri);
+
+        block_comment("  for (j = (2*len-i)/2; j; j--) {"); {
+          lsl(Rj, Rlen, 1);
+          sub(Rj, Rj, Ri);
+          lsr(Rj, Rj, 1);
+          unroll_2(Rj, &MontgomeryMultiplyGenerator::extra_step_squaring);
+        } block_comment("  } // j");
+
+        post2(Ri, Rlen);
+        add(Ri, Ri, 1);
+        cmp(Ri, Rlen, Assembler::LSL, 1);
+
+        br(Assembler::LT, loop);
+        bind(end);
+        block_comment("} // i");
+      }
+
+      normalize(Rlen);
+
+      mov(Ra, Pm_base);  // Save Pm_base in Ra
+      restore_regs();  // Restore caller's Pm_base
+
+      // Copy our result into caller's Pm_base
+      reverse(Pm_base, Ra, Rlen, t0, t1);
+
+      leave();
+      ret(lr);
+
+      return entry;
+    }
+    // In C, approximately:
+
+    // void
+    // montgomery_square(unsigned long Pa_base[], unsigned long Pn_base[],
+    //                   unsigned long Pm_base[], unsigned long inv, int len) {
+    //   unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
+    //   unsigned long *Pa, *Pb, *Pn, *Pm;
+    //   unsigned long Ra, Rb, Rn, Rm;
+
+    //   int i;
+
+    //   assert(inv * Pn_base[0] == -1UL, "broken inverse in Montgomery multiply");
+
+    //   for (i = 0; i < len; i++) {
+    //     int j;
+
+    //     Pa = Pa_base;
+    //     Pb = Pa_base + i;
+    //     Pm = Pm_base;
+    //     Pn = Pn_base + i;
+
+    //     Ra = *Pa;
+    //     Rb = *Pb;
+    //     Rm = *Pm;
+    //     Rn = *Pn;
+
+    //     int iters = (i+1)/2;
+    //     for (j = 0; iters--; j++) {
+    //       assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be");
+    //       MACC2(Ra, Rb, t0, t1, t2);
+    //       Ra = *++Pa;
+    //       Rb = *--Pb;
+    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+    //       MACC(Rm, Rn, t0, t1, t2);
+    //       Rm = *++Pm;
+    //       Rn = *--Pn;
+    //     }
+    //     if ((i & 1) == 0) {
+    //       assert(Ra == Pa_base[j], "must be");
+    //       MACC(Ra, Ra, t0, t1, t2);
+    //     }
+    //     iters = i/2;
+    //     assert(iters == i-j, "must be");
+    //     for (; iters--; j++) {
+    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+    //       MACC(Rm, Rn, t0, t1, t2);
+    //       Rm = *++Pm;
+    //       Rn = *--Pn;
+    //     }
+
+    //     *Pm = Rm = t0 * inv;
+    //     assert(Rm == Pm_base[i] && Rn == Pn_base[0], "must be");
+    //     MACC(Rm, Rn, t0, t1, t2);
+
+    //     assert(t0 == 0, "broken Montgomery multiply");
+
+    //     t0 = t1; t1 = t2; t2 = 0;
+    //   }
+
+    //   for (i = len; i < 2*len; i++) {
+    //     int start = i-len+1;
+    //     int end = start + (len - start)/2;
+    //     int j;
+
+    //     Pa = Pa_base + i-len;
+    //     Pb = Pa_base + len;
+    //     Pm = Pm_base + i-len;
+    //     Pn = Pn_base + len;
+
+    //     Ra = *++Pa;
+    //     Rb = *--Pb;
+    //     Rm = *++Pm;
+    //     Rn = *--Pn;
+
+    //     int iters = (2*len-i-1)/2;
+    //     assert(iters == end-start, "must be");
+    //     for (j = start; iters--; j++) {
+    //       assert(Ra == Pa_base[j] && Rb == Pa_base[i-j], "must be");
+    //       MACC2(Ra, Rb, t0, t1, t2);
+    //       Ra = *++Pa;
+    //       Rb = *--Pb;
+    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+    //       MACC(Rm, Rn, t0, t1, t2);
+    //       Rm = *++Pm;
+    //       Rn = *--Pn;
+    //     }
+    //     if ((i & 1) == 0) {
+    //       assert(Ra == Pa_base[j], "must be");
+    //       MACC(Ra, Ra, t0, t1, t2);
+    //     }
+    //     iters =  (2*len-i)/2;
+    //     assert(iters == len-j, "must be");
+    //     for (; iters--; j++) {
+    //       assert(Rm == Pm_base[j] && Rn == Pn_base[i-j], "must be");
+    //       MACC(Rm, Rn, t0, t1, t2);
+    //       Rm = *++Pm;
+    //       Rn = *--Pn;
+    //     }
+    //     Pm_base[i-len] = t0;
+    //     t0 = t1; t1 = t2; t2 = 0;
+    //   }
+
+    //   while (t0)
+    //     t0 = sub(Pm_base, Pn_base, t0, len);
+    // }
+  };
+
   // Initialization
   void generate_initial() {
     // Generate initial stubs and initializes the entry points
@@ -2603,7 +3554,26 @@
       StubRoutines::_multiplyToLen = generate_multiplyToLen();
     }
 
+    if (UseMontgomeryMultiplyIntrinsic) {
+      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/false);
+      StubRoutines::_montgomeryMultiply = g.generate_multiply();
+    }
+
+    if (UseMontgomerySquareIntrinsic) {
+      StubCodeMark mark(this, "StubRoutines", "montgomerySquare");
+      MontgomeryMultiplyGenerator g(_masm, /*squaring*/true);
+      // We use generate_multiply() rather than generate_square()
+      // because it's faster for the sizes of modulus we care about.
+      StubRoutines::_montgomerySquare = g.generate_multiply();
+    }
+
 #ifndef BUILTIN_SIM
+    // generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
+
     if (UseAESIntrinsics) {
       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -45,6 +45,10 @@
 #define HWCAP_AES   (1<<3)
 #endif
 
+#ifndef HWCAP_PMULL
+#define HWCAP_PMULL (1<<4)
+#endif
+
 #ifndef HWCAP_SHA1
 #define HWCAP_SHA1  (1<<5)
 #endif
@@ -190,11 +194,6 @@
     }
   }
 
-  if (UseGHASHIntrinsics) {
-    warning("GHASH intrinsics are not available on this CPU");
-    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
-  }
-
   if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
     UseCRC32Intrinsics = true;
   }
@@ -232,7 +231,7 @@
     }
   } else if (UseSHA256Intrinsics) {
     warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
-    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
+    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
   }
 
   if (UseSHA512Intrinsics) {
@@ -244,6 +243,15 @@
     FLAG_SET_DEFAULT(UseSHA, false);
   }
 
+  if (auxv & HWCAP_PMULL) {
+    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
+      FLAG_SET_DEFAULT(UseGHASHIntrinsics, true);
+    }
+  } else if (UseGHASHIntrinsics) {
+    warning("GHASH intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+  }
+
   // This machine allows unaligned memory accesses
   if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
     FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
@@ -261,6 +269,13 @@
     UsePopCountInstruction = true;
   }
 
+  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
+    UseMontgomeryMultiplyIntrinsic = true;
+  }
+  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
+    UseMontgomerySquareIntrinsic = true;
+  }
+
 #ifdef COMPILER2
   if (FLAG_IS_DEFAULT(OptoScheduling)) {
     OptoScheduling = true;
--- a/src/share/vm/c1/c1_Compiler.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/c1/c1_Compiler.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -99,6 +99,164 @@
   return buffer_blob;
 }
 
+bool Compiler::is_intrinsic_supported(methodHandle method) {
+  vmIntrinsics::ID id = method->intrinsic_id();
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+  if (method->is_synchronized()) {
+    // C1 does not support intrinsification of synchronized methods.
+    return false;
+  }
+
+  switch (id) {
+  case vmIntrinsics::_compareAndSwapLong:
+    if (!VM_Version::supports_cx8()) return false;
+    break;
+  case vmIntrinsics::_getAndAddInt:
+    if (!VM_Version::supports_atomic_getadd4()) return false;
+    break;
+  case vmIntrinsics::_getAndAddLong:
+    if (!VM_Version::supports_atomic_getadd8()) return false;
+    break;
+  case vmIntrinsics::_getAndSetInt:
+    if (!VM_Version::supports_atomic_getset4()) return false;
+    break;
+  case vmIntrinsics::_getAndSetLong:
+    if (!VM_Version::supports_atomic_getset8()) return false;
+    break;
+  case vmIntrinsics::_getAndSetObject:
+#ifdef _LP64
+    if (!UseCompressedOops && !VM_Version::supports_atomic_getset8()) return false;
+    if (UseCompressedOops && !VM_Version::supports_atomic_getset4()) return false;
+#else
+    if (!VM_Version::supports_atomic_getset4()) return false;
+#endif
+    break;
+  case vmIntrinsics::_arraycopy:
+  case vmIntrinsics::_currentTimeMillis:
+  case vmIntrinsics::_nanoTime:
+  case vmIntrinsics::_Reference_get:
+    // Use the intrinsic version of Reference.get() so that the value in
+    // the referent field can be registered by the G1 pre-barrier code.
+    // Also to prevent commoning reads from this field across safepoint
+    // since GC can change its value.
+  case vmIntrinsics::_loadFence:
+  case vmIntrinsics::_storeFence:
+  case vmIntrinsics::_fullFence:
+  case vmIntrinsics::_floatToRawIntBits:
+  case vmIntrinsics::_intBitsToFloat:
+  case vmIntrinsics::_doubleToRawLongBits:
+  case vmIntrinsics::_longBitsToDouble:
+  case vmIntrinsics::_getClass:
+  case vmIntrinsics::_isInstance:
+  case vmIntrinsics::_currentThread:
+  case vmIntrinsics::_dabs:
+  case vmIntrinsics::_dsqrt:
+  case vmIntrinsics::_dsin:
+  case vmIntrinsics::_dcos:
+  case vmIntrinsics::_dtan:
+  case vmIntrinsics::_dlog:
+  case vmIntrinsics::_dlog10:
+  case vmIntrinsics::_dexp:
+  case vmIntrinsics::_dpow:
+  case vmIntrinsics::_getObject:
+  case vmIntrinsics::_getBoolean:
+  case vmIntrinsics::_getByte:
+  case vmIntrinsics::_getShort:
+  case vmIntrinsics::_getChar:
+  case vmIntrinsics::_getInt:
+  case vmIntrinsics::_getLong:
+  case vmIntrinsics::_getFloat:
+  case vmIntrinsics::_getDouble:
+  case vmIntrinsics::_putObject:
+  case vmIntrinsics::_putBoolean:
+  case vmIntrinsics::_putByte:
+  case vmIntrinsics::_putShort:
+  case vmIntrinsics::_putChar:
+  case vmIntrinsics::_putInt:
+  case vmIntrinsics::_putLong:
+  case vmIntrinsics::_putFloat:
+  case vmIntrinsics::_putDouble:
+  case vmIntrinsics::_getObjectVolatile:
+  case vmIntrinsics::_getBooleanVolatile:
+  case vmIntrinsics::_getByteVolatile:
+  case vmIntrinsics::_getShortVolatile:
+  case vmIntrinsics::_getCharVolatile:
+  case vmIntrinsics::_getIntVolatile:
+  case vmIntrinsics::_getLongVolatile:
+  case vmIntrinsics::_getFloatVolatile:
+  case vmIntrinsics::_getDoubleVolatile:
+  case vmIntrinsics::_putObjectVolatile:
+  case vmIntrinsics::_putBooleanVolatile:
+  case vmIntrinsics::_putByteVolatile:
+  case vmIntrinsics::_putShortVolatile:
+  case vmIntrinsics::_putCharVolatile:
+  case vmIntrinsics::_putIntVolatile:
+  case vmIntrinsics::_putLongVolatile:
+  case vmIntrinsics::_putFloatVolatile:
+  case vmIntrinsics::_putDoubleVolatile:
+  case vmIntrinsics::_getByte_raw:
+  case vmIntrinsics::_getShort_raw:
+  case vmIntrinsics::_getChar_raw:
+  case vmIntrinsics::_getInt_raw:
+  case vmIntrinsics::_getLong_raw:
+  case vmIntrinsics::_getFloat_raw:
+  case vmIntrinsics::_getDouble_raw:
+  case vmIntrinsics::_putByte_raw:
+  case vmIntrinsics::_putShort_raw:
+  case vmIntrinsics::_putChar_raw:
+  case vmIntrinsics::_putInt_raw:
+  case vmIntrinsics::_putLong_raw:
+  case vmIntrinsics::_putFloat_raw:
+  case vmIntrinsics::_putDouble_raw:
+  case vmIntrinsics::_putOrderedObject:
+  case vmIntrinsics::_putOrderedInt:
+  case vmIntrinsics::_putOrderedLong:
+  case vmIntrinsics::_getShortUnaligned:
+  case vmIntrinsics::_getCharUnaligned:
+  case vmIntrinsics::_getIntUnaligned:
+  case vmIntrinsics::_getLongUnaligned:
+  case vmIntrinsics::_putShortUnaligned:
+  case vmIntrinsics::_putCharUnaligned:
+  case vmIntrinsics::_putIntUnaligned:
+  case vmIntrinsics::_putLongUnaligned:
+  case vmIntrinsics::_checkIndex:
+  case vmIntrinsics::_updateCRC32:
+  case vmIntrinsics::_updateBytesCRC32:
+  case vmIntrinsics::_updateByteBufferCRC32:
+  case vmIntrinsics::_compareAndSwapInt:
+  case vmIntrinsics::_compareAndSwapObject:
+#ifdef TRACE_HAVE_INTRINSICS
+  case vmIntrinsics::_classID:
+  case vmIntrinsics::_threadID:
+  case vmIntrinsics::_counterTime:
+#endif
+    break;
+  default:
+    return false; // Intrinsics not on the previous list are not available.
+  }
+
+  return true;
+}
+
+bool Compiler::is_intrinsic_disabled_by_flag(methodHandle method) {
+  vmIntrinsics::ID id = method->intrinsic_id();
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+  if (vmIntrinsics::is_disabled_by_flags(id)) {
+    return true;
+  }
+
+  if (!InlineNatives && id != vmIntrinsics::_Reference_get) {
+    return true;
+  }
+
+  if (!InlineClassNatives && id == vmIntrinsics::_getClass) {
+    return true;
+  }
+
+  return false;
+}
 
 void Compiler::compile_method(ciEnv* env, ciMethod* method, int entry_bci) {
   BufferBlob* buffer_blob = CompilerThread::current()->get_buffer_blob();
@@ -117,3 +275,7 @@
 void Compiler::print_timers() {
   Compilation::print_timers();
 }
+
+bool Compiler::is_intrinsic_available(methodHandle method, methodHandle compilation_context) {
+  return is_intrinsic_supported(method) && !is_intrinsic_disabled_by_flag(method);
+}
--- a/src/share/vm/c1/c1_Compiler.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/c1/c1_Compiler.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -55,6 +55,18 @@
   // Print compilation timers and statistics
   virtual void print_timers();
 
+  // Check the availability of an intrinsic for 'method' given a compilation context.
+  // The compilation context is needed to support per-method usage of the
+  // DisableIntrinsic flag. However, as C1 ignores the DisableIntrinsic flag, it
+  // ignores the compilation context.
+  virtual bool is_intrinsic_available(methodHandle method, methodHandle compilation_context);
+
+  // Check if the C1 compiler supports an intrinsic for 'method'.
+  virtual bool is_intrinsic_supported(methodHandle method);
+
+  // Processing of command-line flags specific to the C1 compiler.
+  virtual bool is_intrinsic_disabled_by_flag(methodHandle method);
+
   // Size of the code buffer
   static int code_buffer_size();
 };
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -3372,231 +3372,85 @@
   return NULL;
 }
 
-
-bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
-  if (callee->is_synchronized()) {
-    // We don't currently support any synchronized intrinsics
-    return false;
+void GraphBuilder::build_graph_for_intrinsic(ciMethod* callee) {
+  vmIntrinsics::ID id = callee->intrinsic_id();
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+  // Some intrinsics need special IR nodes.
+  switch(id) {
+  case vmIntrinsics::_getObject          : append_unsafe_get_obj(callee, T_OBJECT,  false); return;
+  case vmIntrinsics::_getBoolean         : append_unsafe_get_obj(callee, T_BOOLEAN, false); return;
+  case vmIntrinsics::_getByte            : append_unsafe_get_obj(callee, T_BYTE,    false); return;
+  case vmIntrinsics::_getShort           : append_unsafe_get_obj(callee, T_SHORT,   false); return;
+  case vmIntrinsics::_getChar            : append_unsafe_get_obj(callee, T_CHAR,    false); return;
+  case vmIntrinsics::_getInt             : append_unsafe_get_obj(callee, T_INT,     false); return;
+  case vmIntrinsics::_getLong            : append_unsafe_get_obj(callee, T_LONG,    false); return;
+  case vmIntrinsics::_getFloat           : append_unsafe_get_obj(callee, T_FLOAT,   false); return;
+  case vmIntrinsics::_getDouble          : append_unsafe_get_obj(callee, T_DOUBLE,  false); return;
+  case vmIntrinsics::_putObject          : append_unsafe_put_obj(callee, T_OBJECT,  false); return;
+  case vmIntrinsics::_putBoolean         : append_unsafe_put_obj(callee, T_BOOLEAN, false); return;
+  case vmIntrinsics::_putByte            : append_unsafe_put_obj(callee, T_BYTE,    false); return;
+  case vmIntrinsics::_putShort           : append_unsafe_put_obj(callee, T_SHORT,   false); return;
+  case vmIntrinsics::_putChar            : append_unsafe_put_obj(callee, T_CHAR,    false); return;
+  case vmIntrinsics::_putInt             : append_unsafe_put_obj(callee, T_INT,     false); return;
+  case vmIntrinsics::_putLong            : append_unsafe_put_obj(callee, T_LONG,    false); return;
+  case vmIntrinsics::_putFloat           : append_unsafe_put_obj(callee, T_FLOAT,   false); return;
+  case vmIntrinsics::_putDouble          : append_unsafe_put_obj(callee, T_DOUBLE,  false); return;
+  case vmIntrinsics::_getShortUnaligned  : append_unsafe_get_obj(callee, T_SHORT,   false); return;
+  case vmIntrinsics::_getCharUnaligned   : append_unsafe_get_obj(callee, T_CHAR,    false); return;
+  case vmIntrinsics::_getIntUnaligned    : append_unsafe_get_obj(callee, T_INT,     false); return;
+  case vmIntrinsics::_getLongUnaligned   : append_unsafe_get_obj(callee, T_LONG,    false); return;
+  case vmIntrinsics::_putShortUnaligned  : append_unsafe_put_obj(callee, T_SHORT,   false); return;
+  case vmIntrinsics::_putCharUnaligned   : append_unsafe_put_obj(callee, T_CHAR,    false); return;
+  case vmIntrinsics::_putIntUnaligned    : append_unsafe_put_obj(callee, T_INT,     false); return;
+  case vmIntrinsics::_putLongUnaligned   : append_unsafe_put_obj(callee, T_LONG,    false); return;
+  case vmIntrinsics::_getObjectVolatile  : append_unsafe_get_obj(callee, T_OBJECT,  true); return;
+  case vmIntrinsics::_getBooleanVolatile : append_unsafe_get_obj(callee, T_BOOLEAN, true); return;
+  case vmIntrinsics::_getByteVolatile    : append_unsafe_get_obj(callee, T_BYTE,    true); return;
+  case vmIntrinsics::_getShortVolatile   : append_unsafe_get_obj(callee, T_SHORT,   true); return;
+  case vmIntrinsics::_getCharVolatile    : append_unsafe_get_obj(callee, T_CHAR,    true); return;
+  case vmIntrinsics::_getIntVolatile     : append_unsafe_get_obj(callee, T_INT,     true); return;
+  case vmIntrinsics::_getLongVolatile    : append_unsafe_get_obj(callee, T_LONG,    true); return;
+  case vmIntrinsics::_getFloatVolatile   : append_unsafe_get_obj(callee, T_FLOAT,   true); return;
+  case vmIntrinsics::_getDoubleVolatile  : append_unsafe_get_obj(callee, T_DOUBLE,  true); return;
+  case vmIntrinsics::_putObjectVolatile  : append_unsafe_put_obj(callee, T_OBJECT,  true); return;
+  case vmIntrinsics::_putBooleanVolatile : append_unsafe_put_obj(callee, T_BOOLEAN, true); return;
+  case vmIntrinsics::_putByteVolatile    : append_unsafe_put_obj(callee, T_BYTE,    true); return;
+  case vmIntrinsics::_putShortVolatile   : append_unsafe_put_obj(callee, T_SHORT,   true); return;
+  case vmIntrinsics::_putCharVolatile    : append_unsafe_put_obj(callee, T_CHAR,    true); return;
+  case vmIntrinsics::_putIntVolatile     : append_unsafe_put_obj(callee, T_INT,     true); return;
+  case vmIntrinsics::_putLongVolatile    : append_unsafe_put_obj(callee, T_LONG,    true); return;
+  case vmIntrinsics::_putFloatVolatile   : append_unsafe_put_obj(callee, T_FLOAT,   true); return;
+  case vmIntrinsics::_putDoubleVolatile  : append_unsafe_put_obj(callee, T_DOUBLE,  true); return;
+  case vmIntrinsics::_getByte_raw        : append_unsafe_get_raw(callee, T_BYTE  ); return;
+  case vmIntrinsics::_getShort_raw       : append_unsafe_get_raw(callee, T_SHORT ); return;
+  case vmIntrinsics::_getChar_raw        : append_unsafe_get_raw(callee, T_CHAR  ); return;
+  case vmIntrinsics::_getInt_raw         : append_unsafe_get_raw(callee, T_INT   ); return;
+  case vmIntrinsics::_getLong_raw        : append_unsafe_get_raw(callee, T_LONG  ); return;
+  case vmIntrinsics::_getFloat_raw       : append_unsafe_get_raw(callee, T_FLOAT ); return;
+  case vmIntrinsics::_getDouble_raw      : append_unsafe_get_raw(callee, T_DOUBLE); return;
+  case vmIntrinsics::_putByte_raw        : append_unsafe_put_raw(callee, T_BYTE  ); return;
+  case vmIntrinsics::_putShort_raw       : append_unsafe_put_raw(callee, T_SHORT ); return;
+  case vmIntrinsics::_putChar_raw        : append_unsafe_put_raw(callee, T_CHAR  ); return;
+  case vmIntrinsics::_putInt_raw         : append_unsafe_put_raw(callee, T_INT   ); return;
+  case vmIntrinsics::_putLong_raw        : append_unsafe_put_raw(callee, T_LONG  ); return;
+  case vmIntrinsics::_putFloat_raw       : append_unsafe_put_raw(callee, T_FLOAT ); return;
+  case vmIntrinsics::_putDouble_raw      : append_unsafe_put_raw(callee, T_DOUBLE);  return;
+  case vmIntrinsics::_putOrderedObject   : append_unsafe_put_obj(callee, T_OBJECT,  true); return;
+  case vmIntrinsics::_putOrderedInt      : append_unsafe_put_obj(callee, T_INT,     true); return;
+  case vmIntrinsics::_putOrderedLong     : append_unsafe_put_obj(callee, T_LONG,    true); return;
+  case vmIntrinsics::_compareAndSwapLong:
+  case vmIntrinsics::_compareAndSwapInt:
+  case vmIntrinsics::_compareAndSwapObject: append_unsafe_CAS(callee); return;
+  case vmIntrinsics::_getAndAddInt:
+  case vmIntrinsics::_getAndAddLong      : append_unsafe_get_and_set_obj(callee, true); return;
+  case vmIntrinsics::_getAndSetInt       :
+  case vmIntrinsics::_getAndSetLong      :
+  case vmIntrinsics::_getAndSetObject    : append_unsafe_get_and_set_obj(callee, false); return;
+  default:
+    break;
   }
 
-  // callee seems like a good candidate
-  // determine id
-  vmIntrinsics::ID id = callee->intrinsic_id();
-  if (!InlineNatives && id != vmIntrinsics::_Reference_get) {
-    // InlineNatives does not control Reference.get
-    INLINE_BAILOUT("intrinsic method inlining disabled");
-  }
-  bool preserves_state = false;
-  bool cantrap = true;
-  switch (id) {
-    case vmIntrinsics::_arraycopy:
-      if (!InlineArrayCopy) return false;
-      break;
-
-#ifdef TRACE_HAVE_INTRINSICS
-    case vmIntrinsics::_classID:
-    case vmIntrinsics::_threadID:
-      preserves_state = true;
-      cantrap = true;
-      break;
-
-    case vmIntrinsics::_counterTime:
-      preserves_state = true;
-      cantrap = false;
-      break;
-#endif
-
-    case vmIntrinsics::_currentTimeMillis:
-    case vmIntrinsics::_nanoTime:
-      preserves_state = true;
-      cantrap = false;
-      break;
-
-    case vmIntrinsics::_floatToRawIntBits   :
-    case vmIntrinsics::_intBitsToFloat      :
-    case vmIntrinsics::_doubleToRawLongBits :
-    case vmIntrinsics::_longBitsToDouble    :
-      if (!InlineMathNatives) return false;
-      preserves_state = true;
-      cantrap = false;
-      break;
-
-    case vmIntrinsics::_getClass      :
-    case vmIntrinsics::_isInstance    :
-      if (!InlineClassNatives) return false;
-      preserves_state = true;
-      break;
-
-    case vmIntrinsics::_currentThread :
-      if (!InlineThreadNatives) return false;
-      preserves_state = true;
-      cantrap = false;
-      break;
-
-    case vmIntrinsics::_dabs          : // fall through
-    case vmIntrinsics::_dsqrt         : // fall through
-    case vmIntrinsics::_dsin          : // fall through
-    case vmIntrinsics::_dcos          : // fall through
-    case vmIntrinsics::_dtan          : // fall through
-    case vmIntrinsics::_dlog          : // fall through
-    case vmIntrinsics::_dlog10        : // fall through
-    case vmIntrinsics::_dexp          : // fall through
-    case vmIntrinsics::_dpow          : // fall through
-      if (!InlineMathNatives) return false;
-      cantrap = false;
-      preserves_state = true;
-      break;
-
-    // Use special nodes for Unsafe instructions so we can more easily
-    // perform an address-mode optimization on the raw variants
-    case vmIntrinsics::_getObject : return append_unsafe_get_obj(callee, T_OBJECT,  false);
-    case vmIntrinsics::_getBoolean: return append_unsafe_get_obj(callee, T_BOOLEAN, false);
-    case vmIntrinsics::_getByte   : return append_unsafe_get_obj(callee, T_BYTE,    false);
-    case vmIntrinsics::_getShort  : return append_unsafe_get_obj(callee, T_SHORT,   false);
-    case vmIntrinsics::_getChar   : return append_unsafe_get_obj(callee, T_CHAR,    false);
-    case vmIntrinsics::_getInt    : return append_unsafe_get_obj(callee, T_INT,     false);
-    case vmIntrinsics::_getLong   : return append_unsafe_get_obj(callee, T_LONG,    false);
-    case vmIntrinsics::_getFloat  : return append_unsafe_get_obj(callee, T_FLOAT,   false);
-    case vmIntrinsics::_getDouble : return append_unsafe_get_obj(callee, T_DOUBLE,  false);
-
-    case vmIntrinsics::_putObject : return append_unsafe_put_obj(callee, T_OBJECT,  false);
-    case vmIntrinsics::_putBoolean: return append_unsafe_put_obj(callee, T_BOOLEAN, false);
-    case vmIntrinsics::_putByte   : return append_unsafe_put_obj(callee, T_BYTE,    false);
-    case vmIntrinsics::_putShort  : return append_unsafe_put_obj(callee, T_SHORT,   false);
-    case vmIntrinsics::_putChar   : return append_unsafe_put_obj(callee, T_CHAR,    false);
-    case vmIntrinsics::_putInt    : return append_unsafe_put_obj(callee, T_INT,     false);
-    case vmIntrinsics::_putLong   : return append_unsafe_put_obj(callee, T_LONG,    false);
-    case vmIntrinsics::_putFloat  : return append_unsafe_put_obj(callee, T_FLOAT,   false);
-    case vmIntrinsics::_putDouble : return append_unsafe_put_obj(callee, T_DOUBLE,  false);
-
-    case vmIntrinsics::_getShortUnaligned  :
-      return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_SHORT,   false) : false;
-    case vmIntrinsics::_getCharUnaligned   :
-      return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_CHAR,    false) : false;
-    case vmIntrinsics::_getIntUnaligned    :
-      return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_INT,     false) : false;
-    case vmIntrinsics::_getLongUnaligned   :
-      return UseUnalignedAccesses ? append_unsafe_get_obj(callee, T_LONG,    false) : false;
-
-    case vmIntrinsics::_putShortUnaligned  :
-      return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_SHORT,   false) : false;
-    case vmIntrinsics::_putCharUnaligned   :
-      return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_CHAR,    false) : false;
-    case vmIntrinsics::_putIntUnaligned    :
-      return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_INT,     false) : false;
-    case vmIntrinsics::_putLongUnaligned   :
-      return UseUnalignedAccesses ? append_unsafe_put_obj(callee, T_LONG,    false) : false;
-
-    case vmIntrinsics::_getObjectVolatile : return append_unsafe_get_obj(callee, T_OBJECT,  true);
-    case vmIntrinsics::_getBooleanVolatile: return append_unsafe_get_obj(callee, T_BOOLEAN, true);
-    case vmIntrinsics::_getByteVolatile   : return append_unsafe_get_obj(callee, T_BYTE,    true);
-    case vmIntrinsics::_getShortVolatile  : return append_unsafe_get_obj(callee, T_SHORT,   true);
-    case vmIntrinsics::_getCharVolatile   : return append_unsafe_get_obj(callee, T_CHAR,    true);
-    case vmIntrinsics::_getIntVolatile    : return append_unsafe_get_obj(callee, T_INT,     true);
-    case vmIntrinsics::_getLongVolatile   : return append_unsafe_get_obj(callee, T_LONG,    true);
-    case vmIntrinsics::_getFloatVolatile  : return append_unsafe_get_obj(callee, T_FLOAT,   true);
-    case vmIntrinsics::_getDoubleVolatile : return append_unsafe_get_obj(callee, T_DOUBLE,  true);
-
-    case vmIntrinsics::_putObjectVolatile : return append_unsafe_put_obj(callee, T_OBJECT,  true);
-    case vmIntrinsics::_putBooleanVolatile: return append_unsafe_put_obj(callee, T_BOOLEAN, true);
-    case vmIntrinsics::_putByteVolatile   : return append_unsafe_put_obj(callee, T_BYTE,    true);
-    case vmIntrinsics::_putShortVolatile  : return append_unsafe_put_obj(callee, T_SHORT,   true);
-    case vmIntrinsics::_putCharVolatile   : return append_unsafe_put_obj(callee, T_CHAR,    true);
-    case vmIntrinsics::_putIntVolatile    : return append_unsafe_put_obj(callee, T_INT,     true);
-    case vmIntrinsics::_putLongVolatile   : return append_unsafe_put_obj(callee, T_LONG,    true);
-    case vmIntrinsics::_putFloatVolatile  : return append_unsafe_put_obj(callee, T_FLOAT,   true);
-    case vmIntrinsics::_putDoubleVolatile : return append_unsafe_put_obj(callee, T_DOUBLE,  true);
-
-    case vmIntrinsics::_getByte_raw   : return append_unsafe_get_raw(callee, T_BYTE);
-    case vmIntrinsics::_getShort_raw  : return append_unsafe_get_raw(callee, T_SHORT);
-    case vmIntrinsics::_getChar_raw   : return append_unsafe_get_raw(callee, T_CHAR);
-    case vmIntrinsics::_getInt_raw    : return append_unsafe_get_raw(callee, T_INT);
-    case vmIntrinsics::_getLong_raw   : return append_unsafe_get_raw(callee, T_LONG);
-    case vmIntrinsics::_getFloat_raw  : return append_unsafe_get_raw(callee, T_FLOAT);
-    case vmIntrinsics::_getDouble_raw : return append_unsafe_get_raw(callee, T_DOUBLE);
-
-    case vmIntrinsics::_putByte_raw   : return append_unsafe_put_raw(callee, T_BYTE);
-    case vmIntrinsics::_putShort_raw  : return append_unsafe_put_raw(callee, T_SHORT);
-    case vmIntrinsics::_putChar_raw   : return append_unsafe_put_raw(callee, T_CHAR);
-    case vmIntrinsics::_putInt_raw    : return append_unsafe_put_raw(callee, T_INT);
-    case vmIntrinsics::_putLong_raw   : return append_unsafe_put_raw(callee, T_LONG);
-    case vmIntrinsics::_putFloat_raw  : return append_unsafe_put_raw(callee, T_FLOAT);
-    case vmIntrinsics::_putDouble_raw : return append_unsafe_put_raw(callee, T_DOUBLE);
-
-    case vmIntrinsics::_checkIndex    :
-      if (!InlineNIOCheckIndex) return false;
-      preserves_state = true;
-      break;
-    case vmIntrinsics::_putOrderedObject : return append_unsafe_put_obj(callee, T_OBJECT,  true);
-    case vmIntrinsics::_putOrderedInt    : return append_unsafe_put_obj(callee, T_INT,     true);
-    case vmIntrinsics::_putOrderedLong   : return append_unsafe_put_obj(callee, T_LONG,    true);
-
-    case vmIntrinsics::_compareAndSwapLong:
-      if (!VM_Version::supports_cx8()) return false;
-      // fall through
-    case vmIntrinsics::_compareAndSwapInt:
-    case vmIntrinsics::_compareAndSwapObject:
-      append_unsafe_CAS(callee);
-      return true;
-
-    case vmIntrinsics::_getAndAddInt:
-      if (!VM_Version::supports_atomic_getadd4()) {
-        return false;
-      }
-      return append_unsafe_get_and_set_obj(callee, true);
-    case vmIntrinsics::_getAndAddLong:
-      if (!VM_Version::supports_atomic_getadd8()) {
-        return false;
-      }
-      return append_unsafe_get_and_set_obj(callee, true);
-    case vmIntrinsics::_getAndSetInt:
-      if (!VM_Version::supports_atomic_getset4()) {
-        return false;
-      }
-      return append_unsafe_get_and_set_obj(callee, false);
-    case vmIntrinsics::_getAndSetLong:
-      if (!VM_Version::supports_atomic_getset8()) {
-        return false;
-      }
-      return append_unsafe_get_and_set_obj(callee, false);
-    case vmIntrinsics::_getAndSetObject:
-#ifdef _LP64
-      if (!UseCompressedOops && !VM_Version::supports_atomic_getset8()) {
-        return false;
-      }
-      if (UseCompressedOops && !VM_Version::supports_atomic_getset4()) {
-        return false;
-      }
-#else
-      if (!VM_Version::supports_atomic_getset4()) {
-        return false;
-      }
-#endif
-      return append_unsafe_get_and_set_obj(callee, false);
-
-    case vmIntrinsics::_Reference_get:
-      // Use the intrinsic version of Reference.get() so that the value in
-      // the referent field can be registered by the G1 pre-barrier code.
-      // Also to prevent commoning reads from this field across safepoint
-      // since GC can change its value.
-      preserves_state = true;
-      break;
-
-    case vmIntrinsics::_updateCRC32:
-    case vmIntrinsics::_updateBytesCRC32:
-    case vmIntrinsics::_updateByteBufferCRC32:
-      if (!UseCRC32Intrinsics) return false;
-      cantrap = false;
-      preserves_state = true;
-      break;
-
-    case vmIntrinsics::_loadFence :
-    case vmIntrinsics::_storeFence:
-    case vmIntrinsics::_fullFence :
-      break;
-
-    default                       : return false; // do not inline
-  }
   // create intrinsic node
   const bool has_receiver = !callee->is_static();
   ValueType* result_type = as_ValueType(callee->return_type());
@@ -3621,8 +3475,10 @@
     }
   }
 
-  Intrinsic* result = new Intrinsic(result_type, id, args, has_receiver, state_before,
-                                    preserves_state, cantrap);
+  Intrinsic* result = new Intrinsic(result_type, callee->intrinsic_id(),
+                                    args, has_receiver, state_before,
+                                    vmIntrinsics::preserves_state(id),
+                                    vmIntrinsics::can_trap(id));
   // append instruction & push result
   Value value = append_split(result);
   if (result_type != voidType) push(result_type, value);
@@ -3630,8 +3486,22 @@
   if (callee != method() && profile_return() && result_type->is_object_kind()) {
     profile_return_type(result, callee);
   }
-
-  // done
+}
+
+bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
+  // For calling is_intrinsic_available we need to transition to
+  // the '_thread_in_vm' state because is_intrinsic_available()
+  // does not accesses critical VM-internal data.
+  if (!_compilation->compiler()->is_intrinsic_available(callee->get_Method(), NULL)) {
+    if (!InlineNatives) {
+      // Return false and also set message that the inlining of
+      // intrinsics has been disabled in general.
+      INLINE_BAILOUT("intrinsic method inlining disabled");
+    } else {
+      return false;
+    }
+  }
+  build_graph_for_intrinsic(callee);
   return true;
 }
 
@@ -4224,58 +4094,46 @@
   _scope_data = scope_data()->parent();
 }
 
-bool GraphBuilder::append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile) {
-  if (InlineUnsafeOps) {
-    Values* args = state()->pop_arguments(callee->arg_size());
-    null_check(args->at(0));
-    Instruction* offset = args->at(2);
+void GraphBuilder::append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile) {
+  Values* args = state()->pop_arguments(callee->arg_size());
+  null_check(args->at(0));
+  Instruction* offset = args->at(2);
 #ifndef _LP64
-    offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
+  offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
 #endif
-    Instruction* op = append(new UnsafeGetObject(t, args->at(1), offset, is_volatile));
-    push(op->type(), op);
-    compilation()->set_has_unsafe_access(true);
-  }
-  return InlineUnsafeOps;
+  Instruction* op = append(new UnsafeGetObject(t, args->at(1), offset, is_volatile));
+  push(op->type(), op);
+  compilation()->set_has_unsafe_access(true);
 }
 
 
-bool GraphBuilder::append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile) {
-  if (InlineUnsafeOps) {
-    Values* args = state()->pop_arguments(callee->arg_size());
-    null_check(args->at(0));
-    Instruction* offset = args->at(2);
+void GraphBuilder::append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile) {
+  Values* args = state()->pop_arguments(callee->arg_size());
+  null_check(args->at(0));
+  Instruction* offset = args->at(2);
 #ifndef _LP64
-    offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
+  offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
 #endif
-    Instruction* op = append(new UnsafePutObject(t, args->at(1), offset, args->at(3), is_volatile));
-    compilation()->set_has_unsafe_access(true);
-    kill_all();
-  }
-  return InlineUnsafeOps;
+  Instruction* op = append(new UnsafePutObject(t, args->at(1), offset, args->at(3), is_volatile));
+  compilation()->set_has_unsafe_access(true);
+  kill_all();
 }
 
 
-bool GraphBuilder::append_unsafe_get_raw(ciMethod* callee, BasicType t) {
-  if (InlineUnsafeOps) {
-    Values* args = state()->pop_arguments(callee->arg_size());
-    null_check(args->at(0));
-    Instruction* op = append(new UnsafeGetRaw(t, args->at(1), false));
-    push(op->type(), op);
-    compilation()->set_has_unsafe_access(true);
-  }
-  return InlineUnsafeOps;
+void GraphBuilder::append_unsafe_get_raw(ciMethod* callee, BasicType t) {
+  Values* args = state()->pop_arguments(callee->arg_size());
+  null_check(args->at(0));
+  Instruction* op = append(new UnsafeGetRaw(t, args->at(1), false));
+  push(op->type(), op);
+  compilation()->set_has_unsafe_access(true);
 }
 
 
-bool GraphBuilder::append_unsafe_put_raw(ciMethod* callee, BasicType t) {
-  if (InlineUnsafeOps) {
-    Values* args = state()->pop_arguments(callee->arg_size());
-    null_check(args->at(0));
-    Instruction* op = append(new UnsafePutRaw(t, args->at(1), args->at(2)));
-    compilation()->set_has_unsafe_access(true);
-  }
-  return InlineUnsafeOps;
+void GraphBuilder::append_unsafe_put_raw(ciMethod* callee, BasicType t) {
+  Values* args = state()->pop_arguments(callee->arg_size());
+  null_check(args->at(0));
+  Instruction* op = append(new UnsafePutRaw(t, args->at(1), args->at(2)));
+  compilation()->set_has_unsafe_access(true);
 }
 
 
@@ -4352,21 +4210,18 @@
   }
 }
 
-bool GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) {
-  if (InlineUnsafeOps) {
-    Values* args = state()->pop_arguments(callee->arg_size());
-    BasicType t = callee->return_type()->basic_type();
-    null_check(args->at(0));
-    Instruction* offset = args->at(2);
+void GraphBuilder::append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add) {
+  Values* args = state()->pop_arguments(callee->arg_size());
+  BasicType t = callee->return_type()->basic_type();
+  null_check(args->at(0));
+  Instruction* offset = args->at(2);
 #ifndef _LP64
-    offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
+  offset = append(new Convert(Bytecodes::_l2i, offset, as_ValueType(T_INT)));
 #endif
-    Instruction* op = append(new UnsafeGetAndSetObject(t, args->at(1), offset, args->at(3), is_add));
-    compilation()->set_has_unsafe_access(true);
-    kill_all();
-    push(op->type(), op);
-  }
-  return InlineUnsafeOps;
+  Instruction* op = append(new UnsafeGetAndSetObject(t, args->at(1), offset, args->at(3), is_add));
+  compilation()->set_has_unsafe_access(true);
+  kill_all();
+  push(op->type(), op);
 }
 
 #ifndef PRODUCT
--- a/src/share/vm/c1/c1_GraphBuilder.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -339,6 +339,8 @@
   void inline_sync_entry(Value lock, BlockBegin* sync_handler);
   void fill_sync_handler(Value lock, BlockBegin* sync_handler, bool default_handler = false);
 
+  void build_graph_for_intrinsic(ciMethod* callee);
+
   // inliners
   bool try_inline(           ciMethod* callee, bool holder_known, Bytecodes::Code bc = Bytecodes::_illegal, Value receiver = NULL);
   bool try_inline_intrinsics(ciMethod* callee);
@@ -364,12 +366,12 @@
   void pop_scope();
   void pop_scope_for_jsr();
 
-  bool append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile);
-  bool append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile);
-  bool append_unsafe_get_raw(ciMethod* callee, BasicType t);
-  bool append_unsafe_put_raw(ciMethod* callee, BasicType t);
+  void append_unsafe_get_obj(ciMethod* callee, BasicType t, bool is_volatile);
+  void append_unsafe_put_obj(ciMethod* callee, BasicType t, bool is_volatile);
+  void append_unsafe_get_raw(ciMethod* callee, BasicType t);
+  void append_unsafe_put_raw(ciMethod* callee, BasicType t);
   void append_unsafe_CAS(ciMethod* callee);
-  bool append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add);
+  void append_unsafe_get_and_set_obj(ciMethod* callee, bool is_add);
 
   void print_inlining(ciMethod* callee, const char* msg = NULL, bool success = true);
 
--- a/src/share/vm/c1/c1_ValueType.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/c1/c1_ValueType.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -153,7 +153,19 @@
     case T_FLOAT  : return new FloatConstant (value.as_float ());
     case T_DOUBLE : return new DoubleConstant(value.as_double());
     case T_ARRAY  : // fall through (ciConstant doesn't have an array accessor)
-    case T_OBJECT : return new ObjectConstant(value.as_object());
+    case T_OBJECT : {
+      // TODO: Common the code with GraphBuilder::load_constant?
+      ciObject* obj = value.as_object();
+      if (obj->is_null_object())
+        return objectNull;
+      if (obj->is_loaded()) {
+        if (obj->is_array())
+          return new ArrayConstant(obj->as_array());
+        else if (obj->is_instance())
+          return new InstanceConstant(obj->as_instance());
+      }
+      return new ObjectConstant(obj);
+    }
   }
   ShouldNotReachHere();
   return illegalType;
--- a/src/share/vm/classfile/vmSymbols.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/classfile/vmSymbols.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -324,6 +324,319 @@
   return vmIntrinsics::_none;
 }
 
+bool vmIntrinsics::preserves_state(vmIntrinsics::ID id) {
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+  switch(id) {
+#ifdef TRACE_HAVE_INTRINSICS
+  case vmIntrinsics::_classID:
+  case vmIntrinsics::_threadID:
+  case vmIntrinsics::_counterTime:
+#endif
+  case vmIntrinsics::_currentTimeMillis:
+  case vmIntrinsics::_nanoTime:
+  case vmIntrinsics::_floatToRawIntBits:
+  case vmIntrinsics::_intBitsToFloat:
+  case vmIntrinsics::_doubleToRawLongBits:
+  case vmIntrinsics::_longBitsToDouble:
+  case vmIntrinsics::_getClass:
+  case vmIntrinsics::_isInstance:
+  case vmIntrinsics::_currentThread:
+  case vmIntrinsics::_dabs:
+  case vmIntrinsics::_dsqrt:
+  case vmIntrinsics::_dsin:
+  case vmIntrinsics::_dcos:
+  case vmIntrinsics::_dtan:
+  case vmIntrinsics::_dlog:
+  case vmIntrinsics::_dlog10:
+  case vmIntrinsics::_dexp:
+  case vmIntrinsics::_dpow:
+  case vmIntrinsics::_checkIndex:
+  case vmIntrinsics::_Reference_get:
+  case vmIntrinsics::_updateCRC32:
+  case vmIntrinsics::_updateBytesCRC32:
+  case vmIntrinsics::_updateByteBufferCRC32:
+    return true;
+  default:
+    return false;
+  }
+}
+
+bool vmIntrinsics::can_trap(vmIntrinsics::ID id) {
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+  switch(id) {
+#ifdef TRACE_HAVE_INTRINSICS
+  case vmIntrinsics::_counterTime:
+#endif
+  case vmIntrinsics::_currentTimeMillis:
+  case vmIntrinsics::_nanoTime:
+  case vmIntrinsics::_floatToRawIntBits:
+  case vmIntrinsics::_intBitsToFloat:
+  case vmIntrinsics::_doubleToRawLongBits:
+  case vmIntrinsics::_longBitsToDouble:
+  case vmIntrinsics::_currentThread:
+  case vmIntrinsics::_dabs:
+  case vmIntrinsics::_dsqrt:
+  case vmIntrinsics::_dsin:
+  case vmIntrinsics::_dcos:
+  case vmIntrinsics::_dtan:
+  case vmIntrinsics::_dlog:
+  case vmIntrinsics::_dlog10:
+  case vmIntrinsics::_dexp:
+  case vmIntrinsics::_dpow:
+  case vmIntrinsics::_updateCRC32:
+  case vmIntrinsics::_updateBytesCRC32:
+  case vmIntrinsics::_updateByteBufferCRC32:
+    return false;
+  default:
+    return true;
+  }
+}
+
+bool vmIntrinsics::does_virtual_dispatch(vmIntrinsics::ID id) {
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+  switch(id) {
+  case vmIntrinsics::_hashCode:
+  case vmIntrinsics::_clone:
+    return true;
+    break;
+  default:
+    return false;
+  }
+}
+
+int vmIntrinsics::predicates_needed(vmIntrinsics::ID id) {
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+  switch (id) {
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    return 1;
+  case vmIntrinsics::_digestBase_implCompressMB:
+    return 3;
+  default:
+    return 0;
+  }
+}
+
+bool vmIntrinsics::is_disabled_by_flags(vmIntrinsics::ID id) {
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+  switch (id) {
+  case vmIntrinsics::_isInstance:
+  case vmIntrinsics::_isAssignableFrom:
+  case vmIntrinsics::_getModifiers:
+  case vmIntrinsics::_isInterface:
+  case vmIntrinsics::_isArray:
+  case vmIntrinsics::_isPrimitive:
+  case vmIntrinsics::_getSuperclass:
+  case vmIntrinsics::_Class_cast:
+  case vmIntrinsics::_getLength:
+  case vmIntrinsics::_newArray:
+    if (!InlineClassNatives) return true;
+    break;
+  case vmIntrinsics::_currentThread:
+  case vmIntrinsics::_isInterrupted:
+    if (!InlineThreadNatives) return true;
+    break;
+  case vmIntrinsics::_floatToRawIntBits:
+  case vmIntrinsics::_intBitsToFloat:
+  case vmIntrinsics::_doubleToRawLongBits:
+  case vmIntrinsics::_longBitsToDouble:
+  case vmIntrinsics::_dabs:
+  case vmIntrinsics::_dsqrt:
+  case vmIntrinsics::_dsin:
+  case vmIntrinsics::_dcos:
+  case vmIntrinsics::_dtan:
+  case vmIntrinsics::_dlog:
+  case vmIntrinsics::_dexp:
+  case vmIntrinsics::_dpow:
+  case vmIntrinsics::_dlog10:
+  case vmIntrinsics::_datan2:
+  case vmIntrinsics::_min:
+  case vmIntrinsics::_max:
+  case vmIntrinsics::_floatToIntBits:
+  case vmIntrinsics::_doubleToLongBits:
+    if (!InlineMathNatives) return true;
+    break;
+  case vmIntrinsics::_arraycopy:
+    if (!InlineArrayCopy) return true;
+    break;
+  case vmIntrinsics::_updateCRC32:
+  case vmIntrinsics::_updateBytesCRC32:
+  case vmIntrinsics::_updateByteBufferCRC32:
+    if (!UseCRC32Intrinsics) return true;
+    break;
+  case vmIntrinsics::_getObject:
+  case vmIntrinsics::_getBoolean:
+  case vmIntrinsics::_getByte:
+  case vmIntrinsics::_getShort:
+  case vmIntrinsics::_getChar:
+  case vmIntrinsics::_getInt:
+  case vmIntrinsics::_getLong:
+  case vmIntrinsics::_getFloat:
+  case vmIntrinsics::_getDouble:
+  case vmIntrinsics::_putObject:
+  case vmIntrinsics::_putBoolean:
+  case vmIntrinsics::_putByte:
+  case vmIntrinsics::_putShort:
+  case vmIntrinsics::_putChar:
+  case vmIntrinsics::_putInt:
+  case vmIntrinsics::_putLong:
+  case vmIntrinsics::_putFloat:
+  case vmIntrinsics::_putDouble:
+  case vmIntrinsics::_getObjectVolatile:
+  case vmIntrinsics::_getBooleanVolatile:
+  case vmIntrinsics::_getByteVolatile:
+  case vmIntrinsics::_getShortVolatile:
+  case vmIntrinsics::_getCharVolatile:
+  case vmIntrinsics::_getIntVolatile:
+  case vmIntrinsics::_getLongVolatile:
+  case vmIntrinsics::_getFloatVolatile:
+  case vmIntrinsics::_getDoubleVolatile:
+  case vmIntrinsics::_putObjectVolatile:
+  case vmIntrinsics::_putBooleanVolatile:
+  case vmIntrinsics::_putByteVolatile:
+  case vmIntrinsics::_putShortVolatile:
+  case vmIntrinsics::_putCharVolatile:
+  case vmIntrinsics::_putIntVolatile:
+  case vmIntrinsics::_putLongVolatile:
+  case vmIntrinsics::_putFloatVolatile:
+  case vmIntrinsics::_putDoubleVolatile:
+  case vmIntrinsics::_getByte_raw:
+  case vmIntrinsics::_getShort_raw:
+  case vmIntrinsics::_getChar_raw:
+  case vmIntrinsics::_getInt_raw:
+  case vmIntrinsics::_getLong_raw:
+  case vmIntrinsics::_getFloat_raw:
+  case vmIntrinsics::_getDouble_raw:
+  case vmIntrinsics::_putByte_raw:
+  case vmIntrinsics::_putShort_raw:
+  case vmIntrinsics::_putChar_raw:
+  case vmIntrinsics::_putInt_raw:
+  case vmIntrinsics::_putLong_raw:
+  case vmIntrinsics::_putFloat_raw:
+  case vmIntrinsics::_putDouble_raw:
+  case vmIntrinsics::_putOrderedObject:
+  case vmIntrinsics::_putOrderedLong:
+  case vmIntrinsics::_putOrderedInt:
+  case vmIntrinsics::_getAndAddInt:
+  case vmIntrinsics::_getAndAddLong:
+  case vmIntrinsics::_getAndSetInt:
+  case vmIntrinsics::_getAndSetLong:
+  case vmIntrinsics::_getAndSetObject:
+    if (!InlineUnsafeOps) return true;
+    break;
+  case vmIntrinsics::_getShortUnaligned:
+  case vmIntrinsics::_getCharUnaligned:
+  case vmIntrinsics::_getIntUnaligned:
+  case vmIntrinsics::_getLongUnaligned:
+  case vmIntrinsics::_putShortUnaligned:
+  case vmIntrinsics::_putCharUnaligned:
+  case vmIntrinsics::_putIntUnaligned:
+  case vmIntrinsics::_putLongUnaligned:
+  case vmIntrinsics::_allocateInstance:
+  case vmIntrinsics::_getAddress_raw:
+  case vmIntrinsics::_putAddress_raw:
+    if (!InlineUnsafeOps || !UseUnalignedAccesses) return true;
+    break;
+  case vmIntrinsics::_hashCode:
+    if (!InlineObjectHash) return true;
+    break;
+  case vmIntrinsics::_aescrypt_encryptBlock:
+  case vmIntrinsics::_aescrypt_decryptBlock:
+    if (!UseAESIntrinsics) return true;
+    break;
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+    if (!UseAESIntrinsics) return true;
+    break;
+  case vmIntrinsics::_sha_implCompress:
+    if (!UseSHA1Intrinsics) return true;
+    break;
+  case vmIntrinsics::_sha2_implCompress:
+    if (!UseSHA256Intrinsics) return true;
+    break;
+  case vmIntrinsics::_sha5_implCompress:
+    if (!UseSHA512Intrinsics) return true;
+    break;
+  case vmIntrinsics::_digestBase_implCompressMB:
+    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return true;
+    break;
+  case vmIntrinsics::_ghash_processBlocks:
+    if (!UseGHASHIntrinsics) return true;
+    break;
+  case vmIntrinsics::_updateBytesCRC32C:
+  case vmIntrinsics::_updateDirectByteBufferCRC32C:
+    if (!UseCRC32CIntrinsics) return true;
+    break;
+  case vmIntrinsics::_copyMemory:
+    if (!InlineArrayCopy || !InlineUnsafeOps) return true;
+    break;
+#ifdef COMPILER1
+  case vmIntrinsics::_checkIndex:
+    if (!InlineNIOCheckIndex) return true;
+    break;
+#endif // COMPILER1
+#ifdef COMPILER2
+  case vmIntrinsics::_clone:
+  case vmIntrinsics::_copyOf:
+  case vmIntrinsics::_copyOfRange:
+    // These intrinsics use both the objectcopy and the arraycopy
+    // intrinsic mechanism.
+    if (!InlineObjectCopy || !InlineArrayCopy) return true;
+    break;
+  case vmIntrinsics::_compareTo:
+     if (!SpecialStringCompareTo) return true;
+     break;
+  case vmIntrinsics::_indexOf:
+    if (!SpecialStringIndexOf) return true;
+    break;
+  case vmIntrinsics::_equals:
+    if (!SpecialStringEquals) return true;
+    break;
+  case vmIntrinsics::_equalsC:
+    if (!SpecialArraysEquals) return true;
+    break;
+  case vmIntrinsics::_encodeISOArray:
+    if (!SpecialEncodeISOArray) return true;
+    break;
+  case vmIntrinsics::_getCallerClass:
+    if (!InlineReflectionGetCallerClass) return true;
+    break;
+  case vmIntrinsics::_multiplyToLen:
+      if (!UseMultiplyToLenIntrinsic) return true;
+      break;
+  case vmIntrinsics::_squareToLen:
+    if (!UseSquareToLenIntrinsic) return true;
+    break;
+  case vmIntrinsics::_mulAdd:
+    if (!UseMulAddIntrinsic) return true;
+    break;
+  case vmIntrinsics::_montgomeryMultiply:
+    if (!UseMontgomeryMultiplyIntrinsic) return true;
+    break;
+  case vmIntrinsics::_montgomerySquare:
+    if (!UseMontgomerySquareIntrinsic) return true;
+    break;
+  case vmIntrinsics::_addExactI:
+  case vmIntrinsics::_addExactL:
+  case vmIntrinsics::_decrementExactI:
+  case vmIntrinsics::_decrementExactL:
+  case vmIntrinsics::_incrementExactI:
+  case vmIntrinsics::_incrementExactL:
+  case vmIntrinsics::_multiplyExactI:
+  case vmIntrinsics::_multiplyExactL:
+  case vmIntrinsics::_negateExactI:
+  case vmIntrinsics::_negateExactL:
+  case vmIntrinsics::_subtractExactI:
+  case vmIntrinsics::_subtractExactL:
+    if (!UseMathExactIntrinsics || !InlineMathNatives) return true;
+    break;
+#endif // COMPILER2
+  default:
+    return false;
+  }
+
+  return false;
+}
 
 #define VM_INTRINSIC_INITIALIZE(id, klass, name, sig, flags) #id "\0"
 static const char* vm_intrinsic_name_bodies =
--- a/src/share/vm/classfile/vmSymbols.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -1368,6 +1368,26 @@
 
   // Raw conversion:
   static ID for_raw_conversion(BasicType src, BasicType dest);
+
+  // The methods below provide information related to compiling intrinsics.
+
+  // (1) Information needed by the C1 compiler.
+
+  static bool preserves_state(vmIntrinsics::ID id);
+  static bool can_trap(vmIntrinsics::ID id);
+
+  // (2) Information needed by the C2 compiler.
+
+  // Returns true if the intrinsic for method 'method' will perform a virtual dispatch.
+  static bool does_virtual_dispatch(vmIntrinsics::ID id);
+  // A return value larger than 0 indicates that the intrinsic for method
+  // 'method' requires predicated logic.
+  static int predicates_needed(vmIntrinsics::ID id);
+
+  // Returns true if an intrinsic is disabled by command-line flags and
+  // false otherwise. Implements functionality common to the C1
+  // and the C2 compiler.
+  static bool is_disabled_by_flags(vmIntrinsics::ID id);
 };
 
 #endif // SHARE_VM_CLASSFILE_VMSYMBOLS_HPP
--- a/src/share/vm/compiler/abstractCompiler.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/compiler/abstractCompiler.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -66,6 +66,58 @@
   virtual bool supports_osr   ()                 { return true; }
   virtual bool can_compile_method(methodHandle method)  { return true; }
 
+  // Determine if the current compiler provides an intrinsic
+  // for method 'method'. An intrinsic is available if:
+  //  - the intrinsic is enabled (by using the appropriate command-line flag) and
+  //  - the platform on which the VM is running supports the intrinsic
+  //    (i.e., the platform provides the instructions necessary for the compiler
+  //    to generate the intrinsic code).
+  //
+  // The second parameter, 'compilation_context', is needed to implement functionality
+  // related to the DisableIntrinsic command-line flag. The DisableIntrinsic flag can
+  // be used to prohibit the C2 compiler (but not the C1 compiler) to use an intrinsic.
+  // There are three ways to disable an intrinsic using the DisableIntrinsic flag:
+  //
+  // (1) -XX:DisableIntrinsic=_hashCode,_getClass
+  //     Disables intrinsification of _hashCode and _getClass globally
+  //     (i.e., the intrinsified version the methods will not be used at all).
+  // (2) -XX:CompileCommand=option,aClass::aMethod,ccstr,DisableIntrinsic,_hashCode
+  //     Disables intrinsification of _hashCode if it is called from
+  //     aClass::aMethod (but not for any other call site of _hashCode)
+  // (3) -XX:CompileCommand=option,java.lang.ref.Reference::get,ccstr,DisableIntrinsic,_Reference_get
+  //     Some methods are not compiled by C2. Instead, the C2 compiler
+  //     returns directly the intrinsified version of these methods.
+  //     The command above forces C2 to compile _Reference_get, but
+  //     allows using the intrinsified version of _Reference_get at all
+  //     other call sites.
+  //
+  // From the modes above, (1) disable intrinsics globally, (2) and (3)
+  // disable intrinsics on a per-method basis. In cases (2) and (3) the
+  // compilation context is aClass::aMethod and java.lang.ref.Reference::get,
+  // respectively.
+  virtual bool is_intrinsic_available(methodHandle method, methodHandle compilation_context) {
+    return false;
+  }
+
+  // Determines if an intrinsic is supported by the compiler, that is,
+  // the compiler provides the instructions necessary to generate
+  // the intrinsic code for method 'method'.
+  //
+  // The 'is_intrinsic_supported' method is a white list, that is,
+  // by default no intrinsics are supported by a compiler except
+  // the ones listed in the method. Overriding methods should conform
+  // to this behavior.
+  virtual bool is_intrinsic_supported(methodHandle method) {
+    return false;
+  }
+
+  // Implements compiler-specific processing of command-line flags.
+  // Processing of command-line flags common to all compilers is implemented
+  // in vmIntrinsicss::is_disabled_by_flag.
+  virtual bool is_intrinsic_disabled_by_flag(methodHandle method) {
+    return false;
+  }
+
   // Compiler type queries.
   bool is_c1()                                   { return _type == c1; }
   bool is_c2()                                   { return _type == c2; }
--- a/src/share/vm/opto/c2compiler.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/opto/c2compiler.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -79,7 +79,6 @@
   return OptoRuntime::generate(thread->env());
 }
 
-
 void C2Compiler::initialize() {
   // The first compiler thread that gets here will initialize the
   // small amount of global state (and runtime stubs) that C2 needs.
@@ -154,11 +153,361 @@
   }
 }
 
-
 void C2Compiler::print_timers() {
   Compile::print_timers();
 }
 
+bool C2Compiler::is_intrinsic_available(methodHandle method, methodHandle compilation_context) {
+  // Assume a non-virtual dispatch. A virtual dispatch is
+  // possible for only a limited set of available intrinsics whereas
+  // a non-virtual dispatch is possible for all available intrinsics.
+  return is_intrinsic_supported(method, false) &&
+         !is_intrinsic_disabled_by_flag(method, compilation_context);
+}
+
+bool C2Compiler::is_intrinsic_supported(methodHandle method, bool is_virtual) {
+  vmIntrinsics::ID id = method->intrinsic_id();
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+  if (id < vmIntrinsics::FIRST_ID || id >= vmIntrinsics::LAST_COMPILER_INLINE) {
+    return false;
+  }
+
+  // Only Object.hashCode and Object.clone intrinsics implement also a virtual
+  // dispatch because calling both methods is expensive but both methods are
+  // frequently overridden. All other intrinsics implement only a non-virtual
+  // dispatch.
+  if (is_virtual) {
+    switch (id) {
+    case vmIntrinsics::_hashCode:
+    case vmIntrinsics::_clone:
+      break;
+    default:
+      return false;
+    }
+  }
+
+  switch (id) {
+  case vmIntrinsics::_compareTo:
+    if (!Matcher::match_rule_supported(Op_StrComp)) return false;
+    break;
+  case vmIntrinsics::_equals:
+    if (!Matcher::match_rule_supported(Op_StrEquals)) return false;
+    break;
+  case vmIntrinsics::_equalsC:
+    if (!Matcher::match_rule_supported(Op_AryEq)) return false;
+    break;
+  case vmIntrinsics::_copyMemory:
+    if (StubRoutines::unsafe_arraycopy() == NULL) return false;
+    break;
+  case vmIntrinsics::_encodeISOArray:
+    if (!Matcher::match_rule_supported(Op_EncodeISOArray)) return false;
+    break;
+  case vmIntrinsics::_bitCount_i:
+    if (!Matcher::match_rule_supported(Op_PopCountI)) return false;
+    break;
+  case vmIntrinsics::_bitCount_l:
+    if (!Matcher::match_rule_supported(Op_PopCountL)) return false;
+    break;
+  case vmIntrinsics::_numberOfLeadingZeros_i:
+    if (!Matcher::match_rule_supported(Op_CountLeadingZerosI)) return false;
+    break;
+  case vmIntrinsics::_numberOfLeadingZeros_l:
+    if (!Matcher::match_rule_supported(Op_CountLeadingZerosL)) return false;
+    break;
+  case vmIntrinsics::_numberOfTrailingZeros_i:
+    if (!Matcher::match_rule_supported(Op_CountTrailingZerosI)) return false;
+    break;
+  case vmIntrinsics::_numberOfTrailingZeros_l:
+    if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return false;
+    break;
+  case vmIntrinsics::_reverseBytes_c:
+    if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return false;
+    break;
+  case vmIntrinsics::_reverseBytes_s:
+    if (!Matcher::match_rule_supported(Op_ReverseBytesS)) return false;
+    break;
+  case vmIntrinsics::_reverseBytes_i:
+    if (!Matcher::match_rule_supported(Op_ReverseBytesI)) return false;
+    break;
+  case vmIntrinsics::_reverseBytes_l:
+    if (!Matcher::match_rule_supported(Op_ReverseBytesL)) return false;
+    break;
+  case vmIntrinsics::_compareAndSwapObject:
+#ifdef _LP64
+    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return false;
+#endif
+    break;
+  case vmIntrinsics::_compareAndSwapLong:
+    if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return false;
+    break;
+  case vmIntrinsics::_getAndAddInt:
+    if (!Matcher::match_rule_supported(Op_GetAndAddI)) return false;
+    break;
+  case vmIntrinsics::_getAndAddLong:
+    if (!Matcher::match_rule_supported(Op_GetAndAddL)) return false;
+    break;
+  case vmIntrinsics::_getAndSetInt:
+    if (!Matcher::match_rule_supported(Op_GetAndSetI)) return false;
+    break;
+  case vmIntrinsics::_getAndSetLong:
+    if (!Matcher::match_rule_supported(Op_GetAndSetL)) return false;
+    break;
+  case vmIntrinsics::_getAndSetObject:
+#ifdef _LP64
+    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return false;
+    if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return false;
+    break;
+#else
+    if (!Matcher::match_rule_supported(Op_GetAndSetP)) return false;
+    break;
+#endif
+  case vmIntrinsics::_incrementExactI:
+  case vmIntrinsics::_addExactI:
+    if (!Matcher::match_rule_supported(Op_OverflowAddI)) return false;
+    break;
+  case vmIntrinsics::_incrementExactL:
+  case vmIntrinsics::_addExactL:
+    if (!Matcher::match_rule_supported(Op_OverflowAddL)) return false;
+    break;
+  case vmIntrinsics::_decrementExactI:
+  case vmIntrinsics::_subtractExactI:
+    if (!Matcher::match_rule_supported(Op_OverflowSubI)) return false;
+    break;
+  case vmIntrinsics::_decrementExactL:
+  case vmIntrinsics::_subtractExactL:
+    if (!Matcher::match_rule_supported(Op_OverflowSubL)) return false;
+    break;
+  case vmIntrinsics::_negateExactI:
+    if (!Matcher::match_rule_supported(Op_OverflowSubI)) return false;
+    break;
+  case vmIntrinsics::_negateExactL:
+    if (!Matcher::match_rule_supported(Op_OverflowSubL)) return false;
+    break;
+  case vmIntrinsics::_multiplyExactI:
+    if (!Matcher::match_rule_supported(Op_OverflowMulI)) return false;
+    break;
+  case vmIntrinsics::_multiplyExactL:
+    if (!Matcher::match_rule_supported(Op_OverflowMulL)) return false;
+    break;
+  case vmIntrinsics::_getCallerClass:
+    if (SystemDictionary::reflect_CallerSensitive_klass() == NULL) return false;
+    break;
+  case vmIntrinsics::_hashCode:
+  case vmIntrinsics::_identityHashCode:
+  case vmIntrinsics::_getClass:
+  case vmIntrinsics::_dsin:
+  case vmIntrinsics::_dcos:
+  case vmIntrinsics::_dtan:
+  case vmIntrinsics::_dabs:
+  case vmIntrinsics::_datan2:
+  case vmIntrinsics::_dsqrt:
+  case vmIntrinsics::_dexp:
+  case vmIntrinsics::_dlog:
+  case vmIntrinsics::_dlog10:
+  case vmIntrinsics::_dpow:
+  case vmIntrinsics::_min:
+  case vmIntrinsics::_max:
+  case vmIntrinsics::_arraycopy:
+  case vmIntrinsics::_indexOf:
+  case vmIntrinsics::_getObject:
+  case vmIntrinsics::_getBoolean:
+  case vmIntrinsics::_getByte:
+  case vmIntrinsics::_getShort:
+  case vmIntrinsics::_getChar:
+  case vmIntrinsics::_getInt:
+  case vmIntrinsics::_getLong:
+  case vmIntrinsics::_getFloat:
+  case vmIntrinsics::_getDouble:
+  case vmIntrinsics::_putObject:
+  case vmIntrinsics::_putBoolean:
+  case vmIntrinsics::_putByte:
+  case vmIntrinsics::_putShort:
+  case vmIntrinsics::_putChar:
+  case vmIntrinsics::_putInt:
+  case vmIntrinsics::_putLong:
+  case vmIntrinsics::_putFloat:
+  case vmIntrinsics::_putDouble:
+  case vmIntrinsics::_getByte_raw:
+  case vmIntrinsics::_getShort_raw:
+  case vmIntrinsics::_getChar_raw:
+  case vmIntrinsics::_getInt_raw:
+  case vmIntrinsics::_getLong_raw:
+  case vmIntrinsics::_getFloat_raw:
+  case vmIntrinsics::_getDouble_raw:
+  case vmIntrinsics::_getAddress_raw:
+  case vmIntrinsics::_putByte_raw:
+  case vmIntrinsics::_putShort_raw:
+  case vmIntrinsics::_putChar_raw:
+  case vmIntrinsics::_putInt_raw:
+  case vmIntrinsics::_putLong_raw:
+  case vmIntrinsics::_putFloat_raw:
+  case vmIntrinsics::_putDouble_raw:
+  case vmIntrinsics::_putAddress_raw:
+  case vmIntrinsics::_getObjectVolatile:
+  case vmIntrinsics::_getBooleanVolatile:
+  case vmIntrinsics::_getByteVolatile:
+  case vmIntrinsics::_getShortVolatile:
+  case vmIntrinsics::_getCharVolatile:
+  case vmIntrinsics::_getIntVolatile:
+  case vmIntrinsics::_getLongVolatile:
+  case vmIntrinsics::_getFloatVolatile:
+  case vmIntrinsics::_getDoubleVolatile:
+  case vmIntrinsics::_putObjectVolatile:
+  case vmIntrinsics::_putBooleanVolatile:
+  case vmIntrinsics::_putByteVolatile:
+  case vmIntrinsics::_putShortVolatile:
+  case vmIntrinsics::_putCharVolatile:
+  case vmIntrinsics::_putIntVolatile:
+  case vmIntrinsics::_putLongVolatile:
+  case vmIntrinsics::_putFloatVolatile:
+  case vmIntrinsics::_putDoubleVolatile:
+  case vmIntrinsics::_getShortUnaligned:
+  case vmIntrinsics::_getCharUnaligned:
+  case vmIntrinsics::_getIntUnaligned:
+  case vmIntrinsics::_getLongUnaligned:
+  case vmIntrinsics::_putShortUnaligned:
+  case vmIntrinsics::_putCharUnaligned:
+  case vmIntrinsics::_putIntUnaligned:
+  case vmIntrinsics::_putLongUnaligned:
+  case vmIntrinsics::_compareAndSwapInt:
+  case vmIntrinsics::_putOrderedObject:
+  case vmIntrinsics::_putOrderedInt:
+  case vmIntrinsics::_putOrderedLong:
+  case vmIntrinsics::_loadFence:
+  case vmIntrinsics::_storeFence:
+  case vmIntrinsics::_fullFence:
+  case vmIntrinsics::_currentThread:
+  case vmIntrinsics::_isInterrupted:
+#ifdef TRACE_HAVE_INTRINSICS
+  case vmIntrinsics::_classID:
+  case vmIntrinsics::_threadID:
+  case vmIntrinsics::_counterTime:
+#endif
+  case vmIntrinsics::_currentTimeMillis:
+  case vmIntrinsics::_nanoTime:
+  case vmIntrinsics::_allocateInstance:
+  case vmIntrinsics::_newArray:
+  case vmIntrinsics::_getLength:
+  case vmIntrinsics::_copyOf:
+  case vmIntrinsics::_copyOfRange:
+  case vmIntrinsics::_clone:
+  case vmIntrinsics::_isAssignableFrom:
+  case vmIntrinsics::_isInstance:
+  case vmIntrinsics::_getModifiers:
+  case vmIntrinsics::_isInterface:
+  case vmIntrinsics::_isArray:
+  case vmIntrinsics::_isPrimitive:
+  case vmIntrinsics::_getSuperclass:
+  case vmIntrinsics::_getClassAccessFlags:
+  case vmIntrinsics::_floatToRawIntBits:
+  case vmIntrinsics::_floatToIntBits:
+  case vmIntrinsics::_intBitsToFloat:
+  case vmIntrinsics::_doubleToRawLongBits:
+  case vmIntrinsics::_doubleToLongBits:
+  case vmIntrinsics::_longBitsToDouble:
+  case vmIntrinsics::_Reference_get:
+  case vmIntrinsics::_Class_cast:
+  case vmIntrinsics::_aescrypt_encryptBlock:
+  case vmIntrinsics::_aescrypt_decryptBlock:
+  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
+  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
+  case vmIntrinsics::_sha_implCompress:
+  case vmIntrinsics::_sha2_implCompress:
+  case vmIntrinsics::_sha5_implCompress:
+  case vmIntrinsics::_digestBase_implCompressMB:
+  case vmIntrinsics::_multiplyToLen:
+  case vmIntrinsics::_squareToLen:
+  case vmIntrinsics::_mulAdd:
+  case vmIntrinsics::_montgomeryMultiply:
+  case vmIntrinsics::_montgomerySquare:
+  case vmIntrinsics::_ghash_processBlocks:
+  case vmIntrinsics::_updateCRC32:
+  case vmIntrinsics::_updateBytesCRC32:
+  case vmIntrinsics::_updateByteBufferCRC32:
+  case vmIntrinsics::_updateBytesCRC32C:
+  case vmIntrinsics::_updateDirectByteBufferCRC32C:
+  case vmIntrinsics::_profileBoolean:
+  case vmIntrinsics::_isCompileConstant:
+    break;
+  default:
+    return false;
+  }
+  return true;
+}
+
+bool C2Compiler::is_intrinsic_disabled_by_flag(methodHandle method, methodHandle compilation_context) {
+  vmIntrinsics::ID id = method->intrinsic_id();
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+  if (vmIntrinsics::is_disabled_by_flags(method->intrinsic_id())) {
+    return true;
+  }
+
+  // Check if the intrinsic corresponding to 'method' has been disabled on
+  // the command line by using the DisableIntrinsic flag (either globally
+  // or on a per-method level, see src/share/vm/compiler/abstractCompiler.hpp
+  // for details).
+  // Usually, the compilation context is the caller of the method 'method'.
+  // The only case when for a non-recursive method 'method' the compilation context
+  // is not the caller of the 'method' (but it is the method itself) is
+  // java.lang.ref.Referene::get.
+  // For java.lang.ref.Reference::get, the intrinsic version is used
+  // instead of the C2-compiled version so that the value in the referent
+  // field can be registered by the G1 pre-barrier code. The intrinsified
+  // version of Reference::get also adds a memory barrier to prevent
+  // commoning reads from the referent field across safepoint since GC
+  // can change the referent field's value. See Compile::Compile()
+  // in src/share/vm/opto/compile.cpp for more details.
+  ccstr disable_intr = NULL;
+  if ((DisableIntrinsic[0] != '\0' && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) ||
+      (!compilation_context.is_null() &&
+       CompilerOracle::has_option_value(compilation_context, "DisableIntrinsic", disable_intr) &&
+       strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)
+  ) {
+    return true;
+  }
+
+  // -XX:-InlineNatives disables nearly all intrinsics except the ones listed in
+  // the following switch statement.
+  if (!InlineNatives) {
+    switch (id) {
+    case vmIntrinsics::_indexOf:
+    case vmIntrinsics::_compareTo:
+    case vmIntrinsics::_equals:
+    case vmIntrinsics::_equalsC:
+    case vmIntrinsics::_getAndAddInt:
+    case vmIntrinsics::_getAndAddLong:
+    case vmIntrinsics::_getAndSetInt:
+    case vmIntrinsics::_getAndSetLong:
+    case vmIntrinsics::_getAndSetObject:
+    case vmIntrinsics::_loadFence:
+    case vmIntrinsics::_storeFence:
+    case vmIntrinsics::_fullFence:
+    case vmIntrinsics::_Reference_get:
+      break;
+    default:
+      return true;
+    }
+  }
+
+  if (!InlineUnsafeOps) {
+    switch (id) {
+    case vmIntrinsics::_loadFence:
+    case vmIntrinsics::_storeFence:
+    case vmIntrinsics::_fullFence:
+    case vmIntrinsics::_compareAndSwapObject:
+    case vmIntrinsics::_compareAndSwapLong:
+    case vmIntrinsics::_compareAndSwapInt:
+      return true;
+    default:
+      return false;
+    }
+  }
+
+  return false;
+}
+
 int C2Compiler::initial_code_buffer_size() {
   assert(SegmentedCodeCache, "Should be only used with a segmented code cache");
   return Compile::MAX_inst_size + Compile::MAX_locs_size + initial_const_capacity;
--- a/src/share/vm/opto/c2compiler.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/opto/c2compiler.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -36,7 +36,6 @@
 
   // Name
   const char *name() { return "C2"; }
-
   void initialize();
 
   // Compilation entry point for methods
@@ -52,6 +51,26 @@
   // Print compilation timers and statistics
   void print_timers();
 
+  // Check the availability of an intrinsic for 'method' given a compilation context.
+  virtual bool is_intrinsic_available(methodHandle method, methodHandle compilation_context);
+
+  // Return true if the intrinsification of a method supported by the compiler
+  // assuming a non-virtual dispatch. Return false otherwise.
+  virtual bool is_intrinsic_supported(methodHandle method) {
+    return is_intrinsic_supported(method, false);
+  }
+
+  // Check if the compiler supports an intrinsic for 'method' given the
+  // the dispatch mode specified by the 'is_virtual' parameter.
+  virtual bool is_intrinsic_supported(methodHandle method, bool is_virtual);
+
+  // Processing of command-line flags specific to the C2 compiler.
+  virtual bool is_intrinsic_disabled_by_flag(methodHandle method) {
+    return is_intrinsic_disabled_by_flag(method, NULL);
+  }
+
+  virtual bool is_intrinsic_disabled_by_flag(methodHandle method, methodHandle compilation_context);
+
   // Initial size of the code buffer (may be increased at runtime)
   static int initial_code_buffer_size();
 };
--- a/src/share/vm/opto/library_call.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/opto/library_call.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -31,6 +31,7 @@
 #include "oops/objArrayKlass.hpp"
 #include "opto/addnode.hpp"
 #include "opto/arraycopynode.hpp"
+#include "opto/c2compiler.hpp"
 #include "opto/callGenerator.hpp"
 #include "opto/castnode.hpp"
 #include "opto/cfgnode.hpp"
@@ -305,330 +306,40 @@
   bool inline_isCompileConstant();
 };
 
-
 //---------------------------make_vm_intrinsic----------------------------
 CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
   vmIntrinsics::ID id = m->intrinsic_id();
   assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
 
-  ccstr disable_intr = NULL;
-
-  if ((DisableIntrinsic[0] != '\0'
-       && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) ||
-      (method_has_option_value("DisableIntrinsic", disable_intr)
-       && strstr(disable_intr, vmIntrinsics::name_at(id)) != NULL)) {
-    // disabled by a user request on the command line:
-    // example: -XX:DisableIntrinsic=_hashCode,_getClass
+  if (!m->is_loaded()) {
+    // Do not attempt to inline unloaded methods.
     return NULL;
   }
 
-  if (!m->is_loaded()) {
-    // do not attempt to inline unloaded methods
-    return NULL;
-  }
-
-  // Only a few intrinsics implement a virtual dispatch.
-  // They are expensive calls which are also frequently overridden.
-  if (is_virtual) {
-    switch (id) {
-    case vmIntrinsics::_hashCode:
-    case vmIntrinsics::_clone:
-      // OK, Object.hashCode and Object.clone intrinsics come in both flavors
-      break;
-    default:
-      return NULL;
-    }
-  }
-
-  // -XX:-InlineNatives disables nearly all intrinsics:
-  if (!InlineNatives) {
-    switch (id) {
-    case vmIntrinsics::_indexOf:
-    case vmIntrinsics::_compareTo:
-    case vmIntrinsics::_equals:
-    case vmIntrinsics::_equalsC:
-    case vmIntrinsics::_getAndAddInt:
-    case vmIntrinsics::_getAndAddLong:
-    case vmIntrinsics::_getAndSetInt:
-    case vmIntrinsics::_getAndSetLong:
-    case vmIntrinsics::_getAndSetObject:
-    case vmIntrinsics::_loadFence:
-    case vmIntrinsics::_storeFence:
-    case vmIntrinsics::_fullFence:
-      break;  // InlineNatives does not control String.compareTo
-    case vmIntrinsics::_Reference_get:
-      break;  // InlineNatives does not control Reference.get
-    default:
-      return NULL;
-    }
-  }
-
-  int predicates = 0;
-  bool does_virtual_dispatch = false;
-
-  switch (id) {
-  case vmIntrinsics::_compareTo:
-    if (!SpecialStringCompareTo)  return NULL;
-    if (!Matcher::match_rule_supported(Op_StrComp))  return NULL;
-    break;
-  case vmIntrinsics::_indexOf:
-    if (!SpecialStringIndexOf)  return NULL;
-    break;
-  case vmIntrinsics::_equals:
-    if (!SpecialStringEquals)  return NULL;
-    if (!Matcher::match_rule_supported(Op_StrEquals))  return NULL;
-    break;
-  case vmIntrinsics::_equalsC:
-    if (!SpecialArraysEquals)  return NULL;
-    if (!Matcher::match_rule_supported(Op_AryEq))  return NULL;
-    break;
-  case vmIntrinsics::_arraycopy:
-    if (!InlineArrayCopy)  return NULL;
-    break;
-  case vmIntrinsics::_copyMemory:
-    if (StubRoutines::unsafe_arraycopy() == NULL)  return NULL;
-    if (!InlineArrayCopy)  return NULL;
-    break;
-  case vmIntrinsics::_hashCode:
-    if (!InlineObjectHash)  return NULL;
-    does_virtual_dispatch = true;
-    break;
-  case vmIntrinsics::_clone:
-    does_virtual_dispatch = true;
-  case vmIntrinsics::_copyOf:
-  case vmIntrinsics::_copyOfRange:
-    if (!InlineObjectCopy)  return NULL;
-    // These also use the arraycopy intrinsic mechanism:
-    if (!InlineArrayCopy)  return NULL;
-    break;
-  case vmIntrinsics::_encodeISOArray:
-    if (!SpecialEncodeISOArray)  return NULL;
-    if (!Matcher::match_rule_supported(Op_EncodeISOArray))  return NULL;
-    break;
-  case vmIntrinsics::_checkIndex:
-    // We do not intrinsify this.  The optimizer does fine with it.
-    return NULL;
-
-  case vmIntrinsics::_getCallerClass:
-    if (!InlineReflectionGetCallerClass)  return NULL;
-    if (SystemDictionary::reflect_CallerSensitive_klass() == NULL)  return NULL;
-    break;
-
-  case vmIntrinsics::_bitCount_i:
-    if (!Matcher::match_rule_supported(Op_PopCountI)) return NULL;
-    break;
-
-  case vmIntrinsics::_bitCount_l:
-    if (!Matcher::match_rule_supported(Op_PopCountL)) return NULL;
-    break;
-
-  case vmIntrinsics::_numberOfLeadingZeros_i:
-    if (!Matcher::match_rule_supported(Op_CountLeadingZerosI)) return NULL;
-    break;
-
-  case vmIntrinsics::_numberOfLeadingZeros_l:
-    if (!Matcher::match_rule_supported(Op_CountLeadingZerosL)) return NULL;
-    break;
-
-  case vmIntrinsics::_numberOfTrailingZeros_i:
-    if (!Matcher::match_rule_supported(Op_CountTrailingZerosI)) return NULL;
-    break;
-
-  case vmIntrinsics::_numberOfTrailingZeros_l:
-    if (!Matcher::match_rule_supported(Op_CountTrailingZerosL)) return NULL;
-    break;
-
-  case vmIntrinsics::_reverseBytes_c:
-    if (!Matcher::match_rule_supported(Op_ReverseBytesUS)) return NULL;
-    break;
-  case vmIntrinsics::_reverseBytes_s:
-    if (!Matcher::match_rule_supported(Op_ReverseBytesS))  return NULL;
-    break;
-  case vmIntrinsics::_reverseBytes_i:
-    if (!Matcher::match_rule_supported(Op_ReverseBytesI))  return NULL;
-    break;
-  case vmIntrinsics::_reverseBytes_l:
-    if (!Matcher::match_rule_supported(Op_ReverseBytesL))  return NULL;
-    break;
-
-  case vmIntrinsics::_Reference_get:
-    // Use the intrinsic version of Reference.get() so that the value in
-    // the referent field can be registered by the G1 pre-barrier code.
-    // Also add memory barrier to prevent commoning reads from this field
-    // across safepoint since GC can change it value.
-    break;
-
-  case vmIntrinsics::_compareAndSwapObject:
-#ifdef _LP64
-    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_CompareAndSwapP)) return NULL;
-#endif
-    break;
-
-  case vmIntrinsics::_compareAndSwapLong:
-    if (!Matcher::match_rule_supported(Op_CompareAndSwapL)) return NULL;
-    break;
-
-  case vmIntrinsics::_getAndAddInt:
-    if (!Matcher::match_rule_supported(Op_GetAndAddI)) return NULL;
-    break;
-
-  case vmIntrinsics::_getAndAddLong:
-    if (!Matcher::match_rule_supported(Op_GetAndAddL)) return NULL;
-    break;
-
-  case vmIntrinsics::_getAndSetInt:
-    if (!Matcher::match_rule_supported(Op_GetAndSetI)) return NULL;
-    break;
-
-  case vmIntrinsics::_getAndSetLong:
-    if (!Matcher::match_rule_supported(Op_GetAndSetL)) return NULL;
-    break;
-
-  case vmIntrinsics::_getAndSetObject:
-#ifdef _LP64
-    if (!UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
-    if (UseCompressedOops && !Matcher::match_rule_supported(Op_GetAndSetN)) return NULL;
-    break;
-#else
-    if (!Matcher::match_rule_supported(Op_GetAndSetP)) return NULL;
-    break;
-#endif
-
-  case vmIntrinsics::_aescrypt_encryptBlock:
-  case vmIntrinsics::_aescrypt_decryptBlock:
-    if (!UseAESIntrinsics) return NULL;
-    break;
-
-  case vmIntrinsics::_multiplyToLen:
-    if (!UseMultiplyToLenIntrinsic) return NULL;
-    break;
-
-  case vmIntrinsics::_squareToLen:
-    if (!UseSquareToLenIntrinsic) return NULL;
-    break;
-
-  case vmIntrinsics::_mulAdd:
-    if (!UseMulAddIntrinsic) return NULL;
-    break;
-
-  case vmIntrinsics::_montgomeryMultiply:
-     if (!UseMontgomeryMultiplyIntrinsic) return NULL;
-    break;
-  case vmIntrinsics::_montgomerySquare:
-     if (!UseMontgomerySquareIntrinsic) return NULL;
-    break;
-
-  case vmIntrinsics::_cipherBlockChaining_encryptAESCrypt:
-  case vmIntrinsics::_cipherBlockChaining_decryptAESCrypt:
-    if (!UseAESIntrinsics) return NULL;
-    // these two require the predicated logic
-    predicates = 1;
-    break;
-
-  case vmIntrinsics::_sha_implCompress:
-    if (!UseSHA1Intrinsics) return NULL;
-    break;
-
-  case vmIntrinsics::_sha2_implCompress:
-    if (!UseSHA256Intrinsics) return NULL;
-    break;
-
-  case vmIntrinsics::_sha5_implCompress:
-    if (!UseSHA512Intrinsics) return NULL;
-    break;
-
-  case vmIntrinsics::_digestBase_implCompressMB:
-    if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) return NULL;
-    predicates = 3;
-    break;
-
-  case vmIntrinsics::_ghash_processBlocks:
-    if (!UseGHASHIntrinsics) return NULL;
-    break;
-
-  case vmIntrinsics::_updateCRC32:
-  case vmIntrinsics::_updateBytesCRC32:
-  case vmIntrinsics::_updateByteBufferCRC32:
-    if (!UseCRC32Intrinsics) return NULL;
-    break;
-
-  case vmIntrinsics::_updateBytesCRC32C:
-  case vmIntrinsics::_updateDirectByteBufferCRC32C:
-    if (!UseCRC32CIntrinsics) return NULL;
-    break;
-
-  case vmIntrinsics::_incrementExactI:
-  case vmIntrinsics::_addExactI:
-    if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
-    break;
-  case vmIntrinsics::_incrementExactL:
-  case vmIntrinsics::_addExactL:
-    if (!Matcher::match_rule_supported(Op_OverflowAddL) || !UseMathExactIntrinsics) return NULL;
-    break;
-  case vmIntrinsics::_decrementExactI:
-  case vmIntrinsics::_subtractExactI:
-    if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
-    break;
-  case vmIntrinsics::_decrementExactL:
-  case vmIntrinsics::_subtractExactL:
-    if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
-    break;
-  case vmIntrinsics::_negateExactI:
-    if (!Matcher::match_rule_supported(Op_OverflowSubI) || !UseMathExactIntrinsics) return NULL;
-    break;
-  case vmIntrinsics::_negateExactL:
-    if (!Matcher::match_rule_supported(Op_OverflowSubL) || !UseMathExactIntrinsics) return NULL;
-    break;
-  case vmIntrinsics::_multiplyExactI:
-    if (!Matcher::match_rule_supported(Op_OverflowMulI) || !UseMathExactIntrinsics) return NULL;
-    break;
-  case vmIntrinsics::_multiplyExactL:
-    if (!Matcher::match_rule_supported(Op_OverflowMulL) || !UseMathExactIntrinsics) return NULL;
-    break;
-
-  case vmIntrinsics::_getShortUnaligned:
-  case vmIntrinsics::_getCharUnaligned:
-  case vmIntrinsics::_getIntUnaligned:
-  case vmIntrinsics::_getLongUnaligned:
-  case vmIntrinsics::_putShortUnaligned:
-  case vmIntrinsics::_putCharUnaligned:
-  case vmIntrinsics::_putIntUnaligned:
-  case vmIntrinsics::_putLongUnaligned:
-    if (!UseUnalignedAccesses) return NULL;
-    break;
-
- default:
+  C2Compiler* compiler = (C2Compiler*)CompileBroker::compiler(CompLevel_full_optimization);
+  bool is_available = false;
+
+  {
+    // For calling is_intrinsic_supported and is_intrinsic_disabled_by_flag
+    // the compiler must transition to '_thread_in_vm' state because both
+    // methods access VM-internal data.
+    VM_ENTRY_MARK;
+    methodHandle mh(THREAD, m->get_Method());
+    methodHandle ct(THREAD, method()->get_Method());
+    is_available = compiler->is_intrinsic_supported(mh, is_virtual) &&
+                   !compiler->is_intrinsic_disabled_by_flag(mh, ct);
+  }
+
+  if (is_available) {
     assert(id <= vmIntrinsics::LAST_COMPILER_INLINE, "caller responsibility");
     assert(id != vmIntrinsics::_Object_init && id != vmIntrinsics::_invoke, "enum out of order?");
-    break;
-  }
-
-  // -XX:-InlineClassNatives disables natives from the Class class.
-  // The flag applies to all reflective calls, notably Array.newArray
-  // (visible to Java programmers as Array.newInstance).
-  if (m->holder()->name() == ciSymbol::java_lang_Class() ||
-      m->holder()->name() == ciSymbol::java_lang_reflect_Array()) {
-    if (!InlineClassNatives)  return NULL;
-  }
-
-  // -XX:-InlineThreadNatives disables natives from the Thread class.
-  if (m->holder()->name() == ciSymbol::java_lang_Thread()) {
-    if (!InlineThreadNatives)  return NULL;
-  }
-
-  // -XX:-InlineMathNatives disables natives from the Math,Float and Double classes.
-  if (m->holder()->name() == ciSymbol::java_lang_Math() ||
-      m->holder()->name() == ciSymbol::java_lang_Float() ||
-      m->holder()->name() == ciSymbol::java_lang_Double()) {
-    if (!InlineMathNatives)  return NULL;
-  }
-
-  // -XX:-InlineUnsafeOps disables natives from the Unsafe class.
-  if (m->holder()->name() == ciSymbol::sun_misc_Unsafe()) {
-    if (!InlineUnsafeOps)  return NULL;
-  }
-
-  return new LibraryIntrinsic(m, is_virtual, predicates, does_virtual_dispatch, (vmIntrinsics::ID) id);
+    return new LibraryIntrinsic(m, is_virtual,
+                                vmIntrinsics::predicates_needed(id),
+                                vmIntrinsics::does_virtual_dispatch(id),
+                                (vmIntrinsics::ID) id);
+  } else {
+    return NULL;
+  }
 }
 
 //----------------------register_library_intrinsics-----------------------
@@ -812,7 +523,6 @@
   case vmIntrinsics::_getLong:                  return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG,    !is_volatile);
   case vmIntrinsics::_getFloat:                 return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT,   !is_volatile);
   case vmIntrinsics::_getDouble:                return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE,  !is_volatile);
-
   case vmIntrinsics::_putObject:                return inline_unsafe_access(!is_native_ptr,  is_store, T_OBJECT,  !is_volatile);
   case vmIntrinsics::_putBoolean:               return inline_unsafe_access(!is_native_ptr,  is_store, T_BOOLEAN, !is_volatile);
   case vmIntrinsics::_putByte:                  return inline_unsafe_access(!is_native_ptr,  is_store, T_BYTE,    !is_volatile);
--- a/src/share/vm/prims/whitebox.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/prims/whitebox.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -528,6 +528,24 @@
   return mh->queued_for_compilation();
 WB_END
 
+WB_ENTRY(jboolean, WB_IsIntrinsicAvailable(JNIEnv* env, jobject o, jobject method, jobject compilation_context, jint compLevel))
+  if (compLevel < CompLevel_none || compLevel > CompLevel_highest_tier) {
+    return false; // Intrinsic is not available on a non-existent compilation level.
+  }
+  jmethodID method_id, compilation_context_id;
+  method_id = reflected_method_to_jmid(thread, env, method);
+  CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
+  methodHandle mh(THREAD, Method::checked_resolve_jmethod_id(method_id));
+  if (compilation_context != NULL) {
+    compilation_context_id = reflected_method_to_jmid(thread, env, compilation_context);
+    CHECK_JNI_EXCEPTION_(env, JNI_FALSE);
+    methodHandle cch(THREAD, Method::checked_resolve_jmethod_id(compilation_context_id));
+    return CompileBroker::compiler(compLevel)->is_intrinsic_available(mh, cch);
+  } else {
+    return CompileBroker::compiler(compLevel)->is_intrinsic_available(mh, NULL);
+  }
+WB_END
+
 WB_ENTRY(jint, WB_GetMethodCompilationLevel(JNIEnv* env, jobject o, jobject method, jboolean is_osr))
   jmethodID jmid = reflected_method_to_jmid(thread, env, method);
   CHECK_JNI_EXCEPTION_(env, CompLevel_none);
@@ -1477,14 +1495,17 @@
 #endif // INCLUDE_NMT
   {CC"deoptimizeFrames",   CC"(Z)I",                  (void*)&WB_DeoptimizeFrames  },
   {CC"deoptimizeAll",      CC"()V",                   (void*)&WB_DeoptimizeAll     },
-  {CC"deoptimizeMethod0",   CC"(Ljava/lang/reflect/Executable;Z)I",
-                                                      (void*)&WB_DeoptimizeMethod  },
+    {CC"deoptimizeMethod0",   CC"(Ljava/lang/reflect/Executable;Z)I",
+                                                        (void*)&WB_DeoptimizeMethod  },
   {CC"isMethodCompiled0",   CC"(Ljava/lang/reflect/Executable;Z)Z",
                                                       (void*)&WB_IsMethodCompiled  },
   {CC"isMethodCompilable0", CC"(Ljava/lang/reflect/Executable;IZ)Z",
                                                       (void*)&WB_IsMethodCompilable},
   {CC"isMethodQueuedForCompilation0",
       CC"(Ljava/lang/reflect/Executable;)Z",          (void*)&WB_IsMethodQueuedForCompilation},
+  {CC"isIntrinsicAvailable0",
+      CC"(Ljava/lang/reflect/Executable;Ljava/lang/reflect/Executable;I)Z",
+                                                      (void*)&WB_IsIntrinsicAvailable},
   {CC"makeMethodNotCompilable0",
       CC"(Ljava/lang/reflect/Executable;IZ)V",        (void*)&WB_MakeMethodNotCompilable},
   {CC"testSetDontInlineMethod0",
--- a/src/share/vm/runtime/arguments.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/runtime/arguments.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -1226,32 +1226,6 @@
   }
 }
 
-/**
- * Returns the minimum number of compiler threads needed to run the JVM. The following
- * configurations are possible.
- *
- * 1) The JVM is build using an interpreter only. As a result, the minimum number of
- *    compiler threads is 0.
- * 2) The JVM is build using the compiler(s) and tiered compilation is disabled. As
- *    a result, either C1 or C2 is used, so the minimum number of compiler threads is 1.
- * 3) The JVM is build using the compiler(s) and tiered compilation is enabled. However,
- *    the option "TieredStopAtLevel < CompLevel_full_optimization". As a result, only
- *    C1 can be used, so the minimum number of compiler threads is 1.
- * 4) The JVM is build using the compilers and tiered compilation is enabled. The option
- *    'TieredStopAtLevel = CompLevel_full_optimization' (the default value). As a result,
- *    the minimum number of compiler threads is 2.
- */
-int Arguments::get_min_number_of_compiler_threads() {
-#if !defined(COMPILER1) && !defined(COMPILER2) && !defined(SHARK)
-  return 0;   // case 1
-#else
-  if (!TieredCompilation || (TieredStopAtLevel < CompLevel_full_optimization)) {
-    return 1; // case 2 or case 3
-  }
-  return 2;   // case 4 (tiered)
-#endif
-}
-
 #if INCLUDE_ALL_GCS
 static void disable_adaptive_size_policy(const char* collector_name) {
   if (UseAdaptiveSizePolicy) {
@@ -2199,10 +2173,6 @@
     status = false;
   }
 
-  int min_number_of_compiler_threads = get_min_number_of_compiler_threads();
-  // The default CICompilerCount's value is CI_COMPILER_COUNT.
-  assert(min_number_of_compiler_threads <= CI_COMPILER_COUNT, "minimum should be less or equal default number");
-
   if (!FLAG_IS_DEFAULT(CICompilerCount) && !FLAG_IS_DEFAULT(CICompilerCountPerCPU) && CICompilerCountPerCPU) {
     warning("The VM option CICompilerCountPerCPU overrides CICompilerCount.");
   }
--- a/src/share/vm/runtime/arguments.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/runtime/arguments.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -445,9 +445,6 @@
   static char*  SharedArchivePath;
 
  public:
-  // Tiered
-  static int  get_min_number_of_compiler_threads();
-
   // Scale compile thresholds
   // Returns threshold scaled with CompileThresholdScaling
   static intx scaled_compile_threshold(intx threshold, double scale);
--- a/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/runtime/commandLineFlagConstraintsCompiler.cpp	Fri Jul 31 12:13:57 2015 +0200
@@ -41,3 +41,45 @@
     return Flag::SUCCESS;
   }
 }
+
+/**
+ * Validate the minimum number of compiler threads needed to run the
+ * JVM. The following configurations are possible.
+ *
+ * 1) The JVM is build using an interpreter only. As a result, the minimum number of
+ *    compiler threads is 0.
+ * 2) The JVM is build using the compiler(s) and tiered compilation is disabled. As
+ *    a result, either C1 or C2 is used, so the minimum number of compiler threads is 1.
+ * 3) The JVM is build using the compiler(s) and tiered compilation is enabled. However,
+ *    the option "TieredStopAtLevel < CompLevel_full_optimization". As a result, only
+ *    C1 can be used, so the minimum number of compiler threads is 1.
+ * 4) The JVM is build using the compilers and tiered compilation is enabled. The option
+ *    'TieredStopAtLevel = CompLevel_full_optimization' (the default value). As a result,
+ *    the minimum number of compiler threads is 2.
+ */
+Flag::Error CICompilerCountConstraintFunc(bool verbose, intx* value) {
+  int min_number_of_compiler_threads = 0;
+#if !defined(COMPILER1) && !defined(COMPILER2) && !defined(SHARK)
+  // case 1
+#else
+  if (!TieredCompilation || (TieredStopAtLevel < CompLevel_full_optimization)) {
+    min_number_of_compiler_threads = 1; // case 2 or case 3
+  } else {
+    min_number_of_compiler_threads = 2;   // case 4 (tiered)
+  }
+#endif
+
+  // The default CICompilerCount's value is CI_COMPILER_COUNT.
+  assert(min_number_of_compiler_threads <= CI_COMPILER_COUNT, "minimum should be less or equal default number");
+
+  if (*value < (intx)min_number_of_compiler_threads) {
+    if (verbose == true) {
+      jio_fprintf(defaultStream::error_stream(),
+                  "CICompilerCount=" INTX_FORMAT " must be at least %d \n",
+                  *value, min_number_of_compiler_threads);
+    }
+    return Flag::VIOLATES_CONSTRAINT;
+  } else {
+    return Flag::SUCCESS;
+  }
+}
--- a/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/runtime/commandLineFlagConstraintsCompiler.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -36,4 +36,6 @@
 
 Flag::Error AliasLevelConstraintFunc(bool verbose, intx* value);
 
+Flag::Error CICompilerCountConstraintFunc(bool verbose, intx* value);
+
 #endif /* SHARE_VM_RUNTIME_COMMANDLINEFLAGCONSTRAINTSCOMPILER_HPP */
--- a/src/share/vm/runtime/globals.hpp	Mon Jul 27 13:56:26 2015 -0700
+++ b/src/share/vm/runtime/globals.hpp	Fri Jul 31 12:13:57 2015 +0200
@@ -2643,8 +2643,8 @@
   /* because of overflow issue                                   */         \
   product(intx, CICompilerCount, CI_COMPILER_COUNT,                         \
           "Number of compiler threads to run")                              \
-          range((intx)Arguments::get_min_number_of_compiler_threads(),      \
-                max_jint)                                                   \
+          range(0, max_jint)                                                \
+          constraint(CICompilerCountConstraintFunc, AtParse)                \
                                                                             \
   product(intx, CompilationPolicyChoice, 0,                                 \
           "which compilation policy (0-3)")                                 \
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/arguments/CheckCICompilerCount.java	Fri Jul 31 12:13:57 2015 +0200
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import jdk.test.lib.*;
+
+/*
+ * @test CheckCheckCICompilerCount
+ * @bug 8130858
+ * @summary Check that correct range of values for CICompilerCount are allowed depending on whether tiered is enabled or not
+ * @library /testlibrary
+ * @modules java.base/sun.misc
+ *          java.management
+ * @run main CheckCICompilerCount
+ */
+
+public class CheckCICompilerCount {
+    private static final String[][] NON_TIERED_ARGUMENTS = {
+        {
+            "-XX:-TieredCompilation",
+            "-XX:+PrintFlagsFinal",
+            "-XX:CICompilerCount=0",
+            "-version"
+        },
+        {
+            "-XX:-TieredCompilation",
+            "-XX:+PrintFlagsFinal",
+            "-XX:CICompilerCount=1",
+            "-version"
+        }
+    };
+
+    private static final String[][] NON_TIERED_EXPECTED_OUTPUTS = {
+        {
+            "CICompilerCount=0 must be at least 1",
+            "Improperly specified VM option 'CICompilerCount=0'"
+        },
+        {
+            "intx CICompilerCount                          := 1                                   {product}"
+        }
+    };
+
+    private static final int[] NON_TIERED_EXIT = {
+        1,
+        0
+    };
+
+    private static final String[][] TIERED_ARGUMENTS = {
+        {
+            "-XX:+TieredCompilation",
+            "-XX:+PrintFlagsFinal",
+            "-XX:CICompilerCount=1",
+            "-version"
+        },
+        {
+            "-XX:+TieredCompilation",
+            "-XX:+PrintFlagsFinal",
+            "-XX:CICompilerCount=2",
+            "-version"
+        }
+    };
+
+    private static final String[][] TIERED_EXPECTED_OUTPUTS = {
+        {
+            "CICompilerCount=1 must be at least 2",
+            "Improperly specified VM option 'CICompilerCount=1'"
+        },
+        {
+            "intx CICompilerCount                          := 2                                   {product}"
+        }
+    };
+
+    private static final int[] TIERED_EXIT = {
+        1,
+        0
+    };
+
+    private static void verifyValidOption(String[] arguments, String[] expected_outputs, int exit, boolean tiered) throws Exception {
+        ProcessBuilder pb;
+        OutputAnalyzer out;
+
+        pb = ProcessTools.createJavaProcessBuilder(arguments);
+        out = new OutputAnalyzer(pb.start());
+
+        try {
+            out.shouldHaveExitValue(exit);
+            for (String expected_output : expected_outputs) {
+                out.shouldContain(expected_output);
+            }
+        } catch (RuntimeException e) {
+            // Check if tiered compilation is available in this JVM
+            // Version. Throw exception only if it is available.
+            if (!(tiered && out.getOutput().contains("TieredCompilation is disabled in this release."))) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        if (NON_TIERED_ARGUMENTS.length != NON_TIERED_EXPECTED_OUTPUTS.length || NON_TIERED_ARGUMENTS.length != NON_TIERED_EXIT.length) {
+            throw new RuntimeException("Test is set up incorrectly: length of arguments, expected outputs and exit codes in non-tiered mode of operation do not match.");
+        }
+
+        if (TIERED_ARGUMENTS.length != TIERED_EXPECTED_OUTPUTS.length || TIERED_ARGUMENTS.length != TIERED_EXIT.length) {
+            throw new RuntimeException("Test is set up incorrectly: length of arguments, expected outputs and exit codes in tiered mode of operation do not match.");
+        }
+
+        for (int i = 0; i < NON_TIERED_ARGUMENTS.length; i++) {
+            verifyValidOption(NON_TIERED_ARGUMENTS[i], NON_TIERED_EXPECTED_OUTPUTS[i], NON_TIERED_EXIT[i], false);
+        }
+
+        for (int i = 0; i < TIERED_ARGUMENTS.length; i++) {
+            verifyValidOption(TIERED_ARGUMENTS[i], TIERED_EXPECTED_OUTPUTS[i], TIERED_EXIT[i], true);
+        }
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/intrinsics/IntrinsicAvailableTest.java	Fri Jul 31 12:13:57 2015 +0200
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+import java.lang.reflect.Executable;
+import java.util.concurrent.Callable;
+import java.util.Objects;
+/*
+ * @test
+ * @bug 8130832
+ * @library /testlibrary /../../test/lib /compiler/whitebox /compiler/testlibrary
+ * @build IntrinsicAvailableTest
+ * @run main ClassFileInstaller sun.hotspot.WhiteBox
+ *                              sun.hotspot.WhiteBox$WhiteBoxPermission
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   -XX:+UseCRC32Intrinsics
+ *                   IntrinsicAvailableTest
+ * @run main/othervm -Xbootclasspath/a:.
+ *                   -XX:+UnlockDiagnosticVMOptions
+ *                   -XX:+WhiteBoxAPI
+ *                   -XX:-UseCRC32Intrinsics
+ *                   IntrinsicAvailableTest
+ */
+public class IntrinsicAvailableTest extends CompilerWhiteBoxTest {
+    protected String VMName;
+
+    public IntrinsicAvailableTest(IntrinsicAvailableTestTestCase testCase) {
+        super(testCase);
+        VMName = System.getProperty("java.vm.name");
+    }
+
+    public static class IntrinsicAvailableTestTestCase implements TestCase {
+
+        public String name() {
+            return "IntrinsicAvailableTestTestCase";
+        }
+
+        public Executable getExecutable() {
+            // Using a single method to test the
+            // WhiteBox.isIntrinsicAvailable(Executable method, int compLevel)
+            // call for the compilation level corresponding to both the C1 and C2
+            // compiler keeps the current test simple.
+            //
+            // The tested method is java.util.zip.CRC32.update(int, int) because
+            // both C1 and C2 define an intrinsic for the method and
+            // the UseCRC32Intrinsics flag can be used to enable/disable
+            // intrinsification of the method in both product and fastdebug
+            // builds.
+            try {
+                return Class.forName("java.util.zip.CRC32").getDeclaredMethod("update", int.class, int.class);
+            } catch (NoSuchMethodException e) {
+                throw new RuntimeException("Test bug, method unavailable. " + e);
+            } catch (ClassNotFoundException e) {
+                throw new RuntimeException("Test bug, class unavailable. " + e);
+            }
+        }
+
+        public Callable<Integer> getCallable() {
+            return null;
+        }
+
+        public boolean isOsr() {
+            return false;
+        }
+
+    }
+
+    protected void checkIntrinsicForCompilationLevel(Executable method, int compLevel) throws Exception {
+        boolean intrinsicEnabled = Boolean.valueOf(getVMOption("UseCRC32Intrinsics"));
+        boolean intrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(method,
+                                                                    compLevel);
+
+        String intrinsicEnabledMessage = intrinsicEnabled ? "enabled" : "disabled";
+        String intrinsicAvailableMessage = intrinsicAvailable ? "available" : "not available";
+
+        if (intrinsicEnabled == intrinsicAvailable) {
+            System.out.println("Expected result: intrinsic for java.util.zip.CRC32.update() is " +
+                               intrinsicEnabledMessage + " and intrinsic is " + intrinsicAvailableMessage +
+                               " at compilation level " + compLevel);
+        } else {
+            throw new RuntimeException("Unexpected result: intrinsic for java.util.zip.CRC32.update() is " +
+                                       intrinsicEnabledMessage + " but intrinsic is " + intrinsicAvailableMessage +
+                                       " at compilation level " + compLevel);
+        }
+    }
+
+    protected boolean isServerVM() {
+        return VMName.toLowerCase().contains("server");
+    }
+
+    public void test() throws Exception {
+        Executable intrinsicMethod = testCase.getExecutable();
+        if (isServerVM()) {
+            if (TIERED_COMPILATION) {
+                checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_SIMPLE);
+            }
+            checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_FULL_OPTIMIZATION);
+        } else {
+            checkIntrinsicForCompilationLevel(intrinsicMethod, COMP_LEVEL_SIMPLE);
+        }
+    }
+
+    public static void main(String args[]) throws Exception {
+        new IntrinsicAvailableTest(new IntrinsicAvailableTestTestCase()).test();
+    }
+}
--- a/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java	Mon Jul 27 13:56:26 2015 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/IntrinsicBase.java	Fri Jul 31 12:13:57 2015 +0200
@@ -67,7 +67,7 @@
                     compileAtLevel(CompilerWhiteBoxTest.COMP_LEVEL_SIMPLE);
                 }
 
-                if (!isIntrinsicSupported()) {
+                if (!isIntrinsicAvailable()) {
                     expectedIntrinsicCount = 0;
                 }
                 break;
@@ -114,7 +114,11 @@
         }
     }
 
-    protected abstract boolean isIntrinsicSupported();
+    // An intrinsic is available if:
+    // - the intrinsic is enabled (by using the appropriate command-line flag) and
+    // - the intrinsic is supported by the VM (i.e., the platform on which the VM is
+    //   running provides the instructions necessary for the VM to generate the intrinsic).
+    protected abstract boolean isIntrinsicAvailable();
 
     protected abstract String getIntrinsicId();
 
@@ -123,14 +127,20 @@
     }
 
     static class IntTest extends IntrinsicBase {
+
+        protected boolean isIntrinsicAvailable; // The tested intrinsic is available on the current platform.
+
         protected IntTest(MathIntrinsic.IntIntrinsic testCase) {
             super(testCase);
+            // Only the C2 compiler intrinsifies exact math methods
+            // so check if the intrinsics are available with C2.
+            isIntrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(testCase.getTestMethod(),
+                                                                  COMP_LEVEL_FULL_OPTIMIZATION);
         }
 
         @Override
-        protected boolean isIntrinsicSupported() {
-            return isServerVM() && Boolean.valueOf(useMathExactIntrinsics)
-                && (Platform.isX86() || Platform.isX64() || Platform.isAArch64());
+        protected boolean isIntrinsicAvailable() {
+            return isIntrinsicAvailable;
         }
 
         @Override
@@ -140,14 +150,20 @@
     }
 
     static class LongTest extends IntrinsicBase {
+
+        protected boolean isIntrinsicAvailable; // The tested intrinsic is available on the current platform.
+
         protected LongTest(MathIntrinsic.LongIntrinsic testCase) {
             super(testCase);
+            // Only the C2 compiler intrinsifies exact math methods
+            // so check if the intrinsics are available with C2.
+            isIntrinsicAvailable = WHITE_BOX.isIntrinsicAvailable(testCase.getTestMethod(),
+                                                                  COMP_LEVEL_FULL_OPTIMIZATION);
         }
 
         @Override
-        protected boolean isIntrinsicSupported() {
-            return isServerVM() && Boolean.valueOf(useMathExactIntrinsics) &&
-                (Platform.isX64() || Platform.isPPC() || Platform.isAArch64());
+        protected boolean isIntrinsicAvailable() {
+            return isIntrinsicAvailable;
         }
 
         @Override
--- a/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java	Mon Jul 27 13:56:26 2015 -0700
+++ b/test/compiler/intrinsics/mathexact/sanity/MathIntrinsic.java	Fri Jul 31 12:13:57 2015 +0200
@@ -29,11 +29,21 @@
     enum IntIntrinsic implements CompilerWhiteBoxTest.TestCase {
         Add {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("addExact", int.class, int.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return intR = Math.addExact(int1, int2);
             }
         },
-        Subtract {
+       Subtract {
+            @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("subtractExact", int.class, int.class);
+            }
+
             @Override
             Object execMathMethod() {
                 return intR = Math.subtractExact(int1, int2);
@@ -41,34 +51,66 @@
         },
         Multiply {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("multiplyExact", int.class, int.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return intR = Math.multiplyExact(int1, int2);
             }
         },
         Increment {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("incrementExact", int.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return intR = Math.incrementExact(int1);
             }
         },
         Decrement {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("decrementExact", int.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return intR = Math.decrementExact(int1);
             }
         },
         Negate {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("negateExact", int.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return intR = Math.negateExact(int1);
             }
         };
+
         protected int int1;
         protected int int2;
         protected int intR;
 
+        abstract Executable testMethod() throws NoSuchMethodException, ClassNotFoundException;
         abstract Object execMathMethod();
 
+        public Executable getTestMethod() {
+            try {
+                return testMethod();
+            } catch (NoSuchMethodException e) {
+                throw new RuntimeException("Test bug, no such method: " + e);
+            } catch (ClassNotFoundException e) {
+                throw new RuntimeException("Test bug, no such class: " + e);
+            }
+        }
+
         @Override
         public Executable getExecutable() {
             try {
@@ -93,36 +135,66 @@
     enum LongIntrinsic implements CompilerWhiteBoxTest.TestCase {
         Add {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("addExact", long.class, long.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return longR = Math.addExact(long1, long2);
             }
         },
         Subtract {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("subtractExact", long.class, long.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return longR = Math.subtractExact(long1, long2);
             }
         },
         Multiply {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("multiplyExact", long.class, long.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return longR = Math.multiplyExact(long1, long2);
             }
         },
         Increment {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("incrementExact", long.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return longR = Math.incrementExact(long1);
             }
         },
         Decrement {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("decrementExact", long.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return longR = Math.decrementExact(long1);
             }
         },
         Negate {
             @Override
+            Executable testMethod() throws NoSuchMethodException, ClassNotFoundException {
+                return Class.forName("java.lang.Math").getDeclaredMethod("negateExact", long.class);
+            }
+
+            @Override
             Object execMathMethod() {
                 return longR = Math.negateExact(long1);
             }
@@ -131,8 +203,19 @@
         protected long long2;
         protected long longR;
 
+        abstract Executable testMethod() throws NoSuchMethodException, ClassNotFoundException;
         abstract Object execMathMethod();
 
+        public Executable getTestMethod() {
+            try {
+                return testMethod();
+            } catch (NoSuchMethodException e) {
+                throw new RuntimeException("Test bug, no such method: " + e);
+            } catch (ClassNotFoundException e) {
+                throw new RuntimeException("Test bug, no such class: " + e);
+            }
+        }
+
         @Override
         public Executable getExecutable() {
             try {