changeset 8294:5f2ef612ed74

Merge
author bharadwaj
date Thu, 30 Apr 2015 18:14:58 -0400
parents 3411dd58a74f 030e40746a11
children 34a82607b479 e0ad42748972
files src/cpu/ppc/vm/interp_masm_ppc_64.hpp src/cpu/ppc/vm/templateTable_ppc_64.cpp src/cpu/x86/vm/globals_x86.hpp src/cpu/x86/vm/macroAssembler_x86.cpp src/cpu/x86/vm/sharedRuntime_x86_64.cpp src/share/vm/classfile/javaClasses.cpp src/share/vm/classfile/javaClasses.hpp src/share/vm/classfile/vmSymbols.hpp src/share/vm/oops/oop.hpp src/share/vm/oops/oop.inline.hpp src/share/vm/prims/methodHandles.cpp src/share/vm/prims/methodHandles.hpp src/share/vm/runtime/globals.hpp src/share/vm/runtime/sharedRuntime.cpp
diffstat 93 files changed, 2650 insertions(+), 1296 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java	Wed Apr 29 02:35:29 2015 +0200
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java	Thu Apr 30 18:14:58 2015 -0400
@@ -148,7 +148,7 @@
   ParametersTypeData<ciKlass,ciMethod> parametersTypeData() {
     Address base = getAddress().addOffsetTo(origField.getOffset());
     int di = (int)parametersTypeDataDi.getValue(base);
-    if (di == -1) {
+    if (di == -1 || di == -2) {
       return null;
     }
     DataLayout dataLayout = new DataLayout(dataField.getValue(getAddress()), di);
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Wed Apr 29 02:35:29 2015 +0200
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Thu Apr 30 18:14:58 2015 -0400
@@ -328,7 +328,7 @@
   }
 
   public Symbol getUnresolvedStringAt(int which) {
-    return getSymbolAt(which);
+    return getSlotAt(which).getSymbol();
   }
 
   // returns null, if not resolved.
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java	Wed Apr 29 02:35:29 2015 +0200
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java	Thu Apr 30 18:14:58 2015 -0400
@@ -42,10 +42,10 @@
 
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     Type type      = db.lookupType("PhaseCFG");
-    numBlocksField = new CIntField(type.getCIntegerField("_num_blocks"), 0);
+    numBlocksField = new CIntField(type.getCIntegerField("_number_of_blocks"), 0);
     blocksField = type.getAddressField("_blocks");
     bbsField = type.getAddressField("_node_to_block_mapping");
-    brootField = type.getAddressField("_broot");
+    brootField = type.getAddressField("_root_block");
   }
 
   private static CIntField numBlocksField;
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Wed Apr 29 02:35:29 2015 +0200
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Thu Apr 30 18:14:58 2015 -0400
@@ -314,26 +314,17 @@
   //------------------------------------------------------------------------------
   // frame::adjust_unextended_sp
   private void adjustUnextendedSP() {
-    // If we are returning to a compiled MethodHandle call site, the
-    // saved_fp will in fact be a saved value of the unextended SP.  The
-    // simplest way to tell whether we are returning to such a call site
-    // is as follows:
+    // On x86, sites calling method handle intrinsics and lambda forms are treated
+    // as any other call site. Therefore, no special action is needed when we are
+    // returning to any of these call sites.
 
     CodeBlob cb = cb();
     NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
     if (senderNm != null) {
-      // If the sender PC is a deoptimization point, get the original
-      // PC.  For MethodHandle call site the unextended_sp is stored in
-      // saved_fp.
-      if (senderNm.isDeoptMhEntry(getPC())) {
-        // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP()));
-        raw_unextendedSP = getFP();
-      }
-      else if (senderNm.isDeoptEntry(getPC())) {
-        // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp));
-      }
-      else if (senderNm.isMethodHandleReturn(getPC())) {
-        raw_unextendedSP = getFP();
+      // If the sender PC is a deoptimization point, get the original PC.
+      if (senderNm.isDeoptEntry(getPC()) ||
+          senderNm.isDeoptMhEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
       }
     }
   }
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -68,6 +68,8 @@
 
 define_pd_global(bool, UseMembar,            true);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 // GC Ergo Flags
 define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
 
--- a/src/cpu/ppc/vm/assembler_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/assembler_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,8 +85,7 @@
 }
 
 // Low-level andi-one-instruction-macro.
-void Assembler::andi(Register a, Register s, const int ui16) {
-  assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
+void Assembler::andi(Register a, Register s, const long ui16) {
   if (is_power_of_2_long(((jlong) ui16)+1)) {
     // pow2minus1
     clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
@@ -97,6 +96,7 @@
     // negpow2
     clrrdi(a, s, log2_long((jlong)-ui16));
   } else {
+    assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
     andi_(a, s, ui16);
   }
 }
@@ -356,7 +356,6 @@
 // 16 bit immediate offset.
 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
   // Avoid accidentally trying to use R0 for indexed addressing.
-  assert(d != R0, "R0 not allowed");
   assert_different_registers(d, tmp);
 
   short xa, xb, xc, xd; // Four 16-bit chunks of const.
@@ -370,6 +369,58 @@
     return 0;
   }
 
+  int retval = 0;
+  if (return_simm16_rest) {
+    retval = xd;
+    x = rem << 16;
+    xd = 0;
+  }
+
+  if (d == R0) { // Can't use addi.
+    if (is_simm(x, 32)) { // opt 2: simm32
+      lis(d, x >> 16);
+      if (xd) ori(d, d, (unsigned short)xd);
+    } else {
+      // 64-bit value: x = xa xb xc xd
+      xa = (x >> 48) & 0xffff;
+      xb = (x >> 32) & 0xffff;
+      xc = (x >> 16) & 0xffff;
+      bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
+      if (tmp == noreg || (xc == 0 && xd == 0)) {
+        if (xa_loaded) {
+          lis(d, xa);
+          if (xb) { ori(d, d, (unsigned short)xb); }
+        } else {
+          li(d, xb);
+        }
+        sldi(d, d, 32);
+        if (xc) { oris(d, d, (unsigned short)xc); }
+        if (xd) { ori( d, d, (unsigned short)xd); }
+      } else {
+        // Exploit instruction level parallelism if we have a tmp register.
+        bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
+        if (xa_loaded) {
+          lis(tmp, xa);
+        }
+        if (xc_loaded) {
+          lis(d, xc);
+        }
+        if (xa_loaded) {
+          if (xb) { ori(tmp, tmp, (unsigned short)xb); }
+        } else {
+          li(tmp, xb);
+        }
+        if (xc_loaded) {
+          if (xd) { ori(d, d, (unsigned short)xd); }
+        } else {
+          li(d, xd);
+        }
+        insrdi(d, tmp, 32, 0);
+      }
+    }
+    return retval;
+  }
+
   xc = rem & 0xFFFF; // Next 16-bit chunk.
   rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
 
@@ -377,28 +428,27 @@
     lis(d, xc);
   } else { // High 32 bits needed.
 
-    if (tmp != noreg) { // opt 3: We have a temp reg.
+    if (tmp != noreg  && (int)x != 0) { // opt 3: We have a temp reg.
       // No carry propagation between xc and higher chunks here (use logical instructions).
       xa = (x >> 48) & 0xffff;
       xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
-      bool load_xa = (xa != 0) || (xb < 0);
+      bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
       bool return_xd = false;
 
-      if (load_xa) { lis(tmp, xa); }
+      if (xa_loaded) { lis(tmp, xa); }
       if (xc) { lis(d, xc); }
-      if (load_xa) {
+      if (xa_loaded) {
         if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
       } else {
-        li(tmp, xb); // non-negative
+        li(tmp, xb);
       }
       if (xc) {
-        if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
-        else if (xd) { addi(d, d, xd); }
+        if (xd) { addi(d, d, xd); }
       } else {
         li(d, xd);
       }
       insrdi(d, tmp, 32, 0);
-      return return_xd ? xd : 0; // non-negative
+      return retval;
     }
 
     xb = rem & 0xFFFF; // Next 16-bit chunk.
@@ -417,11 +467,51 @@
     if (xc) { addis(d, d, xc); }
   }
 
-  // opt 5: Return offset to be inserted into following instruction.
-  if (return_simm16_rest) return xd;
+  if (xd) { addi(d, d, xd); }
+  return retval;
+}
 
-  if (xd) { addi(d, d, xd); }
-  return 0;
+// We emit only one addition to s to optimize latency.
+int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
+  assert(s != R0 && s != tmp, "unsupported");
+  long rem = x;
+
+  // Case 1: Can use mr or addi.
+  short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
+  rem = (rem >> 16) + ((unsigned short)xd >> 15);
+  if (rem == 0) {
+    if (xd == 0) {
+      if (d != s) { mr(d, s); }
+      return 0;
+    }
+    if (return_simm16_rest) {
+      return xd;
+    }
+    addi(d, s, xd);
+    return 0;
+  }
+
+  // Case 2: Can use addis.
+  if (xd == 0) {
+    short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
+    rem = (rem >> 16) + ((unsigned short)xd >> 15);
+    if (rem == 0) {
+      addis(d, s, xc);
+      return 0;
+    }
+  }
+
+  // Other cases: load & add.
+  Register tmp1 = tmp,
+           tmp2 = noreg;
+  if ((d != tmp) && (d != s)) {
+    // Can use d.
+    tmp1 = d;
+    tmp2 = tmp;
+  }
+  int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
+  add(d, tmp1, s);
+  return simm16_rest;
 }
 
 #ifndef PRODUCT
--- a/src/cpu/ppc/vm/assembler_ppc.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/assembler_ppc.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -224,10 +224,13 @@
     ADDIS_OPCODE  = (15u << OPCODE_SHIFT),
     ADDIC__OPCODE = (13u << OPCODE_SHIFT),
     ADDE_OPCODE   = (31u << OPCODE_SHIFT | 138u << 1),
+    ADDME_OPCODE  = (31u << OPCODE_SHIFT | 234u << 1),
+    ADDZE_OPCODE  = (31u << OPCODE_SHIFT | 202u << 1),
     SUBF_OPCODE   = (31u << OPCODE_SHIFT |  40u << 1),
     SUBFC_OPCODE  = (31u << OPCODE_SHIFT |   8u << 1),
     SUBFE_OPCODE  = (31u << OPCODE_SHIFT | 136u << 1),
     SUBFIC_OPCODE = (8u  << OPCODE_SHIFT),
+    SUBFME_OPCODE = (31u << OPCODE_SHIFT | 232u << 1),
     SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1),
     DIVW_OPCODE   = (31u << OPCODE_SHIFT | 491u << 1),
     MULLW_OPCODE  = (31u << OPCODE_SHIFT | 235u << 1),
@@ -657,6 +660,9 @@
     SYNC_OPCODE    = (31u << OPCODE_SHIFT |  598u << 1),
     EIEIO_OPCODE   = (31u << OPCODE_SHIFT |  854u << 1),
 
+    // Wait instructions for polling.
+    WAIT_OPCODE    = (31u << OPCODE_SHIFT |   62u << 1),
+
     // Trap instructions
     TDI_OPCODE     = (2u  << OPCODE_SHIFT),
     TWI_OPCODE     = (3u  << OPCODE_SHIFT),
@@ -666,8 +672,10 @@
     // Atomics.
     LWARX_OPCODE   = (31u << OPCODE_SHIFT |   20u << 1),
     LDARX_OPCODE   = (31u << OPCODE_SHIFT |   84u << 1),
+    LQARX_OPCODE   = (31u << OPCODE_SHIFT |  276u << 1),
     STWCX_OPCODE   = (31u << OPCODE_SHIFT |  150u << 1),
-    STDCX_OPCODE   = (31u << OPCODE_SHIFT |  214u << 1)
+    STDCX_OPCODE   = (31u << OPCODE_SHIFT |  214u << 1),
+    STQCX_OPCODE   = (31u << OPCODE_SHIFT |  182u << 1)
 
   };
 
@@ -1171,6 +1179,14 @@
   inline void adde_(  Register d, Register a, Register b);
   inline void subfe(  Register d, Register a, Register b);
   inline void subfe_( Register d, Register a, Register b);
+  inline void addme(  Register d, Register a);
+  inline void addme_( Register d, Register a);
+  inline void subfme( Register d, Register a);
+  inline void subfme_(Register d, Register a);
+  inline void addze(  Register d, Register a);
+  inline void addze_( Register d, Register a);
+  inline void subfze( Register d, Register a);
+  inline void subfze_(Register d, Register a);
   inline void neg(    Register d, Register a);
   inline void neg_(   Register d, Register a);
   inline void mulli(  Register d, Register a, int si16);
@@ -1189,6 +1205,38 @@
   inline void divw(   Register d, Register a, Register b);
   inline void divw_(  Register d, Register a, Register b);
 
+  // Fixed-Point Arithmetic Instructions with Overflow detection
+  inline void addo(    Register d, Register a, Register b);
+  inline void addo_(   Register d, Register a, Register b);
+  inline void subfo(   Register d, Register a, Register b);
+  inline void subfo_(  Register d, Register a, Register b);
+  inline void addco(   Register d, Register a, Register b);
+  inline void addco_(  Register d, Register a, Register b);
+  inline void subfco(  Register d, Register a, Register b);
+  inline void subfco_( Register d, Register a, Register b);
+  inline void addeo(   Register d, Register a, Register b);
+  inline void addeo_(  Register d, Register a, Register b);
+  inline void subfeo(  Register d, Register a, Register b);
+  inline void subfeo_( Register d, Register a, Register b);
+  inline void addmeo(  Register d, Register a);
+  inline void addmeo_( Register d, Register a);
+  inline void subfmeo( Register d, Register a);
+  inline void subfmeo_(Register d, Register a);
+  inline void addzeo(  Register d, Register a);
+  inline void addzeo_( Register d, Register a);
+  inline void subfzeo( Register d, Register a);
+  inline void subfzeo_(Register d, Register a);
+  inline void nego(    Register d, Register a);
+  inline void nego_(   Register d, Register a);
+  inline void mulldo(  Register d, Register a, Register b);
+  inline void mulldo_( Register d, Register a, Register b);
+  inline void mullwo(  Register d, Register a, Register b);
+  inline void mullwo_( Register d, Register a, Register b);
+  inline void divdo(   Register d, Register a, Register b);
+  inline void divdo_(  Register d, Register a, Register b);
+  inline void divwo(   Register d, Register a, Register b);
+  inline void divwo_(  Register d, Register a, Register b);
+
   // extended mnemonics
   inline void li(   Register d, int si16);
   inline void lis(  Register d, int si16);
@@ -1303,7 +1351,7 @@
   inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
 
   // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
-         void andi(   Register a, Register s, int ui16);   // optimized version
+         void andi(   Register a, Register s, long ui16);   // optimized version
   inline void andi_(  Register a, Register s, int ui16);
   inline void andis_( Register a, Register s, int ui16);
   inline void ori(    Register a, Register s, int ui16);
@@ -1688,14 +1736,21 @@
   inline void isync();
   inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
 
+  // Wait instructions for polling. Attention: May result in SIGILL.
+  inline void wait();
+  inline void waitrsv(); // >=Power7
+
   // atomics
   inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
   inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
+  inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
   inline bool lxarx_hint_exclusive_access();
   inline void lwarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
   inline void ldarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
+  inline void lqarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
   inline void stwcx_( Register s, Register a, Register b);
   inline void stdcx_( Register s, Register a, Register b);
+  inline void stqcx_( Register s, Register a, Register b);
 
   // Instructions for adjusting thread priority for simultaneous
   // multithreading (SMT) on Power5.
@@ -2054,10 +2109,13 @@
   // Atomics: use ra0mem to disallow R0 as base.
   inline void lwarx_unchecked(Register d, Register b, int eh1);
   inline void ldarx_unchecked(Register d, Register b, int eh1);
+  inline void lqarx_unchecked(Register d, Register b, int eh1);
   inline void lwarx( Register d, Register b, bool hint_exclusive_access);
   inline void ldarx( Register d, Register b, bool hint_exclusive_access);
+  inline void lqarx( Register d, Register b, bool hint_exclusive_access);
   inline void stwcx_(Register s, Register b);
   inline void stdcx_(Register s, Register b);
+  inline void stqcx_(Register s, Register b);
   inline void lfs(   FloatRegister d, int si16);
   inline void lfsx(  FloatRegister d, Register b);
   inline void lfd(   FloatRegister d, int si16);
@@ -2120,6 +2178,20 @@
     return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest);
   }
 
+  // If return_simm16_rest, the return value needs to get added afterwards.
+         int add_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false);
+  inline int add_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
+    return add_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
+  }
+
+  // If return_simm16_rest, the return value needs to get added afterwards.
+  inline int sub_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false) {
+    return add_const_optimized(d, s, -x, tmp, return_simm16_rest);
+  }
+  inline int sub_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
+    return sub_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
+  }
+
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 #ifdef CHECK_DELAY
--- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -100,6 +100,14 @@
 inline void Assembler::adde_(  Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
 inline void Assembler::subfe(  Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
 inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::addme(  Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::addme_( Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(1)); }
+inline void Assembler::subfme( Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::subfme_(Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(0) | rc(1)); }
+inline void Assembler::addze(  Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::addze_( Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(1)); }
+inline void Assembler::subfze( Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::subfze_(Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(0) | rc(1)); }
 inline void Assembler::neg(    Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(0) | rc(0)); }
 inline void Assembler::neg_(   Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(0) | rc(1)); }
 inline void Assembler::mulli(  Register d, Register a, int si16)   { emit_int32(MULLI_OPCODE  | rt(d) | ra(a) | simm(si16, 16)); }
@@ -118,6 +126,38 @@
 inline void Assembler::divw(   Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
 inline void Assembler::divw_(  Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
 
+// Fixed-Point Arithmetic Instructions with Overflow detection
+inline void Assembler::addo(    Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addo_(   Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfo(   Register d, Register a, Register b) { emit_int32(SUBF_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfo_(  Register d, Register a, Register b) { emit_int32(SUBF_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addco(   Register d, Register a, Register b) { emit_int32(ADDC_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addco_(  Register d, Register a, Register b) { emit_int32(ADDC_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfco(  Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfco_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addeo(   Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addeo_(  Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfeo(  Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfeo_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addmeo(  Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::addmeo_( Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::subfmeo( Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::subfmeo_(Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::addzeo(  Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::addzeo_( Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::subfzeo( Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::subfzeo_(Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::nego(    Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(1) | rc(0)); }
+inline void Assembler::nego_(   Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(1) | rc(1)); }
+inline void Assembler::mulldo(  Register d, Register a, Register b) { emit_int32(MULLD_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::mulldo_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::mullwo(  Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::mullwo_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::divdo(   Register d, Register a, Register b) { emit_int32(DIVD_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::divdo_(  Register d, Register a, Register b) { emit_int32(DIVD_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::divwo(   Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::divwo_(  Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+
 // extended mnemonics
 inline void Assembler::li(   Register d, int si16)             { Assembler::addi_r0ok( d, R0, si16); }
 inline void Assembler::lis(  Register d, int si16)             { Assembler::addis_r0ok(d, R0, si16); }
@@ -540,15 +580,22 @@
 inline void Assembler::isync()     { emit_int32( ISYNC_OPCODE); }
 inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }
 
+// Wait instructions for polling.
+inline void Assembler::wait()    { emit_int32( WAIT_OPCODE); }
+inline void Assembler::waitrsv() { emit_int32( WAIT_OPCODE | 1<<(31-10)); } // WC=0b01 >=Power7
+
 // atomics
 // Use ra0mem to disallow R0 as base.
 inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
+inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline bool Assembler::lxarx_hint_exclusive_access()                                          { return VM_Version::has_lxarxeh(); }
 inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::stwcx_(Register s, Register a, Register b)                             { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 inline void Assembler::stdcx_(Register s, Register a, Register b)                             { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
+inline void Assembler::stqcx_(Register s, Register a, Register b)                             { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 
 // Instructions for adjusting thread priority
 // for simultaneous multithreading (SMT) on POWER5.
@@ -873,10 +920,13 @@
 // ra0 version
 inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
 inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
+inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
 inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::stwcx_(Register s, Register b)                            { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
 inline void Assembler::stdcx_(Register s, Register b)                            { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::stqcx_(Register s, Register b)                            { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); }
 
 // ra0 version
 inline void Assembler::lfs( FloatRegister d, int si16)   { emit_int32( LFS_OPCODE  | frt(d) | simm(si16,16)); }
--- a/src/cpu/ppc/vm/c2_globals_ppc.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/c2_globals_ppc.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,7 @@
 define_pd_global(intx, FLOATPRESSURE,                28);
 define_pd_global(intx, FreqInlineSize,               175);
 define_pd_global(intx, MinJumpTableSize,             10);
-define_pd_global(intx, INTPRESSURE,                  25);
+define_pd_global(intx, INTPRESSURE,                  26);
 define_pd_global(intx, InteriorEntryAlignment,       16);
 define_pd_global(size_t, NewSizeThreadIncrease,      ScaleForWordSize(4*K));
 define_pd_global(intx, RegisterCostAreaRatio,        16000);
--- a/src/cpu/ppc/vm/globals_ppc.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/globals_ppc.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,10 +55,12 @@
 
 define_pd_global(bool, UseMembar,             false);
 
+define_pd_global(bool, PreserveFramePointer,  false);
+
 // GC Ergo Flags
 define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // Default max size of CMS young gen, per GC worker thread.
 
-define_pd_global(uintx, TypeProfileLevel, 0);
+define_pd_global(uintx, TypeProfileLevel, 111);
 
 // Platform dependent flag handling: flags only defined on this platform.
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct)  \
@@ -71,14 +73,26 @@
                                                                             \
   product(uintx, PowerArchitecturePPC64, 0,                                 \
           "CPU Version: x for PowerX. Currently recognizes Power5 to "      \
-          "Power7. Default is 0. CPUs newer than Power7 will be "           \
-          "recognized as Power7.")                                          \
+          "Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
                                                                             \
   /* Reoptimize code-sequences of calls at runtime, e.g. replace an */      \
   /* indirect call by a direct call.                                */      \
   product(bool, ReoptimizeCallSequences, true,                              \
           "Reoptimize code-sequences of calls at runtime.")                 \
                                                                             \
+  /* Power 8: Configure Data Stream Control Register. */                    \
+  product(uint64_t,DSCR_PPC64, (uintx)-1,                                   \
+          "Power8 or later: Specify encoded value for Data Stream Control " \
+          "Register")                                                       \
+  product(uint64_t,DSCR_DPFD_PPC64, 8,                                      \
+          "Power8 or later: DPFD (default prefetch depth) value of the "    \
+          "Data Stream Control Register."                                   \
+          " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch")    \
+  product(uint64_t,DSCR_URG_PPC64, 8,                                       \
+          "Power8 or later: URG (depth attainment urgency) value of the "   \
+          "Data Stream Control Register."                                   \
+          " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch")    \
+                                                                            \
   product(bool, UseLoadInstructionsForStackBangingPPC64, false,             \
           "Use load instructions for stack banging.")                       \
                                                                             \
@@ -121,6 +135,41 @@
                                                                             \
   product(bool, ZapMemory, false, "Write 0x0101... to empty memory."        \
           " Use this to ease debugging.")                                   \
-
+                                                                            \
+  /* Use Restricted Transactional Memory for lock eliding */                \
+  product(bool, UseRTMLocking, false,                                       \
+          "Enable RTM lock eliding for inflated locks in compiled code")    \
+                                                                            \
+  experimental(bool, UseRTMForStackLocks, false,                            \
+          "Enable RTM lock eliding for stack locks in compiled code")       \
+                                                                            \
+  product(bool, UseRTMDeopt, false,                                         \
+          "Perform deopt and recompilation based on RTM abort ratio")       \
+                                                                            \
+  product(uintx, RTMRetryCount, 5,                                          \
+          "Number of RTM retries on lock abort or busy")                    \
+                                                                            \
+  experimental(intx, RTMSpinLoopCount, 100,                                 \
+          "Spin count for lock to become free before RTM retry")            \
+                                                                            \
+  experimental(intx, RTMAbortThreshold, 1000,                               \
+          "Calculate abort ratio after this number of aborts")              \
+                                                                            \
+  experimental(intx, RTMLockingThreshold, 10000,                            \
+          "Lock count at which to do RTM lock eliding without "             \
+          "abort ratio calculation")                                        \
+                                                                            \
+  experimental(intx, RTMAbortRatio, 50,                                     \
+          "Lock abort ratio at which to stop use RTM lock eliding")         \
+                                                                            \
+  experimental(intx, RTMTotalCountIncrRate, 64,                             \
+          "Increment total RTM attempted lock count once every n times")    \
+                                                                            \
+  experimental(intx, RTMLockingCalculationDelay, 0,                         \
+          "Number of milliseconds to wait before start calculating aborts " \
+          "for RTM locking")                                                \
+                                                                            \
+  experimental(bool, UseRTMXendForLockBusy, true,                           \
+          "Use RTM Xend instead of Xabort when lock busy")                  \
 
 #endif // CPU_PPC_VM_GLOBALS_PPC_HPP
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -446,7 +446,7 @@
 }
 
 // Load object from cpool->resolved_references(index).
-void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
+void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index, Label *is_null) {
   assert_different_registers(result, index);
   get_constant_pool(result);
 
@@ -469,7 +469,7 @@
 #endif
   // Add in the index.
   add(result, tmp, result);
-  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
+  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, is_null);
 }
 
 // Generate a subtype check: branch to ok_is_subtype if sub_klass is
@@ -876,7 +876,6 @@
     // If condition is true we are done and hence we can store 0 in the displaced
     // header indicating it is a recursive lock.
     bne(CCR0, slow_case);
-    release();
     std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
         BasicLock::displaced_header_offset_in_bytes(), monitor);
     b(done);
@@ -1861,7 +1860,7 @@
     const Register mdp = tmp1;
     add(mdp, tmp1, R28_mdx);
 
-    // Pffset of the current profile entry to update.
+    // Offset of the current profile entry to update.
     const Register entry_offset = tmp2;
     // entry_offset = array len in number of cells
     ld(entry_offset, in_bytes(ArrayData::array_len_offset()), mdp);
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,7 +85,7 @@
                          Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype);
 
   // Load object from cpool->resolved_references(index).
-  void load_resolved_reference_at_index(Register result, Register index);
+  void load_resolved_reference_at_index(Register result, Register index, Label *is_null = NULL);
 
   void generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1);
   void load_receiver(Register Rparam_count, Register Rrecv_dst);
--- a/src/cpu/ppc/vm/interpreter_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/interpreter_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -427,7 +427,6 @@
   return entry;
 }
 
-
 // Call an accessor method (assuming it is resolved, otherwise drop into
 // vanilla (slow path) entry.
 address InterpreterGenerator::generate_jump_to_normal_entry(void) {
@@ -473,7 +472,8 @@
 
   // This is not a leaf but we have a JavaFrameAnchor now and we will
   // check (create) exceptions afterward so this is ok.
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError),
+                  R16_thread);
 
   // Pop the C frame and restore LR.
   __ pop_frame();
--- a/src/cpu/ppc/vm/interpreter_ppc.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/interpreter_ppc.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,4 +47,4 @@
   }
 #endif
 
-#endif // CPU_PPC_VM_INTERPRETER_PPC_PP
+#endif // CPU_PPC_VM_INTERPRETER_PPC_HPP
--- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1455,7 +1455,7 @@
 // Several special cases exist to avoid that unnecessary information is generated.
 //
 void MacroAssembler::cmpxchgd(ConditionRegister flag,
-                              Register dest_current_value, Register compare_value, Register exchange_value,
+                              Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
                               Register addr_base, int semantics, bool cmpxchgx_hint,
                               Register int_flag_success, Label* failed_ext, bool contention_hint) {
   Label retry;
@@ -1465,7 +1465,7 @@
 
   // Save one branch if result is returned via register and result register is different from the other ones.
   bool use_result_reg    = (int_flag_success!=noreg);
-  bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value &&
+  bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
                             int_flag_success!=exchange_value && int_flag_success!=addr_base);
   assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
 
@@ -1481,7 +1481,7 @@
   // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
   if (contention_hint) { // Don't try to reserve if cmp fails.
     ld(dest_current_value, 0, addr_base);
-    cmpd(flag, dest_current_value, compare_value);
+    cmpd(flag, compare_value, dest_current_value);
     bne(flag, failed);
   }
 
@@ -1489,7 +1489,7 @@
   bind(retry);
 
   ldarx(dest_current_value, addr_base, cmpxchgx_hint);
-  cmpd(flag, dest_current_value, compare_value);
+  cmpd(flag, compare_value, dest_current_value);
   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
     bne_predict_not_taken(flag, failed);
   } else {
@@ -1873,7 +1873,6 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
-  fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
            /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
            /*where=*/obj_reg,
@@ -1909,7 +1908,6 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
-  fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
                  /*where=*/obj_reg,
@@ -1946,7 +1944,6 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
-  fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
                  /*where=*/obj_reg,
@@ -1987,9 +1984,371 @@
   beq(cr_reg, done);
 }
 
+// TM on PPC64.
+void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
+  Label retry;
+  bind(retry);
+  ldarx(result, addr, /*hint*/ false);
+  addi(result, result, simm16);
+  stdcx_(result, addr);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
+  } else {
+    bne(                  CCR0, retry); // stXcx_ sets CCR0
+  }
+}
+
+void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
+  Label retry;
+  bind(retry);
+  lwarx(result, addr, /*hint*/ false);
+  ori(result, result, uimm16);
+  stwcx_(result, addr);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
+  } else {
+    bne(                  CCR0, retry); // stXcx_ sets CCR0
+  }
+}
+
+#if INCLUDE_RTM_OPT
+
+// Update rtm_counters based on abort status
+// input: abort_status
+//        rtm_counters (RTMLockingCounters*)
+void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
+  // Mapping to keep PreciseRTMLockingStatistics similar to x86.
+  // x86 ppc (! means inverted, ? means not the same)
+  //  0   31  Set if abort caused by XABORT instruction.
+  //  1  ! 7  If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
+  //  2   13  Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
+  //  3   10  Set if an internal buffer overflowed.
+  //  4  ?12  Set if a debug breakpoint was hit.
+  //  5  ?32  Set if an abort occurred during execution of a nested transaction.
+  const  int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too.
+                                 Assembler::tm_failure_persistent, // inverted: transient
+                                 Assembler::tm_trans_cf,
+                                 Assembler::tm_footprint_of,
+                                 Assembler::tm_non_trans_cf,
+                                 Assembler::tm_suspended};
+  const bool tm_failure_inv[] = {false, true, false, false, false, false};
+  assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!");
+
+  const Register addr_Reg = R0;
+  // Keep track of offset to where rtm_counters_Reg had pointed to.
+  int counters_offs = RTMLockingCounters::abort_count_offset();
+  addi(addr_Reg, rtm_counters_Reg, counters_offs);
+  const Register temp_Reg = rtm_counters_Reg;
+
+  //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
+  ldx(temp_Reg, addr_Reg);
+  addi(temp_Reg, temp_Reg, 1);
+  stdx(temp_Reg, addr_Reg);
+
+  if (PrintPreciseRTMLockingStatistics) {
+    int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs;
+
+    //mftexasr(abort_status); done by caller
+    for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
+      counters_offs += counters_offs_delta;
+      li(temp_Reg, counters_offs_delta); // can't use addi with R0
+      add(addr_Reg, addr_Reg, temp_Reg); // point to next counter
+      counters_offs_delta = sizeof(uintx);
+
+      Label check_abort;
+      rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0);
+      if (tm_failure_inv[i]) {
+        bne(CCR0, check_abort);
+      } else {
+        beq(CCR0, check_abort);
+      }
+      //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
+      ldx(temp_Reg, addr_Reg);
+      addi(temp_Reg, temp_Reg, 1);
+      stdx(temp_Reg, addr_Reg);
+      bind(check_abort);
+    }
+  }
+  li(temp_Reg, -counters_offs); // can't use addi with R0
+  add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore
+}
+
+// Branch if (random & (count-1) != 0), count is 2^n
+// tmp and CR0 are killed
+void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
+  mftb(tmp);
+  andi_(tmp, tmp, count-1);
+  bne(CCR0, brLabel);
+}
+
+// Perform abort ratio calculation, set no_rtm bit if high ratio.
+// input:  rtm_counters_Reg (RTMLockingCounters* address) - KILLED
+void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
+                                                 RTMLockingCounters* rtm_counters,
+                                                 Metadata* method_data) {
+  Label L_done, L_check_always_rtm1, L_check_always_rtm2;
+
+  if (RTMLockingCalculationDelay > 0) {
+    // Delay calculation.
+    ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
+    cmpdi(CCR0, rtm_counters_Reg, 0);
+    beq(CCR0, L_done);
+    load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
+  }
+  // Abort ratio calculation only if abort_count > RTMAbortThreshold.
+  //   Aborted transactions = abort_count * 100
+  //   All transactions = total_count *  RTMTotalCountIncrRate
+  //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
+  ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
+  cmpdi(CCR0, R0, RTMAbortThreshold);
+  blt(CCR0, L_check_always_rtm2);
+  mulli(R0, R0, 100);
+
+  const Register tmpReg = rtm_counters_Reg;
+  ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
+  mulli(tmpReg, tmpReg, RTMTotalCountIncrRate);
+  mulli(tmpReg, tmpReg, RTMAbortRatio);
+  cmpd(CCR0, R0, tmpReg);
+  blt(CCR0, L_check_always_rtm1); // jump to reload
+  if (method_data != NULL) {
+    // Set rtm_state to "no rtm" in MDO.
+    // Not using a metadata relocation. Method and Class Loader are kept alive anyway.
+    // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
+    load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
+    atomic_ori_int(R0, tmpReg, NoRTM);
+  }
+  b(L_done);
+
+  bind(L_check_always_rtm1);
+  load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
+  bind(L_check_always_rtm2);
+  ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
+  cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
+  blt(CCR0, L_done);
+  if (method_data != NULL) {
+    // Set rtm_state to "always rtm" in MDO.
+    // Not using a metadata relocation. See above.
+    load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
+    atomic_ori_int(R0, tmpReg, UseRTM);
+  }
+  bind(L_done);
+}
+
+// Update counters and perform abort ratio calculation.
+// input: abort_status_Reg
+void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
+                                   RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data,
+                                   bool profile_rtm) {
+
+  assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+  // Update rtm counters based on state at abort.
+  // Reads abort_status_Reg, updates flags.
+  assert_different_registers(abort_status_Reg, temp_Reg);
+  load_const_optimized(temp_Reg, (address)rtm_counters, R0);
+  rtm_counters_update(abort_status_Reg, temp_Reg);
+  if (profile_rtm) {
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
+  }
+}
+
+// Retry on abort if abort's status indicates non-persistent failure.
+// inputs: retry_count_Reg
+//       : abort_status_Reg
+// output: retry_count_Reg decremented by 1
+void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
+                                             Label& retryLabel, Label* checkRetry) {
+  Label doneRetry;
+  rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
+  bne(CCR0, doneRetry);
+  if (checkRetry) { bind(*checkRetry); }
+  addic_(retry_count_Reg, retry_count_Reg, -1);
+  blt(CCR0, doneRetry);
+  smt_yield(); // Can't use wait(). No permission (SIGILL).
+  b(retryLabel);
+  bind(doneRetry);
+}
+
+// Spin and retry if lock is busy.
+// inputs: box_Reg (monitor address)
+//       : retry_count_Reg
+// output: retry_count_Reg decremented by 1
+// CTR is killed
+void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
+  Label SpinLoop, doneRetry;
+  addic_(retry_count_Reg, retry_count_Reg, -1);
+  blt(CCR0, doneRetry);
+  li(R0, RTMSpinLoopCount);
+  mtctr(R0);
+
+  bind(SpinLoop);
+  smt_yield(); // Can't use waitrsv(). No permission (SIGILL).
+  bdz(retryLabel);
+  ld(R0, 0, owner_addr_Reg);
+  cmpdi(CCR0, R0, 0);
+  bne(CCR0, SpinLoop);
+  b(retryLabel);
+
+  bind(doneRetry);
+}
+
+// Use RTM for normal stack locks.
+// Input: objReg (object to lock)
+void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
+                                       Register obj, Register mark_word, Register tmp,
+                                       Register retry_on_abort_count_Reg,
+                                       RTMLockingCounters* stack_rtm_counters,
+                                       Metadata* method_data, bool profile_rtm,
+                                       Label& DONE_LABEL, Label& IsInflated) {
+  assert(UseRTMForStackLocks, "why call this otherwise?");
+  assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+
+  if (RTMRetryCount > 0) {
+    load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
+    bind(L_rtm_retry);
+  }
+  andi_(R0, mark_word, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
+  bne(CCR0, IsInflated);
+
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
+    load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
+    //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
+    ldx(mark_word, tmp);
+    addi(mark_word, mark_word, 1);
+    stdx(mark_word, tmp);
+    bind(L_noincrement);
+  }
+  tbegin_();
+  beq(CCR0, L_on_abort);
+  ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);         // Reload in transaction, conflicts need to be tracked.
+  andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+  cmpwi(flag, R0, markOopDesc::unlocked_value);                // bits = 001 unlocked
+  beq(flag, DONE_LABEL);                                       // all done if unlocked
+
+  if (UseRTMXendForLockBusy) {
+    tend_();
+    b(L_decrement_retry);
+  } else {
+    tabort_();
+  }
+  bind(L_on_abort);
+  const Register abort_status_Reg = tmp;
+  mftexasr(abort_status_Reg);
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
+  }
+  ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
+  if (RTMRetryCount > 0) {
+    // Retry on lock abort if abort status is not permanent.
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
+  } else {
+    bind(L_decrement_retry);
+  }
+}
+
+// Use RTM for inflating locks
+// inputs: obj       (object to lock)
+//         mark_word (current header - KILLED)
+//         boxReg    (on-stack box address (displaced header location) - KILLED)
+void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
+                                          Register obj, Register mark_word, Register boxReg,
+                                          Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
+                                          RTMLockingCounters* rtm_counters,
+                                          Metadata* method_data, bool profile_rtm,
+                                          Label& DONE_LABEL) {
+  assert(UseRTMLocking, "why call this otherwise?");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+  // Clean monitor_value bit to get valid pointer.
+  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+
+  // Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark().
+  std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
+  const Register tmpReg = boxReg;
+  const Register owner_addr_Reg = mark_word;
+  addi(owner_addr_Reg, mark_word, owner_offset);
+
+  if (RTMRetryCount > 0) {
+    load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy.
+    load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
+    bind(L_rtm_retry);
+  }
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
+    //atomic_inc_ptr(R0, tmpReg); We don't increment atomically
+    ldx(tmpReg, R0);
+    addi(tmpReg, tmpReg, 1);
+    stdx(tmpReg, R0);
+    bind(L_noincrement);
+  }
+  tbegin_();
+  beq(CCR0, L_on_abort);
+  // We don't reload mark word. Will only be reset at safepoint.
+  ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
+  cmpdi(flag, R0, 0);
+  beq(flag, DONE_LABEL);
+
+  if (UseRTMXendForLockBusy) {
+    tend_();
+    b(L_decrement_retry);
+  } else {
+    tabort_();
+  }
+  bind(L_on_abort);
+  const Register abort_status_Reg = tmpReg;
+  mftexasr(abort_status_Reg);
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
+    // Restore owner_addr_Reg
+    ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
+#ifdef ASSERT
+    andi_(R0, mark_word, markOopDesc::monitor_value);
+    asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint.
+#endif
+    addi(owner_addr_Reg, mark_word, owner_offset);
+  }
+  if (RTMRetryCount > 0) {
+    // Retry on lock abort if abort status is not permanent.
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
+  }
+
+  // Appears unlocked - try to swing _owner from null to non-null.
+  cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
+           MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+           MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
+
+  if (RTMRetryCount > 0) {
+    // success done else retry
+    b(DONE_LABEL);
+    bind(L_decrement_retry);
+    // Spin and retry if lock is busy.
+    rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
+  } else {
+    bind(L_decrement_retry);
+  }
+}
+
+#endif //  INCLUDE_RTM_OPT
+
 // "The box" is the space on the stack where we copy the object mark.
 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
-                                               Register temp, Register displaced_header, Register current_header) {
+                                               Register temp, Register displaced_header, Register current_header,
+                                               bool try_bias,
+                                               RTMLockingCounters* rtm_counters,
+                                               RTMLockingCounters* stack_rtm_counters,
+                                               Metadata* method_data,
+                                               bool use_rtm, bool profile_rtm) {
   assert_different_registers(oop, box, temp, displaced_header, current_header);
   assert(flag != CCR0, "bad condition register");
   Label cont;
@@ -2006,10 +2365,18 @@
     return;
   }
 
-  if (UseBiasedLocking) {
+  if (try_bias) {
     biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
   }
 
+#if INCLUDE_RTM_OPT
+  if (UseRTMForStackLocks && use_rtm) {
+    rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
+                      stack_rtm_counters, method_data, profile_rtm,
+                      cont, object_has_monitor);
+  }
+#endif // INCLUDE_RTM_OPT
+
   // Handle existing monitor.
   if ((EmitSync & 0x02) == 0) {
     // The object has an existing monitor iff (mark & monitor_value) != 0.
@@ -2066,14 +2433,22 @@
     bind(object_has_monitor);
     // The object's monitor m is unlocked iff m->owner == NULL,
     // otherwise m->owner may contain a thread or a stack address.
-    //
+
+#if INCLUDE_RTM_OPT
+    // Use the same RTM locking code in 32- and 64-bit VM.
+    if (use_rtm) {
+      rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
+                           rtm_counters, method_data, profile_rtm, cont);
+    } else {
+#endif // INCLUDE_RTM_OPT
+
     // Try to CAS m->owner from NULL to current thread.
     addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
     li(displaced_header, 0);
     // CmpxchgX sets flag to cmpX(current, displaced).
     cmpxchgd(/*flag=*/flag,
              /*current_value=*/current_header,
-             /*compare_value=*/displaced_header,
+             /*compare_value=*/(intptr_t)0,
              /*exchange_value=*/R16_thread,
              /*where=*/temp,
              MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
@@ -2095,6 +2470,10 @@
     //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
     //                           "monitor->OwnerIsThread shouldn't be 0", -1);
 #   endif
+
+#if INCLUDE_RTM_OPT
+    } // use_rtm()
+#endif
   }
 
   bind(cont);
@@ -2103,7 +2482,8 @@
 }
 
 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
-                                                 Register temp, Register displaced_header, Register current_header) {
+                                                 Register temp, Register displaced_header, Register current_header,
+                                                 bool try_bias, bool use_rtm) {
   assert_different_registers(oop, box, temp, displaced_header, current_header);
   assert(flag != CCR0, "bad condition register");
   Label cont;
@@ -2115,10 +2495,24 @@
     return;
   }
 
-  if (UseBiasedLocking) {
+  if (try_bias) {
     biased_locking_exit(flag, oop, current_header, cont);
   }
 
+#if INCLUDE_RTM_OPT
+  if (UseRTMForStackLocks && use_rtm) {
+    assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+    Label L_regular_unlock;
+    ld(current_header, oopDesc::mark_offset_in_bytes(), oop);         // fetch markword
+    andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+    cmpwi(flag, R0, markOopDesc::unlocked_value);                     // bits = 001 unlocked
+    bne(flag, L_regular_unlock);                                      // else RegularLock
+    tend_();                                                          // otherwise end...
+    b(cont);                                                          // ... and we're done
+    bind(L_regular_unlock);
+  }
+#endif
+
   // Find the lock address and load the displaced header from the stack.
   ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
 
@@ -2129,13 +2523,12 @@
   // Handle existing monitor.
   if ((EmitSync & 0x02) == 0) {
     // The object has an existing monitor iff (mark & monitor_value) != 0.
+    RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
     ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
-    andi(temp, current_header, markOopDesc::monitor_value);
-    cmpdi(flag, temp, 0);
-    bne(flag, object_has_monitor);
+    andi_(R0, current_header, markOopDesc::monitor_value);
+    bne(CCR0, object_has_monitor);
   }
 
-
   // Check if it is still a light weight lock, this is is true if we see
   // the stack address of the basicLock in the markOop of the object.
   // Cmpxchg sets flag to cmpd(current_header, box).
@@ -2158,6 +2551,20 @@
     bind(object_has_monitor);
     addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
     ld(temp,             ObjectMonitor::owner_offset_in_bytes(), current_header);
+
+    // It's inflated.
+#if INCLUDE_RTM_OPT
+    if (use_rtm) {
+      Label L_regular_inflated_unlock;
+      // Clean monitor_value bit to get valid pointer
+      cmpdi(flag, temp, 0);
+      bne(flag, L_regular_inflated_unlock);
+      tend_();
+      b(cont);
+      bind(L_regular_inflated_unlock);
+    }
+#endif
+
     ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
     xorr(temp, R16_thread, temp);      // Will be 0 if we are the owner.
     orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
@@ -2441,6 +2848,8 @@
   //   oop_result
   //   R16_thread->in_bytes(JavaThread::vm_result_offset())
 
+  verify_thread();
+
   ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
   li(R0, 0);
   std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
@@ -2462,26 +2871,24 @@
   std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
 }
 
-
-void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+Register MacroAssembler::encode_klass_not_null(Register dst, Register src) {
   Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
   if (Universe::narrow_klass_base() != 0) {
     // Use dst as temp if it is free.
-    load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
-    sub(dst, current, R0);
+    sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0);
     current = dst;
   }
   if (Universe::narrow_klass_shift() != 0) {
     srdi(dst, current, Universe::narrow_klass_shift());
     current = dst;
   }
-  mr_if_needed(dst, current); // Move may be required.
+  return current;
 }
 
 void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
   if (UseCompressedClassPointers) {
-    encode_klass_not_null(ck, klass);
-    stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop);
+    Register compressedKlass = encode_klass_not_null(ck, klass);
+    stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
   } else {
     std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
   }
@@ -2514,8 +2921,7 @@
     sldi(shifted_src, src, Universe::narrow_klass_shift());
   }
   if (Universe::narrow_klass_base() != 0) {
-    load_const(R0, Universe::narrow_klass_base());
-    add(dst, shifted_src, R0);
+    add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0);
   }
 }
 
--- a/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 #define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
 
 #include "asm/assembler.hpp"
+#include "runtime/rtmLocking.hpp"
 #include "utilities/macros.hpp"
 
 // MacroAssembler extends Assembler by a few frequently used macros.
@@ -432,8 +433,8 @@
                 int semantics, bool cmpxchgx_hint = false,
                 Register int_flag_success = noreg, bool contention_hint = false);
   void cmpxchgd(ConditionRegister flag,
-                Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
-                int semantics, bool cmpxchgx_hint = false,
+                Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
+                Register addr_base, int semantics, bool cmpxchgx_hint = false,
                 Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false);
 
   // interface method calling
@@ -506,8 +507,42 @@
   // biased locking exit case failed.
   void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);
 
-  void compiler_fast_lock_object(  ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
-  void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
+  void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
+  void atomic_ori_int(Register addr, Register result, int uimm16);
+
+#if INCLUDE_RTM_OPT
+  void rtm_counters_update(Register abort_status, Register rtm_counters);
+  void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
+  void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data);
+  void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
+                     RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
+  void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
+                               Label& retryLabel, Label* checkRetry = NULL);
+  void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
+  void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
+                         Register retry_on_abort_count,
+                         RTMLockingCounters* stack_rtm_counters,
+                         Metadata* method_data, bool profile_rtm,
+                         Label& DONE_LABEL, Label& IsInflated);
+  void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
+                            Register retry_on_busy_count, Register retry_on_abort_count,
+                            RTMLockingCounters* rtm_counters,
+                            Metadata* method_data, bool profile_rtm,
+                            Label& DONE_LABEL);
+#endif
+
+  void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
+                                 Register tmp1, Register tmp2, Register tmp3,
+                                 bool try_bias = UseBiasedLocking,
+                                 RTMLockingCounters* rtm_counters = NULL,
+                                 RTMLockingCounters* stack_rtm_counters = NULL,
+                                 Metadata* method_data = NULL,
+                                 bool use_rtm = false, bool profile_rtm = false);
+
+  void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
+                                   Register tmp1, Register tmp2, Register tmp3,
+                                   bool try_bias = UseBiasedLocking, bool use_rtm = false);
 
   // Support for serializing memory accesses between threads
   void serialize_memory(Register thread, Register tmp1, Register tmp2);
@@ -576,7 +611,7 @@
                                       Register tmp = noreg);
 
   // Null allowed.
-  inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg);
+  inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL);
 
   // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
   // src == d allowed.
@@ -593,7 +628,7 @@
   void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
   static int instr_size_for_decode_klass_not_null();
   void decode_klass_not_null(Register dst, Register src = noreg);
-  void encode_klass_not_null(Register dst, Register src = noreg);
+  Register encode_klass_not_null(Register dst, Register src = noreg);
 
   // Load common heap base into register.
   void reinit_heapbase(Register d, Register tmp = noreg);
--- a/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -333,19 +333,29 @@
   }
 }
 
-inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) {
+inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1, Label *is_null) {
   if (UseCompressedOops) {
     lwz(d, offs, s1);
-    decode_heap_oop(d);
+    if (is_null != NULL) {
+      cmpwi(CCR0, d, 0);
+      beq(CCR0, *is_null);
+      decode_heap_oop_not_null(d);
+    } else {
+      decode_heap_oop(d);
+    }
   } else {
     ld(d, offs, s1);
+    if (is_null != NULL) {
+      cmpdi(CCR0, d, 0);
+      beq(CCR0, *is_null);
+    }
   }
 }
 
 inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register src) {
   Register current = (src != noreg) ? src : d; // Oop to be compressed is in d if no src provided.
   if (Universe::narrow_oop_base_overlaps()) {
-    sub(d, current, R30);
+    sub_const_optimized(d, current, Universe::narrow_oop_base(), R0);
     current = d;
   }
   if (Universe::narrow_oop_shift() != 0) {
@@ -358,7 +368,7 @@
 inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) {
   if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d &&
       Universe::narrow_oop_shift() != 0) {
-    mr(d, R30);
+    load_const_optimized(d, Universe::narrow_oop_base(), R0);
     rldimi(d, src, Universe::narrow_oop_shift(), 32-Universe::narrow_oop_shift());
     return d;
   }
@@ -369,7 +379,7 @@
     current = d;
   }
   if (Universe::narrow_oop_base() != NULL) {
-    add(d, current, R30);
+    add_const_optimized(d, current, Universe::narrow_oop_base(), R0);
     current = d;
   }
   return current; // Decoded oop is in this register.
@@ -377,11 +387,19 @@
 
 inline void MacroAssembler::decode_heap_oop(Register d) {
   Label isNull;
+  bool use_isel = false;
   if (Universe::narrow_oop_base() != NULL) {
     cmpwi(CCR0, d, 0);
-    beq(CCR0, isNull);
+    if (VM_Version::has_isel()) {
+      use_isel = true;
+    } else {
+      beq(CCR0, isNull);
+    }
   }
   decode_heap_oop_not_null(d);
+  if (use_isel) {
+    isel_0(d, CCR0, Assembler::equal);
+  }
   bind(isNull);
 }
 
--- a/src/cpu/ppc/vm/methodHandles_ppc.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/methodHandles_ppc.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,9 +27,6 @@
 // These definitions are inlined into class MethodHandles.
 
 // Adapters
-//static unsigned int adapter_code_size() {
-//  return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0);
-//}
 enum /* platform_dependent_constants */ {
   adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
 };
@@ -45,7 +42,9 @@
 
   static void verify_method_handle(MacroAssembler* _masm, Register mh_reg,
                                    Register temp_reg, Register temp2_reg) {
-    Unimplemented();
+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+                 temp_reg, temp2_reg,
+                 "reference is a MH");
   }
 
   static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
--- a/src/cpu/ppc/vm/ppc.ad	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/ppc.ad	Thu Apr 30 18:14:58 2015 -0400
@@ -447,8 +447,8 @@
   R26,
   R27,
   R28,
-/*R29*/             // global TOC
-/*R30*/             // Narrow Oop Base
+/*R29,*/             // global TOC
+  R30,
   R31
 );
 
@@ -484,58 +484,11 @@
   R26,
   R27,
   R28,
-/*R29*/
-/*R30*/             // Narrow Oop Base
+/*R29,*/
+  R30,
   R31
 );
 
-// Complement-required-in-pipeline operands for narrow oops.
-reg_class bits32_reg_ro_not_complement (
-/*R0*/     // R0
-  R1,      // SP
-  R2,      // TOC
-  R3,
-  R4,
-  R5,
-  R6,
-  R7,
-  R8,
-  R9,
-  R10,
-  R11,
-  R12,
-/*R13,*/   // system thread id
-  R14,
-  R15,
-  R16,    // R16_thread
-  R17,
-  R18,
-  R19,
-  R20,
-  R21,
-  R22,
-/*R23,
-  R24,
-  R25,
-  R26,
-  R27,
-  R28,*/
-/*R29,*/ // TODO: let allocator handle TOC!!
-/*R30,*/
-  R31
-);
-
-// Complement-required-in-pipeline operands for narrow oops.
-// See 64-bit declaration.
-reg_class bits32_reg_ro_complement (
-  R23,
-  R24,
-  R25,
-  R26,
-  R27,
-  R28
-);
-
 reg_class rscratch1_bits32_reg(R11);
 reg_class rscratch2_bits32_reg(R12);
 reg_class rarg1_bits32_reg(R3);
@@ -591,8 +544,8 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+  R30_H, R30,
   R31_H, R31
 );
 
@@ -629,8 +582,8 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+  R30_H, R30,
   R31_H, R31
 );
 
@@ -667,8 +620,8 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+  R30_H, R30,
   R31_H, R31
 );
 
@@ -704,64 +657,11 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/ // TODO: let allocator handle TOC!!
-/*R30_H, R30,*/
+/*R29_H, R29,*/ // TODO: let allocator handle TOC!!
+  R30_H, R30,
   R31_H, R31
 );
 
-// Complement-required-in-pipeline operands.
-reg_class bits64_reg_ro_not_complement (
-/*R0_H,  R0*/     // R0
-  R1_H,  R1,      // SP
-  R2_H,  R2,      // TOC
-  R3_H,  R3,
-  R4_H,  R4,
-  R5_H,  R5,
-  R6_H,  R6,
-  R7_H,  R7,
-  R8_H,  R8,
-  R9_H,  R9,
-  R10_H, R10,
-  R11_H, R11,
-  R12_H, R12,
-/*R13_H, R13*/   // system thread id
-  R14_H, R14,
-  R15_H, R15,
-  R16_H, R16,    // R16_thread
-  R17_H, R17,
-  R18_H, R18,
-  R19_H, R19,
-  R20_H, R20,
-  R21_H, R21,
-  R22_H, R22,
-/*R23_H, R23,
-  R24_H, R24,
-  R25_H, R25,
-  R26_H, R26,
-  R27_H, R27,
-  R28_H, R28,*/
-/*R29_H, R29*/ // TODO: let allocator handle TOC!!
-/*R30_H, R30,*/
-  R31_H, R31
-);
-
-// Complement-required-in-pipeline operands.
-// This register mask is used for the trap instructions that implement
-// the null checks on AIX. The trap instruction first computes the
-// complement of the value it shall trap on. Because of this, the
-// instruction can not be scheduled in the same cycle as an other
-// instruction reading the normal value of the same register. So we
-// force the value to check into 'bits64_reg_ro_not_complement'
-// and then copy it to 'bits64_reg_ro_complement' for the trap.
-reg_class bits64_reg_ro_complement (
-  R23_H, R23,
-  R24_H, R24,
-  R25_H, R25,
-  R26_H, R26,
-  R27_H, R27,
-  R28_H, R28
-);
-
 
 // ----------------------------
 // Special Class for Condition Code Flags Register
@@ -777,6 +677,17 @@
   CCR7
 );
 
+reg_class int_flags_ro(
+  CCR0,
+  CCR1,
+  CCR2,
+  CCR3,
+  CCR4,
+  CCR5,
+  CCR6,
+  CCR7
+);
+
 reg_class int_flags_CR0(CCR0);
 reg_class int_flags_CR1(CCR1);
 reg_class int_flags_CR6(CCR6);
@@ -2876,7 +2787,7 @@
 
   // Use release_store for card-marking to ensure that previous
   // oop-stores are visible before the card-mark change.
-  enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{
+  enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // FIXME: Implement this as a cmove and use a fixed condition code
     // register which is written on every transition to compiled code,
@@ -2897,8 +2808,8 @@
     // Check CMSCollectorCardTableModRefBSExt::_requires_release and do the
     // StoreStore barrier conditionally.
     __ lwz(R0, 0, $releaseFieldAddr$$Register);
-    __ cmpwi(CCR0, R0, 0);
-    __ beq_predict_taken(CCR0, skip_storestore);
+    __ cmpwi($crx$$CondRegister, R0, 0);
+    __ beq_predict_taken($crx$$CondRegister, skip_storestore);
 #endif
     __ li(R0, 0);
     __ membar(Assembler::StoreStore);
@@ -3108,7 +3019,7 @@
     nodes->push(n2);
   %}
 
-  enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{
+  enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
 
     MacroAssembler _masm(&cbuf);
@@ -3123,7 +3034,7 @@
     __ bind(done);
   %}
 
-  enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{
+  enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
 
     MacroAssembler _masm(&cbuf);
@@ -3269,7 +3180,7 @@
     __ bind(done);
   %}
 
-  enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{
+  enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
 
     MacroAssembler _masm(&cbuf);
@@ -3281,7 +3192,7 @@
     __ bind(done);
   %}
 
-  enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{
+  enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
 
     MacroAssembler _masm(&cbuf);
@@ -3309,7 +3220,7 @@
           l);
   %}
 
-  enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{
+  enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // The scheduler doesn't know about branch shortening, so we set the opcode
     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
@@ -3341,7 +3252,7 @@
   %}
 
   // Branch used with Power6 scheduling (can be shortened without changing the node).
-  enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{
+  enc_class enc_bc_short_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // The scheduler doesn't know about branch shortening, so we set the opcode
     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
@@ -4700,6 +4611,15 @@
   interface(REG_INTER);
 %}
 
+operand flagsRegSrc() %{
+  constraint(ALLOC_IN_RC(int_flags_ro));
+  match(RegFlags);
+  match(flagsReg);
+  match(flagsRegCR0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Condition Code Flag Register CR0
 operand flagsRegCR0() %{
   constraint(ALLOC_IN_RC(int_flags_CR0));
@@ -4783,6 +4703,13 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits32_reg_ro));
   match(DecodeN reg);
+  format %{ "$reg" %}
+  interface(REG_INTER)
+%}
+
+operand iRegN2P_klass(iRegNsrc reg) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits32_reg_ro));
   match(DecodeNKlass reg);
   format %{ "$reg" %}
   interface(REG_INTER)
@@ -4839,6 +4766,19 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(DecodeN reg);
+  op_cost(100);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indirectNarrow_klass(iRegNsrc reg) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(DecodeNKlass reg);
   op_cost(100);
   format %{ "[$reg]" %}
@@ -4855,6 +4795,19 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeN reg) offset);
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeNKlass reg) offset);
   op_cost(100);
   format %{ "[$reg + $offset]" %}
@@ -4871,6 +4824,19 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeN reg) offset);
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeNKlass reg) offset);
   op_cost(100);
   format %{ "[$reg + $offset]" %}
@@ -4998,9 +4964,9 @@
 // encoding and format. The classic case of this is memory operands.
 // Indirect is not included since its use is limited to Compare & Swap.
 
-opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow);
+opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass);
 // Memory operand where offsets are 4-aligned. Required for ld, std.
-opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4);
+opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass);
 opclass indirectMemory(indirect, indirectNarrow);
 
 // Special opclass for I and ConvL2I.
@@ -5009,7 +4975,7 @@
 // Operand classes to match encode and decode. iRegN_P2N is only used
 // for storeN. I have never seen an encode node elsewhere.
 opclass iRegN_P2N(iRegNsrc, iRegP2N);
-opclass iRegP_N2P(iRegPsrc, iRegN2P);
+opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass);
 
 //----------PIPELINE-----------------------------------------------------------
 
@@ -5593,6 +5559,19 @@
   ins_pipe(pipe_class_memory);
 %}
 
+instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{
+  match(Set dst (DecodeNKlass (LoadNKlass mem)));
+  // SAPJVM GL 2014-05-21 Differs.
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 &&
+            _kids[0]->_leaf->as_Load()->is_unordered());
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
+  size(4);
+  ins_encode( enc_lwz(dst, mem) );
+  ins_pipe(pipe_class_memory);
+%}
+
 // Load Pointer
 instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
   match(Set dst (LoadP mem));
@@ -5669,8 +5648,9 @@
 %}
 
 // Load Float acquire.
-instruct loadF_ac(regF dst, memory mem) %{
+instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{
   match(Set dst (LoadF mem));
+  effect(TEMP cr0);
   ins_cost(3*MEMORY_REF_COST);
 
   format %{ "LFS     $dst, $mem \t// acquire\n\t"
@@ -5705,8 +5685,9 @@
 %}
 
 // Load Double - aligned acquire.
-instruct loadD_ac(regD dst, memory mem) %{
+instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{
   match(Set dst (LoadD mem));
+  effect(TEMP cr0);
   ins_cost(3*MEMORY_REF_COST);
 
   format %{ "LFD     $dst, $mem \t// acquire\n\t"
@@ -6034,11 +6015,10 @@
 instruct loadBase(iRegLdst dst) %{
   effect(DEF dst);
 
-  format %{ "MR      $dst, r30_heapbase" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_or);
-    __ mr($dst$$Register, R30);
+  format %{ "LoadConst $dst, heapbase" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ load_const_optimized($dst$$Register, Universe::narrow_oop_base(), R0);
   %}
   ins_pipe(pipe_class_default);
 %}
@@ -6114,7 +6094,7 @@
   effect(TEMP src2);
   ins_cost(DEFAULT_COST);
 
-  format %{ "ORI    $dst, $src1, $src2 \t// narrow klass lo" %}
+  format %{ "ORI     $dst, $src1, $src2 \t// narrow klass lo" %}
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_ori);
@@ -6563,8 +6543,9 @@
 // do a releasing store. For this, it gets the address of
 // CMSCollectorCardTableModRefBSExt::_requires_release as input.
 // (Using releaseFieldAddr in the match rule is a hack.)
-instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{
+instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{
   match(Set mem (StoreCM mem releaseFieldAddr));
+  effect(TEMP crx);
   predicate(false);
   ins_cost(MEMORY_REF_COST);
 
@@ -6572,7 +6553,7 @@
   ins_cannot_rematerialize(true);
 
   format %{ "STB     #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %}
-  ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) );
+  ins_encode( enc_cms_card_mark(mem, releaseFieldAddr, crx) );
   ins_pipe(pipe_class_memory);
 %}
 
@@ -6589,8 +6570,9 @@
   expand %{
     immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %}
     iRegLdst releaseFieldAddress;
+    flagsReg crx;
     loadConL_Ex(releaseFieldAddress, baseImm);
-    storeCM_CMS(mem, releaseFieldAddress);
+    storeCM_CMS(mem, releaseFieldAddress, crx);
   %}
 %}
 
@@ -6639,39 +6621,34 @@
   predicate(false);
 
   format %{ "SUB     $dst, $src, oop_base \t// encode" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_subf);
-    __ subf($dst$$Register, R30, $src$$Register);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ sub_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 // Conditional sub base.
-instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   match(Set dst (EncodeP (Binary crx src1)));
   predicate(false);
 
-  ins_variable_size_depending_on_alignment(true);
-
   format %{ "BEQ     $crx, done\n\t"
-            "SUB     $dst, $src1, R30 \t// encode: subtract base if != NULL\n"
+            "SUB     $dst, $src1, heapbase \t// encode: subtract base if != NULL\n"
             "done:" %}
-  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     Label done;
     __ beq($crx$$CondRegister, done);
-    __ subf($dst$$Register, R30, $src1$$Register);
-    // TODO PPC port __ endgroup_if_needed(_size == 12);
+    __ sub_const_optimized($dst$$Register, $src1$$Register, Universe::narrow_oop_base(), R0);
     __ bind(done);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 // Power 7 can use isel instruction
-instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   match(Set dst (EncodeP (Binary crx src1)));
   predicate(false);
@@ -6777,42 +6754,37 @@
   match(Set dst (DecodeN src));
   predicate(false);
 
-  format %{ "ADD     $dst, $src, R30 \t// DecodeN, add oop base" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_add);
-    __ add($dst$$Register, $src$$Register, R30);
+  format %{ "ADD     $dst, $src, heapbase \t// DecodeN, add oop base" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 // conditianal add base for expand
-instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   // NOTICE that the rule is nonsense - we just have to make sure that:
   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
-  match(Set dst (DecodeN (Binary crx src1)));
+  match(Set dst (DecodeN (Binary crx src)));
   predicate(false);
 
-  ins_variable_size_depending_on_alignment(true);
-
   format %{ "BEQ     $crx, done\n\t"
-            "ADD     $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n"
+            "ADD     $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n"
             "done:" %}
-  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     Label done;
     __ beq($crx$$CondRegister, done);
-    __ add($dst$$Register, $src1$$Register, R30);
-    // TODO PPC port  __ endgroup_if_needed(_size == 12);
+    __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
     __ bind(done);
   %}
   ins_pipe(pipe_class_default);
 %}
 
-instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   // NOTICE that the rule is nonsense - we just have to make sure that:
   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
@@ -6888,7 +6860,7 @@
             Universe::narrow_oop_base_disjoint());
   ins_cost(DEFAULT_COST);
 
-  format %{ "MOV     $dst, R30 \t\n"
+  format %{ "MOV     $dst, heapbase \t\n"
             "RLDIMI  $dst, $src, shift, 32-shift \t// decode with disjoint base" %}
   postalloc_expand %{
     loadBaseNode *n1 = new loadBaseNode();
@@ -6946,7 +6918,7 @@
 
     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
     ra_->set_oop(n_cond_set, true);
-    
+
     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
@@ -7303,7 +7275,7 @@
 //----------Conditional Move---------------------------------------------------
 
 // Cmove using isel.
-instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
+instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7321,7 +7293,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
+instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7335,7 +7307,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{
+instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{
   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7349,7 +7321,7 @@
 %}
 
 // Cmove using isel.
-instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
+instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7367,7 +7339,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
+instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7381,7 +7353,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{
+instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{
   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7395,7 +7367,7 @@
 %}
 
 // Cmove using isel.
-instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
+instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7414,7 +7386,7 @@
 %}
 
 // Conditional move for RegN. Only cmov(reg, reg).
-instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
+instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7428,7 +7400,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{
+instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{
   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7442,7 +7414,7 @@
 %}
 
 // Cmove using isel.
-instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{
+instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{
   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7460,7 +7432,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{
+instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{
   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7474,7 +7446,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{
+instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{
   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7487,7 +7459,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{
+instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
   match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7509,7 +7481,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{
+instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
   match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7542,8 +7514,9 @@
 // Mem_ptr must be a memory operand, else this node does not get
 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 // can be rematerialized which leads to errors.
-instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{
+instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal, flagsRegCR0 cr0) %{
   match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
@@ -7560,16 +7533,16 @@
 // Mem_ptr must be a memory operand, else this node does not get
 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 // can be rematerialized which leads to errors.
-instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
-  match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal)));
-  format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
-                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
-                noreg, NULL, true);
-  %}
-  ins_pipe(pipe_class_default);
+instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
+  match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal)));
+  ins_cost(2*MEMORY_REF_COST);
+
+  format %{ "STDCX_  if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_stdcx_);
+    __ stdcx_($newVal$$Register, $mem_ptr$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
 %}
 
 // Implement LoadPLocked. Must be ordered against changes of the memory location
@@ -7577,13 +7550,14 @@
 // Don't know whether this is ever used.
 instruct loadPLocked(iRegPdst dst, memory mem) %{
   match(Set dst (LoadPLocked mem));
-  ins_cost(MEMORY_REF_COST);
-
-  format %{ "LD      $dst, $mem \t// loadPLocked\n\t"
-            "TWI     $dst\n\t"
-            "ISYNC" %}
-  size(12);
-  ins_encode( enc_ld_ac(dst, mem) );
+  ins_cost(2*MEMORY_REF_COST);
+
+  format %{ "LDARX   $dst, $mem \t// loadPLocked\n\t" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_ldarx);
+    __ ldarx($dst$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
+  %}
   ins_pipe(pipe_class_memory);
 %}
 
@@ -7593,8 +7567,9 @@
 // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))"  cannot be
 // matched.
 
-instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{
+instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7607,8 +7582,9 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{
+instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7621,8 +7597,9 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{
+instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7635,8 +7612,9 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{
+instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7649,48 +7627,54 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
+instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndAddI mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndAddI $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndAddI(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
+instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndAddL mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndAddL $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndAddL(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
+instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetI mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetI $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
+instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetL mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetL $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{
+instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetP mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetP $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{
+instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetN mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetN $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
@@ -7898,18 +7882,8 @@
 %}
 
 // Immediate Subtraction
-// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal),
-// so this rule seems to be unused.
-instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
-  match(Set dst (SubI src1 src2));
-  format %{ "SUBI    $dst, $src1, $src2" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
-    __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
-  %}
-  ins_pipe(pipe_class_default);
-%}
+// Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
+// Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16.
 
 // SubI from constant (using subfic).
 instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
@@ -7989,22 +7963,6 @@
   ins_pipe(pipe_class_default);
 %}
 
-// Immediate Subtraction
-// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
-// so this rule seems to be unused.
-// No constant pool entries required.
-instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
-  match(Set dst (SubL src1 src2));
-
-  format %{ "SUBI    $dst, $src1, $src2 \t// long" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
-    __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
-  %}
-  ins_pipe(pipe_class_default);
-%}
-
 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 // positive longs and 0xF...F for negative ones.
 instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
@@ -8165,7 +8123,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{
+instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
   effect(USE_DEF dst, USE src1, USE crx);
   predicate(false);
 
@@ -8228,7 +8186,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{
+instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
   effect(USE_DEF dst, USE src1, USE crx);
   predicate(false);
 
@@ -8281,7 +8239,7 @@
 %}
 
 // Long Remainder with registers
-instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
+instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
   match(Set dst (ModL src1 src2));
   ins_cost(10*DEFAULT_COST);
 
@@ -9011,7 +8969,6 @@
 instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
   match(Set dst (AndL src1 src2));
   effect(KILL cr0);
-  ins_cost(DEFAULT_COST);
 
   format %{ "ANDI    $dst, $src1, $src2 \t// long" %}
   size(4);
@@ -9803,7 +9760,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{
+instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE src);
   predicate(false);
@@ -9817,7 +9774,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{
+instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE mem);
   predicate(false);
@@ -9972,7 +9929,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{
+instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE src);
   predicate(false);
@@ -9986,7 +9943,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{
+instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE mem);
   predicate(false);
@@ -10255,7 +10212,6 @@
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
-    // FIXME: avoid andi_ ?
     __ andi_(R0, $src1$$Register, $src2$$constant);
   %}
   ins_pipe(pipe_class_compare);
@@ -10302,13 +10258,12 @@
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
-    // FIXME: avoid andi_ ?
     __ andi_(R0, $src1$$Register, $src2$$constant);
   %}
   ins_pipe(pipe_class_compare);
 %}
 
-instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{
+instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsRegSrc crx) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx);
   predicate(false);
@@ -10332,7 +10287,7 @@
   ins_pipe(pipe_class_compare);
 %}
 
-instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{
+instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsRegSrc crx) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx);
   predicate(false);
@@ -10622,8 +10577,9 @@
 //----------Float Compares----------------------------------------------------
 
 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
+  // Needs matchrule, see cmpDUnordered.
+  match(Set crx (CmpF src1 src2)); 
   // no match-rule, false predicate
-  effect(DEF crx, USE src1, USE src2);
   predicate(false);
 
   format %{ "cmpFUrd $crx, $src1, $src2" %}
@@ -10731,8 +10687,14 @@
 %}
 
 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
-  // no match-rule, false predicate
-  effect(DEF crx, USE src1, USE src2);
+  // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the 
+  // node right before the conditional move using it. 
+  // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
+  // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
+  // crashed in register allocation where the flags Reg between cmpDUnoredered and a
+  // conditional move was supposed to be spilled.
+  match(Set crx (CmpD src1 src2)); 
+  // False predicate, shall not be matched.
   predicate(false);
 
   format %{ "cmpFUrd $crx, $src1, $src2" %}
@@ -10830,7 +10792,7 @@
 %}
 
 // Conditional Near Branch
-instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{
   // Same match rule as `branchConFar'.
   match(If cmp crx);
   effect(USE lbl);
@@ -10853,7 +10815,7 @@
 // expensive.
 //
 // Conditional Far Branch
-instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{
   // Same match rule as `branchCon'.
   match(If cmp crx);
   effect(USE crx, USE lbl);
@@ -10871,7 +10833,7 @@
 %}
 
 // Conditional Branch used with Power6 scheduler (can be far or short).
-instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchConSched(cmpOp cmp, flagsRegSrc crx, label lbl) %{
   // Same match rule as `branchCon'.
   match(If cmp crx);
   effect(USE crx, USE lbl);
@@ -10890,7 +10852,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{
   match(CountedLoopEnd cmp crx);
   effect(USE labl);
   ins_cost(BRANCH_COST);
@@ -10904,7 +10866,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{
   match(CountedLoopEnd cmp crx);
   effect(USE labl);
   predicate(!false /* TODO: PPC port HB_Schedule */);
@@ -10920,7 +10882,7 @@
 %}
 
 // Conditional Branch used with Power6 scheduler (can be far or short).
-instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEndSched(cmpOp cmp, flagsRegSrc crx, label labl) %{
   match(CountedLoopEnd cmp crx);
   effect(USE labl);
   predicate(false /* TODO: PPC port HB_Schedule */);
@@ -10969,13 +10931,14 @@
 instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
   match(Set crx (FastLock oop box));
   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
-  // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking);
+  predicate(/*(!UseNewFastLockPPC64 || UseBiasedLocking) &&*/ !Compile::current()->use_rtm());
 
   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
-                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
+                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                 UseBiasedLocking && !UseOptoBiasInlining); // SAPJVM MD 2014-11-06 UseOptoBiasInlining
     // If locking was successfull, crx should indicate 'EQ'.
     // The compiler generates a branch to the runtime call to
     // _complete_monitor_locking_Java for the case where crx is 'NE'.
@@ -10983,15 +10946,58 @@
   ins_pipe(pipe_class_compare);
 %}
 
+// Separate version for TM. Use bound register for box to enable USE_KILL.
+instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
+  match(Set crx (FastLock oop box));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
+  predicate(Compile::current()->use_rtm());
+
+  format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
+                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                 /*Biased Locking*/ false,
+                                 _rtm_counters, _stack_rtm_counters,
+                                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
+                                 /*TM*/ true, ra_->C->profile_rtm());
+    // If locking was successfull, crx should indicate 'EQ'.
+    // The compiler generates a branch to the runtime call to
+    // _complete_monitor_locking_Java for the case where crx is 'NE'.
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
 instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
   match(Set crx (FastUnlock oop box));
   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+  predicate(!Compile::current()->use_rtm());
 
   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
-                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
+                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                   UseBiasedLocking && !UseOptoBiasInlining,
+                                   false);
+    // If unlocking was successfull, crx should indicate 'EQ'.
+    // The compiler generates a branch to the runtime call to
+    // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
+  match(Set crx (FastUnlock oop box));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+  predicate(Compile::current()->use_rtm());
+
+  format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2 (TM)" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
+                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                   /*Biased Locking*/ false, /*TM*/ true);
     // If unlocking was successfull, crx should indicate 'EQ'.
     // The compiler generates a branch to the runtime call to
     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
@@ -11658,6 +11664,66 @@
   ins_pipe(pipe_class_default);
 %}
 
+
+//----------Overflow Math Instructions-----------------------------------------
+
+// Note that we have to make sure that XER.SO is reset before using overflow instructions.
+// Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc).
+// Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.)
+
+instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+  match(Set cr0 (OverflowAddL op1 op2));
+
+  format %{ "add_    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ addo_(R0, $op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+  match(Set cr0 (OverflowSubL op1 op2));
+
+  format %{ "subfo_  R0, $op2, $op1\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ subfo_(R0, $op2$$Register, $op1$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
+  match(Set cr0 (OverflowSubL zero op2));
+
+  format %{ "nego_   R0, $op2\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ nego_(R0, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+  match(Set cr0 (OverflowMulL op1 op2));
+
+  format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ mulldo_(R0, $op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+
 // ============================================================================
 // Safepoint Instruction
 
--- a/src/cpu/ppc/vm/register_definitions_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/register_definitions_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,19 +23,10 @@
  *
  */
 
-// make sure the defines don't screw up the declarations later on in this file
+// Make sure the defines don't screw up the declarations later on in this file.
 #define DONT_USE_REGISTER_DEFINES
 
-#include "precompiled.hpp"
-#include "asm/macroAssembler.hpp"
 #include "asm/register.hpp"
-#include "register_ppc.hpp"
-#ifdef TARGET_ARCH_MODEL_ppc_32
-# include "interp_masm_ppc_32.hpp"
-#endif
-#ifdef TARGET_ARCH_MODEL_ppc_64
-# include "interp_masm_ppc_64.hpp"
-#endif
 
 REGISTER_DEFINITION(Register, noreg);
 
--- a/src/cpu/ppc/vm/relocInfo_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/relocInfo_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,14 +25,12 @@
 
 #include "precompiled.hpp"
 #include "asm/assembler.inline.hpp"
-#include "assembler_ppc.inline.hpp"
 #include "code/relocInfo.hpp"
 #include "nativeInst_ppc.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/safepoint.hpp"
 
 void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
-  bool copy_back_to_oop_pool = true;  // TODO: PPC port
   // The following comment is from the declaration of DataRelocation:
   //
   //  "The "o" (displacement) argument is relevant only to split relocations
--- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #include "code/debugInfoRec.hpp"
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
+#include "frame_ppc.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interp_masm.hpp"
 #include "oops/compiledICHolder.hpp"
@@ -194,8 +195,8 @@
   RegisterSaver_LiveIntReg(   R27 ),
   RegisterSaver_LiveIntReg(   R28 ),
   RegisterSaver_LiveIntReg(   R29 ),
-  RegisterSaver_LiveIntReg(   R31 ),
-  RegisterSaver_LiveIntReg(   R30 ), // r30 must be the last register
+  RegisterSaver_LiveIntReg(   R30 ),
+  RegisterSaver_LiveIntReg(   R31 ), // must be the last register (see save/restore functions below)
 };
 
 OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
@@ -229,29 +230,30 @@
 
   BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
 
-  // Save r30 in the last slot of the not yet pushed frame so that we
+  // Save r31 in the last slot of the not yet pushed frame so that we
   // can use it as scratch reg.
-  __ std(R30, -reg_size, R1_SP);
+  __ std(R31, -reg_size, R1_SP);
   assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
          "consistency check");
 
   // save the flags
   // Do the save_LR_CR by hand and adjust the return pc if requested.
-  __ mfcr(R30);
-  __ std(R30, _abi(cr), R1_SP);
+  __ mfcr(R31);
+  __ std(R31, _abi(cr), R1_SP);
   switch (return_pc_location) {
-    case return_pc_is_lr:    __ mflr(R30);           break;
-    case return_pc_is_r4:    __ mr(R30, R4);     break;
+    case return_pc_is_lr:    __ mflr(R31);           break;
+    case return_pc_is_r4:    __ mr(R31, R4);     break;
     case return_pc_is_thread_saved_exception_pc:
-                                 __ ld(R30, thread_(saved_exception_pc)); break;
+                             __ ld(R31, thread_(saved_exception_pc)); break;
     default: ShouldNotReachHere();
   }
-  if (return_pc_adjustment != 0)
-    __ addi(R30, R30, return_pc_adjustment);
-  __ std(R30, _abi(lr), R1_SP);
+  if (return_pc_adjustment != 0) {
+    __ addi(R31, R31, return_pc_adjustment);
+  }
+  __ std(R31, _abi(lr), R1_SP);
 
   // push a new frame
-  __ push_frame(frame_size_in_bytes, R30);
+  __ push_frame(frame_size_in_bytes, R31);
 
   // save all registers (ints and floats)
   offset = register_save_offset;
@@ -261,7 +263,7 @@
 
     switch (reg_type) {
       case RegisterSaver::int_reg: {
-        if (reg_num != 30) { // We spilled R30 right at the beginning.
+        if (reg_num != 31) { // We spilled R31 right at the beginning.
           __ std(as_Register(reg_num), offset, R1_SP);
         }
         break;
@@ -272,8 +274,8 @@
       }
       case RegisterSaver::special_reg: {
         if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
-          __ mfctr(R30);
-          __ std(R30, offset, R1_SP);
+          __ mfctr(R31);
+          __ std(R31, offset, R1_SP);
         } else {
           Unimplemented();
         }
@@ -321,7 +323,7 @@
 
     switch (reg_type) {
       case RegisterSaver::int_reg: {
-        if (reg_num != 30) // R30 restored at the end, it's the tmp reg!
+        if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
           __ ld(as_Register(reg_num), offset, R1_SP);
         break;
       }
@@ -332,8 +334,8 @@
       case RegisterSaver::special_reg: {
         if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
           if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
-            __ ld(R30, offset, R1_SP);
-            __ mtctr(R30);
+            __ ld(R31, offset, R1_SP);
+            __ mtctr(R31);
           }
         } else {
           Unimplemented();
@@ -350,10 +352,10 @@
   __ pop_frame();
 
   // restore the flags
-  __ restore_LR_CR(R30);
+  __ restore_LR_CR(R31);
 
   // restore scratch register's value
-  __ ld(R30, -reg_size, R1_SP);
+  __ ld(R31, -reg_size, R1_SP);
 
   BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
 }
@@ -2021,6 +2023,8 @@
   __ push_frame(frame_size_in_bytes, r_temp_1);          // Push the c2n adapter's frame.
   frame_done_pc = (intptr_t)__ pc();
 
+  __ verify_thread();
+
   // Native nmethod wrappers never take possesion of the oop arguments.
   // So the caller will gc the arguments.
   // The only thing we need an oopMap for is if the call is static.
@@ -2594,7 +2598,7 @@
 }
 
 uint SharedRuntime::out_preserve_stack_slots() {
-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
   return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
 #else
   return 0;
@@ -2868,11 +2872,6 @@
   __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
   __ BIND(skip_restore_excp);
 
-  // reload narrro_oop_base
-  if (UseCompressedOops && Universe::narrow_oop_base() != 0) {
-    __ load_const_optimized(R30, Universe::narrow_oop_base());
-  }
-
   __ pop_frame();
 
   // stack: (deoptee, optional i2c, caller of deoptee, ...).
--- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -261,9 +261,6 @@
       // global toc register
       __ load_const(R29, MacroAssembler::global_toc(), R11_scratch1);
 
-      // Load narrow oop base.
-      __ reinit_heapbase(R30, R11_scratch1);
-
       // Remember the senderSP so we interpreter can pop c2i arguments off of the stack
       // when called via a c2i.
 
@@ -418,6 +415,23 @@
   // or native call stub.  The pending exception in Thread is
   // converted into a Java-level exception.
   //
+  // Read:
+  //
+  //   LR:     The pc the runtime library callee wants to return to.
+  //           Since the exception occurred in the callee, the return pc
+  //           from the point of view of Java is the exception pc.
+  //   thread: Needed for method handles.
+  //
+  // Invalidate:
+  //
+  //   volatile registers (except below).
+  //
+  // Update:
+  //
+  //   R4_ARG2: exception
+  //
+  // (LR is unchanged and is live out).
+  //
   address generate_forward_exception() {
     StubCodeMark mark(this, "StubRoutines", "forward_exception");
     address start = __ pc();
@@ -1256,9 +1270,9 @@
     Register tmp3 = R8_ARG6;
 
 #if defined(ABI_ELFv2)
-     address nooverlap_target = aligned ?
-       StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
-       StubRoutines::jbyte_disjoint_arraycopy();
+    address nooverlap_target = aligned ?
+      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
+      StubRoutines::jbyte_disjoint_arraycopy();
 #else
     address nooverlap_target = aligned ?
       ((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -264,11 +264,11 @@
       __ cmpdi(CCR0, Rmdo, 0);
       __ beq(CCR0, no_mdo);
 
-      // Increment invocation counter in the MDO.
-      const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
-      __ lwz(Rscratch2, mdo_ic_offs, Rmdo);
+      // Increment backedge counter in the MDO.
+      const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+      __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
       __ addi(Rscratch2, Rscratch2, increment);
-      __ stw(Rscratch2, mdo_ic_offs, Rmdo);
+      __ stw(Rscratch2, mdo_bc_offs, Rmdo);
       __ load_const_optimized(Rscratch1, mask, R0);
       __ and_(Rscratch1, Rscratch2, Rscratch1);
       __ bne(CCR0, done);
@@ -276,12 +276,12 @@
     }
 
     // Increment counter in MethodCounters*.
-    const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+    const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
     __ bind(no_mdo);
     __ get_method_counters(R19_method, R3_counters, done);
-    __ lwz(Rscratch2, mo_ic_offs, R3_counters);
+    __ lwz(Rscratch2, mo_bc_offs, R3_counters);
     __ addi(Rscratch2, Rscratch2, increment);
-    __ stw(Rscratch2, mo_ic_offs, R3_counters);
+    __ stw(Rscratch2, mo_bc_offs, R3_counters);
     __ load_const_optimized(Rscratch1, mask, R0);
     __ and_(Rscratch1, Rscratch2, Rscratch1);
     __ beq(CCR0, *overflow);
@@ -611,12 +611,7 @@
 // For others we can use a normal (native) entry.
 
 inline bool math_entry_available(AbstractInterpreter::MethodKind kind) {
-  // Provide math entry with debugging on demand.
-  // Note: Debugging changes which code will get executed:
-  // Debugging or disabled InlineIntrinsics: java method will get interpreted and performs a native call.
-  // Not debugging and enabled InlineIntrinics: processor instruction will get used.
-  // Result might differ slightly due to rounding etc.
-  if (!InlineIntrinsics && (!FLAG_IS_ERGO(InlineIntrinsics))) return false; // Generate a vanilla entry.
+  if (!InlineIntrinsics) return false;
 
   return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) ||
           (kind==Interpreter::java_lang_math_abs));
@@ -628,15 +623,8 @@
     return Interpreter::entry_for_kind(Interpreter::zerolocals);
   }
 
-  Label Lslow_path;
-  const Register Rjvmti_mode = R11_scratch1;
   address entry = __ pc();
 
-  // Provide math entry with debugging on demand.
-  __ lwz(Rjvmti_mode, thread_(interp_only_mode));
-  __ cmpwi(CCR0, Rjvmti_mode, 0);
-  __ bne(CCR0, Lslow_path); // jvmti_mode!=0
-
   __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp);
 
   // Pop c2i arguments (if any) off when we return.
@@ -659,9 +647,6 @@
   // And we're done.
   __ blr();
 
-  // Provide slow path for JVMTI case.
-  __ bind(Lslow_path);
-  __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R12_scratch2);
   __ flush();
 
   return entry;
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/templateInterpreter_ppc.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
   // Run with +PrintInterpreter to get the VM to print out the size.
   // Max size with JVMTI
 
-  const static int InterpreterCodeSize = 210*K;
+  const static int InterpreterCodeSize = 230*K;
 
 #endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
 
--- a/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2013, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -375,23 +375,22 @@
 
   int index_size = wide ? sizeof(u2) : sizeof(u1);
   const Register Rscratch = R11_scratch1;
-  Label resolved;
+  Label is_null;
 
   // We are resolved if the resolved reference cache entry contains a
   // non-null object (CallSite, etc.)
   __ get_cache_index_at_bcp(Rscratch, 1, index_size);  // Load index.
-  __ load_resolved_reference_at_index(R17_tos, Rscratch);
-  __ cmpdi(CCR0, R17_tos, 0);
-  __ bne(CCR0, resolved);
+  __ load_resolved_reference_at_index(R17_tos, Rscratch, &is_null);
+  __ verify_oop(R17_tos);
+  __ dispatch_epilog(atos, Bytecodes::length_for(bytecode()));
+
+  __ bind(is_null);
   __ load_const_optimized(R3_ARG1, (int)bytecode());
 
   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 
   // First time invocation - must resolve first.
   __ call_VM(R17_tos, entry, R3_ARG1);
-
-  __ align(32, 12);
-  __ bind(resolved);
   __ verify_oop(R17_tos);
 }
 
@@ -3859,9 +3858,9 @@
   transition(atos, itos);
 
   Label Ldone, Lis_null, Lquicked, Lresolved;
-  Register Roffset         = R5_ARG3,
+  Register Roffset         = R6_ARG4,
            RobjKlass       = R4_ARG2,
-           RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect the value in this register.
+           RspecifiedKlass = R5_ARG3,
            Rcpool          = R11_scratch1,
            Rtags           = R12_scratch2;
 
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -32,12 +32,13 @@
 #include "runtime/os.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "utilities/defaultStream.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "vm_version_ppc.hpp"
 
 # include <sys/sysinfo.h>
 
 int VM_Version::_features = VM_Version::unknown_m;
-int VM_Version::_measured_cache_line_size = 128; // default value
+int VM_Version::_measured_cache_line_size = 32; // pessimistic init value
 const char* VM_Version::_features_str = "";
 bool VM_Version::_is_determine_features_test_running = false;
 
@@ -55,7 +56,9 @@
 
   // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
   if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
-    if (VM_Version::has_popcntw()) {
+    if (VM_Version::has_lqarx()) {
+      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
+    } else if (VM_Version::has_popcntw()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
     } else if (VM_Version::has_cmpb()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
@@ -66,8 +69,14 @@
     }
   }
   guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
-            PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7,
-            "PowerArchitecturePPC64 should be 0, 5, 6 or 7");
+            PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 ||
+            PowerArchitecturePPC64 == 8,
+            "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8");
+
+  // Power 8: Configure Data Stream Control Register.
+  if (PowerArchitecturePPC64 >= 8) {
+    config_dscr();
+  }
 
   if (!UseSIGTRAP) {
     MSG(TrapBasedICMissChecks);
@@ -97,7 +106,7 @@
   // Create and print feature-string.
   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
   jio_snprintf(buf, sizeof(buf),
-               "ppc64%s%s%s%s%s%s%s%s",
+               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_fsqrt()   ? " fsqrt"   : ""),
                (has_isel()    ? " isel"    : ""),
                (has_lxarxeh() ? " lxarxeh" : ""),
@@ -106,11 +115,17 @@
                (has_popcntb() ? " popcntb" : ""),
                (has_popcntw() ? " popcntw" : ""),
                (has_fcfids()  ? " fcfids"  : ""),
-               (has_vand()    ? " vand"    : "")
+               (has_vand()    ? " vand"    : ""),
+               (has_lqarx()   ? " lqarx"   : ""),
+               (has_vcipher() ? " vcipher" : ""),
+               (has_vpmsumb() ? " vpmsumb" : ""),
+               (has_tcheck()  ? " tcheck"  : "")
                // Make sure number of %s matches num_features!
               );
   _features_str = os::strdup(buf);
-  NOT_PRODUCT(if (Verbose) print_features(););
+  if (Verbose) {
+    print_features();
+  }
 
   // PPC64 supports 8-byte compare-exchange operations (see
   // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
@@ -171,6 +186,58 @@
     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
+  // Adjust RTM (Restricted Transactional Memory) flags.
+  if (!has_tcheck() && UseRTMLocking) {
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    // VM_Version_init() is executed after UseBiasedLocking is used
+    // in Thread::allocate().
+    vm_exit_during_initialization("RTM instructions are not available on this CPU");
+  }
+
+  if (UseRTMLocking) {
+#if INCLUDE_RTM_OPT
+    if (!UnlockExperimentalVMOptions) {
+      vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. "
+                                    "It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
+    } else {
+      warning("UseRTMLocking is only available as experimental option on this platform.");
+    }
+    if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
+      // RTM locking should be used only for applications with
+      // high lock contention. For now we do not use it by default.
+      vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
+    }
+    if (!is_power_of_2(RTMTotalCountIncrRate)) {
+      warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
+      FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
+    }
+    if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
+      warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
+      FLAG_SET_DEFAULT(RTMAbortRatio, 50);
+    }
+    FLAG_SET_ERGO(bool, UseNewFastLockPPC64, false); // Does not implement TM.
+    guarantee(RTMSpinLoopCount > 0, "unsupported");
+#else
+    // Only C2 does RTM locking optimization.
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
+#endif
+  } else { // !UseRTMLocking
+    if (UseRTMForStackLocks) {
+      if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
+        warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
+      }
+      FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
+    }
+    if (UseRTMDeopt) {
+      FLAG_SET_DEFAULT(UseRTMDeopt, false);
+    }
+    if (PrintPreciseRTMLockingStatistics) {
+      FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
+    }
+  }
 
   // This machine does not allow unaligned memory accesses
   if (UseUnalignedAccesses) {
@@ -180,6 +247,27 @@
   }
 }
 
+bool VM_Version::use_biased_locking() {
+#if INCLUDE_RTM_OPT
+  // RTM locking is most useful when there is high lock contention and
+  // low data contention. With high lock contention the lock is usually
+  // inflated and biased locking is not suitable for that case.
+  // RTM locking code requires that biased locking is off.
+  // Note: we can't switch off UseBiasedLocking in get_processor_features()
+  // because it is used by Thread::allocate() which is called before
+  // VM_Version::initialize().
+  if (UseRTMLocking && UseBiasedLocking) {
+    if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
+      FLAG_SET_DEFAULT(UseBiasedLocking, false);
+    } else {
+      warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
+      UseBiasedLocking = false;
+    }
+  }
+#endif
+  return UseBiasedLocking;
+}
+
 void VM_Version::print_features() {
   tty->print_cr("Version: %s cache_line_size = %d", cpu_features(), (int) get_cache_line_size());
 }
@@ -443,16 +531,19 @@
   // Don't use R0 in ldarx.
   // Keep R3_ARG1 unmodified, it contains &field (see below).
   // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
-  a->fsqrt(F3, F4);                            // code[0] -> fsqrt_m
-  a->fsqrts(F3, F4);                           // code[1] -> fsqrts_m
-  a->isel(R7, R5, R6, 0);                      // code[2] -> isel_m
-  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
-  a->cmpb(R7, R5, R6);                         // code[4] -> bcmp
-  //a->mftgpr(R7, F3);                         // code[5] -> mftgpr
-  a->popcntb(R7, R5);                          // code[6] -> popcntb
-  a->popcntw(R7, R5);                          // code[7] -> popcntw
-  a->fcfids(F3, F4);                           // code[8] -> fcfids
-  a->vand(VR0, VR0, VR0);                      // code[9] -> vand
+  a->fsqrt(F3, F4);                            // code[0]  -> fsqrt_m
+  a->fsqrts(F3, F4);                           // code[1]  -> fsqrts_m
+  a->isel(R7, R5, R6, 0);                      // code[2]  -> isel_m
+  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3]  -> lxarx_m
+  a->cmpb(R7, R5, R6);                         // code[4]  -> cmpb
+  a->popcntb(R7, R5);                          // code[5]  -> popcntb
+  a->popcntw(R7, R5);                          // code[6]  -> popcntw
+  a->fcfids(F3, F4);                           // code[7]  -> fcfids
+  a->vand(VR0, VR0, VR0);                      // code[8]  -> vand
+  a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
+  a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
+  a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
+  a->tcheck(0);                                // code[12] -> tcheck
   a->blr();
 
   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -491,11 +582,14 @@
   if (code[feature_cntr++]) features |= isel_m;
   if (code[feature_cntr++]) features |= lxarxeh_m;
   if (code[feature_cntr++]) features |= cmpb_m;
-  //if(code[feature_cntr++])features |= mftgpr_m;
   if (code[feature_cntr++]) features |= popcntb_m;
   if (code[feature_cntr++]) features |= popcntw_m;
   if (code[feature_cntr++]) features |= fcfids_m;
   if (code[feature_cntr++]) features |= vand_m;
+  if (code[feature_cntr++]) features |= lqarx_m;
+  if (code[feature_cntr++]) features |= vcipher_m;
+  if (code[feature_cntr++]) features |= vpmsumb_m;
+  if (code[feature_cntr++]) features |= tcheck_m;
 
   // Print the detection code.
   if (PrintAssembly) {
@@ -507,6 +601,69 @@
   _features = features;
 }
 
+// Power 8: Configure Data Stream Control Register.
+void VM_Version::config_dscr() {
+  assert(has_tcheck(), "Only execute on Power 8 or later!");
+
+  // 7 InstWords for each call (function descriptor + blr instruction).
+  const int code_size = (2+2*7)*BytesPerInstWord;
+
+  // Allocate space for the code.
+  ResourceMark rm;
+  CodeBuffer cb("config_dscr", code_size, 0);
+  MacroAssembler* a = new MacroAssembler(&cb);
+
+  // Emit code.
+  uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd();
+  uint32_t *code = (uint32_t *)a->pc();
+  a->mfdscr(R3);
+  a->blr();
+
+  void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd();
+  a->mtdscr(R3);
+  a->blr();
+
+  uint32_t *code_end = (uint32_t *)a->pc();
+  a->flush();
+
+  // Print the detection code.
+  if (PrintAssembly) {
+    ttyLocker ttyl;
+    tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", code);
+    Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
+  }
+
+  // Apply the configuration if needed.
+  uint64_t dscr_val = (*get_dscr)();
+  if (Verbose) {
+    tty->print_cr("dscr value was 0x%lx" , dscr_val);
+  }
+  bool change_requested = false;
+  if (DSCR_PPC64 != (uintx)-1) {
+    dscr_val = DSCR_PPC64;
+    change_requested = true;
+  }
+  if (DSCR_DPFD_PPC64 <= 7) {
+    uint64_t mask = 0x7;
+    if ((dscr_val & mask) != DSCR_DPFD_PPC64) {
+      dscr_val = (dscr_val & ~mask) | (DSCR_DPFD_PPC64);
+      change_requested = true;
+    }
+  }
+  if (DSCR_URG_PPC64 <= 7) {
+    uint64_t mask = 0x7 << 6;
+    if ((dscr_val & mask) != DSCR_DPFD_PPC64 << 6) {
+      dscr_val = (dscr_val & ~mask) | (DSCR_URG_PPC64 << 6);
+      change_requested = true;
+    }
+  }
+  if (change_requested) {
+    (*set_dscr)(dscr_val);
+    if (Verbose) {
+      tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)());
+    }
+  }
+}
 
 static int saved_features = 0;
 
--- a/src/cpu/ppc/vm/vm_version_ppc.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/vm_version_ppc.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,10 @@
     popcntw,
     fcfids,
     vand,
-    dcba,
+    lqarx,
+    vcipher,
+    vpmsumb,
+    tcheck,
     num_features // last entry to count features
   };
   enum Feature_Flag_Set {
@@ -55,7 +58,10 @@
     popcntw_m             = (1 << popcntw),
     fcfids_m              = (1 << fcfids ),
     vand_m                = (1 << vand   ),
-    dcba_m                = (1 << dcba   ),
+    lqarx_m               = (1 << lqarx  ),
+    vcipher_m             = (1 << vcipher),
+    vpmsumb_m             = (1 << vpmsumb),
+    tcheck_m              = (1 << tcheck ),
     all_features_m        = -1
   };
   static int  _features;
@@ -65,12 +71,16 @@
 
   static void print_features();
   static void determine_features(); // also measures cache line size
+  static void config_dscr(); // Power 8: Configure Data Stream Control Register.
   static void determine_section_size();
   static void power6_micro_bench();
 public:
   // Initialization
   static void initialize();
 
+  // Override Abstract_VM_Version implementation
+  static bool use_biased_locking();
+
   static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
   // CPU instruction support
   static bool has_fsqrt()   { return (_features & fsqrt_m) != 0; }
@@ -82,7 +92,10 @@
   static bool has_popcntw() { return (_features & popcntw_m) != 0; }
   static bool has_fcfids()  { return (_features & fcfids_m) != 0; }
   static bool has_vand()    { return (_features & vand_m) != 0; }
-  static bool has_dcba()    { return (_features & dcba_m) != 0; }
+  static bool has_lqarx()   { return (_features & lqarx_m) != 0; }
+  static bool has_vcipher() { return (_features & vcipher_m) != 0; }
+  static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; }
+  static bool has_tcheck()  { return (_features & tcheck_m) != 0; }
 
   static const char* cpu_features() { return _features_str; }
 
--- a/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,7 +24,6 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "code/vtableStubs.hpp"
 #include "interp_masm_ppc_64.hpp"
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -74,6 +74,8 @@
 
 define_pd_global(bool, UseMembar,            false);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 // GC Ergo Flags
 define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
 
--- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -142,8 +142,10 @@
 
 #endif // _LP64
 
-// JSR 292 fixed register usages:
-REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
+// JSR 292
+// On x86, the SP does not have to be saved when invoking method handle intrinsics
+// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
+REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
 
 // Address is an abstraction used to represent a memory location
 // using any of the amd64 addressing modes with one object.
--- a/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -343,14 +343,13 @@
   return FrameMap::rsp_opr;
 }
 
-
 // JSR 292
+// On x86, there is no need to save the SP, because neither
+// method handle intrinsics, nor compiled lambda forms modify it.
 LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
-  assert(rbp == rbp_mh_SP_save, "must be same register");
-  return rbp_opr;
+  return LIR_OprFact::illegalOpr;
 }
 
-
 bool FrameMap::validate_frame() {
   return true;
 }
--- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -360,6 +360,9 @@
   generate_stack_overflow_check(bang_size_in_bytes);
 
   push(rbp);
+  if (PreserveFramePointer) {
+    mov(rbp, rsp);
+  }
 #ifdef TIERED
   // c2 leaves fpu stack dirty. Clean it on entry
   if (UseSSE < 2 ) {
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -754,14 +754,9 @@
     // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
     // since we do a leave anyway.
 
-    // Pop the return address since we are possibly changing SP (restoring from BP).
+    // Pop the return address.
     __ leave();
     __ pop(rcx);
-
-    // Restore SP from BP if the exception PC is a method handle call site.
-    NOT_LP64(__ get_thread(thread);)
-    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
-    __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
     __ jmp(rcx);  // jump to exception handler
     break;
   default:  ShouldNotReachHere();
@@ -832,11 +827,6 @@
   // the pop is also necessary to simulate the effect of a ret(0)
   __ pop(exception_pc);
 
-  // Restore SP from BP if the exception PC is a method handle call site.
-  NOT_LP64(__ get_thread(thread);)
-  __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // continue at exception handler (return address removed)
   // note: do *not* remove arguments when unwinding the
   //       activation since the caller assumes having
--- a/src/cpu/x86/vm/frame_x86.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/frame_x86.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -224,7 +224,8 @@
     if (sender_blob->is_nmethod()) {
         nmethod* nm = sender_blob->as_nmethod_or_null();
         if (nm != NULL) {
-            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
+                nm->method()->is_method_handle_intrinsic()) {
                 return false;
             }
         }
@@ -391,10 +392,9 @@
 // frame::verify_deopt_original_pc
 //
 // Verifies the calculated original PC of a deoptimization PC for the
-// given unextended SP.  The unextended SP might also be the saved SP
-// for MethodHandle call sites.
+// given unextended SP.
 #ifdef ASSERT
-void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) {
   frame fr;
 
   // This is ugly but it's better than to change {get,set}_original_pc
@@ -404,33 +404,23 @@
 
   address original_pc = nm->get_original_pc(&fr);
   assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
-  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
 }
 #endif
 
 //------------------------------------------------------------------------------
 // frame::adjust_unextended_sp
 void frame::adjust_unextended_sp() {
-  // If we are returning to a compiled MethodHandle call site, the
-  // saved_fp will in fact be a saved value of the unextended SP.  The
-  // simplest way to tell whether we are returning to such a call site
-  // is as follows:
+  // On x86, sites calling method handle intrinsics and lambda forms are treated
+  // as any other call site. Therefore, no special action is needed when we are
+  // returning to any of these call sites.
 
   nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
   if (sender_nm != NULL) {
-    // If the sender PC is a deoptimization point, get the original
-    // PC.  For MethodHandle call site the unextended_sp is stored in
-    // saved_fp.
-    if (sender_nm->is_deopt_mh_entry(_pc)) {
-      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
-      _unextended_sp = _fp;
-    }
-    else if (sender_nm->is_deopt_entry(_pc)) {
+    // If the sender PC is a deoptimization point, get the original PC.
+    if (sender_nm->is_deopt_entry(_pc) ||
+        sender_nm->is_deopt_mh_entry(_pc)) {
       DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
     }
-    else if (sender_nm->is_method_handle_return(_pc)) {
-      _unextended_sp = _fp;
-    }
   }
 }
 
--- a/src/cpu/x86/vm/frame_x86.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/frame_x86.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -76,11 +76,11 @@
 //    [locals and parameters   ]
 //                               <- sender sp
 
-// [1] When the c++ interpreter calls a new method it returns to the frame
+// [1] When the C++ interpreter calls a new method it returns to the frame
 //     manager which allocates a new frame on the stack. In that case there
 //     is no real callee of this newly allocated frame. The frame manager is
-//     aware of the  additional frame(s) and will pop them as nested calls
-//     complete. Howevers tTo make it look good in the debugger the frame
+//     aware of the additional frame(s) and will pop them as nested calls
+//     complete. However, to make it look good in the debugger the frame
 //     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
 //     with a fake interpreter_state* parameter to make it easy to debug
 //     nested calls.
@@ -88,7 +88,7 @@
 // Note that contrary to the layout for the assembly interpreter the
 // expression stack allocated for the C++ interpreter is full sized.
 // However this is not as bad as it seems as the interpreter frame_manager
-// will truncate the unused space on succesive method calls.
+// will truncate the unused space on successive method calls.
 //
 // ------------------------------ C++ interpreter ----------------------------------------
 
@@ -167,10 +167,7 @@
 
 #ifdef ASSERT
   // Used in frame::sender_for_{interpreter,compiled}_frame
-  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
-  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
-    verify_deopt_original_pc(nm, unextended_sp, true);
-  }
+  static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp);
 #endif
 
  public:
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -94,7 +94,7 @@
   // find_blob call. This is also why we can have no asserts on the validity
   // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
   // -> pd_last_frame should use a specialized version of pd_last_frame which could
-  // call a specilaized frame constructor instead of this one.
+  // call a specialized frame constructor instead of this one.
   // Then we could use the assert below. However this assert is of somewhat dubious
   // value.
   // assert(_pc != NULL, "no pc?");
--- a/src/cpu/x86/vm/globals_x86.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/globals_x86.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -82,6 +82,8 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
                                                                             \
   develop(bool, IEEEPrecision, true,                                        \
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -6104,6 +6104,10 @@
     // We always push rbp, so that on return to interpreter rbp, will be
     // restored correctly and we can correct the stack.
     push(rbp);
+    // Save caller's stack pointer into RBP if the frame pointer is preserved.
+    if (PreserveFramePointer) {
+      mov(rbp, rsp);
+    }
     // Remove word for ebp
     framesize -= wordSize;
 
@@ -6118,6 +6122,11 @@
     // Save RBP register now.
     framesize -= wordSize;
     movptr(Address(rsp, framesize), rbp);
+    // Save caller's stack pointer into RBP if the frame pointer is preserved.
+    if (PreserveFramePointer) {
+      movptr(rbp, rsp);
+      addptr(rbp, framesize + wordSize);
+    }
   }
 
   if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
@@ -6671,7 +6680,7 @@
     subl(cnt2, stride2);
     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
     // clean upper bits of YMM registers
-    vzeroupper();
+    vpxor(vec1, vec1);
 
     // compare wide vectors tail
     bind(COMPARE_WIDE_TAIL);
@@ -6686,7 +6695,7 @@
     // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
     bind(VECTOR_NOT_EQUAL);
     // clean upper bits of YMM registers
-    vzeroupper();
+    vpxor(vec1, vec1);
     lea(str1, Address(str1, result, scale));
     lea(str2, Address(str2, result, scale));
     jmp(COMPARE_16_CHARS);
@@ -6945,7 +6954,8 @@
   bind(DONE);
   if (UseAVX >= 2) {
     // clean upper bits of YMM registers
-    vzeroupper();
+    vpxor(vec1, vec1);
+    vpxor(vec2, vec2);
   }
 }
 
@@ -7079,7 +7089,8 @@
 
         BIND(L_check_fill_8_bytes);
         // clean upper bits of YMM registers
-        vzeroupper();
+        movdl(xtmp, value);
+        pshufd(xtmp, xtmp, 0);
       } else {
         // Fill 32-byte chunks
         pshufd(xtmp, xtmp, 0);
@@ -7242,7 +7253,11 @@
     bind(L_copy_16_chars_exit);
     if (UseAVX >= 2) {
       // clean upper bits of YMM registers
-      vzeroupper();
+      vpxor(tmp2Reg, tmp2Reg);
+      vpxor(tmp3Reg, tmp3Reg);
+      vpxor(tmp4Reg, tmp4Reg);
+      movdl(tmp1Reg, tmp5);
+      pshufd(tmp1Reg, tmp1Reg, 0);
     }
     subptr(len, 8);
     jccb(Assembler::greater, L_copy_8_chars_exit);
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -374,7 +374,7 @@
     //  member_reg - MemberName that was the trailing argument
     //  temp1_recv_klass - klass of stacked receiver, if needed
     //  rsi/r13 - interpreter linkage (if interpreted)
-    //  rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled)
+    //  rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled)
 
     Label L_incompatible_class_change_error;
     switch (iid) {
--- a/src/cpu/x86/vm/runtime_x86_32.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -126,10 +126,6 @@
 
   // rax: exception handler for given <exception oop/exception pc>
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
-  __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // We have a handler in rax, (could be deopt blob)
   // rdx - throwing pc, deopt blob will need it.
 
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -3395,8 +3395,8 @@
 
   // Save callee-saved registers.  See x86_64.ad.
 
-  // rbp is an implicitly saved callee saved register (i.e. the calling
-  // convention will save restore it in prolog/epilog) Other than that
+  // rbp is an implicitly saved callee saved register (i.e., the calling
+  // convention will save/restore it in the prolog/epilog). Other than that
   // there are no callee save registers now that adapter frames are gone.
 
   __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
@@ -3438,9 +3438,9 @@
 
   // Restore callee-saved registers
 
-  // rbp is an implicitly saved callee saved register (i.e. the calling
+  // rbp is an implicitly saved callee-saved register (i.e., the calling
   // convention will save restore it in prolog/epilog) Other than that
-  // there are no callee save registers no that adapter frames are gone.
+  // there are no callee save registers now that adapter frames are gone.
 
   __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
 
@@ -3449,10 +3449,6 @@
 
   // rax: exception handler
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
-  __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // We have a handler in rax (could be deopt blob).
   __ mov(r8, rax);
 
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -835,7 +835,8 @@
 
     if (UseUnalignedLoadStores && (UseAVX >= 2)) {
       // clean upper bits of YMM registers
-      __ vzeroupper();
+      __ vpxor(xmm0, xmm0);
+      __ vpxor(xmm1, xmm1);
     }
     __ addl(qword_count, 8);
     __ jccb(Assembler::zero, L_exit);
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1352,7 +1352,8 @@
       __ BIND(L_end);
       if (UseAVX >= 2) {
         // clean upper bits of YMM registers
-        __ vzeroupper();
+        __ vpxor(xmm0, xmm0);
+        __ vpxor(xmm1, xmm1);
       }
     } else {
       // Copy 32-bytes per iteration
@@ -1429,7 +1430,8 @@
       __ BIND(L_end);
       if (UseAVX >= 2) {
         // clean upper bits of YMM registers
-        __ vzeroupper();
+        __ vpxor(xmm0, xmm0);
+        __ vpxor(xmm1, xmm1);
       }
     } else {
       // Copy 32-bytes per iteration
--- a/src/cpu/x86/vm/x86.ad	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/x86.ad	Thu Apr 30 18:14:58 2015 -0400
@@ -930,21 +930,6 @@
 
 encode %{
 
-  enc_class preserve_SP %{
-    debug_only(int off0 = cbuf.insts_size());
-    MacroAssembler _masm(&cbuf);
-    // RBP is preserved across all calls, even compiled calls.
-    // Use it to preserve RSP in places where the callee might change the SP.
-    __ movptr(rbp_mh_SP_save, rsp);
-    debug_only(int off1 = cbuf.insts_size());
-    assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
-  %}
-
-  enc_class restore_SP %{
-    MacroAssembler _masm(&cbuf);
-    __ movptr(rsp, rbp_mh_SP_save);
-  %}
-
   enc_class call_epilog %{
     if (VerifyStackAtCalls) {
       // Check that stack depth is unchanged: find majik cookie on stack
--- a/src/cpu/x86/vm/x86_32.ad	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/x86_32.ad	Thu Apr 30 18:14:58 2015 -0400
@@ -123,50 +123,94 @@
 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 //
+// Class for no registers (empty set).
+reg_class no_reg();
+
 // Class for all registers
-reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
+reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
+// Class for all registers (excluding EBP)
+reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
+// Dynamic register class that selects at runtime between register classes
+// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer). 
+// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
+reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class for general registers
-reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
-// Class for general registers which may be used for implicit null checks on win95
-// Also safe for use by tailjump. We don't want to allocate in rbp,
-reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
+reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
+// Class for general registers (excluding EBP).
+// This register class can be used for implicit null checks on win95.
+// It is also safe for use by tailjumps (we don't want to allocate in ebp).
+// Used also if the PreserveFramePointer flag is true.
+reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
+// Dynamic register class that selects between int_reg and int_reg_no_ebp.
+reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class of "X" registers
 reg_class int_x_reg(EBX, ECX, EDX, EAX);
+
 // Class of registers that can appear in an address with no offset.
 // EBP and ESP require an extra instruction byte for zero offset.
 // Used in fast-unlock
 reg_class p_reg(EDX, EDI, ESI, EBX);
-// Class for general registers not including ECX
-reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
-// Class for general registers not including EAX
+
+// Class for general registers excluding ECX
+reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
+// Class for general registers excluding ECX (and EBP)
+reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
+// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
+reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
+
+// Class for general registers excluding EAX
 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
-// Class for general registers not including EAX or EBX.
-reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
+
+// Class for general registers excluding EAX and EBX.
+reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
+// Class for general registers excluding EAX and EBX (and EBP)
+reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
+// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
+reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class of EAX (for multiply and divide operations)
 reg_class eax_reg(EAX);
+
 // Class of EBX (for atomic add)
 reg_class ebx_reg(EBX);
+
 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
 reg_class ecx_reg(ECX);
+
 // Class of EDX (for multiply and divide operations)
 reg_class edx_reg(EDX);
+
 // Class of EDI (for synchronization)
 reg_class edi_reg(EDI);
+
 // Class of ESI (for synchronization)
 reg_class esi_reg(ESI);
-// Singleton class for interpreter's stack pointer
-reg_class ebp_reg(EBP);
+
 // Singleton class for stack pointer
 reg_class sp_reg(ESP);
+
 // Singleton class for instruction pointer
 // reg_class ip_reg(EIP);
+
 // Class of integer register pairs
-reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
+reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
+// Class of integer register pairs (excluding EBP and EDI);
+reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
+// Dynamic register class that selects between long_reg and long_reg_no_ebp.
+reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
+
 // Class of integer register pairs that aligns with calling convention
 reg_class eadx_reg( EAX,EDX );
 reg_class ebcx_reg( ECX,EBX );
+
 // Not AX or DX, used in divides
-reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
+reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
+// Not AX or DX (and neither EBP), used in divides
+reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
+// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
+reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
 
 // Floating point registers.  Notice FPR0 is not a choice.
 // FPR0 is not ever allocated; we use clever encodings to fake
@@ -240,18 +284,11 @@
   return size;
 }
 
-static int preserve_SP_size() {
-  return 2;  // op, rm(reg/reg)
-}
-
 // !!!!! Special hack to get all type of calls to specify the byte offset
 //       from the start of the call to the point where the return address
 //       will point.
 int MachCallStaticJavaNode::ret_addr_offset() {
-  int offset = 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
-  if (_method_handle_invoke)
-    offset += preserve_SP_size();
-  return offset;
+  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points  
 }
 
 int MachCallDynamicJavaNode::ret_addr_offset() {
@@ -285,15 +322,6 @@
 
 // The address of the call instruction needs to be 4-byte aligned to
 // ensure that it does not span a cache line so that it can be patched.
-int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
-  current_offset += pre_call_resets_size();  // skip fldcw, if any
-  current_offset += preserve_SP_size();   // skip mov rbp, rsp
-  current_offset += 1;      // skip call opcode byte
-  return round_to(current_offset, alignment_required()) - current_offset;
-}
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
   current_offset += pre_call_resets_size();  // skip fldcw, if any
   current_offset += 5;      // skip MOV instruction
@@ -523,6 +551,10 @@
     st->print("# stack bang (%d bytes)", bangsize);
     st->print("\n\t");
     st->print("PUSH   EBP\t# Save EBP");
+    if (PreserveFramePointer) {
+      st->print("\n\t");
+      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
+    }
     if (framesize) {
       st->print("\n\t");
       st->print("SUB    ESP, #%d\t# Create frame",framesize);
@@ -532,6 +564,10 @@
     st->print("\n\t");
     framesize -= wordSize;
     st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
+    if (PreserveFramePointer) {
+      st->print("\n\t");
+      st->print("MOV    EBP, [ESP + #%d]\t# Save the caller's SP into EBP", (framesize + wordSize));
+    }
   }
 
   if (VerifyStackAtCalls) {
@@ -1489,7 +1525,7 @@
 }
 
 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return EBP_REG_mask();
+  return NO_REG_mask();
 }
 
 // Returns true if the high 32 bits of the value is known to be zero.
@@ -3735,7 +3771,7 @@
 
 // On windows95, EBP is not safe to use for implicit null tests.
 operand eRegP_no_EBP() %{
-  constraint(ALLOC_IN_RC(int_reg_no_rbp));
+  constraint(ALLOC_IN_RC(int_reg_no_ebp));
   match(RegP);
   match(eAXRegP);
   match(eBXRegP);
@@ -3824,13 +3860,6 @@
   interface(REG_INTER);
 %}
 
-operand eBPRegP() %{
-  constraint(ALLOC_IN_RC(ebp_reg));
-  match(RegP);
-  format %{ "EBP" %}
-  interface(REG_INTER);
-%}
-
 operand eRegL() %{
   constraint(ALLOC_IN_RC(long_reg));
   match(RegL);
@@ -12615,7 +12644,6 @@
 //       compute_padding() functions will have to be adjusted.
 instruct CallStaticJavaDirect(method meth) %{
   match(CallStaticJava);
-  predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
   effect(USE meth);
 
   ins_cost(300);
@@ -12629,29 +12657,6 @@
   ins_alignment(4);
 %}
 
-// Call Java Static Instruction (method handle version)
-// Note: If this code changes, the corresponding ret_addr_offset() and
-//       compute_padding() functions will have to be adjusted.
-instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
-  match(CallStaticJava);
-  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
-  effect(USE meth);
-  // EBP is saved by all callees (for interpreter stack correction).
-  // We use it here for a similar purpose, in {preserve,restore}_SP.
-
-  ins_cost(300);
-  format %{ "CALL,static/MethodHandle " %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode( pre_call_resets,
-              preserve_SP,
-              Java_Static_Call( meth ),
-              restore_SP,
-              call_epilog,
-              post_call_FPU );
-  ins_pipe( pipe_slow );
-  ins_alignment(4);
-%}
-
 // Call Java Dynamic Instruction
 // Note: If this code changes, the corresponding ret_addr_offset() and
 //       compute_padding() functions will have to be adjusted.
--- a/src/cpu/x86/vm/x86_64.ad	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/cpu/x86/vm/x86_64.ad	Thu Apr 30 18:14:58 2015 -0400
@@ -166,42 +166,67 @@
 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
 //
 
-// Class for all pointer registers (including RSP)
-reg_class any_reg(RAX, RAX_H,
-                  RDX, RDX_H,
-                  RBP, RBP_H,
-                  RDI, RDI_H,
-                  RSI, RSI_H,
-                  RCX, RCX_H,
-                  RBX, RBX_H,
-                  RSP, RSP_H,
-                  R8,  R8_H,
-                  R9,  R9_H,
-                  R10, R10_H,
-                  R11, R11_H,
-                  R12, R12_H,
-                  R13, R13_H,
-                  R14, R14_H,
-                  R15, R15_H);
-
-// Class for all pointer registers except RSP
-reg_class ptr_reg(RAX, RAX_H,
-                  RDX, RDX_H,
-                  RBP, RBP_H,
-                  RDI, RDI_H,
-                  RSI, RSI_H,
-                  RCX, RCX_H,
-                  RBX, RBX_H,
-                  R8,  R8_H,
-                  R9,  R9_H,
-                  R10, R10_H,
-                  R11, R11_H,
-                  R13, R13_H,
-                  R14, R14_H);
-
-// Class for all pointer registers except RAX and RSP
-reg_class ptr_no_rax_reg(RDX, RDX_H,
-                         RBP, RBP_H,
+// Empty register class.
+reg_class no_reg();
+
+// Class for all pointer registers (including RSP and RBP)
+reg_class any_reg_with_rbp(RAX, RAX_H,
+                           RDX, RDX_H,
+                           RBP, RBP_H,               
+                           RDI, RDI_H,
+                           RSI, RSI_H,
+                           RCX, RCX_H,
+                           RBX, RBX_H,
+                           RSP, RSP_H,
+                           R8,  R8_H,
+                           R9,  R9_H,
+                           R10, R10_H,
+                           R11, R11_H,
+                           R12, R12_H,
+                           R13, R13_H,
+                           R14, R14_H,
+                           R15, R15_H);
+
+// Class for all pointer registers (including RSP, but excluding RBP)
+reg_class any_reg_no_rbp(RAX, RAX_H,
+                         RDX, RDX_H,                
+                         RDI, RDI_H,
+                         RSI, RSI_H,
+                         RCX, RCX_H,
+                         RBX, RBX_H,
+                         RSP, RSP_H,
+                         R8,  R8_H,
+                         R9,  R9_H,
+                         R10, R10_H,
+                         R11, R11_H,
+                         R12, R12_H,
+                         R13, R13_H,
+                         R14, R14_H,
+                         R15, R15_H);
+
+// Dynamic register class that selects at runtime between register classes
+// any_reg_no_rbp and any_reg_with_rbp (depending on the value of the flag PreserveFramePointer). 
+// Equivalent to: return PreserveFramePointer ? any_reg_no_rbp : any_reg_with_rbp;
+reg_class_dynamic any_reg(any_reg_no_rbp, any_reg_with_rbp, %{ PreserveFramePointer %});
+                  
+// Class for all pointer registers (excluding RSP)
+reg_class ptr_reg_with_rbp(RAX, RAX_H,
+                           RDX, RDX_H,
+                           RBP, RBP_H,
+                           RDI, RDI_H,
+                           RSI, RSI_H,
+                           RCX, RCX_H,
+                           RBX, RBX_H,
+                           R8,  R8_H,
+                           R9,  R9_H,
+                           R10, R10_H,
+                           R11, R11_H,
+                           R13, R13_H,
+                           R14, R14_H);
+
+// Class for all pointer registers (excluding RSP and RBP)
+reg_class ptr_reg_no_rbp(RAX, RAX_H,
+                         RDX, RDX_H,                         
                          RDI, RDI_H,
                          RSI, RSI_H,
                          RCX, RCX_H,
@@ -213,31 +238,66 @@
                          R13, R13_H,
                          R14, R14_H);
 
-reg_class ptr_no_rbp_reg(RDX, RDX_H,
-                         RAX, RAX_H,
-                         RDI, RDI_H,
-                         RSI, RSI_H,
-                         RCX, RCX_H,
-                         RBX, RBX_H,
-                         R8,  R8_H,
-                         R9,  R9_H,
-                         R10, R10_H,
-                         R11, R11_H,
-                         R13, R13_H,
-                         R14, R14_H);
-
-// Class for all pointer registers except RAX, RBX and RSP
-reg_class ptr_no_rax_rbx_reg(RDX, RDX_H,
-                             RBP, RBP_H,
-                             RDI, RDI_H,
-                             RSI, RSI_H,
-                             RCX, RCX_H,
-                             R8,  R8_H,
-                             R9,  R9_H,
-                             R10, R10_H,
-                             R11, R11_H,
-                             R13, R13_H,
-                             R14, R14_H);
+// Dynamic register class that selects between ptr_reg_no_rbp and ptr_reg_with_rbp.
+reg_class_dynamic ptr_reg(ptr_reg_no_rbp, ptr_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all pointer registers (excluding RAX and RSP)
+reg_class ptr_no_rax_reg_with_rbp(RDX, RDX_H,
+                                  RBP, RBP_H,
+                                  RDI, RDI_H,
+                                  RSI, RSI_H,
+                                  RCX, RCX_H,
+                                  RBX, RBX_H,
+                                  R8,  R8_H,
+                                  R9,  R9_H,
+                                  R10, R10_H,
+                                  R11, R11_H,
+                                  R13, R13_H,
+                                  R14, R14_H);
+
+// Class for all pointer registers (excluding RAX, RSP, and RBP)
+reg_class ptr_no_rax_reg_no_rbp(RDX, RDX_H,
+                                RDI, RDI_H,
+                                RSI, RSI_H,
+                                RCX, RCX_H,
+                                RBX, RBX_H,
+                                R8,  R8_H,
+                                R9,  R9_H,
+                                R10, R10_H,
+                                R11, R11_H,
+                                R13, R13_H,
+                                R14, R14_H);
+
+// Dynamic register class that selects between ptr_no_rax_reg_no_rbp and ptr_no_rax_reg_with_rbp.
+reg_class_dynamic ptr_no_rax_reg(ptr_no_rax_reg_no_rbp, ptr_no_rax_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all pointer registers (excluding RAX, RBX, and RSP)
+reg_class ptr_no_rax_rbx_reg_with_rbp(RDX, RDX_H,
+                                      RBP, RBP_H,
+                                      RDI, RDI_H,
+                                      RSI, RSI_H,
+                                      RCX, RCX_H,
+                                      R8,  R8_H,
+                                      R9,  R9_H,
+                                      R10, R10_H,
+                                      R11, R11_H,
+                                      R13, R13_H,
+                                      R14, R14_H);
+
+// Class for all pointer registers (excluding RAX, RBX, RSP, and RBP)
+reg_class ptr_no_rax_rbx_reg_no_rbp(RDX, RDX_H,
+                                    RDI, RDI_H,
+                                    RSI, RSI_H,
+                                    RCX, RCX_H,
+                                    R8,  R8_H,
+                                    R9,  R9_H,
+                                    R10, R10_H,
+                                    R11, R11_H,
+                                    R13, R13_H,
+                                    R14, R14_H);
+
+// Dynamic register class that selects between ptr_no_rax_rbx_reg_no_rbp and ptr_no_rax_rbx_reg_with_rbp.
+reg_class_dynamic ptr_no_rax_rbx_reg(ptr_no_rax_rbx_reg_no_rbp, ptr_no_rax_rbx_reg_with_rbp, %{ PreserveFramePointer %});
 
 // Singleton class for RAX pointer register
 reg_class ptr_rax_reg(RAX, RAX_H);
@@ -251,59 +311,29 @@
 // Singleton class for RDI pointer register
 reg_class ptr_rdi_reg(RDI, RDI_H);
 
-// Singleton class for RBP pointer register
-reg_class ptr_rbp_reg(RBP, RBP_H);
-
 // Singleton class for stack pointer
 reg_class ptr_rsp_reg(RSP, RSP_H);
 
 // Singleton class for TLS pointer
 reg_class ptr_r15_reg(R15, R15_H);
 
-// Class for all long registers (except RSP)
-reg_class long_reg(RAX, RAX_H,
-                   RDX, RDX_H,
-                   RBP, RBP_H,
-                   RDI, RDI_H,
-                   RSI, RSI_H,
-                   RCX, RCX_H,
-                   RBX, RBX_H,
-                   R8,  R8_H,
-                   R9,  R9_H,
-                   R10, R10_H,
-                   R11, R11_H,
-                   R13, R13_H,
-                   R14, R14_H);
-
-// Class for all long registers except RAX, RDX (and RSP)
-reg_class long_no_rax_rdx_reg(RBP, RBP_H,
-                              RDI, RDI_H,
-                              RSI, RSI_H,
-                              RCX, RCX_H,
-                              RBX, RBX_H,
-                              R8,  R8_H,
-                              R9,  R9_H,
-                              R10, R10_H,
-                              R11, R11_H,
-                              R13, R13_H,
-                              R14, R14_H);
-
-// Class for all long registers except RCX (and RSP)
-reg_class long_no_rcx_reg(RBP, RBP_H,
-                          RDI, RDI_H,
-                          RSI, RSI_H,
-                          RAX, RAX_H,
-                          RDX, RDX_H,
-                          RBX, RBX_H,
-                          R8,  R8_H,
-                          R9,  R9_H,
-                          R10, R10_H,
-                          R11, R11_H,
-                          R13, R13_H,
-                          R14, R14_H);
-
-// Class for all long registers except RAX (and RSP)
-reg_class long_no_rax_reg(RBP, RBP_H,
+// Class for all long registers (excluding RSP)
+reg_class long_reg_with_rbp(RAX, RAX_H,
+                            RDX, RDX_H,
+                            RBP, RBP_H,
+                            RDI, RDI_H,
+                            RSI, RSI_H,
+                            RCX, RCX_H,
+                            RBX, RBX_H,
+                            R8,  R8_H,
+                            R9,  R9_H,
+                            R10, R10_H,
+                            R11, R11_H,
+                            R13, R13_H,
+                            R14, R14_H);
+
+// Class for all long registers (excluding RSP and RBP)
+reg_class long_reg_no_rbp(RAX, RAX_H,
                           RDX, RDX_H,
                           RDI, RDI_H,
                           RSI, RSI_H,
@@ -316,6 +346,67 @@
                           R13, R13_H,
                           R14, R14_H);
 
+// Dynamic register class that selects between long_reg_no_rbp and long_reg_with_rbp.
+reg_class_dynamic long_reg(long_reg_no_rbp, long_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all long registers (excluding RAX, RDX and RSP)
+reg_class long_no_rax_rdx_reg_with_rbp(RBP, RBP_H,
+                                       RDI, RDI_H,
+                                       RSI, RSI_H,
+                                       RCX, RCX_H,
+                                       RBX, RBX_H,
+                                       R8,  R8_H,
+                                       R9,  R9_H,
+                                       R10, R10_H,
+                                       R11, R11_H,
+                                       R13, R13_H,
+                                       R14, R14_H);
+
+// Class for all long registers (excluding RAX, RDX, RSP, and RBP)
+reg_class long_no_rax_rdx_reg_no_rbp(RDI, RDI_H,
+                                     RSI, RSI_H,
+                                     RCX, RCX_H,
+                                     RBX, RBX_H,
+                                     R8,  R8_H,
+                                     R9,  R9_H,
+                                     R10, R10_H,
+                                     R11, R11_H,
+                                     R13, R13_H,
+                                     R14, R14_H);
+
+// Dynamic register class that selects between long_no_rax_rdx_reg_no_rbp and long_no_rax_rdx_reg_with_rbp.
+reg_class_dynamic long_no_rax_rdx_reg(long_no_rax_rdx_reg_no_rbp, long_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all long registers (excluding RCX and RSP)
+reg_class long_no_rcx_reg_with_rbp(RBP, RBP_H,
+                                   RDI, RDI_H,
+                                   RSI, RSI_H,
+                                   RAX, RAX_H,
+                                   RDX, RDX_H,
+                                   RBX, RBX_H,
+                                   R8,  R8_H,
+                                   R9,  R9_H,
+                                   R10, R10_H,
+                                   R11, R11_H,
+                                   R13, R13_H,
+                                   R14, R14_H);
+
+// Class for all long registers (excluding RCX, RSP, and RBP)
+reg_class long_no_rcx_reg_no_rbp(RDI, RDI_H,
+                                 RSI, RSI_H,
+                                 RAX, RAX_H,
+                                 RDX, RDX_H,
+                                 RBX, RBX_H,
+                                 R8,  R8_H,
+                                 R9,  R9_H,
+                                 R10, R10_H,
+                                 R11, R11_H,
+                                 R13, R13_H,
+                                 R14, R14_H);
+
+// Dynamic register class that selects between long_no_rcx_reg_no_rbp and long_no_rcx_reg_with_rbp.
+reg_class_dynamic long_no_rcx_reg(long_no_rcx_reg_no_rbp, long_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
+
 // Singleton class for RAX long register
 reg_class long_rax_reg(RAX, RAX_H);
 
@@ -325,27 +416,27 @@
 // Singleton class for RDX long register
 reg_class long_rdx_reg(RDX, RDX_H);
 
-// Class for all int registers (except RSP)
-reg_class int_reg(RAX,
-                  RDX,
-                  RBP,
-                  RDI,
-                  RSI,
-                  RCX,
-                  RBX,
-                  R8,
-                  R9,
-                  R10,
-                  R11,
-                  R13,
-                  R14);
-
-// Class for all int registers except RCX (and RSP)
-reg_class int_no_rcx_reg(RAX,
+// Class for all int registers (excluding RSP)
+reg_class int_reg_with_rbp(RAX,
+                           RDX,
+                           RBP,
+                           RDI,
+                           RSI,
+                           RCX,
+                           RBX,
+                           R8,
+                           R9,
+                           R10,
+                           R11,
+                           R13,
+                           R14);
+
+// Class for all int registers (excluding RSP and RBP)
+reg_class int_reg_no_rbp(RAX,
                          RDX,
-                         RBP,
                          RDI,
                          RSI,
+                         RCX,
                          RBX,
                          R8,
                          R9,
@@ -354,18 +445,66 @@
                          R13,
                          R14);
 
-// Class for all int registers except RAX, RDX (and RSP)
-reg_class int_no_rax_rdx_reg(RBP,
-                             RDI,
-                             RSI,
-                             RCX,
-                             RBX,
-                             R8,
-                             R9,
-                             R10,
-                             R11,
-                             R13,
-                             R14);
+// Dynamic register class that selects between int_reg_no_rbp and int_reg_with_rbp.
+reg_class_dynamic int_reg(int_reg_no_rbp, int_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all int registers (excluding RCX and RSP)
+reg_class int_no_rcx_reg_with_rbp(RAX,
+                                  RDX,
+                                  RBP,
+                                  RDI,
+                                  RSI,
+                                  RBX,
+                                  R8,
+                                  R9,
+                                  R10,
+                                  R11,
+                                  R13,
+                                  R14);
+
+// Class for all int registers (excluding RCX, RSP, and RBP)
+reg_class int_no_rcx_reg_no_rbp(RAX,
+                                RDX,
+                                RDI,
+                                RSI,
+                                RBX,
+                                R8,
+                                R9,
+                                R10,
+                                R11,
+                                R13,
+                                R14);
+
+// Dynamic register class that selects between int_no_rcx_reg_no_rbp and int_no_rcx_reg_with_rbp.
+reg_class_dynamic int_no_rcx_reg(int_no_rcx_reg_no_rbp, int_no_rcx_reg_with_rbp, %{ PreserveFramePointer %});
+
+// Class for all int registers (excluding RAX, RDX, and RSP)
+reg_class int_no_rax_rdx_reg_with_rbp(RBP,
+                                      RDI,
+                                      RSI,
+                                      RCX,
+                                      RBX,
+                                      R8,
+                                      R9,
+                                      R10,
+                                      R11,
+                                      R13,
+                                      R14);
+
+// Class for all int registers (excluding RAX, RDX, RSP, and RBP)
+reg_class int_no_rax_rdx_reg_no_rbp(RDI,
+                                    RSI,
+                                    RCX,
+                                    RBX,
+                                    R8,
+                                    R9,
+                                    R10,
+                                    R11,
+                                    R13,
+                                    R14);
+
+// Dynamic register class that selects between int_no_rax_rdx_reg_no_rbp and int_no_rax_rdx_reg_with_rbp.
+reg_class_dynamic int_no_rax_rdx_reg(int_no_rax_rdx_reg_no_rbp, int_no_rax_rdx_reg_with_rbp, %{ PreserveFramePointer %});
 
 // Singleton class for RAX int register
 reg_class int_rax_reg(RAX);
@@ -396,9 +535,6 @@
 
 #define __ _masm.
 
-static int preserve_SP_size() {
-  return 3;  // rex.w, op, rm(reg/reg)
-}
 static int clear_avx_size() {
   return (Compile::current()->max_vector_size() > 16) ? 3 : 0;  // vzeroupper
 }
@@ -409,9 +545,7 @@
 int MachCallStaticJavaNode::ret_addr_offset()
 {
   int offset = 5; // 5 bytes from start of call to where return address points
-  offset += clear_avx_size();
-  if (_method_handle_invoke)
-    offset += preserve_SP_size();
+  offset += clear_avx_size();  
   return offset;
 }
 
@@ -450,16 +584,6 @@
 
 // The address of the call instruction needs to be 4-byte aligned to
 // ensure that it does not span a cache line so that it can be patched.
-int CallStaticJavaHandleNode::compute_padding(int current_offset) const
-{
-  current_offset += preserve_SP_size();   // skip mov rbp, rsp
-  current_offset += clear_avx_size(); // skip vzeroupper
-  current_offset += 1; // skip call opcode byte
-  return round_to(current_offset, alignment_required()) - current_offset;
-}
-
-// The address of the call instruction needs to be 4-byte aligned to
-// ensure that it does not span a cache line so that it can be patched.
 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
 {
   current_offset += clear_avx_size(); // skip vzeroupper
@@ -724,6 +848,10 @@
     st->print("# stack bang (%d bytes)", bangsize);
     st->print("\n\t");
     st->print("pushq   rbp\t# Save rbp");
+    if (PreserveFramePointer) {
+        st->print("\n\t");
+        st->print("movq    rbp, rsp\t# Save the caller's SP into rbp");
+    }
     if (framesize) {
       st->print("\n\t");
       st->print("subq    rsp, #%d\t# Create frame",framesize);
@@ -732,7 +860,11 @@
     st->print("subq    rsp, #%d\t# Create frame",framesize);
     st->print("\n\t");
     framesize -= wordSize;
-    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);
+    st->print("movq    [rsp + #%d], rbp\t# Save rbp",framesize);    
+    if (PreserveFramePointer) {
+      st->print("\n\t");
+      st->print("movq    rbp, [rsp + #%d]\t# Save the caller's SP into rbp", (framesize + wordSize));
+    }
   }
 
   if (VerifyStackAtCalls) {
@@ -1598,8 +1730,9 @@
   return LONG_RDX_REG_mask();
 }
 
+// Register for saving SP into on method handle invokes. Not used on x86_64.
 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
-  return PTR_RBP_REG_mask();
+    return NO_REG_mask();
 }
 
 %}
@@ -3202,7 +3335,7 @@
 // Pointer Register
 operand any_RegP()
 %{
-  constraint(ALLOC_IN_RC(any_reg));
+  constraint(ALLOC_IN_RC(any_reg));  
   match(RegP);
   match(rax_RegP);
   match(rbx_RegP);
@@ -3224,8 +3357,8 @@
   match(rbx_RegP);
   match(rdi_RegP);
   match(rsi_RegP);
-  match(rbp_RegP);
-  match(r15_RegP);  // See Q&A below about r15_RegP.
+  match(rbp_RegP);  // See Q&A below about
+  match(r15_RegP);  // r15_RegP and rbp_RegP.
 
   format %{ %}
   interface(REG_INTER);
@@ -3241,11 +3374,14 @@
 
 // Question: Why is r15_RegP (the read-only TLS register) a match for rRegP?
 // Answer: Operand match rules govern the DFA as it processes instruction inputs.
-// It's fine for an instruction input which expects rRegP to match a r15_RegP.
+// It's fine for an instruction input that expects rRegP to match a r15_RegP.
 // The output of an instruction is controlled by the allocator, which respects
 // register class masks, not match rules.  Unless an instruction mentions
 // r15_RegP or any_RegP explicitly as its output, r15 will not be considered
 // by the allocator as an input.
+// The same logic applies to rbp_RegP being a match for rRegP: If PreserveFramePointer==true,
+// the RBP is used as a proper frame pointer and is not included in ptr_reg. As a
+// result, RBP is not included in the output of the instruction either.
 
 operand no_rax_RegP()
 %{
@@ -3259,9 +3395,11 @@
   interface(REG_INTER);
 %}
 
+// This operand is not allowed to use RBP even if
+// RBP is not used to hold the frame pointer.
 operand no_rbp_RegP()
 %{
-  constraint(ALLOC_IN_RC(ptr_no_rbp_reg));
+  constraint(ALLOC_IN_RC(ptr_reg_no_rbp));
   match(RegP);
   match(rbx_RegP);
   match(rsi_RegP);
@@ -3338,16 +3476,6 @@
   interface(REG_INTER);
 %}
 
-operand rbp_RegP()
-%{
-  constraint(ALLOC_IN_RC(ptr_rbp_reg));
-  match(RegP);
-  match(rRegP);
-
-  format %{ %}
-  interface(REG_INTER);
-%}
-
 operand r15_RegP()
 %{
   constraint(ALLOC_IN_RC(ptr_r15_reg));
@@ -11410,7 +11538,6 @@
 //       compute_padding() functions will have to be adjusted.
 instruct CallStaticJavaDirect(method meth) %{
   match(CallStaticJava);
-  predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
   effect(USE meth);
 
   ins_cost(300);
@@ -11421,27 +11548,6 @@
   ins_alignment(4);
 %}
 
-// Call Java Static Instruction (method handle version)
-// Note: If this code changes, the corresponding ret_addr_offset() and
-//       compute_padding() functions will have to be adjusted.
-instruct CallStaticJavaHandle(method meth, rbp_RegP rbp_mh_SP_save) %{
-  match(CallStaticJava);
-  predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
-  effect(USE meth);
-  // RBP is saved by all callees (for interpreter stack correction).
-  // We use it here for a similar purpose, in {preserve,restore}_SP.
-
-  ins_cost(300);
-  format %{ "call,static/MethodHandle " %}
-  opcode(0xE8); /* E8 cd */
-  ins_encode(clear_avx, preserve_SP,
-             Java_Static_Call(meth),
-             restore_SP,
-             call_epilog);
-  ins_pipe(pipe_slow);
-  ins_alignment(4);
-%}
-
 // Call Java Dynamic Instruction
 // Note: If this code changes, the corresponding ret_addr_offset() and
 //       compute_padding() functions will have to be adjusted.
--- a/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/os_cpu/linux_ppc/vm/os_linux_ppc.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -298,6 +298,7 @@
         goto report_and_die;
       }
 
+      CodeBlob *cb = NULL;
       // Handle signal from NativeJump::patch_verified_entry().
       if (( TrapBasedNotEntrantChecks && sig == SIGTRAP && nativeInstruction_at(pc)->is_sigtrap_zombie_not_entrant()) ||
           (!TrapBasedNotEntrantChecks && sig == SIGILL  && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant())) {
@@ -313,7 +314,10 @@
                // especially when we try to read from the safepoint polling page. So the check
                //   (address)info->si_addr == os::get_standard_polling_page()
                // doesn't work for us. We use:
-               ((NativeInstruction*)pc)->is_safepoint_poll()) {
+               ((NativeInstruction*)pc)->is_safepoint_poll() &&
+               CodeCache::contains((void*) pc) &&
+               ((cb = CodeCache::find_blob(pc)) != NULL) &&
+               cb->is_nmethod()) {
         if (TraceTraps) {
           tty->print_cr("trap: safepoint_poll at " INTPTR_FORMAT " (SIGSEGV)", p2i(pc));
         }
--- a/src/share/vm/adlc/output_c.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/adlc/output_c.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1505,8 +1505,8 @@
     // Iterate over the instructions 'node' expands into
     ExpandRule  *expand       = node->_exprule;
     NameAndList *expand_instr = NULL;
-    for(expand->reset_instructions();
-        (expand_instr = expand->iter_instructions()) != NULL; cnt++) {
+    for (expand->reset_instructions();
+         (expand_instr = expand->iter_instructions()) != NULL; cnt++) {
       new_id = expand_instr->name();
 
       InstructForm* expand_instruction = (InstructForm*)globalAD->globalNames()[new_id];
@@ -1517,30 +1517,25 @@
         continue;
       }
 
-      if (expand_instruction->has_temps()) {
-        globalAD->syntax_err(node->_linenum, "In %s: expand rules using instructs with TEMPs aren't supported: %s",
-                             node->_ident, new_id);
-      }
-
       // Build the node for the instruction
       fprintf(fp,"\n  %sNode *n%d = new %sNode();\n", new_id, cnt, new_id);
       // Add control edge for this node
       fprintf(fp,"  n%d->add_req(_in[0]);\n", cnt);
       // Build the operand for the value this node defines.
       Form *form = (Form*)_globalNames[new_id];
-      assert( form, "'new_id' must be a defined form name");
+      assert(form, "'new_id' must be a defined form name");
       // Grab the InstructForm for the new instruction
       new_inst = form->is_instruction();
-      assert( new_inst, "'new_id' must be an instruction name");
-      if( node->is_ideal_if() && new_inst->is_ideal_if() ) {
-        fprintf(fp, "  ((MachIfNode*)n%d)->_prob = _prob;\n",cnt);
-        fprintf(fp, "  ((MachIfNode*)n%d)->_fcnt = _fcnt;\n",cnt);
+      assert(new_inst, "'new_id' must be an instruction name");
+      if (node->is_ideal_if() && new_inst->is_ideal_if()) {
+        fprintf(fp, "  ((MachIfNode*)n%d)->_prob = _prob;\n", cnt);
+        fprintf(fp, "  ((MachIfNode*)n%d)->_fcnt = _fcnt;\n", cnt);
       }
 
-      if( node->is_ideal_fastlock() && new_inst->is_ideal_fastlock() ) {
-        fprintf(fp, "  ((MachFastLockNode*)n%d)->_counters = _counters;\n",cnt);
-        fprintf(fp, "  ((MachFastLockNode*)n%d)->_rtm_counters = _rtm_counters;\n",cnt);
-        fprintf(fp, "  ((MachFastLockNode*)n%d)->_stack_rtm_counters = _stack_rtm_counters;\n",cnt);
+      if (node->is_ideal_fastlock() && new_inst->is_ideal_fastlock()) {
+        fprintf(fp, "  ((MachFastLockNode*)n%d)->_counters = _counters;\n", cnt);
+        fprintf(fp, "  ((MachFastLockNode*)n%d)->_rtm_counters = _rtm_counters;\n", cnt);
+        fprintf(fp, "  ((MachFastLockNode*)n%d)->_stack_rtm_counters = _stack_rtm_counters;\n", cnt);
       }
 
       // Fill in the bottom_type where requested
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -4083,7 +4083,7 @@
       ValueType* type = apop()->type();
       if (type->is_constant()) {
         ciMethod* target = type->as_ObjectType()->constant_value()->as_member_name()->get_vmtarget();
-        // If the target is another method handle invoke try recursivly to get
+        // If the target is another method handle invoke, try to recursively get
         // a better target.
         if (target->is_method_handle_intrinsic()) {
           if (try_method_handle_inline(target)) {
--- a/src/share/vm/c1/c1_LIR.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/c1/c1_LIR.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -458,7 +458,7 @@
 //-------------------visits--------------------------
 
 // complete rework of LIR instruction visitor.
-// The virtual calls for each instruction type is replaced by a big
+// The virtual call for each instruction type is replaced by a big
 // switch that adds the operands for each instruction
 
 void LIR_OpVisitState::visit(LIR_Op* op) {
@@ -825,7 +825,8 @@
       }
 
       if (opJavaCall->_info)                     do_info(opJavaCall->_info);
-      if (opJavaCall->is_method_handle_invoke()) {
+      if (FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr &&
+          opJavaCall->is_method_handle_invoke()) {
         opJavaCall->_method_handle_invoke_SP_save_opr = FrameMap::method_handle_invoke_SP_save_opr();
         do_temp(opJavaCall->_method_handle_invoke_SP_save_opr);
       }
--- a/src/share/vm/c1/c1_LIR.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/c1/c1_LIR.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1219,10 +1219,8 @@
   // JSR 292 support.
   bool is_invokedynamic() const                  { return code() == lir_dynamic_call; }
   bool is_method_handle_invoke() const {
-    return
-      method()->is_compiled_lambda_form()  // Java-generated adapter
-      ||
-      method()->is_method_handle_intrinsic();  // JVM-generated MH intrinsic
+    return method()->is_compiled_lambda_form() ||   // Java-generated lambda form
+           method()->is_method_handle_intrinsic();  // JVM-generated MH intrinsic
   }
 
   intptr_t vtable_offset() const {
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1606,13 +1606,26 @@
   } else {
     __ unsigned_shift_right(addr, CardTableModRefBS::card_shift, tmp);
   }
+
+  LIR_Address* card_addr;
   if (can_inline_as_constant(card_table_base)) {
-    __ move(LIR_OprFact::intConst(0),
-              new LIR_Address(tmp, card_table_base->as_jint(), T_BYTE));
+    card_addr = new LIR_Address(tmp, card_table_base->as_jint(), T_BYTE);
   } else {
-    __ move(LIR_OprFact::intConst(0),
-              new LIR_Address(tmp, load_constant(card_table_base),
-                              T_BYTE));
+    card_addr = new LIR_Address(tmp, load_constant(card_table_base), T_BYTE);
+  }
+
+  LIR_Opr dirty = LIR_OprFact::intConst(CardTableModRefBS::dirty_card_val());
+  if (UseCondCardMark) {
+    LIR_Opr cur_value = new_register(T_INT);
+    __ move(card_addr, cur_value);
+
+    LabelObj* L_already_dirty = new LabelObj();
+    __ cmp(lir_cond_equal, cur_value, dirty);
+    __ branch(lir_cond_equal, T_BYTE, L_already_dirty->label());
+    __ move(dirty, card_addr);
+    __ branch_destination(L_already_dirty->label());
+  } else {
+    __ move(dirty, card_addr);
   }
 #endif
 }
@@ -2862,7 +2875,7 @@
 //   g) lock result registers and emit call operation
 //
 // Before issuing a call, we must spill-save all values on stack
-// that are in caller-save register. "spill-save" moves thos registers
+// that are in caller-save register. "spill-save" moves those registers
 // either in a free callee-save register or spills them if no free
 // callee save register is available.
 //
@@ -2870,7 +2883,7 @@
 // - if invoked between e) and f), we may lock callee save
 //   register in "spill-save" that destroys the receiver register
 //   before f) is executed
-// - if we rearange the f) to be earlier, by loading %o0, it
+// - if we rearrange f) to be earlier (by loading %o0) it
 //   may destroy a value on the stack that is currently in %o0
 //   and is waiting to be spilled
 // - if we keep the receiver locked while doing spill-save,
@@ -2903,14 +2916,16 @@
   assert(receiver->is_illegal() || receiver->is_equal(LIR_Assembler::receiverOpr()), "must match");
 
   // JSR 292
-  // Preserve the SP over MethodHandle call sites.
+  // Preserve the SP over MethodHandle call sites, if needed.
   ciMethod* target = x->target();
   bool is_method_handle_invoke = (// %%% FIXME: Are both of these relevant?
                                   target->is_method_handle_intrinsic() ||
                                   target->is_compiled_lambda_form());
   if (is_method_handle_invoke) {
     info->set_is_method_handle_invoke(true);
-    __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
+    if(FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
+        __ move(FrameMap::stack_pointer(), FrameMap::method_handle_invoke_SP_save_opr());
+    }
   }
 
   switch (x->code()) {
@@ -2950,8 +2965,9 @@
   }
 
   // JSR 292
-  // Restore the SP after MethodHandle call sites.
-  if (is_method_handle_invoke) {
+  // Restore the SP after MethodHandle call sites, if needed.
+  if (is_method_handle_invoke
+      && FrameMap::method_handle_invoke_SP_save_opr() != LIR_OprFact::illegalOpr) {
     __ move(FrameMap::method_handle_invoke_SP_save_opr(), FrameMap::stack_pointer());
   }
 
--- a/src/share/vm/ci/ciCallSite.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/ci/ciCallSite.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -50,6 +50,25 @@
 }
 
 // ------------------------------------------------------------------
+// ciCallSite::get_context
+//
+// Return the target MethodHandle of this CallSite.
+ciKlass* ciCallSite::get_context() {
+  assert(!is_constant_call_site(), "");
+
+  VM_ENTRY_MARK;
+  oop call_site_oop = get_oop();
+  InstanceKlass* ctxk = MethodHandles::get_call_site_context(call_site_oop);
+  if (ctxk == NULL) {
+    // The call site doesn't have a context associated. Set it to the default context.
+    oop def_context_oop = java_lang_invoke_CallSite::default_context();
+    java_lang_invoke_CallSite::set_context_cas(call_site_oop, def_context_oop, /*expected=*/NULL);
+    ctxk = MethodHandles::get_call_site_context(call_site_oop);
+  }
+  return (CURRENT_ENV->get_metadata(ctxk))->as_klass();
+}
+
+// ------------------------------------------------------------------
 // ciCallSite::print
 //
 // Print debugging information about the CallSite.
--- a/src/share/vm/ci/ciCallSite.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/ci/ciCallSite.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -43,6 +43,7 @@
 
   // Return the target MethodHandle of this CallSite.
   ciMethodHandle* get_target() const;
+  ciKlass* get_context();
 
   void print();
 };
--- a/src/share/vm/classfile/javaClasses.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/classfile/javaClasses.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -102,21 +102,22 @@
 static bool find_field(InstanceKlass* ik,
                        Symbol* name_symbol, Symbol* signature_symbol,
                        fieldDescriptor* fd,
-                       bool allow_super = false) {
-  if (allow_super)
-    return ik->find_field(name_symbol, signature_symbol, fd) != NULL;
-  else
+                       bool is_static = false, bool allow_super = false) {
+  if (allow_super || is_static) {
+    return ik->find_field(name_symbol, signature_symbol, is_static, fd) != NULL;
+  } else {
     return ik->find_local_field(name_symbol, signature_symbol, fd);
+  }
 }
 
 // Helpful routine for computing field offsets at run time rather than hardcoding them
 static void
 compute_offset(int &dest_offset,
                Klass* klass_oop, Symbol* name_symbol, Symbol* signature_symbol,
-               bool allow_super = false) {
+               bool is_static = false, bool allow_super = false) {
   fieldDescriptor fd;
   InstanceKlass* ik = InstanceKlass::cast(klass_oop);
-  if (!find_field(ik, name_symbol, signature_symbol, &fd, allow_super)) {
+  if (!find_field(ik, name_symbol, signature_symbol, &fd, is_static, allow_super)) {
     ResourceMark rm;
     tty->print_cr("Invalid layout of %s at %s", ik->external_name(), name_symbol->as_C_string());
 #ifndef PRODUCT
@@ -2965,14 +2966,49 @@
 // Support for java_lang_invoke_CallSite
 
 int java_lang_invoke_CallSite::_target_offset;
+int java_lang_invoke_CallSite::_context_offset;
+int java_lang_invoke_CallSite::_default_context_offset;
 
 void java_lang_invoke_CallSite::compute_offsets() {
   Klass* k = SystemDictionary::CallSite_klass();
   if (k != NULL) {
     compute_offset(_target_offset, k, vmSymbols::target_name(), vmSymbols::java_lang_invoke_MethodHandle_signature());
+    compute_offset(_context_offset, k, vmSymbols::context_name(), vmSymbols::sun_misc_Cleaner_signature());
+    compute_offset(_default_context_offset, k,
+                   vmSymbols::DEFAULT_CONTEXT_name(), vmSymbols::sun_misc_Cleaner_signature(),
+                   /*is_static=*/true, /*allow_super=*/false);
   }
 }
 
+oop java_lang_invoke_CallSite::context_volatile(oop call_site) {
+  assert(java_lang_invoke_CallSite::is_instance(call_site), "");
+
+  oop dep_oop = call_site->obj_field_volatile(_context_offset);
+  return dep_oop;
+}
+
+void java_lang_invoke_CallSite::set_context_volatile(oop call_site, oop context) {
+  assert(java_lang_invoke_CallSite::is_instance(call_site), "");
+  call_site->obj_field_put_volatile(_context_offset, context);
+}
+
+bool java_lang_invoke_CallSite::set_context_cas(oop call_site, oop context, oop expected) {
+  assert(java_lang_invoke_CallSite::is_instance(call_site), "");
+  HeapWord* context_addr = call_site->obj_field_addr<HeapWord>(_context_offset);
+  oop res = oopDesc::atomic_compare_exchange_oop(context, context_addr, expected, true);
+  bool success = (res == expected);
+  if (success) {
+    update_barrier_set((void*)context_addr, context);
+  }
+  return success;
+}
+
+oop java_lang_invoke_CallSite::default_context() {
+  InstanceKlass* ik = InstanceKlass::cast(SystemDictionary::CallSite_klass());
+  oop def_context_oop = ik->java_mirror()->obj_field(_default_context_offset);
+  assert(!oopDesc::is_null(def_context_oop), "");
+  return def_context_oop;
+}
 
 // Support for java_security_AccessControlContext
 
--- a/src/share/vm/classfile/javaClasses.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/classfile/javaClasses.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -962,7 +962,6 @@
   static void set_clock(jlong value);
 };
 
-
 // Interface to java.lang.invoke.MethodHandle objects
 
 class MethodHandleEntry;
@@ -1170,14 +1169,23 @@
 
 private:
   static int _target_offset;
+  static int _context_offset;
+  static int _default_context_offset;
+
 
   static void compute_offsets();
 
 public:
   // Accessors
-  static oop              target(         oop site);
-  static void         set_target(         oop site, oop target);
-  static void         set_target_volatile(oop site, oop target);
+  static oop              target(          oop site);
+  static void         set_target(          oop site, oop target);
+  static void         set_target_volatile( oop site, oop target);
+
+  static oop              context_volatile(oop site);
+  static void         set_context_volatile(oop site, oop context);
+  static bool         set_context_cas     (oop site, oop context, oop expected);
+
+  static oop default_context();
 
   // Testers
   static bool is_subclass(Klass* klass) {
@@ -1189,7 +1197,6 @@
   static int target_offset_in_bytes()           { return _target_offset; }
 };
 
-
 // Interface to java.security.AccessControlContext objects
 
 class java_security_AccessControlContext: AllStatic {
--- a/src/share/vm/classfile/vmSymbols.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/classfile/vmSymbols.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -292,6 +292,7 @@
   template(setTargetNormal_name,                      "setTargetNormal")                          \
   template(setTargetVolatile_name,                    "setTargetVolatile")                        \
   template(setTarget_signature,                       "(Ljava/lang/invoke/MethodHandle;)V")       \
+  template(DEFAULT_CONTEXT_name,                      "DEFAULT_CONTEXT")                          \
   NOT_LP64(  do_alias(intptr_signature,               int_signature)  )                           \
   LP64_ONLY( do_alias(intptr_signature,               long_signature) )                           \
                                                                                                   \
@@ -501,6 +502,7 @@
   template(class_signature,                           "Ljava/lang/Class;")                                        \
   template(string_signature,                          "Ljava/lang/String;")                                       \
   template(reference_signature,                       "Ljava/lang/ref/Reference;")                                \
+  template(sun_misc_Cleaner_signature,                "Lsun/misc/Cleaner;")                                       \
   template(executable_signature,                      "Ljava/lang/reflect/Executable;")                           \
   template(concurrenthashmap_signature,               "Ljava/util/concurrent/ConcurrentHashMap;")                 \
   template(String_StringBuilder_signature,            "(Ljava/lang/String;)Ljava/lang/StringBuilder;")            \
@@ -553,7 +555,7 @@
   template(createGarbageCollectorMBean_signature,      "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/management/GarbageCollectorMBean;") \
   template(trigger_name,                               "trigger")                                                 \
   template(clear_name,                                 "clear")                                                   \
-  template(trigger_method_signature,                   "(ILjava/lang/management/MemoryUsage;)V")                                                 \
+  template(trigger_method_signature,                   "(ILjava/lang/management/MemoryUsage;)V")                  \
   template(startAgent_name,                            "startAgent")                                              \
   template(startRemoteAgent_name,                      "startRemoteManagementAgent")                              \
   template(startLocalAgent_name,                       "startLocalManagementAgent")                               \
--- a/src/share/vm/code/codeCache.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/code/codeCache.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1067,8 +1067,11 @@
   int marked = 0;
   {
     MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
-    InstanceKlass* call_site_klass = InstanceKlass::cast(call_site->klass());
-    marked = call_site_klass->mark_dependent_nmethods(changes);
+    InstanceKlass* ctxk = MethodHandles::get_call_site_context(call_site());
+    if (ctxk == NULL) {
+      return; // No dependencies to invalidate yet.
+    }
+    marked = ctxk->mark_dependent_nmethods(changes);
   }
   if (marked > 0) {
     // At least one nmethod has been marked for deoptimization
--- a/src/share/vm/code/dependencies.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/code/dependencies.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -117,8 +117,9 @@
 }
 
 void Dependencies::assert_call_site_target_value(ciCallSite* call_site, ciMethodHandle* method_handle) {
-  check_ctxk(call_site->klass());
-  assert_common_2(call_site_target_value, call_site, method_handle);
+  ciKlass* ctxk = call_site->get_context();
+  check_ctxk(ctxk);
+  assert_common_3(call_site_target_value, ctxk, call_site, method_handle);
 }
 
 // Helper function.  If we are adding a new dep. under ctxk2,
@@ -388,7 +389,7 @@
   3, // unique_concrete_subtypes_2 ctxk, k1, k2
   3, // unique_concrete_methods_2 ctxk, m1, m2
   1, // no_finalizable_subclasses ctxk
-  2  // call_site_target_value call_site, method_handle
+  3  // call_site_target_value ctxk, call_site, method_handle
 };
 
 const char* Dependencies::dep_name(Dependencies::DepType dept) {
@@ -594,7 +595,7 @@
   const int nargs = argument_count();
   GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
   for (int j = 0; j < nargs; j++) {
-    if (type() == call_site_target_value) {
+    if (is_oop_argument(j)) {
       args->push(argument_oop(j));
     } else {
       args->push(argument(j));
@@ -614,7 +615,7 @@
   int nargs = argument_count();
   GrowableArray<DepArgument>* args = new GrowableArray<DepArgument>(nargs);
   for (int j = 0; j < nargs; j++) {
-    if (type() == call_site_target_value) {
+    if (is_oop_argument(j)) {
       args->push(argument_oop(j));
     } else {
       args->push(argument(j));
@@ -710,7 +711,7 @@
  * Returns a unique identifier for each dependency argument.
  */
 uintptr_t Dependencies::DepStream::get_identifier(int i) {
-  if (has_oop_argument()) {
+  if (is_oop_argument(i)) {
     return (uintptr_t)(oopDesc*)argument_oop(i);
   } else {
     return (uintptr_t)argument(i);
@@ -737,7 +738,7 @@
   }
 
   // Some dependencies are using the klass of the first object
-  // argument as implicit context type (e.g. call_site_target_value).
+  // argument as implicit context type.
   {
     int ctxkj = dep_implicit_context_arg(type());
     if (ctxkj >= 0) {
@@ -1514,9 +1515,16 @@
   return find_finalizable_subclass(search_at);
 }
 
-Klass* Dependencies::check_call_site_target_value(oop call_site, oop method_handle, CallSiteDepChange* changes) {
-  assert(call_site    ->is_a(SystemDictionary::CallSite_klass()),     "sanity");
-  assert(method_handle->is_a(SystemDictionary::MethodHandle_klass()), "sanity");
+Klass* Dependencies::check_call_site_target_value(Klass* recorded_ctxk, oop call_site, oop method_handle, CallSiteDepChange* changes) {
+  assert(call_site->is_a(SystemDictionary::CallSite_klass()),     "sanity");
+  assert(!oopDesc::is_null(method_handle), "sanity");
+
+  Klass* call_site_ctxk = MethodHandles::get_call_site_context(call_site);
+  assert(!Klass::is_null(call_site_ctxk), "call site context should be initialized already");
+  if (recorded_ctxk != call_site_ctxk) {
+    // Stale context
+    return recorded_ctxk;
+  }
   if (changes == NULL) {
     // Validate all CallSites
     if (java_lang_invoke_CallSite::target(call_site) != method_handle)
@@ -1531,7 +1539,6 @@
   return NULL;  // assertion still valid
 }
 
-
 void Dependencies::DepStream::trace_and_log_witness(Klass* witness) {
   if (witness != NULL) {
     if (TraceDependencies) {
@@ -1592,7 +1599,7 @@
   Klass* witness = NULL;
   switch (type()) {
   case call_site_target_value:
-    witness = check_call_site_target_value(argument_oop(0), argument_oop(1), changes);
+    witness = check_call_site_target_value(context_type(), argument_oop(1), argument_oop(2), changes);
     break;
   default:
     witness = NULL;
--- a/src/share/vm/code/dependencies.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/code/dependencies.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -174,7 +174,7 @@
     klass_types         = all_types & ~non_klass_types,
 
     non_ctxk_types      = (1 << evol_method),
-    implicit_ctxk_types = (1 << call_site_target_value),
+    implicit_ctxk_types = 0,
     explicit_ctxk_types = all_types & ~(non_ctxk_types | implicit_ctxk_types),
 
     max_arg_count = 3,   // current maximum number of arguments (incl. ctxk)
@@ -330,7 +330,7 @@
   static Klass* check_exclusive_concrete_methods(Klass* ctxk, Method* m1, Method* m2,
                                                    KlassDepChange* changes = NULL);
   static Klass* check_has_no_finalizable_subclasses(Klass* ctxk, KlassDepChange* changes = NULL);
-  static Klass* check_call_site_target_value(oop call_site, oop method_handle, CallSiteDepChange* changes = NULL);
+  static Klass* check_call_site_target_value(Klass* recorded_ctxk, oop call_site, oop method_handle, CallSiteDepChange* changes = NULL);
   // A returned Klass* is NULL if the dependency assertion is still
   // valid.  A non-NULL Klass* is a 'witness' to the assertion
   // failure, a point in the class hierarchy where the assertion has
@@ -496,7 +496,7 @@
     bool next();
 
     DepType type()               { return _type; }
-    bool has_oop_argument()      { return type() == call_site_target_value; }
+    bool is_oop_argument(int i)  { return type() == call_site_target_value && i > 0; }
     uintptr_t get_identifier(int i);
 
     int argument_count()         { return dep_args(type()); }
@@ -682,7 +682,7 @@
       _method_handle(method_handle)
   {
     assert(_call_site()    ->is_a(SystemDictionary::CallSite_klass()),     "must be");
-    assert(_method_handle()->is_a(SystemDictionary::MethodHandle_klass()), "must be");
+    assert(_method_handle.is_null() || _method_handle()->is_a(SystemDictionary::MethodHandle_klass()), "must be");
   }
 
   // What kind of DepChange is this?
--- a/src/share/vm/code/nmethod.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/code/nmethod.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -2325,6 +2325,7 @@
             // Dependency checking failed. Print out information about the failed
             // dependency and finally fail with an assert. We can fail here, since
             // dependency checking is never done in a product build.
+            tty->print_cr("Failed dependency:");
             changes.print();
             nm->print();
             nm->print_dependencies();
--- a/src/share/vm/oops/oop.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/oops/oop.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -201,6 +201,7 @@
 
   // Access to fields in a instanceOop through these methods.
   oop obj_field(int offset) const;
+  volatile oop obj_field_volatile(int offset) const;
   void obj_field_put(int offset, oop value);
   void obj_field_put_raw(int offset, oop value);
   void obj_field_put_volatile(int offset, oop value);
--- a/src/share/vm/oops/oop.inline.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/oops/oop.inline.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -284,6 +284,11 @@
     load_decode_heap_oop(obj_field_addr<narrowOop>(offset)) :
     load_decode_heap_oop(obj_field_addr<oop>(offset));
 }
+inline volatile oop oopDesc::obj_field_volatile(int offset) const {
+  volatile oop value = obj_field(offset);
+  OrderAccess::acquire();
+  return value;
+}
 inline void oopDesc::obj_field_put(int offset, oop value) {
   UseCompressedOops ? oop_store(obj_field_addr<narrowOop>(offset), value) :
                       oop_store(obj_field_addr<oop>(offset),       value);
--- a/src/share/vm/opto/bytecodeInfo.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/bytecodeInfo.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -631,11 +631,11 @@
   }
   int max_inline_level_adjust = 0;
   if (caller_jvms->method() != NULL) {
-    if (caller_jvms->method()->is_compiled_lambda_form())
+    if (caller_jvms->method()->is_compiled_lambda_form()) {
       max_inline_level_adjust += 1;  // don't count actions in MH or indy adapter frames
-    else if (callee_method->is_method_handle_intrinsic() ||
-             callee_method->is_compiled_lambda_form()) {
-      max_inline_level_adjust += 1;  // don't count method handle calls from java.lang.invoke implem
+    } else if (callee_method->is_method_handle_intrinsic() ||
+               callee_method->is_compiled_lambda_form()) {
+      max_inline_level_adjust += 1;  // don't count method handle calls from java.lang.invoke implementation
     }
     if (max_inline_level_adjust != 0 && C->print_inlining() && (Verbose || WizardMode)) {
       CompileTask::print_inline_indent(inline_level());
--- a/src/share/vm/opto/c2_globals.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/c2_globals.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -593,9 +593,6 @@
   develop(bool, PoisonOSREntry, true,                                       \
            "Detect abnormal calls to OSR code")                             \
                                                                             \
-  product(bool, UseCondCardMark, false,                                     \
-          "Check for already marked card before updating card table")       \
-                                                                            \
   develop(bool, SoftMatchFailure, trueInProduct,                            \
           "If the DFA fails to match a node, print a message and bail out") \
                                                                             \
--- a/src/share/vm/opto/castnode.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/castnode.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -73,16 +73,6 @@
   return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL;
 }
 
-//------------------------------Ideal_DU_postCCP-------------------------------
-// Throw away cast after constant propagation
-Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
-  const Type *t = ccp->type(in(1));
-  ccp->hash_delete(this);
-  set_type(t);                   // Turn into ID function
-  ccp->hash_insert(this);
-  return this;
-}
-
 uint CastIINode::size_of() const {
   return sizeof(*this);
 }
@@ -164,13 +154,6 @@
   return res;
 }
 
-Node *CastIINode::Ideal_DU_postCCP(PhaseCCP *ccp) {
-  if (_carry_dependency) {
-    return NULL;
-  }
-  return ConstraintCastNode::Ideal_DU_postCCP(ccp);
-}
-
 #ifndef PRODUCT
 void CastIINode::dump_spec(outputStream *st) const {
   TypeNode::dump_spec(st);
@@ -181,20 +164,6 @@
 #endif
 
 //=============================================================================
-
-//------------------------------Ideal_DU_postCCP-------------------------------
-// If not converting int->oop, throw away cast after constant propagation
-Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
-  const Type *t = ccp->type(in(1));
-  if (!t->isa_oop_ptr() || ((in(1)->is_DecodeN()) && Matcher::gen_narrow_oop_implicit_null_checks())) {
-    return NULL; // do not transform raw pointers or narrow oops
-  }
-  return ConstraintCastNode::Ideal_DU_postCCP(ccp);
-}
-
-
-
-//=============================================================================
 //------------------------------Identity---------------------------------------
 // If input is already higher or equal to cast type, then this is an identity.
 Node *CheckCastPPNode::Identity( PhaseTransform *phase ) {
--- a/src/share/vm/opto/castnode.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/castnode.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -42,7 +42,6 @@
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual int Opcode() const;
   virtual uint ideal_reg() const = 0;
-  virtual Node *Ideal_DU_postCCP( PhaseCCP * );
 };
 
 //------------------------------CastIINode-------------------------------------
@@ -63,7 +62,6 @@
   virtual uint ideal_reg() const { return Op_RegI; }
   virtual Node *Identity( PhaseTransform *phase );
   virtual const Type *Value( PhaseTransform *phase ) const;
-  virtual Node *Ideal_DU_postCCP( PhaseCCP * );
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
 #endif
@@ -76,7 +74,6 @@
   CastPPNode (Node *n, const Type *t ): ConstraintCastNode(n, t) {}
   virtual int Opcode() const;
   virtual uint ideal_reg() const { return Op_RegP; }
-  virtual Node *Ideal_DU_postCCP( PhaseCCP * );
 };
 
 //------------------------------CheckCastPPNode--------------------------------
@@ -94,9 +91,6 @@
   virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
   virtual int   Opcode() const;
   virtual uint  ideal_reg() const { return Op_RegP; }
-  // No longer remove CheckCast after CCP as it gives me a place to hang
-  // the proper address type - which is required to compute anti-deps.
-  //virtual Node *Ideal_DU_postCCP( PhaseCCP * );
 };
 
 
--- a/src/share/vm/opto/compile.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/compile.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -2811,9 +2811,38 @@
     break;
   }
 
-#ifdef _LP64
-  case Op_CastPP:
-    if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {
+  case Op_CastPP: {
+    // Remove CastPP nodes to gain more freedom during scheduling but
+    // keep the dependency they encode as control or precedence edges
+    // (if control is set already) on memory operations. Some CastPP
+    // nodes don't have a control (don't carry a dependency): skip
+    // those.
+    if (n->in(0) != NULL) {
+      ResourceMark rm;
+      Unique_Node_List wq;
+      wq.push(n);
+      for (uint next = 0; next < wq.size(); ++next) {
+        Node *m = wq.at(next);
+        for (DUIterator_Fast imax, i = m->fast_outs(imax); i < imax; i++) {
+          Node* use = m->fast_out(i);
+          if (use->is_Mem() || use->is_EncodeNarrowPtr()) {
+            use->ensure_control_or_add_prec(n->in(0));
+          } else if (use->in(0) == NULL) {
+            switch(use->Opcode()) {
+            case Op_AddP:
+            case Op_DecodeN:
+            case Op_DecodeNKlass:
+            case Op_CheckCastPP:
+            case Op_CastPP:
+              wq.push(use);
+              break;
+            }
+          }
+        }
+      }
+    }
+    const bool is_LP64 = LP64_ONLY(true) NOT_LP64(false);
+    if (is_LP64 && n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {
       Node* in1 = n->in(1);
       const Type* t = n->bottom_type();
       Node* new_in1 = in1->clone();
@@ -2846,9 +2875,15 @@
       if (in1->outcnt() == 0) {
         in1->disconnect_inputs(NULL, this);
       }
+    } else {
+      n->subsume_by(n->in(1), this);
+      if (n->outcnt() == 0) {
+        n->disconnect_inputs(NULL, this);
+      }
     }
     break;
-
+  }
+#ifdef _LP64
   case Op_CmpP:
     // Do this transformation here to preserve CmpPNode::sub() and
     // other TypePtr related Ideal optimizations (for example, ptr nullness).
--- a/src/share/vm/opto/gcm.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/gcm.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -100,6 +100,9 @@
   }
 }
 
+static bool is_dominator(Block* d, Block* n) {
+  return d->dom_lca(n) == d;
+}
 
 //------------------------------schedule_pinned_nodes--------------------------
 // Set the basic block for Nodes pinned into blocks
@@ -122,6 +125,42 @@
         schedule_node_into_block(node, block);
       }
 
+      // If the node has precedence edges (added when CastPP nodes are
+      // removed in final_graph_reshaping), fix the control of the
+      // node to cover the precedence edges and remove the
+      // dependencies.
+      Node* n = NULL;
+      for (uint i = node->len()-1; i >= node->req(); i--) {
+        Node* m = node->in(i);
+        if (m == NULL) continue;
+        // Skip the precedence edge if the test that guarded a CastPP:
+        // - was optimized out during escape analysis
+        // (OptimizePtrCompare): the CastPP's control isn't an end of
+        // block.
+        // - is moved in the branch of a dominating If: the control of
+        // the CastPP is then a Region.
+        if (m->is_block_proj() || m->is_block_start()) {
+          node->rm_prec(i);
+          if (n == NULL) {
+            n = m;
+          } else {
+            Block* bn = get_block_for_node(n);
+            Block* bm = get_block_for_node(m);
+            assert(is_dominator(bn, bm) || is_dominator(bm, bn), "one must dominate the other");
+            n = is_dominator(bn, bm) ? m : n;
+          }
+        }
+      }
+      if (n != NULL) {
+        assert(node->in(0), "control should have been set");
+        Block* bn = get_block_for_node(n);
+        Block* bnode = get_block_for_node(node->in(0));
+        assert(is_dominator(bn, bnode) || is_dominator(bnode, bn), "one must dominate the other");
+        if (!is_dominator(bn, bnode)) {
+          node->set_req(0, n);
+        }
+      }
+
       // process all inputs that are non NULL
       for (int i = node->req() - 1; i >= 0; --i) {
         if (node->in(i) != NULL) {
--- a/src/share/vm/opto/ifnode.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/ifnode.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -234,16 +234,24 @@
   // Make a region merging constants and a region merging the rest
   uint req_c = 0;
   Node* predicate_proj = NULL;
+  int nb_predicate_proj = 0;
   for (uint ii = 1; ii < r->req(); ii++) {
     if (phi->in(ii) == con1) {
       req_c++;
     }
     Node* proj = PhaseIdealLoop::find_predicate(r->in(ii));
     if (proj != NULL) {
-      assert(predicate_proj == NULL, "only one predicate entry expected");
+      nb_predicate_proj++;
       predicate_proj = proj;
     }
   }
+  if (nb_predicate_proj > 1) {
+    // Can happen in case of loop unswitching and when the loop is
+    // optimized out: it's not a loop anymore so we don't care about
+    // predicates.
+    assert(!r->is_Loop(), "this must not be a loop anymore");
+    predicate_proj = NULL;
+  }
   Node* predicate_c = NULL;
   Node* predicate_x = NULL;
   bool counted_loop = r->is_CountedLoop();
--- a/src/share/vm/opto/loopPredicate.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/loopPredicate.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -89,7 +89,7 @@
 //
 // We will create a region to guard the uct call if there is no one there.
 // The true projecttion (if_cont) of the new_iff is returned.
-// This code is also used to clone predicates to clonned loops.
+// This code is also used to clone predicates to cloned loops.
 ProjNode* PhaseIdealLoop::create_new_if_for_predicate(ProjNode* cont_proj, Node* new_entry,
                                                       Deoptimization::DeoptReason reason) {
   assert(cont_proj->is_uncommon_trap_if_pattern(reason), "must be a uct if pattern!");
--- a/src/share/vm/opto/matcher.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/matcher.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1049,6 +1049,15 @@
         mstack.push(m, Visit, n, -1);
       }
 
+      // Handle precedence edges for interior nodes
+      for (i = n->len()-1; (uint)i >= n->req(); i--) {
+        Node *m = n->in(i);
+        if (m == NULL || C->node_arena()->contains(m)) continue;
+        n->rm_prec(i);
+        // set -1 to call add_prec() instead of set_req() during Step1
+        mstack.push(m, Visit, n, -1);
+      }
+
       // For constant debug info, I'd rather have unmatched constants.
       int cnt = n->req();
       JVMState* jvms = n->jvms();
@@ -1738,6 +1747,14 @@
   return ex;
 }
 
+void Matcher::handle_precedence_edges(Node* n, MachNode *mach) {
+  for (uint i = n->req(); i < n->len(); i++) {
+    if (n->in(i) != NULL) {
+      mach->add_prec(n->in(i));
+    }
+  }
+}
+
 void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) {
   // 'op' is what I am expecting to receive
   int op = _leftOp[rule];
@@ -1772,6 +1789,8 @@
 
 
 uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) {
+  handle_precedence_edges(s->_leaf, mach);
+
   if( s->_leaf->is_Load() ) {
     Node *mem2 = s->_leaf->in(MemNode::Memory);
     assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" );
@@ -1854,6 +1873,9 @@
     mem = s->_leaf->in(MemNode::Memory);
     debug_only(_mem_node = s->_leaf;)
   }
+
+  handle_precedence_edges(s->_leaf, mach);
+
   if( s->_leaf->in(0) && s->_leaf->req() > 1) {
     if( !mach->in(0) )
       mach->set_req(0,s->_leaf->in(0));
--- a/src/share/vm/opto/matcher.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/matcher.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -124,6 +124,8 @@
   // Mach node for ConP #NULL
   MachNode* _mach_null;
 
+  void handle_precedence_edges(Node* n, MachNode *mach);
+
 public:
   int LabelRootDepth;
   // Convert ideal machine register to a register mask for spill-loads
--- a/src/share/vm/opto/memnode.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/memnode.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -652,216 +652,6 @@
   }
 }
 
-//------------------------adr_phi_is_loop_invariant----------------------------
-// A helper function for Ideal_DU_postCCP to check if a Phi in a counted
-// loop is loop invariant. Make a quick traversal of Phi and associated
-// CastPP nodes, looking to see if they are a closed group within the loop.
-bool MemNode::adr_phi_is_loop_invariant(Node* adr_phi, Node* cast) {
-  // The idea is that the phi-nest must boil down to only CastPP nodes
-  // with the same data. This implies that any path into the loop already
-  // includes such a CastPP, and so the original cast, whatever its input,
-  // must be covered by an equivalent cast, with an earlier control input.
-  ResourceMark rm;
-
-  // The loop entry input of the phi should be the unique dominating
-  // node for every Phi/CastPP in the loop.
-  Unique_Node_List closure;
-  closure.push(adr_phi->in(LoopNode::EntryControl));
-
-  // Add the phi node and the cast to the worklist.
-  Unique_Node_List worklist;
-  worklist.push(adr_phi);
-  if( cast != NULL ){
-    if( !cast->is_ConstraintCast() ) return false;
-    worklist.push(cast);
-  }
-
-  // Begin recursive walk of phi nodes.
-  while( worklist.size() ){
-    // Take a node off the worklist
-    Node *n = worklist.pop();
-    if( !closure.member(n) ){
-      // Add it to the closure.
-      closure.push(n);
-      // Make a sanity check to ensure we don't waste too much time here.
-      if( closure.size() > 20) return false;
-      // This node is OK if:
-      //  - it is a cast of an identical value
-      //  - or it is a phi node (then we add its inputs to the worklist)
-      // Otherwise, the node is not OK, and we presume the cast is not invariant
-      if( n->is_ConstraintCast() ){
-        worklist.push(n->in(1));
-      } else if( n->is_Phi() ) {
-        for( uint i = 1; i < n->req(); i++ ) {
-          worklist.push(n->in(i));
-        }
-      } else {
-        return false;
-      }
-    }
-  }
-
-  // Quit when the worklist is empty, and we've found no offending nodes.
-  return true;
-}
-
-//------------------------------Ideal_DU_postCCP-------------------------------
-// Find any cast-away of null-ness and keep its control.  Null cast-aways are
-// going away in this pass and we need to make this memory op depend on the
-// gating null check.
-Node *MemNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
-  return Ideal_common_DU_postCCP(ccp, this, in(MemNode::Address));
-}
-
-// I tried to leave the CastPP's in.  This makes the graph more accurate in
-// some sense; we get to keep around the knowledge that an oop is not-null
-// after some test.  Alas, the CastPP's interfere with GVN (some values are
-// the regular oop, some are the CastPP of the oop, all merge at Phi's which
-// cannot collapse, etc).  This cost us 10% on SpecJVM, even when I removed
-// some of the more trivial cases in the optimizer.  Removing more useless
-// Phi's started allowing Loads to illegally float above null checks.  I gave
-// up on this approach.  CNC 10/20/2000
-// This static method may be called not from MemNode (EncodePNode calls it).
-// Only the control edge of the node 'n' might be updated.
-Node *MemNode::Ideal_common_DU_postCCP( PhaseCCP *ccp, Node* n, Node* adr ) {
-  Node *skipped_cast = NULL;
-  // Need a null check?  Regular static accesses do not because they are
-  // from constant addresses.  Array ops are gated by the range check (which
-  // always includes a NULL check).  Just check field ops.
-  if( n->in(MemNode::Control) == NULL ) {
-    // Scan upwards for the highest location we can place this memory op.
-    while( true ) {
-      switch( adr->Opcode() ) {
-
-      case Op_AddP:             // No change to NULL-ness, so peek thru AddP's
-        adr = adr->in(AddPNode::Base);
-        continue;
-
-      case Op_DecodeN:         // No change to NULL-ness, so peek thru
-      case Op_DecodeNKlass:
-        adr = adr->in(1);
-        continue;
-
-      case Op_EncodeP:
-      case Op_EncodePKlass:
-        // EncodeP node's control edge could be set by this method
-        // when EncodeP node depends on CastPP node.
-        //
-        // Use its control edge for memory op because EncodeP may go away
-        // later when it is folded with following or preceding DecodeN node.
-        if (adr->in(0) == NULL) {
-          // Keep looking for cast nodes.
-          adr = adr->in(1);
-          continue;
-        }
-        ccp->hash_delete(n);
-        n->set_req(MemNode::Control, adr->in(0));
-        ccp->hash_insert(n);
-        return n;
-
-      case Op_CastPP:
-        // If the CastPP is useless, just peek on through it.
-        if( ccp->type(adr) == ccp->type(adr->in(1)) ) {
-          // Remember the cast that we've peeked though. If we peek
-          // through more than one, then we end up remembering the highest
-          // one, that is, if in a loop, the one closest to the top.
-          skipped_cast = adr;
-          adr = adr->in(1);
-          continue;
-        }
-        // CastPP is going away in this pass!  We need this memory op to be
-        // control-dependent on the test that is guarding the CastPP.
-        ccp->hash_delete(n);
-        n->set_req(MemNode::Control, adr->in(0));
-        ccp->hash_insert(n);
-        return n;
-
-      case Op_Phi:
-        // Attempt to float above a Phi to some dominating point.
-        if (adr->in(0) != NULL && adr->in(0)->is_CountedLoop()) {
-          // If we've already peeked through a Cast (which could have set the
-          // control), we can't float above a Phi, because the skipped Cast
-          // may not be loop invariant.
-          if (adr_phi_is_loop_invariant(adr, skipped_cast)) {
-            adr = adr->in(1);
-            continue;
-          }
-        }
-
-        // Intentional fallthrough!
-
-        // No obvious dominating point.  The mem op is pinned below the Phi
-        // by the Phi itself.  If the Phi goes away (no true value is merged)
-        // then the mem op can float, but not indefinitely.  It must be pinned
-        // behind the controls leading to the Phi.
-      case Op_CheckCastPP:
-        // These usually stick around to change address type, however a
-        // useless one can be elided and we still need to pick up a control edge
-        if (adr->in(0) == NULL) {
-          // This CheckCastPP node has NO control and is likely useless. But we
-          // need check further up the ancestor chain for a control input to keep
-          // the node in place. 4959717.
-          skipped_cast = adr;
-          adr = adr->in(1);
-          continue;
-        }
-        ccp->hash_delete(n);
-        n->set_req(MemNode::Control, adr->in(0));
-        ccp->hash_insert(n);
-        return n;
-
-        // List of "safe" opcodes; those that implicitly block the memory
-        // op below any null check.
-      case Op_CastX2P:          // no null checks on native pointers
-      case Op_Parm:             // 'this' pointer is not null
-      case Op_LoadP:            // Loading from within a klass
-      case Op_LoadN:            // Loading from within a klass
-      case Op_LoadKlass:        // Loading from within a klass
-      case Op_LoadNKlass:       // Loading from within a klass
-      case Op_ConP:             // Loading from a klass
-      case Op_ConN:             // Loading from a klass
-      case Op_ConNKlass:        // Loading from a klass
-      case Op_CreateEx:         // Sucking up the guts of an exception oop
-      case Op_Con:              // Reading from TLS
-      case Op_CMoveP:           // CMoveP is pinned
-      case Op_CMoveN:           // CMoveN is pinned
-        break;                  // No progress
-
-      case Op_Proj:             // Direct call to an allocation routine
-      case Op_SCMemProj:        // Memory state from store conditional ops
-#ifdef ASSERT
-        {
-          assert(adr->as_Proj()->_con == TypeFunc::Parms, "must be return value");
-          const Node* call = adr->in(0);
-          if (call->is_CallJava()) {
-            const CallJavaNode* call_java = call->as_CallJava();
-            const TypeTuple *r = call_java->tf()->range();
-            assert(r->cnt() > TypeFunc::Parms, "must return value");
-            const Type* ret_type = r->field_at(TypeFunc::Parms);
-            assert(ret_type && ret_type->isa_ptr(), "must return pointer");
-            // We further presume that this is one of
-            // new_instance_Java, new_array_Java, or
-            // the like, but do not assert for this.
-          } else if (call->is_Allocate()) {
-            // similar case to new_instance_Java, etc.
-          } else if (!call->is_CallLeaf()) {
-            // Projections from fetch_oop (OSR) are allowed as well.
-            ShouldNotReachHere();
-          }
-        }
-#endif
-        break;
-      default:
-        ShouldNotReachHere();
-      }
-      break;
-    }
-  }
-
-  return  NULL;               // No progress
-}
-
-
 //=============================================================================
 // Should LoadNode::Ideal() attempt to remove control edges?
 bool LoadNode::can_remove_control() const {
--- a/src/share/vm/opto/memnode.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/memnode.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -84,10 +84,6 @@
   // This one should probably be a phase-specific function:
   static bool all_controls_dominate(Node* dom, Node* sub);
 
-  // Find any cast-away of null-ness and keep its control.
-  static  Node *Ideal_common_DU_postCCP( PhaseCCP *ccp, Node* n, Node* adr );
-  virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp );
-
   virtual const class TypePtr *adr_type() const;  // returns bottom_type of address
 
   // Shared code for Ideal methods:
--- a/src/share/vm/opto/narrowptrnode.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/narrowptrnode.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -67,10 +67,6 @@
 }
 
 
-Node *EncodeNarrowPtrNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
-  return MemNode::Ideal_common_DU_postCCP(ccp, this, in(1));
-}
-
 Node* DecodeNKlassNode::Identity(PhaseTransform* phase) {
   const Type *t = phase->type( in(1) );
   if( t == Type::TOP ) return in(1);
--- a/src/share/vm/opto/narrowptrnode.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/narrowptrnode.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -39,7 +39,6 @@
   }
   public:
   virtual uint  ideal_reg() const { return Op_RegN; }
-  virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp );
 };
 
 //------------------------------EncodeP--------------------------------
--- a/src/share/vm/opto/node.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/node.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1387,12 +1387,6 @@
   return false;
 }
 
-//------------------------------Ideal_DU_postCCP-------------------------------
-// Idealize graph, using DU info.  Must clone result into new-space
-Node *Node::Ideal_DU_postCCP( PhaseCCP * ) {
-  return NULL;                 // Default to no change
-}
-
 //------------------------------hash-------------------------------------------
 // Hash function over Nodes.
 uint Node::hash() const {
@@ -2081,6 +2075,14 @@
   return found;
 }
 
+void Node::ensure_control_or_add_prec(Node* c) {
+  if (in(0) == NULL) {
+    set_req(0, c);
+  } else if (in(0) != c) {
+    add_prec(c);
+  }
+}
+
 //=============================================================================
 //------------------------------yank-------------------------------------------
 // Find and remove
--- a/src/share/vm/opto/node.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/node.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -906,9 +906,6 @@
   bool remove_dead_region(PhaseGVN *phase, bool can_reshape);
 public:
 
-  // Idealize graph, using DU info.  Done after constant propagation
-  virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp );
-
   // See if there is valid pipeline info
   static  const Pipeline *pipeline_class();
   virtual const Pipeline *pipeline() const;
@@ -942,6 +939,9 @@
   // Return the unique control out if only one. Null if none or more than one.
   Node* unique_ctrl_out() const;
 
+  // Set control or add control as precedence edge
+  void ensure_control_or_add_prec(Node* c);
+
 //----------------- Code Generation
 
   // Ideal register class for Matching.  Zero means unmatched instruction
--- a/src/share/vm/opto/phaseX.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/phaseX.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1605,21 +1605,6 @@
   C->set_root( transform(C->root())->as_Root() );
   assert( C->top(),  "missing TOP node" );
   assert( C->root(), "missing root" );
-
-  // Eagerly remove castPP nodes here. CastPP nodes might not be
-  // removed in the subsequent IGVN phase if a node that changes
-  // in(1) of a castPP is processed prior to the castPP node.
-  for (uint i = 0; i < _worklist.size(); i++) {
-    Node* n = _worklist.at(i);
-
-    if (n->is_ConstraintCast()) {
-      Node* nn = n->Identity(this);
-      if (nn != n) {
-        replace_node(n, nn);
-        --i;
-      }
-    }
-  }
 }
 
 //------------------------------transform--------------------------------------
@@ -1700,11 +1685,6 @@
     _worklist.push(n);          // n re-enters the hash table via the worklist
   }
 
-  // Idealize graph using DU info.  Must clone() into new-space.
-  // DU info is generally used to show profitability, progress or safety
-  // (but generally not needed for correctness).
-  Node *nn = n->Ideal_DU_postCCP(this);
-
   // TEMPORARY fix to ensure that 2nd GVN pass eliminates NULL checks
   switch( n->Opcode() ) {
   case Op_FastLock:      // Revisit FastLocks for lock coarsening
@@ -1721,12 +1701,6 @@
   default:
     break;
   }
-  if( nn ) {
-    _worklist.push(n);
-    // Put users of 'n' onto worklist for second igvn transform
-    add_users_to_worklist(n);
-    return nn;
-  }
 
   return  n;
 }
--- a/src/share/vm/opto/type.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/opto/type.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1158,11 +1158,11 @@
   // Certain normalizations keep us sane when comparing types.
   // The 'SMALLINT' covers constants and also CC and its relatives.
   if (lo <= hi) {
-    if ((juint)(hi - lo) <= SMALLINT)  w = Type::WidenMin;
-    if ((juint)(hi - lo) >= max_juint) w = Type::WidenMax; // TypeInt::INT
+    if (((juint)hi - lo) <= SMALLINT)  w = Type::WidenMin;
+    if (((juint)hi - lo) >= max_juint) w = Type::WidenMax; // TypeInt::INT
   } else {
-    if ((juint)(lo - hi) <= SMALLINT)  w = Type::WidenMin;
-    if ((juint)(lo - hi) >= max_juint) w = Type::WidenMin; // dual TypeInt::INT
+    if (((juint)lo - hi) <= SMALLINT)  w = Type::WidenMin;
+    if (((juint)lo - hi) >= max_juint) w = Type::WidenMin; // dual TypeInt::INT
   }
   return w;
 }
@@ -1416,11 +1416,11 @@
   // Certain normalizations keep us sane when comparing types.
   // The 'SMALLINT' covers constants.
   if (lo <= hi) {
-    if ((julong)(hi - lo) <= SMALLINT)   w = Type::WidenMin;
-    if ((julong)(hi - lo) >= max_julong) w = Type::WidenMax; // TypeLong::LONG
+    if (((julong)hi - lo) <= SMALLINT)   w = Type::WidenMin;
+    if (((julong)hi - lo) >= max_julong) w = Type::WidenMax; // TypeLong::LONG
   } else {
-    if ((julong)(lo - hi) <= SMALLINT)   w = Type::WidenMin;
-    if ((julong)(lo - hi) >= max_julong) w = Type::WidenMin; // dual TypeLong::LONG
+    if (((julong)lo - hi) <= SMALLINT)   w = Type::WidenMin;
+    if (((julong)lo - hi) >= max_julong) w = Type::WidenMin; // dual TypeLong::LONG
   }
   return w;
 }
--- a/src/share/vm/prims/forte.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/prims/forte.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -171,8 +171,27 @@
   // Now do we have a useful PcDesc?
   if (pc_desc == NULL ||
       pc_desc->scope_decode_offset() == DebugInformationRecorder::serialized_null) {
-    // No debug information available for this pc
-    // vframeStream would explode if we try and walk the frames.
+    // No debug information is available for this PC.
+    //
+    // vframeStreamCommon::fill_from_frame() will decode the frame depending
+    // on the state of the thread.
+    //
+    // Case #1: If the thread is in Java (state == _thread_in_Java), then
+    // the vframeStreamCommon object will be filled as if the frame were a native
+    // compiled frame. Therefore, no debug information is needed.
+    //
+    // Case #2: If the thread is in any other state, then two steps will be performed:
+    // - if asserts are enabled, found_bad_method_frame() will be called and
+    //   the assert in found_bad_method_frame() will be triggered;
+    // - if asserts are disabled, the vframeStreamCommon object will be filled
+    //   as if it were a native compiled frame.
+    //
+    // Case (2) is similar to the way interpreter frames are processed in
+    // vframeStreamCommon::fill_from_interpreter_frame in case no valid BCI
+    // was found for an interpreted frame. If asserts are enabled, the assert
+    // in found_bad_method_frame() will be triggered. If asserts are disabled,
+    // the vframeStreamCommon object will be filled afterwards as if the
+    // interpreter were at the point of entering into the method.
     return false;
   }
 
@@ -229,9 +248,10 @@
     // a valid method. Then again we may have caught an interpreter
     // frame in the middle of construction and the bci field is
     // not yet valid.
-
-    *method_p = method;
     if (!method->is_valid_method()) return false;
+    *method_p = method; // If the Method* found is invalid, it is
+                        // ignored by forte_fill_call_trace_given_top().
+                        // So set method_p only if the Method is valid.
 
     address bcp = fr->interpreter_frame_bcp();
     int bci = method->validate_bci_from_bcp(bcp);
@@ -245,18 +265,33 @@
 }
 
 
-// Determine if 'fr' can be used to find an initial Java frame.
-// Return false if it can not find a fully decipherable Java frame
-// (in other words a frame that isn't safe to use in a vframe stream).
-// Obviously if it can't even find a Java frame false will also be returned.
+// Determine if a Java frame can be found starting with the frame 'fr'.
 //
-// If we find a Java frame decipherable or not then by definition we have
-// identified a method and that will be returned to the caller via method_p.
-// If we can determine a bci that is returned also. (Hmm is it possible
-// to return a method and bci and still return false? )
+// Check the return value of find_initial_Java_frame and the value of
+// 'method_p' to decide on how use the results returned by this method.
 //
-// The initial Java frame we find (if any) is return via initial_frame_p.
+// If 'method_p' is not NULL, an initial Java frame has been found and
+// the stack can be walked starting from that initial frame. In this case,
+// 'method_p' points to the Method that the initial frame belongs to and
+// the initial Java frame is returned in initial_frame_p.
 //
+// find_initial_Java_frame() returns true if a Method has been found (i.e.,
+// 'method_p' is not NULL) and the initial frame that belongs to that Method
+// is decipherable.
+//
+// A frame is considered to be decipherable:
+//
+// - if the frame is a compiled frame and a PCDesc is available;
+//
+// - if the frame is an interpreter frame that is valid or the thread is
+//   state (_thread_in_native || state == _thread_in_vm || state == _thread_blocked).
+//
+// Note that find_initial_Java_frame() can return false even if an initial
+// Java method was found (e.g., there is no PCDesc available for the method).
+//
+// If 'method_p' is NULL, it was not possible to find a Java frame when
+// walking the stack starting from 'fr'. In this case find_initial_Java_frame
+// returns false.
 
 static bool find_initial_Java_frame(JavaThread* thread,
                                     frame* fr,
@@ -276,8 +311,6 @@
   // recognizable to us. This should only happen if we are in a JRT_LEAF
   // or something called by a JRT_LEAF method.
 
-
-
   frame candidate = *fr;
 
   // If the starting frame we were given has no codeBlob associated with
@@ -332,9 +365,11 @@
       nmethod* nm = (nmethod*) candidate.cb();
       *method_p = nm->method();
 
-      // If the frame isn't fully decipherable then the default
-      // value for the bci is a signal that we don't have a bci.
-      // If we have a decipherable frame this bci value will
+      // If the frame is not decipherable, then the value of -1
+      // for the BCI is used to signal that no BCI is available.
+      // Furthermore, the method returns false in this case.
+      //
+      // If a decipherable frame is available, the BCI value will
       // not be used.
 
       *bci_p = -1;
@@ -345,9 +380,9 @@
 
       if (nm->is_native_method()) return true;
 
-      // If it isn't decipherable then we have found a pc that doesn't
-      // have a PCDesc that can get us a bci however we did find
-      // a method
+      // If the frame is not decipherable, then a PC was found
+      // that does not have a PCDesc from which a BCI can be obtained.
+      // Nevertheless, a Method was found.
 
       if (!is_decipherable_compiled_frame(thread, &candidate, nm)) {
         return false;
@@ -356,7 +391,7 @@
       // is_decipherable_compiled_frame may modify candidate's pc
       *initial_frame_p = candidate;
 
-      assert(nm->pc_desc_at(candidate.pc()) != NULL, "if it's decipherable then pc must be valid");
+      assert(nm->pc_desc_at(candidate.pc()) != NULL, "debug information must be available if the frame is decipherable");
 
       return true;
     }
@@ -386,17 +421,17 @@
 
   frame initial_Java_frame;
   Method* method;
-  int bci;
+  int bci = -1; // assume BCI is not available for method
+                // update with correct information if available
   int count;
 
   count = 0;
   assert(trace->frames != NULL, "trace->frames must be non-NULL");
 
-  bool fully_decipherable = find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
+  // Walk the stack starting from 'top_frame' and search for an initial Java frame.
+  find_initial_Java_frame(thd, &top_frame, &initial_Java_frame, &method, &bci);
 
-  // The frame might not be walkable but still recovered a method
-  // (e.g. an nmethod with no scope info for the pc)
-
+  // Check if a Java Method has been found.
   if (method == NULL) return;
 
   if (!method->is_valid_method()) {
@@ -404,29 +439,6 @@
     return;
   }
 
-  // We got a Java frame however it isn't fully decipherable
-  // so it won't necessarily be safe to use it for the
-  // initial frame in the vframe stream.
-
-  if (!fully_decipherable) {
-    // Take whatever method the top-frame decoder managed to scrape up.
-    // We look further at the top frame only if non-safepoint
-    // debugging information is available.
-    count++;
-    trace->num_frames = count;
-    trace->frames[0].method_id = method->find_jmethod_id_or_null();
-    if (!method->is_native()) {
-      trace->frames[0].lineno = bci;
-    } else {
-      trace->frames[0].lineno = -3;
-    }
-
-    if (!initial_Java_frame.safe_for_sender(thd)) return;
-
-    RegisterMap map(thd, false);
-    initial_Java_frame = initial_Java_frame.sender(&map);
-  }
-
   vframeStreamForte st(thd, initial_Java_frame, false);
 
   for (; !st.at_end() && count < depth; st.forte_next(), count++) {
--- a/src/share/vm/prims/methodHandles.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/prims/methodHandles.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -940,6 +940,24 @@
   return rfill + overflow;
 }
 
+// Get context class for a CallSite instance: either extract existing context or use default one.
+InstanceKlass* MethodHandles::get_call_site_context(oop call_site) {
+  // In order to extract a context the following traversal is performed:
+  //   CallSite.context => Cleaner.referent => Class._klass => Klass
+  assert(java_lang_invoke_CallSite::is_instance(call_site), "");
+  oop context_oop = java_lang_invoke_CallSite::context_volatile(call_site);
+  if (oopDesc::is_null(context_oop)) {
+    return NULL; // The context hasn't been initialized yet.
+  }
+  oop context_class_oop = java_lang_ref_Reference::referent(context_oop);
+  if (oopDesc::is_null(context_class_oop)) {
+    // The context reference was cleared by GC, so current dependency context
+    // isn't usable anymore. Context should be fetched from CallSite again.
+    return NULL;
+  }
+  return InstanceKlass::cast(java_lang_Class::as_Klass(context_class_oop));
+}
+
 //------------------------------------------------------------------------------
 // MemberNameTable
 //
@@ -1252,7 +1270,7 @@
 
 JVM_ENTRY(void, MHN_setCallSiteTargetNormal(JNIEnv* env, jobject igcls, jobject call_site_jh, jobject target_jh)) {
   Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh));
-  Handle target   (THREAD, JNIHandles::resolve(target_jh));
+  Handle target   (THREAD, JNIHandles::resolve_non_null(target_jh));
   {
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
@@ -1264,7 +1282,7 @@
 
 JVM_ENTRY(void, MHN_setCallSiteTargetVolatile(JNIEnv* env, jobject igcls, jobject call_site_jh, jobject target_jh)) {
   Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh));
-  Handle target   (THREAD, JNIHandles::resolve(target_jh));
+  Handle target   (THREAD, JNIHandles::resolve_non_null(target_jh));
   {
     // Walk all nmethods depending on this call site.
     MutexLocker mu(Compile_lock, thread);
@@ -1274,6 +1292,33 @@
 }
 JVM_END
 
+JVM_ENTRY(void, MHN_invalidateDependentNMethods(JNIEnv* env, jobject igcls, jobject call_site_jh)) {
+  Handle call_site(THREAD, JNIHandles::resolve_non_null(call_site_jh));
+  {
+    // Walk all nmethods depending on this call site.
+    MutexLocker mu1(Compile_lock, thread);
+
+    CallSiteDepChange changes(call_site(), Handle());
+
+    InstanceKlass* ctxk = MethodHandles::get_call_site_context(call_site());
+    if (ctxk == NULL) {
+      return; // No dependencies to invalidate yet.
+    }
+    int marked = 0;
+    {
+      MutexLockerEx mu2(CodeCache_lock, Mutex::_no_safepoint_check_flag);
+      marked = ctxk->mark_dependent_nmethods(changes);
+    }
+    java_lang_invoke_CallSite::set_context_volatile(call_site(), NULL); // Reset call site to initial state
+    if (marked > 0) {
+      // At least one nmethod has been marked for deoptimization
+      VM_Deoptimize op;
+      VMThread::execute(&op);
+    }
+  }
+}
+JVM_END
+
 /**
  * Throws a java/lang/UnsupportedOperationException unconditionally.
  * This is required by the specification of MethodHandle.invoke if
@@ -1327,6 +1372,7 @@
   {CC"objectFieldOffset",         CC"("MEM")J",                          FN_PTR(MHN_objectFieldOffset)},
   {CC"setCallSiteTargetNormal",   CC"("CS""MH")V",                       FN_PTR(MHN_setCallSiteTargetNormal)},
   {CC"setCallSiteTargetVolatile", CC"("CS""MH")V",                       FN_PTR(MHN_setCallSiteTargetVolatile)},
+  {CC"invalidateDependentNMethods", CC"("CS")V",                         FN_PTR(MHN_invalidateDependentNMethods)},
   {CC"staticFieldOffset",         CC"("MEM")J",                          FN_PTR(MHN_staticFieldOffset)},
   {CC"staticFieldBase",           CC"("MEM")"OBJ,                        FN_PTR(MHN_staticFieldBase)},
   {CC"getMemberVMInfo",           CC"("MEM")"OBJ,                        FN_PTR(MHN_getMemberVMInfo)}
--- a/src/share/vm/prims/methodHandles.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/prims/methodHandles.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -68,6 +68,9 @@
   // bit values for suppress argument to expand_MemberName:
   enum { _suppress_defc = 1, _suppress_name = 2, _suppress_type = 4 };
 
+  // CallSite support
+  static InstanceKlass* get_call_site_context(oop call_site);
+
   // Generate MethodHandles adapters.
   static bool generate_adapters();
 
--- a/src/share/vm/runtime/globals.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/runtime/globals.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -2226,6 +2226,9 @@
           "When +ReduceInitialCardMarks, explicitly defer any that "        \
           "may arise from new_pre_store_barrier")                           \
                                                                             \
+  product(bool, UseCondCardMark, false,                                     \
+          "Check for already marked card before updating card table")       \
+                                                                            \
   diagnostic(bool, VerifyRememberedSets, false,                             \
           "Verify GC remembered sets")                                      \
                                                                             \
@@ -3899,7 +3902,11 @@
           "Use locked-tracing when doing event-based tracing")              \
                                                                             \
   diagnostic(bool, UseUnalignedAccesses, false,                             \
-          "Use unaligned memory accesses in sun.misc.Unsafe")
+          "Use unaligned memory accesses in sun.misc.Unsafe")               \
+                                                                            \
+  product_pd(bool, PreserveFramePointer,                                    \
+             "Use the FP register for holding the frame pointer "           \
+             "and not as a general purpose register.")
 
 /*
  *  Macros for factoring of globals
--- a/src/share/vm/runtime/sharedRuntime.cpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/runtime/sharedRuntime.cpp	Thu Apr 30 18:14:58 2015 -0400
@@ -1179,7 +1179,7 @@
 #endif
 
   // JSR 292 key invariant:
-  // If the resolved method is a MethodHandle invoke target the call
+  // If the resolved method is a MethodHandle invoke target, the call
   // site must be a MethodHandle call site, because the lambda form might tail-call
   // leaving the stack in a state unknown to either caller or callee
   // TODO detune for now but we might need it again
--- a/src/share/vm/runtime/vframe.hpp	Wed Apr 29 02:35:29 2015 +0200
+++ b/src/share/vm/runtime/vframe.hpp	Thu Apr 30 18:14:58 2015 -0400
@@ -389,12 +389,12 @@
       decode_offset < 0 ||
       decode_offset >= nm()->scopes_data_size()) {
     // 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
-    // If we attempt to read nmethod::scopes_data at serialized_null (== 0),
-    // or if we read some at other crazy offset,
-    // we will decode garbage and make wild references into the heap,
-    // leading to crashes in product mode.
-    // (This isn't airtight, of course, since there are internal
-    // offsets which are also crazy.)
+    // If we read nmethod::scopes_data at serialized_null (== 0)
+    // or if read some at other invalid offset, invalid values will be decoded.
+    // Based on these values, invalid heap locations could be referenced
+    // that could lead to crashes in product mode.
+    // Therefore, do not use the decode offset if invalid, but fill the frame
+    // as it were a native compiled frame (no Java-level assumptions).
 #ifdef ASSERT
     if (WizardMode) {
       tty->print_cr("Error in fill_from_frame: pc_desc for "
@@ -514,9 +514,15 @@
   address   bcp    = _frame.interpreter_frame_bcp();
   int       bci    = method->validate_bci_from_bcp(bcp);
   // 6379830 AsyncGetCallTrace sometimes feeds us wild frames.
+  // AsyncGetCallTrace interrupts the VM asynchronously. As a result
+  // it is possible to access an interpreter frame for which
+  // no Java-level information is yet available (e.g., becasue
+  // the frame was being created when the VM interrupted it).
+  // In this scenario, pretend that the interpreter is at the point
+  // of entering the method.
   if (bci < 0) {
     found_bad_method_frame();
-    bci = 0;  // pretend it's on the point of entering
+    bci = 0;
   }
   _mode   = interpreted_mode;
   _method = method;
--- a/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java	Wed Apr 29 02:35:29 2015 +0200
+++ b/test/compiler/arraycopy/TestArrayCopyNoInitDeopt.java	Thu Apr 30 18:14:58 2015 -0400
@@ -116,44 +116,46 @@
                 throw new RuntimeException("m1 deoptimized again");
             }
 
-            // Same test as above but with speculative types
+            if (WHITE_BOX.getUintxVMFlag("TypeProfileLevel") == 20) {
+                // Same test as above but with speculative types
 
-            // Warm up & make sure we collect type profiling
-            for (int i = 0; i < 20000; i++) {
-                m2(src);
-            }
+                // Warm up & make sure we collect type profiling
+                for (int i = 0; i < 20000; i++) {
+                    m2(src);
+                }
 
-            // And make sure m2 is compiled by C2
-            WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
+                // And make sure m2 is compiled by C2
+                WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
 
-            if (!WHITE_BOX.isMethodCompiled(method_m2)) {
-                throw new RuntimeException("m2 not compiled");
-            }
+                if (!WHITE_BOX.isMethodCompiled(method_m2)) {
+                    throw new RuntimeException("m2 not compiled");
+                }
 
-            // should deoptimize for speculative type check
-            if (!deoptimize(method_m2, src_obj)) {
-                throw new RuntimeException("m2 not deoptimized");
-            }
+                // should deoptimize for speculative type check
+                if (!deoptimize(method_m2, src_obj)) {
+                    throw new RuntimeException("m2 not deoptimized");
+                }
 
-            WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
+                WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
 
-            if (!WHITE_BOX.isMethodCompiled(method_m2)) {
-                throw new RuntimeException("m2 not recompiled");
-            }
+                if (!WHITE_BOX.isMethodCompiled(method_m2)) {
+                    throw new RuntimeException("m2 not recompiled");
+                }
 
-            // should deoptimize for actual type check
-            if (!deoptimize(method_m2, src_obj)) {
-                throw new RuntimeException("m2 not deoptimized");
-            }
+                // should deoptimize for actual type check
+                if (!deoptimize(method_m2, src_obj)) {
+                    throw new RuntimeException("m2 not deoptimized");
+                }
 
-            WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
+                WHITE_BOX.enqueueMethodForCompilation(method_m2, CompilerWhiteBoxTest.COMP_LEVEL_FULL_OPTIMIZATION);
 
-            if (!WHITE_BOX.isMethodCompiled(method_m2)) {
-                throw new RuntimeException("m2 not recompiled");
-            }
+                if (!WHITE_BOX.isMethodCompiled(method_m2)) {
+                    throw new RuntimeException("m2 not recompiled");
+                }
 
-            if (deoptimize(method_m2, src_obj)) {
-                throw new RuntimeException("m2 deoptimized again");
+                if (deoptimize(method_m2, src_obj)) {
+                    throw new RuntimeException("m2 deoptimized again");
+                }
             }
         }
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/jsr292/CallSiteDepContextTest.java	Thu Apr 30 18:14:58 2015 -0400
@@ -0,0 +1,179 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/**
+ * @test
+ * @bug 8057967
+ * @run main/bootclasspath -Xbatch java.lang.invoke.CallSiteDepContextTest
+ */
+package java.lang.invoke;
+
+import java.lang.ref.*;
+import jdk.internal.org.objectweb.asm.*;
+import sun.misc.Unsafe;
+
+import static jdk.internal.org.objectweb.asm.Opcodes.*;
+
+public class CallSiteDepContextTest {
+    static final Unsafe               UNSAFE = Unsafe.getUnsafe();
+    static final MethodHandles.Lookup LOOKUP = MethodHandles.Lookup.IMPL_LOOKUP;
+    static final String           CLASS_NAME = "java/lang/invoke/Test";
+    static final String          METHOD_NAME = "m";
+    static final MethodType             TYPE = MethodType.methodType(int.class);
+
+    static MutableCallSite mcs;
+    static MethodHandle bsmMH;
+
+    static {
+        try {
+            bsmMH = LOOKUP.findStatic(
+                    CallSiteDepContextTest.class, "bootstrap",
+                    MethodType.methodType(CallSite.class, MethodHandles.Lookup.class, String.class, MethodType.class));
+        } catch(Throwable e) {
+            throw new InternalError(e);
+        }
+    }
+
+    public static CallSite bootstrap(MethodHandles.Lookup caller,
+                                     String invokedName,
+                                     MethodType invokedType) {
+        return mcs;
+    }
+
+    static class T {
+        static int f1() { return 1; }
+        static int f2() { return 2; }
+    }
+
+    static byte[] getClassFile(String suffix) {
+        ClassWriter cw = new ClassWriter(ClassWriter.COMPUTE_FRAMES | ClassWriter.COMPUTE_MAXS);
+        MethodVisitor mv;
+        cw.visit(52, ACC_PUBLIC | ACC_SUPER, CLASS_NAME + suffix, null, "java/lang/Object", null);
+        {
+            mv = cw.visitMethod(ACC_PUBLIC | ACC_STATIC, METHOD_NAME, TYPE.toMethodDescriptorString(), null, null);
+            mv.visitCode();
+            Handle bsm = new Handle(H_INVOKESTATIC,
+                    "java/lang/invoke/CallSiteDepContextTest", "bootstrap",
+                    bsmMH.type().toMethodDescriptorString());
+            mv.visitInvokeDynamicInsn("methodName", TYPE.toMethodDescriptorString(), bsm);
+            mv.visitInsn(IRETURN);
+            mv.visitMaxs(0, 0);
+            mv.visitEnd();
+        }
+        cw.visitEnd();
+        return cw.toByteArray();
+    }
+
+    private static void execute(int expected, MethodHandle... mhs) throws Throwable {
+        for (int i = 0; i < 20_000; i++) {
+            for (MethodHandle mh : mhs) {
+                int r = (int) mh.invokeExact();
+                if (r != expected) {
+                    throw new Error(r + " != " + expected);
+                }
+            }
+        }
+    }
+
+    public static void testSharedCallSite() throws Throwable {
+        Class<?> cls1 = UNSAFE.defineAnonymousClass(Object.class, getClassFile("CS_1"), null);
+        Class<?> cls2 = UNSAFE.defineAnonymousClass(Object.class, getClassFile("CS_2"), null);
+
+        MethodHandle[] mhs = new MethodHandle[] {
+            LOOKUP.findStatic(cls1, METHOD_NAME, TYPE),
+            LOOKUP.findStatic(cls2, METHOD_NAME, TYPE)
+        };
+
+        mcs = new MutableCallSite(LOOKUP.findStatic(T.class, "f1", TYPE));
+        execute(1, mhs);
+        mcs.setTarget(LOOKUP.findStatic(T.class, "f2", TYPE));
+        execute(2, mhs);
+    }
+
+    public static void testNonBoundCallSite() throws Throwable {
+        mcs = new MutableCallSite(LOOKUP.findStatic(T.class, "f1", TYPE));
+
+        // mcs.context == null
+        MethodHandle mh = mcs.dynamicInvoker();
+        execute(1, mh);
+
+        // mcs.context == cls1
+        Class<?> cls1 = UNSAFE.defineAnonymousClass(Object.class, getClassFile("NonBound_1"), null);
+        MethodHandle mh1 = LOOKUP.findStatic(cls1, METHOD_NAME, TYPE);
+
+        execute(1, mh1);
+
+        mcs.setTarget(LOOKUP.findStatic(T.class, "f2", TYPE));
+
+        execute(2, mh, mh1);
+    }
+
+    static ReferenceQueue rq = new ReferenceQueue();
+    static PhantomReference ref;
+
+    public static void testGC() throws Throwable {
+        mcs = new MutableCallSite(LOOKUP.findStatic(T.class, "f1", TYPE));
+
+        Class<?>[] cls = new Class[] {
+                UNSAFE.defineAnonymousClass(Object.class, getClassFile("GC_1"), null),
+                UNSAFE.defineAnonymousClass(Object.class, getClassFile("GC_2"), null),
+        };
+
+        MethodHandle[] mhs = new MethodHandle[] {
+                LOOKUP.findStatic(cls[0], METHOD_NAME, TYPE),
+                LOOKUP.findStatic(cls[1], METHOD_NAME, TYPE),
+        };
+
+        // mcs.context == cls[0]
+        int r = (int) mhs[0].invokeExact();
+
+        execute(1, mhs);
+
+        ref = new PhantomReference<>(cls[0], rq);
+        cls[0] = UNSAFE.defineAnonymousClass(Object.class, getClassFile("GC_3"), null);
+        mhs[0] = LOOKUP.findStatic(cls[0], METHOD_NAME, TYPE);
+
+        do {
+            System.gc();
+            try {
+                Reference ref1 = rq.remove(1000);
+                if (ref1 == ref) {
+                    ref1.clear();
+                    System.gc(); // Ensure that the stale context is cleared
+                    break;
+                }
+            } catch(InterruptedException e) { /* ignore */ }
+        } while (true);
+
+        execute(1, mhs);
+        mcs.setTarget(LOOKUP.findStatic(T.class, "f2", TYPE));
+        execute(2, mhs);
+    }
+
+    public static void main(String[] args) throws Throwable {
+        testSharedCallSite();
+        testNonBoundCallSite();
+        testGC();
+        System.out.println("TEST PASSED");
+    }
+}
--- a/test/compiler/jsr292/MHInlineTest.java	Wed Apr 29 02:35:29 2015 +0200
+++ b/test/compiler/jsr292/MHInlineTest.java	Thu Apr 30 18:14:58 2015 -0400
@@ -48,15 +48,12 @@
         // The test is applicable only to C2 (present in Server VM).
         if (analyzer.getStderr().contains("Server VM")) {
             analyzer.shouldContain("MHInlineTest$B::public_x (3 bytes)   inline (hot)");
-            analyzer.shouldContain(   "MHInlineTest$B::protected_x (3 bytes)   inline (hot)");
-            analyzer.shouldContain(   "MHInlineTest$B::package_x (3 bytes)   inline (hot)");
+            analyzer.shouldContain("MHInlineTest$B::protected_x (3 bytes)   inline (hot)");
+            analyzer.shouldContain("MHInlineTest$B::package_x (3 bytes)   inline (hot)");
             analyzer.shouldContain("MHInlineTest$A::package_final_x (3 bytes)   inline (hot)");
             analyzer.shouldContain("MHInlineTest$B::private_x (3 bytes)   inline (hot)");
             analyzer.shouldContain("MHInlineTest$B::private_static_x (3 bytes)   inline (hot)");
             analyzer.shouldContain("MHInlineTest$A::package_static_x (3 bytes)   inline (hot)");
-
-            analyzer.shouldNotContain("MHInlineTest$A::protected_x (3 bytes)   virtual call");
-            analyzer.shouldNotContain("MHInlineTest$A::package_x (3 bytes)   virtual call");
         }
     }
 
@@ -179,6 +176,7 @@
             throw new Error(throwable);
         }
     }
+
     static class Launcher {
         public static void main(String[] args) throws Exception {
             for (int i = 0; i < 20_000; i++) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/loopopts/TestPredicateLostDependency.java	Thu Apr 30 18:14:58 2015 -0400
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8069191
+ * @summary predicate moved out of loops and CastPP removal causes dependency to be lost
+ * @run main/othervm -Xcomp -XX:CompileOnly=TestPredicateLostDependency.m1 -XX:+IgnoreUnrecognizedVMOptions -XX:+StressGCM TestPredicateLostDependency
+ *
+ */
+
+public class TestPredicateLostDependency {
+    static class A {
+        int i;
+    }
+
+    static class B extends A {
+    }
+
+    static boolean crash = false;
+
+    static boolean m2() {
+        return crash;
+    }
+
+    static int m3(float[] arr) {
+        return 0;
+    }
+
+    static float m1(A aa) {
+        float res = 0;
+        float[] arr = new float[10];
+        for (int i = 0; i < 10; i++) {
+            if (m2()) {
+                arr = null;
+            }
+            m3(arr);
+            int j = arr.length;
+            int k = 0;
+            for (k = 9; k < j; k++) {
+            }
+            if (k == 10) {
+                if (aa instanceof B) {
+                }
+            }
+            res += arr[0];
+            res += arr[1];
+        }
+        return res;
+    }
+
+    static public void main(String args[]) {
+        A a = new A();
+        B b = new B();
+        for (int i = 0; i < 20000; i++) {
+            m1(a);
+        }
+        crash = true;
+        try {
+            m1(a);
+        } catch (NullPointerException npe) {}
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/compiler/loopopts/TestSplitIfUnswitchedLoopsEliminated.java	Thu Apr 30 18:14:58 2015 -0400
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+/**
+ * @test
+ * @bug 8078426
+ * @summary split if finds predicates on several incoming paths when unswitched's loops are optimized out
+ * @run main/othervm -XX:+IgnoreUnrecognizedVMOptions -XX:-UseOnStackReplacement -XX:-BackgroundCompilation -XX:-UseCompressedOops TestSplitIfUnswitchedLoopsEliminated
+ *
+ */
+
+
+public class TestSplitIfUnswitchedLoopsEliminated {
+
+    static class A {
+        int f;
+    }
+
+    static A aa = new A();
+    static A aaa = new A();
+
+    static int test_helper(int stop, boolean unswitch) {
+        A a = null;
+        for (int i = 3; i < 10; i++) {
+            if (unswitch) {
+                a = null;
+            } else {
+                a = aa;
+                int v = a.f;
+            }
+        }
+        if (stop != 4) {
+            a = aaa;
+        }
+        if (a != null) {
+            return a.f;
+        }
+        return 0;
+    }
+
+    static int test(boolean unswitch) {
+        int stop = 1;
+        for (; stop < 3; stop *= 4) {
+        }
+        return test_helper(stop, unswitch);
+    }
+
+    public static void main(String[] args) {
+        for (int i = 0; i < 20000; i++) {
+            test_helper(10, i%2 == 0);
+            test(i%2 == 0);
+        }
+    }
+}