changeset 8490:bf92b8db249c jdk9-b64

Merge
author lana
date Thu, 07 May 2015 20:51:12 -0700
parents d6b3ad413923 d818fe2baeb6
children 8eeff9bf83aa 6e2422a230fd 2b9b174db80d
files agent/src/share/classes/sun/jvm/hotspot/memory/SharedHeap.java agent/src/share/classes/sun/jvm/hotspot/runtime/VirtualSpace.java src/cpu/x86/vm/rtmLocking.cpp src/cpu/x86/vm/templateTable_x86_32.hpp src/cpu/x86/vm/templateTable_x86_64.hpp src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp src/share/vm/gc_implementation/shared/parGCAllocBuffer.hpp src/share/vm/gc_implementation/shared/parGCAllocBuffer.inline.hpp src/share/vm/memory/sharedHeap.cpp src/share/vm/memory/sharedHeap.hpp src/share/vm/oops/instanceClassLoaderKlass.cpp src/share/vm/oops/klassPS.hpp src/share/vm/oops/oop.pcgc.inline.hpp src/share/vm/oops/oop.psgc.inline.hpp src/share/vm/runtime/virtualspace.cpp src/share/vm/runtime/virtualspace.hpp test/sanity/WhiteBox.java
diffstat 547 files changed, 18383 insertions(+), 10685 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/os/bsd/libproc_impl.c	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/os/bsd/libproc_impl.c	Thu May 07 20:51:12 2015 -0700
@@ -215,7 +215,12 @@
     return NULL;
   }
 
-  strncpy(newlib->name, libname, sizeof(newlib->name));
+  if (strlen(libname) >= sizeof(newlib->name)) {
+    print_debug("libname %s too long\n", libname);
+    return NULL;
+  }
+  strcpy(newlib->name, libname);
+
   newlib->base = base;
 
   if (fd == -1) {
--- a/agent/src/os/linux/libproc_impl.c	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/os/linux/libproc_impl.c	Thu May 07 20:51:12 2015 -0700
@@ -159,7 +159,12 @@
       return NULL;
    }
 
-   strncpy(newlib->name, libname, sizeof(newlib->name));
+   if (strlen(libname) >= sizeof(newlib->name)) {
+     print_debug("libname %s too long\n", libname);
+     return NULL;
+   }
+   strcpy(newlib->name, libname);
+
    newlib->base = base;
 
    if (fd == -1) {
--- a/agent/src/share/classes/com/sun/java/swing/action/ActionManager.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/com/sun/java/swing/action/ActionManager.java	Thu May 07 20:51:12 2015 -0700
@@ -46,6 +46,11 @@
         return manager;
     }
 
+    protected static void setInstance(ActionManager m)
+    {
+        manager = m;
+    }
+
     protected abstract void addActions();
 
     protected void addAction(String cmdname, Action action)
@@ -90,6 +95,6 @@
 
     private HashMap actions;
     private static ActionUtilities utilities = new ActionUtilities();
-    protected static ActionManager manager;
+    private static ActionManager manager;
 
 }
--- a/agent/src/share/classes/com/sun/java/swing/ui/CommonToolBar.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/com/sun/java/swing/ui/CommonToolBar.java	Thu May 07 20:51:12 2015 -0700
@@ -46,7 +46,7 @@
     {
         this.manager = manager;
         statusBar = status;
-        buttonSize = new Dimension(CommonUI.buttconPrefSize);
+        buttonSize = new Dimension(CommonUI.getButtconPrefSize());
         buttonInsets = new Insets(0, 0, 0, 0);
         addComponents();
     }
--- a/agent/src/share/classes/com/sun/java/swing/ui/CommonUI.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/com/sun/java/swing/ui/CommonUI.java	Thu May 07 20:51:12 2015 -0700
@@ -373,20 +373,25 @@
         comp.setCursor(Cursor.getPredefinedCursor(0));
     }
 
-    public static final int BUTTON_WIDTH = 100;
-    public static final int BUTTON_HEIGHT = 26;
-    public static final int BUTTCON_WIDTH = 28;
-    public static final int BUTTCON_HEIGHT = 28;
-    public static final int SM_BUTTON_WIDTH = 72;
-    public static final int SM_BUTTON_HEIGHT = 26;
-    public static final int LABEL_WIDTH = 100;
-    public static final int LABEL_HEIGHT = 20;
-    public static final int TEXT_WIDTH = 150;
-    public static final int TEXT_HEIGHT = 20;
-    public static Dimension buttonPrefSize = new Dimension(100, 26);
-    public static Dimension buttconPrefSize = new Dimension(28, 28);
-    public static Dimension smbuttonPrefSize = new Dimension(72, 26);
-    public static Dimension labelPrefSize = new Dimension(100, 20);
-    public static Dimension textPrefSize = new Dimension(150, 20);
+    public static Dimension getButtconPrefSize()
+    {
+        return buttconPrefSize;
+    }
+
+    private static final int BUTTON_WIDTH = 100;
+    private static final int BUTTON_HEIGHT = 26;
+    private static final int BUTTCON_WIDTH = 28;
+    private static final int BUTTCON_HEIGHT = 28;
+    private static final int SM_BUTTON_WIDTH = 72;
+    private static final int SM_BUTTON_HEIGHT = 26;
+    private static final int LABEL_WIDTH = 100;
+    private static final int LABEL_HEIGHT = 20;
+    private static final int TEXT_WIDTH = 150;
+    private static final int TEXT_HEIGHT = 20;
+    private static final Dimension buttonPrefSize = new Dimension(100, 26);
+    private static final Dimension buttconPrefSize = new Dimension(28, 28);
+    private static final Dimension smbuttonPrefSize = new Dimension(72, 26);
+    private static final Dimension labelPrefSize = new Dimension(100, 20);
+    private static final Dimension textPrefSize = new Dimension(150, 20);
 
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/ci/ciMethodData.java	Thu May 07 20:51:12 2015 -0700
@@ -148,7 +148,7 @@
   ParametersTypeData<ciKlass,ciMethod> parametersTypeData() {
     Address base = getAddress().addOffsetTo(origField.getOffset());
     int di = (int)parametersTypeDataDi.getValue(base);
-    if (di == -1) {
+    if (di == -1 || di == -2) {
       return null;
     }
     DataLayout dataLayout = new DataLayout(dataField.getValue(getAddress()), di);
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_implementation/g1/G1CollectedHeap.java	Thu May 07 20:51:12 2015 -0700
@@ -29,9 +29,9 @@
 import java.util.Observer;
 
 import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.gc_interface.CollectedHeap;
 import sun.jvm.hotspot.gc_interface.CollectedHeapName;
 import sun.jvm.hotspot.memory.MemRegion;
-import sun.jvm.hotspot.memory.SharedHeap;
 import sun.jvm.hotspot.memory.SpaceClosure;
 import sun.jvm.hotspot.runtime.VM;
 import sun.jvm.hotspot.runtime.VMObjectFactory;
@@ -41,7 +41,7 @@
 
 // Mirror class for G1CollectedHeap.
 
-public class G1CollectedHeap extends SharedHeap {
+public class G1CollectedHeap extends CollectedHeap {
     // HeapRegionManager _hrm;
     static private long hrmFieldOffset;
     // MemRegion _g1_reserved;
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeap.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeap.java	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2005, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,7 +32,7 @@
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.types.*;
 
-public class CollectedHeap extends VMObject {
+public abstract class CollectedHeap extends VMObject {
   private static long         reservedFieldOffset;
 
   static {
@@ -73,9 +73,7 @@
     return reservedRegion().contains(a);
   }
 
-  public CollectedHeapName kind() {
-    return CollectedHeapName.ABSTRACT;
-  }
+  public abstract CollectedHeapName kind();
 
   public void print() { printOn(System.out); }
   public void printOn(PrintStream tty) {
--- a/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeapName.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/gc_interface/CollectedHeapName.java	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,8 +31,6 @@
 
   private CollectedHeapName(String name) { this.name = name; }
 
-  public static final CollectedHeapName ABSTRACT = new CollectedHeapName("abstract");
-  public static final CollectedHeapName SHARED_HEAP = new CollectedHeapName("SharedHeap");
   public static final CollectedHeapName GEN_COLLECTED_HEAP = new CollectedHeapName("GenCollectedHeap");
   public static final CollectedHeapName G1_COLLECTED_HEAP = new CollectedHeapName("G1CollectedHeap");
   public static final CollectedHeapName PARALLEL_SCAVENGE_HEAP = new CollectedHeapName("ParallelScavengeHeap");
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/GenCollectedHeap.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/GenCollectedHeap.java	Thu May 07 20:51:12 2015 -0700
@@ -33,8 +33,7 @@
 import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;
 
-public class GenCollectedHeap extends SharedHeap {
-  private static CIntegerField nGensField;
+public class GenCollectedHeap extends CollectedHeap {
   private static AddressField youngGenField;
   private static AddressField oldGenField;
 
@@ -54,7 +53,6 @@
   private static synchronized void initialize(TypeDataBase db) {
     Type type = db.lookupType("GenCollectedHeap");
 
-    nGensField = type.getCIntegerField("_n_gens");
     youngGenField = type.getAddressField("_young_gen");
     oldGenField = type.getAddressField("_old_gen");
 
@@ -70,7 +68,7 @@
   }
 
   public int nGens() {
-    return (int) nGensField.getValue(addr);
+    return 2; // Young + Old
   }
 
   public Generation getGen(int i) {
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/SharedHeap.java	Thu May 07 10:19:31 2015 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.memory;
-
-import java.io.*;
-import java.util.*;
-
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.gc_interface.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-
-public abstract class SharedHeap extends CollectedHeap {
-  private static VirtualConstructor ctor;
-
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static synchronized void initialize(TypeDataBase db) {
-    Type type = db.lookupType("SharedHeap");
-    ctor = new VirtualConstructor(db);
-  }
-
-  public SharedHeap(Address addr) {
-    super(addr);
-  }
-
-  public CollectedHeapName kind() {
-    return CollectedHeapName.SHARED_HEAP;
-  }
-  }
--- a/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/Universe.java	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -112,11 +112,7 @@
     return "";
   }
   public CollectedHeap heap() {
-    try {
-      return (CollectedHeap) heapConstructor.instantiateWrapperFor(collectedHeapField.getValue());
-    } catch (WrongTypeException e) {
-      return new CollectedHeap(collectedHeapField.getValue());
-    }
+    return (CollectedHeap) heapConstructor.instantiateWrapperFor(collectedHeapField.getValue());
   }
 
   public static long getNarrowOopBase() {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/memory/VirtualSpace.java	Thu May 07 20:51:12 2015 -0700
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2000, 2002, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.memory;
+
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+
+public class VirtualSpace extends VMObject {
+  private static AddressField lowField;
+  private static AddressField highField;
+  private static AddressField lowBoundaryField;
+  private static AddressField highBoundaryField;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("VirtualSpace");
+
+    lowField          = type.getAddressField("_low");
+    highField         = type.getAddressField("_high");
+    lowBoundaryField  = type.getAddressField("_low_boundary");
+    highBoundaryField = type.getAddressField("_high_boundary");
+  }
+
+  public VirtualSpace(Address addr) {
+    super(addr);
+  }
+
+  public Address low()                          { return lowField.getValue(addr);          }
+  public Address high()                         { return highField.getValue(addr);         }
+  public Address lowBoundary()                  { return lowBoundaryField.getValue(addr);  }
+  public Address highBoundary()                 { return highBoundaryField.getValue(addr); }
+
+  /** Testers (all sizes are byte sizes) */
+  public long committedSize()                   { return high().minus(low());                                    }
+  public long reservedSize()                    { return highBoundary().minus(lowBoundary());                    }
+  public long uncommittedSize()                 { return reservedSize() - committedSize();                       }
+  public boolean contains(Address addr)         { return (low().lessThanOrEqual(addr) && addr.lessThan(high())); }
+}
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/ConstantPool.java	Thu May 07 20:51:12 2015 -0700
@@ -328,7 +328,7 @@
   }
 
   public Symbol getUnresolvedStringAt(int which) {
-    return getSymbolAt(which);
+    return getSlotAt(which).getSymbol();
   }
 
   // returns null, if not resolved.
--- a/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/opto/PhaseCFG.java	Thu May 07 20:51:12 2015 -0700
@@ -42,10 +42,10 @@
 
   private static synchronized void initialize(TypeDataBase db) throws WrongTypeException {
     Type type      = db.lookupType("PhaseCFG");
-    numBlocksField = new CIntField(type.getCIntegerField("_num_blocks"), 0);
+    numBlocksField = new CIntField(type.getCIntegerField("_number_of_blocks"), 0);
     blocksField = type.getAddressField("_blocks");
     bbsField = type.getAddressField("_node_to_block_mapping");
-    brootField = type.getAddressField("_broot");
+    brootField = type.getAddressField("_root_block");
   }
 
   private static CIntField numBlocksField;
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Thu May 07 20:51:12 2015 -0700
@@ -259,8 +259,7 @@
      saProps = new Properties();
      URL url = null;
      try {
-       url = VM.class.getClassLoader().getResource("sa.properties");
-       saProps.load(new BufferedInputStream(url.openStream()));
+       saProps.load(VM.class.getResourceAsStream("/sa.properties"));
      } catch (Exception e) {
        System.err.println("Unable to load properties  " +
                                   (url == null ? "null" : url.toString()) +
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VirtualSpace.java	Thu May 07 10:19:31 2015 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2000, 2002, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.runtime;
-
-import java.util.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.types.*;
-
-public class VirtualSpace extends VMObject {
-  private static AddressField lowField;
-  private static AddressField highField;
-  private static AddressField lowBoundaryField;
-  private static AddressField highBoundaryField;
-
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static synchronized void initialize(TypeDataBase db) {
-    Type type = db.lookupType("VirtualSpace");
-
-    lowField          = type.getAddressField("_low");
-    highField         = type.getAddressField("_high");
-    lowBoundaryField  = type.getAddressField("_low_boundary");
-    highBoundaryField = type.getAddressField("_high_boundary");
-  }
-
-  public VirtualSpace(Address addr) {
-    super(addr);
-  }
-
-  public Address low()                          { return lowField.getValue(addr);          }
-  public Address high()                         { return highField.getValue(addr);         }
-  public Address lowBoundary()                  { return lowBoundaryField.getValue(addr);  }
-  public Address highBoundary()                 { return highBoundaryField.getValue(addr); }
-
-  /** Testers (all sizes are byte sizes) */
-  public long committedSize()                   { return high().minus(low());                                    }
-  public long reservedSize()                    { return highBoundary().minus(lowBoundary());                    }
-  public long uncommittedSize()                 { return reservedSize() - committedSize();                       }
-  public boolean contains(Address addr)         { return (low().lessThanOrEqual(addr) && addr.lessThan(high())); }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Thu May 07 20:51:12 2015 -0700
@@ -314,26 +314,17 @@
   //------------------------------------------------------------------------------
   // frame::adjust_unextended_sp
   private void adjustUnextendedSP() {
-    // If we are returning to a compiled MethodHandle call site, the
-    // saved_fp will in fact be a saved value of the unextended SP.  The
-    // simplest way to tell whether we are returning to such a call site
-    // is as follows:
+    // On x86, sites calling method handle intrinsics and lambda forms are treated
+    // as any other call site. Therefore, no special action is needed when we are
+    // returning to any of these call sites.
 
     CodeBlob cb = cb();
     NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
     if (senderNm != null) {
-      // If the sender PC is a deoptimization point, get the original
-      // PC.  For MethodHandle call site the unextended_sp is stored in
-      // saved_fp.
-      if (senderNm.isDeoptMhEntry(getPC())) {
-        // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP()));
-        raw_unextendedSP = getFP();
-      }
-      else if (senderNm.isDeoptEntry(getPC())) {
-        // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp));
-      }
-      else if (senderNm.isMethodHandleReturn(getPC())) {
-        raw_unextendedSP = getFP();
+      // If the sender PC is a deoptimization point, get the original PC.
+      if (senderNm.isDeoptEntry(getPC()) ||
+          senderNm.isDeoptMhEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptriginalPc(senderNm, raw_unextendedSp));
       }
     }
   }
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Thu May 07 20:51:12 2015 -0700
@@ -81,53 +81,48 @@
       System.out.println();
       System.out.println("Heap Usage:");
 
-      if (heap instanceof SharedHeap) {
-         SharedHeap sharedHeap = (SharedHeap) heap;
-         if (sharedHeap instanceof GenCollectedHeap) {
-            GenCollectedHeap genHeap = (GenCollectedHeap) sharedHeap;
-            for (int n = 0; n < genHeap.nGens(); n++) {
-               Generation gen = genHeap.getGen(n);
-               if (gen instanceof sun.jvm.hotspot.memory.DefNewGeneration) {
-                  System.out.println("New Generation (Eden + 1 Survivor Space):");
-                  printGen(gen);
+      if (heap instanceof GenCollectedHeap) {
+         GenCollectedHeap genHeap = (GenCollectedHeap) heap;
+         for (int n = 0; n < genHeap.nGens(); n++) {
+            Generation gen = genHeap.getGen(n);
+            if (gen instanceof sun.jvm.hotspot.memory.DefNewGeneration) {
+               System.out.println("New Generation (Eden + 1 Survivor Space):");
+               printGen(gen);
 
-                  ContiguousSpace eden = ((DefNewGeneration)gen).eden();
-                  System.out.println("Eden Space:");
-                  printSpace(eden);
+               ContiguousSpace eden = ((DefNewGeneration)gen).eden();
+               System.out.println("Eden Space:");
+               printSpace(eden);
 
-                  ContiguousSpace from = ((DefNewGeneration)gen).from();
-                  System.out.println("From Space:");
-                  printSpace(from);
+               ContiguousSpace from = ((DefNewGeneration)gen).from();
+               System.out.println("From Space:");
+               printSpace(from);
 
-                  ContiguousSpace to = ((DefNewGeneration)gen).to();
-                  System.out.println("To Space:");
-                  printSpace(to);
-               } else {
-                  System.out.println(gen.name() + ":");
-                  printGen(gen);
-               }
+               ContiguousSpace to = ((DefNewGeneration)gen).to();
+               System.out.println("To Space:");
+               printSpace(to);
+            } else {
+               System.out.println(gen.name() + ":");
+               printGen(gen);
             }
-         } else if (sharedHeap instanceof G1CollectedHeap) {
-             G1CollectedHeap g1h = (G1CollectedHeap) sharedHeap;
-             G1MonitoringSupport g1mm = g1h.g1mm();
-             long edenRegionNum = g1mm.edenRegionNum();
-             long survivorRegionNum = g1mm.survivorRegionNum();
-             HeapRegionSetBase oldSet = g1h.oldSet();
-             HeapRegionSetBase humongousSet = g1h.humongousSet();
-             long oldRegionNum = oldSet.count().length()
-                          + humongousSet.count().capacity() / HeapRegion.grainBytes();
-             printG1Space("G1 Heap:", g1h.n_regions(),
-                          g1h.used(), g1h.capacity());
-             System.out.println("G1 Young Generation:");
-             printG1Space("Eden Space:", edenRegionNum,
-                          g1mm.edenUsed(), g1mm.edenCommitted());
-             printG1Space("Survivor Space:", survivorRegionNum,
-                          g1mm.survivorUsed(), g1mm.survivorCommitted());
-             printG1Space("G1 Old Generation:", oldRegionNum,
-                          g1mm.oldUsed(), g1mm.oldCommitted());
-         } else {
-             throw new RuntimeException("unknown SharedHeap type : " + heap.getClass());
          }
+      } else if (heap instanceof G1CollectedHeap) {
+          G1CollectedHeap g1h = (G1CollectedHeap) heap;
+          G1MonitoringSupport g1mm = g1h.g1mm();
+          long edenRegionNum = g1mm.edenRegionNum();
+          long survivorRegionNum = g1mm.survivorRegionNum();
+          HeapRegionSetBase oldSet = g1h.oldSet();
+          HeapRegionSetBase humongousSet = g1h.humongousSet();
+          long oldRegionNum = oldSet.count().length()
+                       + humongousSet.count().capacity() / HeapRegion.grainBytes();
+          printG1Space("G1 Heap:", g1h.n_regions(),
+                       g1h.used(), g1h.capacity());
+          System.out.println("G1 Young Generation:");
+          printG1Space("Eden Space:", edenRegionNum,
+                       g1mm.edenUsed(), g1mm.edenCommitted());
+          printG1Space("Survivor Space:", survivorRegionNum,
+                       g1mm.survivorUsed(), g1mm.survivorCommitted());
+          printG1Space("G1 Old Generation:", oldRegionNum,
+                       g1mm.oldUsed(), g1mm.oldCommitted());
       } else if (heap instanceof ParallelScavengeHeap) {
          ParallelScavengeHeap psh = (ParallelScavengeHeap) heap;
          PSYoungGen youngGen = psh.youngGen();
--- a/agent/src/share/classes/sun/jvm/hotspot/ui/action/HSDBActionManager.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/ui/action/HSDBActionManager.java	Thu May 07 20:51:12 2015 -0700
@@ -32,10 +32,12 @@
 public class HSDBActionManager extends ActionManager {
 
     public static ActionManager getInstance() {
-        if (manager == null) {
-            manager = new HSDBActionManager();
+        ActionManager m = ActionManager.getInstance();
+        if (m == null) {
+            m = new HSDBActionManager();
+            ActionManager.setInstance(m);
         }
-        return manager;
+        return m;
     }
 
     protected void addActions() {
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/HeapHprofBinWriter.java	Thu May 07 10:19:31 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/HeapHprofBinWriter.java	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2004, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -799,6 +799,18 @@
         writeObjectID(klass.getJavaMirror());
 
         ClassData cd = (ClassData) classDataCache.get(klass);
+        if (cd == null) {
+            // The class is not present in the system dictionary, probably Lambda.
+            // Add it to cache here
+            if (klass instanceof InstanceKlass) {
+                InstanceKlass ik = (InstanceKlass) klass;
+                List fields = getInstanceFields(ik);
+                int instSize = getSizeForFields(fields);
+                cd = new ClassData(instSize, fields);
+                classDataCache.put(ik, cd);
+            }
+        }
+
         if (Assert.ASSERTS_ENABLED) {
             Assert.that(cd != null, "can not get class data for " + klass.getName().asString() + klass.getAddress());
         }
--- a/make/Makefile	Thu May 07 10:19:31 2015 -0700
+++ b/make/Makefile	Thu May 07 20:51:12 2015 -0700
@@ -98,7 +98,7 @@
 COMMON_VM_OPTIMIZED_TARGETS=optimized optimized1 docs export_optimized
 
 # JDK directory list
-JDK_DIRS=bin include jre lib demo
+JDK_DIRS=bin include lib demo
 
 all:           all_product all_fastdebug
 
@@ -373,33 +373,33 @@
 	$(install-file)
 $(EXPORT_LIB_DIR)/%.lib:			$(C2_BUILD_DIR)/%.lib
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.diz:			$(C2_BUILD_DIR)/%.diz
+$(EXPORT_BIN_DIR)/%.diz:			$(C2_BUILD_DIR)/%.diz
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.dll:			$(C2_BUILD_DIR)/%.dll
+$(EXPORT_BIN_DIR)/%.dll:			$(C2_BUILD_DIR)/%.dll
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.pdb:			$(C2_BUILD_DIR)/%.pdb
+$(EXPORT_BIN_DIR)/%.pdb:			$(C2_BUILD_DIR)/%.pdb
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.map:			$(C2_BUILD_DIR)/%.map
+$(EXPORT_BIN_DIR)/%.map:			$(C2_BUILD_DIR)/%.map
 	$(install-file)
 # Unix
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C2_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
+$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C2_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
 $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(C2_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
 $(EXPORT_SERVER_DIR)/64/%.$(LIBRARY_SUFFIX):    $(C2_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: 	$(C2_BUILD_DIR)/%.debuginfo
+$(EXPORT_LIB_ARCH_DIR)/%.debuginfo: 	$(C2_BUILD_DIR)/%.debuginfo
 	$(install-file)
 $(EXPORT_SERVER_DIR)/%.debuginfo:       	$(C2_BUILD_DIR)/%.debuginfo
 	$(install-file)
 $(EXPORT_SERVER_DIR)/64/%.debuginfo:    	$(C2_BUILD_DIR)/%.debuginfo
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: 		$(C2_BUILD_DIR)/%.diz
+$(EXPORT_LIB_ARCH_DIR)/%.diz: 		$(C2_BUILD_DIR)/%.diz
 	$(install-file)
 $(EXPORT_SERVER_DIR)/64/%.diz:    		$(C2_BUILD_DIR)/%.diz
 	$(install-file)
 # MacOS X
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.dSYM: 		$(C2_BUILD_DIR)/%.dSYM
+$(EXPORT_LIB_ARCH_DIR)/%.dSYM: 		$(C2_BUILD_DIR)/%.dSYM
 	$(install-dir)
 $(EXPORT_SERVER_DIR)/%.dSYM:       		$(C2_BUILD_DIR)/%.dSYM
 	$(install-dir)
@@ -423,33 +423,33 @@
 	$(install-file)
 $(EXPORT_LIB_DIR)/%.lib:			$(C1_BUILD_DIR)/%.lib
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.diz:			$(C1_BUILD_DIR)/%.diz
+$(EXPORT_BIN_DIR)/%.diz:			$(C1_BUILD_DIR)/%.diz
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.dll:			$(C1_BUILD_DIR)/%.dll
+$(EXPORT_BIN_DIR)/%.dll:			$(C1_BUILD_DIR)/%.dll
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.pdb:			$(C1_BUILD_DIR)/%.pdb
+$(EXPORT_BIN_DIR)/%.pdb:			$(C1_BUILD_DIR)/%.pdb
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.map:			$(C1_BUILD_DIR)/%.map
+$(EXPORT_BIN_DIR)/%.map:			$(C1_BUILD_DIR)/%.map
 	$(install-file)
 # Unix
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C1_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
+$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(C1_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
 $(EXPORT_CLIENT_DIR)/%.$(LIBRARY_SUFFIX):       $(C1_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
 $(EXPORT_CLIENT_DIR)/64/%.$(LIBRARY_SUFFIX):    $(C1_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo: 	$(C1_BUILD_DIR)/%.debuginfo
+$(EXPORT_LIB_ARCH_DIR)/%.debuginfo: 	$(C1_BUILD_DIR)/%.debuginfo
 	$(install-file)
 $(EXPORT_CLIENT_DIR)/%.debuginfo:       	$(C1_BUILD_DIR)/%.debuginfo
 	$(install-file)
 $(EXPORT_CLIENT_DIR)/64/%.debuginfo:    	$(C1_BUILD_DIR)/%.debuginfo
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz: 		$(C1_BUILD_DIR)/%.diz
+$(EXPORT_LIB_ARCH_DIR)/%.diz: 		$(C1_BUILD_DIR)/%.diz
 	$(install-file)
 $(EXPORT_CLIENT_DIR)/64/%.diz:    		$(C1_BUILD_DIR)/%.diz
 	$(install-file)
 # MacOS X
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.dSYM: 		$(C1_BUILD_DIR)/%.dSYM
+$(EXPORT_LIB_ARCH_DIR)/%.dSYM: 		$(C1_BUILD_DIR)/%.dSYM
 	$(install-dir)
 $(EXPORT_CLIENT_DIR)/%.dSYM:       		$(C1_BUILD_DIR)/%.dSYM
 	$(install-dir)
@@ -473,28 +473,28 @@
 	$(install-file)
 $(EXPORT_LIB_DIR)/%.lib:			$(MINIMAL1_BUILD_DIR)/%.lib
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.diz:			$(MINIMAL1_BUILD_DIR)/%.diz
+$(EXPORT_BIN_DIR)/%.diz:			$(MINIMAL1_BUILD_DIR)/%.diz
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.dll:			$(MINIMAL1_BUILD_DIR)/%.dll
+$(EXPORT_BIN_DIR)/%.dll:			$(MINIMAL1_BUILD_DIR)/%.dll
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.pdb:			$(MINIMAL1_BUILD_DIR)/%.pdb
+$(EXPORT_BIN_DIR)/%.pdb:			$(MINIMAL1_BUILD_DIR)/%.pdb
 	$(install-file)
-$(EXPORT_JRE_BIN_DIR)/%.map:			$(MINIMAL1_BUILD_DIR)/%.map
+$(EXPORT_BIN_DIR)/%.map:			$(MINIMAL1_BUILD_DIR)/%.map
 	$(install-file)
 # Unix
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX):	$(MINIMAL1_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
+$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX):	$(MINIMAL1_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
 $(EXPORT_MINIMAL_DIR)/%.$(LIBRARY_SUFFIX):	$(MINIMAL1_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
 $(EXPORT_MINIMAL_DIR)/64/%.$(LIBRARY_SUFFIX):	$(MINIMAL1_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo:		$(MINIMAL1_BUILD_DIR)/%.debuginfo
+$(EXPORT_LIB_ARCH_DIR)/%.debuginfo:		$(MINIMAL1_BUILD_DIR)/%.debuginfo
 	$(install-file)
 $(EXPORT_MINIMAL_DIR)/%.debuginfo:		$(MINIMAL1_BUILD_DIR)/%.debuginfo
 	$(install-file)
 $(EXPORT_MINIMAL_DIR)/64/%.debuginfo:		$(MINIMAL1_BUILD_DIR)/%.debuginfo
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz:		$(MINIMAL1_BUILD_DIR)/%.diz
+$(EXPORT_LIB_ARCH_DIR)/%.diz:		$(MINIMAL1_BUILD_DIR)/%.diz
 	$(install-file)
 $(EXPORT_MINIMAL_DIR)/64/%.diz:			$(MINIMAL1_BUILD_DIR)/%.diz
 	$(install-file)
@@ -509,11 +509,11 @@
 $(EXPORT_INCLUDE_DIR)/%:			$(ZERO_BUILD_DIR)/../generated/jvmtifiles/%
 	$(install-file)
 # Unix
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(ZERO_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
+$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(ZERO_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo:		$(ZERO_BUILD_DIR)/%.debuginfo
+$(EXPORT_LIB_ARCH_DIR)/%.debuginfo:		$(ZERO_BUILD_DIR)/%.debuginfo
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz:		$(ZERO_BUILD_DIR)/%.diz
+$(EXPORT_LIB_ARCH_DIR)/%.diz:		$(ZERO_BUILD_DIR)/%.diz
 	$(install-file)
 $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(ZERO_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
@@ -522,7 +522,7 @@
 $(EXPORT_SERVER_DIR)/%.diz:			$(ZERO_BUILD_DIR)/%.diz
 	$(install-file)
 # MacOS X
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.dSYM: 		$(ZERO_BUILD_DIR)/%.dSYM
+$(EXPORT_LIB_ARCH_DIR)/%.dSYM: 		$(ZERO_BUILD_DIR)/%.dSYM
 	$(install-dir)
 $(EXPORT_SERVER_DIR)/%.dSYM:			$(ZERO_BUILD_DIR)/%.dSYM
 	$(install-dir)
@@ -536,11 +536,11 @@
 $(EXPORT_INCLUDE_DIR)/%:			$(CORE_BUILD_DIR)/../generated/jvmtifiles/%
 	$(install-file)
 # Unix
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX):	$(CORE_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
+$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX):	$(CORE_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo:		$(CORE_BUILD_DIR)/%.debuginfo
+$(EXPORT_LIB_ARCH_DIR)/%.debuginfo:		$(CORE_BUILD_DIR)/%.debuginfo
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz:		$(CORE_BUILD_DIR)/%.diz
+$(EXPORT_LIB_ARCH_DIR)/%.diz:		$(CORE_BUILD_DIR)/%.diz
 	$(install-file)
 $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):	$(CORE_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
@@ -558,11 +558,11 @@
 $(EXPORT_INCLUDE_DIR)/%:			$(SHARK_BUILD_DIR)/../generated/jvmtifiles/%
 	$(install-file)
 # Unix
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(SHARK_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
+$(EXPORT_LIB_ARCH_DIR)/%.$(LIBRARY_SUFFIX): $(SHARK_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.debuginfo):	$(SHARK_BUILD_DIR)/%.debuginfo
+$(EXPORT_LIB_ARCH_DIR)/%.debuginfo):	$(SHARK_BUILD_DIR)/%.debuginfo
 	$(install-file)
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.diz:		$(SHARK_BUILD_DIR)/%.diz
+$(EXPORT_LIB_ARCH_DIR)/%.diz:		$(SHARK_BUILD_DIR)/%.diz
 	$(install-file)
 $(EXPORT_SERVER_DIR)/%.$(LIBRARY_SUFFIX):       $(SHARK_BUILD_DIR)/%.$(LIBRARY_SUFFIX)
 	$(install-file)
@@ -571,7 +571,7 @@
 $(EXPORT_SERVER_DIR)/%.diz:			$(SHARK_BUILD_DIR)/%.diz
 	$(install-file)
 # MacOS X
-$(EXPORT_JRE_LIB_ARCH_DIR)/%.dSYM: 		$(SHARK_BUILD_DIR)/%.dSYM
+$(EXPORT_LIB_ARCH_DIR)/%.dSYM: 		$(SHARK_BUILD_DIR)/%.dSYM
 	$(install-dir)
 $(EXPORT_SERVER_DIR)/%.dSYM:			$(SHARK_BUILD_DIR)/%.dSYM
 	$(install-dir)
--- a/make/aix/makefiles/adlc.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/aix/makefiles/adlc.make	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -140,13 +140,7 @@
 # Note "+="; it is a hook so flags.make can add more flags, like -g or -DFOO.
 ADLCFLAGS += -q -T
 
-# Normally, debugging is done directly on the ad_<arch>*.cpp files.
-# But -g will put #line directives in those files pointing back to <arch>.ad.
-# Some builds of gcc 3.2 have a bug that gets tickled by the extra #line directives
-# so skip it for 3.2 and ealier.
-ifneq "$(shell expr \( $(CC_VER_MAJOR) \> 3 \) \| \( \( $(CC_VER_MAJOR) = 3 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0"
 ADLCFLAGS += -g
-endif
 
 ifdef LP64
 ADLCFLAGS += -D_LP64
--- a/make/aix/makefiles/defs.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/aix/makefiles/defs.make	Thu May 07 20:51:12 2015 -0700
@@ -184,17 +184,17 @@
 EXPORT_LIST += $(EXPORT_DOCS_DIR)/platform/jvmti/jvmti.html
 
 # client and server subdirectories have symbolic links to ../libjsig.so
-EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
+EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
 #ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 #  ifeq ($(ZIP_DEBUGINFO_FILES),1)
-#    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz
+#    EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.diz
 #  else
-#    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo
+#    EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.debuginfo
 #  endif
 #endif
-EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
-EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
-EXPORT_MINIMAL_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/minimal
+EXPORT_SERVER_DIR = $(EXPORT_LIB_ARCH_DIR)/server
+EXPORT_CLIENT_DIR = $(EXPORT_LIB_ARCH_DIR)/client
+EXPORT_MINIMAL_DIR = $(EXPORT_LIB_ARCH_DIR)/minimal
 
 ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK) $(JVM_VARIANT_CORE)), true)
   EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
--- a/make/aix/makefiles/mapfile-vers-debug	Thu May 07 10:19:31 2015 -0700
+++ b/make/aix/makefiles/mapfile-vers-debug	Thu May 07 20:51:12 2015 -0700
@@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #
 
 # Define public interface.
@@ -107,6 +107,7 @@
                 JVM_GetClassTypeAnnotations;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                 JVM_GetEnclosingMethodInfo;
                 JVM_GetFieldIxModifiers;
                 JVM_GetFieldTypeAnnotations;
--- a/make/aix/makefiles/mapfile-vers-product	Thu May 07 10:19:31 2015 -0700
+++ b/make/aix/makefiles/mapfile-vers-product	Thu May 07 20:51:12 2015 -0700
@@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #
 
 # Define public interface.
@@ -107,6 +107,7 @@
                 JVM_GetClassTypeAnnotations;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                 JVM_GetEnclosingMethodInfo;
                 JVM_GetFieldIxModifiers;
                 JVM_GetInheritedAccessControlContext;
--- a/make/aix/makefiles/ppc64.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/aix/makefiles/ppc64.make	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 #
-# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2012, 2013 SAP AG. All rights reserved.
+# Copyright (c) 2004, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -71,9 +71,6 @@
 OPT_CFLAGS/sharedRuntimeTrig.o = $(OPT_CFLAGS/NOOPT)
 OPT_CFLAGS/sharedRuntimeTrans.o = $(OPT_CFLAGS/NOOPT)
 
-# xlc 10.01 parameters for ipa compile.
-QIPA_COMPILE=$(if $(CXX_IS_V10),-qipa)
-
 # Xlc 10.1 parameters for aggressive optimization:
 # - qhot=level=1: Most aggressive loop optimizations.
 # - qignerrno: Assume errno is not modified by system calls.
@@ -88,7 +85,7 @@
 OPT_CFLAGS/synchronizer.o = $(OPT_CFLAGS) -qnoinline
 
 # Set all the xlC V10.1 options here.
-OPT_CFLAGS += $(QIPA_COMPILE) $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)
+OPT_CFLAGS += $(QV10_OPT) $(QV10_OPT_AGGRESSIVE)
 
 export OBJECT_MODE=64
 
--- a/make/aix/makefiles/vm.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/aix/makefiles/vm.make	Thu May 07 20:51:12 2015 -0700
@@ -122,7 +122,7 @@
 # By default, link the *.o into the library, not the executable.
 LINK_INTO$(LINK_INTO) = LIBJVM
 
-JDK_LIBDIR = $(JAVA_HOME)/jre/lib/$(LIBARCH)
+JDK_LIBDIR = $(JAVA_HOME)/lib/$(LIBARCH)
 
 #----------------------------------------------------------------------
 # jvm_db & dtrace
--- a/make/aix/makefiles/xlc.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/aix/makefiles/xlc.make	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 #
-# Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
-# Copyright (c) 2012, 2013 SAP. All rights reserved.
+# Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2012, 2015 SAP. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -34,13 +34,17 @@
 
 AS  = $(CC) -c
 
-# get xlc version
-CXX_VERSION   := $(shell $(CXX) -qversion 2>&1 | sed -n 's/.*Version: \([0-9.]*\)/\1/p')
+# get xlc version which comes as VV.RR.MMMM.LLLL where 'VV' is the version,
+# 'RR' is the release, 'MMMM' is the modification and 'LLLL' is the level.
+# We only use 'VV.RR.LLLL' to avoid integer overflows in bash when comparing
+# the version numbers (some shells only support 32-bit integer compares!).
+CXX_VERSION := $(shell $(CXX) -qversion 2>&1 | \
+                   sed -n 's/.*Version: \([0-9]\{2\}\).\([0-9]\{2\}\).[0-9]\{4\}.\([0-9]\{4\}\)/\1\2\3/p')
 
 # xlc 08.00.0000.0023 and higher supports -qtune=balanced
-CXX_SUPPORTS_BALANCED_TUNING=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 080000000023 ] ; then echo "true" ; fi)
+CXX_SUPPORTS_BALANCED_TUNING := $(shell if [ $(CXX_VERSION) -ge 08000023 ] ; then echo "true" ; fi)
 # xlc 10.01 is used with aggressive optimizations to boost performance
-CXX_IS_V10=$(shell if [ $(subst .,,$(CXX_VERSION)) -ge 100100000000 ] ; then echo "true" ; fi)
+CXX_IS_V10 := $(shell if [ $(CXX_VERSION) -ge 10010000 ] ; then echo "true" ; fi)
 
 # check for precompiled headers support
 
--- a/make/bsd/makefiles/defs.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/bsd/makefiles/defs.make	Thu May 07 20:51:12 2015 -0700
@@ -265,23 +265,23 @@
 EXPORT_LIST += $(EXPORT_DOCS_DIR)/platform/jvmti/jvmti.html
 
 # client and server subdirectories have symbolic links to ../libjsig.so
-EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
+EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
 
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-      EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz
+      EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.diz
   else
     ifeq ($(OS_VENDOR), Darwin)
-        EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX).dSYM
+        EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX).dSYM
     else
-        EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo
+        EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.debuginfo
     endif
   endif
 endif
 
-EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
-EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
-EXPORT_MINIMAL_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/minimal
+EXPORT_SERVER_DIR = $(EXPORT_LIB_ARCH_DIR)/server
+EXPORT_CLIENT_DIR = $(EXPORT_LIB_ARCH_DIR)/client
+EXPORT_MINIMAL_DIR = $(EXPORT_LIB_ARCH_DIR)/minimal
 
 ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK)), true)
   EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
@@ -324,34 +324,34 @@
 
 # Serviceability Binaries
 # No SA Support for PPC, IA64, ARM or zero
-ADD_SA_BINARIES/x86   = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
+ADD_SA_BINARIES/x86   = $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
                         $(EXPORT_LIB_DIR)/sa-jdi.jar
 
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-      ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+      ADD_SA_BINARIES/x86 += $(EXPORT_LIB_ARCH_DIR)/libsaproc.diz
   else
     ifeq ($(OS_VENDOR), Darwin)
-        ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM
+        ADD_SA_BINARIES/x86 += $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM
     else
-        ADD_SA_BINARIES/x86 += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+        ADD_SA_BINARIES/x86 += $(EXPORT_LIB_ARCH_DIR)/libsaproc.debuginfo
     endif
   endif
 endif
 
-ADD_SA_BINARIES/sparc = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
+ADD_SA_BINARIES/sparc = $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
                         $(EXPORT_LIB_DIR)/sa-jdi.jar
-ADD_SA_BINARIES/universal = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
+ADD_SA_BINARIES/universal = $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
                             $(EXPORT_LIB_DIR)/sa-jdi.jar
 
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-      ADD_SA_BINARIES/universal += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+      ADD_SA_BINARIES/universal += $(EXPORT_LIB_ARCH_DIR)/libsaproc.diz
   else
     ifeq ($(OS_VENDOR), Darwin)
-        ADD_SA_BINARIES/universal += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM
+        ADD_SA_BINARIES/universal += $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM
     else
-        ADD_SA_BINARIES/universal += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+        ADD_SA_BINARIES/universal += $(EXPORT_LIB_ARCH_DIR)/libsaproc.debuginfo
     endif
   endif
 endif
@@ -388,25 +388,25 @@
     endif
 
     # Binaries to 'universalize' if built
-    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX)
-    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX)
-    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX)
-    UNIVERSAL_LIPO_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX)
+    UNIVERSAL_LIPO_LIST += $(EXPORT_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX)
 
     # Files to simply copy in place
-    UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/Xusage.txt
-    UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/Xusage.txt
+    UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/server/Xusage.txt
+    UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/client/Xusage.txt
     ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
       ifeq ($(ZIP_DEBUGINFO_FILES),1)
-          UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.diz
-          UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.diz
-          UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.diz
-          UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.diz
+          UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/server/libjvm.diz
+          UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/client/libjvm.diz
+          UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/libjsig.diz
+          UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/libsaproc.diz
       else
-          UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX).dSYM
-          UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX).dSYM
-          UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX).dSYM
-          UNIVERSAL_COPY_LIST += $(EXPORT_JRE_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM
+          UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/server/libjvm.$(LIBRARY_SUFFIX).dSYM
+          UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/client/libjvm.$(LIBRARY_SUFFIX).dSYM
+          UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/libjsig.$(LIBRARY_SUFFIX).dSYM
+          UNIVERSAL_COPY_LIST += $(EXPORT_LIB_DIR)/libsaproc.$(LIBRARY_SUFFIX).dSYM
       endif
     endif
 
--- a/make/bsd/makefiles/gcc.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/bsd/makefiles/gcc.make	Thu May 07 20:51:12 2015 -0700
@@ -313,22 +313,13 @@
 
 # Work around some compiler bugs.
 ifeq ($(USE_CLANG), true)
-  # Clang 4.2
-  ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 2), 1)
+  # Clang <= 6.1
+  ifeq ($(shell expr \
+      $(CC_VER_MAJOR) \< 6 \| \
+      \( $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) \<= 1 \) \
+    ), 1)
     OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT)
     OPT_CFLAGS/unsafe.o += -O1
-  # Clang 5.0
-  else ifeq ($(shell expr $(CC_VER_MAJOR) = 5 \& $(CC_VER_MINOR) = 0), 1)
-    OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT)
-    OPT_CFLAGS/unsafe.o += -O1
-  # Clang 5.1
-  else ifeq ($(shell expr $(CC_VER_MAJOR) = 5 \& $(CC_VER_MINOR) = 1), 1)
-    OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT)
-    OPT_CFLAGS/unsafe.o += -O1
-  # Clang 6.0 
-  else ifeq ($(shell expr $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) = 0), 1) 
-    OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT) 
-    OPT_CFLAGS/unsafe.o += -O1 
   else
     $(error "Update compiler workarounds for Clang $(CC_VER_MAJOR).$(CC_VER_MINOR)")
   endif
@@ -336,7 +327,7 @@
   # 6835796. Problem in GCC 4.3.0 with mulnode.o optimized compilation.
   ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 3), 1)
     OPT_CFLAGS/mulnode.o += $(OPT_CFLAGS/NOOPT)
-  endif
+  endif 
 endif
 
 # Flags for generating make dependency flags.
--- a/make/bsd/makefiles/mapfile-vers-darwin-debug	Thu May 07 10:19:31 2015 -0700
+++ b/make/bsd/makefiles/mapfile-vers-darwin-debug	Thu May 07 20:51:12 2015 -0700
@@ -105,6 +105,7 @@
                 _JVM_GetClassTypeAnnotations
                 _JVM_GetDeclaredClasses
                 _JVM_GetDeclaringClass
+                _JVM_GetSimpleBinaryName
                 _JVM_GetEnclosingMethodInfo
                 _JVM_GetFieldIxModifiers
                 _JVM_GetFieldTypeAnnotations
--- a/make/bsd/makefiles/mapfile-vers-darwin-product	Thu May 07 10:19:31 2015 -0700
+++ b/make/bsd/makefiles/mapfile-vers-darwin-product	Thu May 07 20:51:12 2015 -0700
@@ -105,6 +105,7 @@
                 _JVM_GetClassTypeAnnotations
                 _JVM_GetDeclaredClasses
                 _JVM_GetDeclaringClass
+                _JVM_GetSimpleBinaryName
                 _JVM_GetEnclosingMethodInfo
                 _JVM_GetFieldIxModifiers
                 _JVM_GetFieldTypeAnnotations
--- a/make/bsd/makefiles/mapfile-vers-debug	Thu May 07 10:19:31 2015 -0700
+++ b/make/bsd/makefiles/mapfile-vers-debug	Thu May 07 20:51:12 2015 -0700
@@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #
 
 # Define public interface.
@@ -107,6 +107,7 @@
                 JVM_GetClassTypeAnnotations;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                 JVM_GetEnclosingMethodInfo;
                 JVM_GetFieldIxModifiers;
                 JVM_GetFieldTypeAnnotations;
--- a/make/bsd/makefiles/mapfile-vers-product	Thu May 07 10:19:31 2015 -0700
+++ b/make/bsd/makefiles/mapfile-vers-product	Thu May 07 20:51:12 2015 -0700
@@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #
 
 # Define public interface.
@@ -107,6 +107,7 @@
                 JVM_GetClassTypeAnnotations;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                 JVM_GetEnclosingMethodInfo;
                 JVM_GetFieldIxModifiers;
                 JVM_GetFieldTypeAnnotations;
--- a/make/bsd/makefiles/universal.gmk	Thu May 07 10:19:31 2015 -0700
+++ b/make/bsd/makefiles/universal.gmk	Thu May 07 20:51:12 2015 -0700
@@ -54,12 +54,12 @@
 
 # Consolidate architecture builds into a single Universal binary
 universalize: $(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST)
-	$(RM) -r $(EXPORT_PATH)/jre/lib/{i386,amd64}
+	$(RM) -r $(EXPORT_PATH)/lib/{i386,amd64}
 
 
 # Package built libraries in a universal binary
 $(UNIVERSAL_LIPO_LIST):
-	BUILT_LIPO_FILES="`find $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) 2>/dev/null`" || test $$? = "1"; \
+	BUILT_LIPO_FILES="`find $(EXPORT_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_LIB_DIR)/,,$@) 2>/dev/null`" || test $$? = "1"; \
 	if [ -n "$${BUILT_LIPO_FILES}" ]; then \
 	  $(MKDIR) -p $(shell dirname $@); \
 	  lipo -create -output $@ $${BUILT_LIPO_FILES}; \
@@ -70,7 +70,7 @@
 # - copies directories; including empty dirs
 # - copies files, symlinks, other non-directory files
 $(UNIVERSAL_COPY_LIST):
-	BUILT_COPY_FILES="`find $(EXPORT_JRE_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_JRE_LIB_DIR)/,,$@) -prune 2>/dev/null`" || test $$? = "1"; \
+	BUILT_COPY_FILES="`find $(EXPORT_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_LIB_DIR)/,,$@) -prune 2>/dev/null`" || test $$? = "1"; \
 	if [ -n "$${BUILT_COPY_FILES}" ]; then \
 	  for i in $${BUILT_COPY_FILES}; do \
 	    $(MKDIR) -p $(shell dirname $@); \
@@ -80,21 +80,21 @@
 
 
 # Replace arch specific binaries with universal binaries
-# Do not touch jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
+# Do not touch lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
 # That symbolic link belongs to the 'jdk' build.
 export_universal:
-	$(RM) -r $(EXPORT_PATH)/jre/lib/{i386,amd64}
-	$(RM) -r $(JDK_IMAGE_DIR)/jre/lib/{i386,amd64}
+	$(RM) -r $(EXPORT_PATH)/lib/{i386,amd64}
+	$(RM) -r $(JDK_IMAGE_DIR)/lib/{i386,amd64}
 	($(CD) $(EXPORT_PATH) && \
 	  $(TAR) -cf - *) | \
 	  ($(CD) $(JDK_IMAGE_DIR) && $(TAR) -xpf -)
 
 
 # Overlay universal binaries
-# Do not touch jre/lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
+# Do not touch lib/{client,server}/libjsig.$(LIBRARY_SUFFIX)
 # That symbolic link belongs to the 'jdk' build.
 copy_universal:
-	$(RM) -r $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/jre/lib/{i386,amd64}
+	$(RM) -r $(JDK_IMAGE_DIR)$(COPY_SUBDIR)/lib/{i386,amd64}
 	($(CD) $(EXPORT_PATH)$(COPY_SUBDIR) && \
 	  $(TAR) -cf - *) | \
 	  ($(CD) $(JDK_IMAGE_DIR)$(COPY_SUBDIR) && $(TAR) -xpf -)
--- a/make/build.sh	Thu May 07 10:19:31 2015 -0700
+++ b/make/build.sh	Thu May 07 20:51:12 2015 -0700
@@ -40,7 +40,7 @@
     exit 1
 fi
 
-if [ "${JAVA_HOME-}" = ""  -o  ! -d "${JAVA_HOME-}" -o ! -d ${JAVA_HOME-}/jre/lib/ ]; then
+if [ "${JAVA_HOME-}" = ""  -o  ! -d "${JAVA_HOME-}" ]; then
     echo "JAVA_HOME needs to be set to a valid JDK path"
     echo "JAVA_HOME: ${JAVA_HOME-}"
     exit 1
--- a/make/defs.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/defs.make	Thu May 07 20:51:12 2015 -0700
@@ -350,15 +350,13 @@
 EXPORT_INCLUDE_DIR = $(EXPORT_PATH)/include
 EXPORT_DOCS_DIR = $(EXPORT_PATH)/docs
 EXPORT_LIB_DIR = $(EXPORT_PATH)/lib
-EXPORT_JRE_DIR = $(EXPORT_PATH)/jre
-EXPORT_JRE_BIN_DIR = $(EXPORT_JRE_DIR)/bin
-EXPORT_JRE_LIB_DIR = $(EXPORT_JRE_DIR)/lib
-EXPORT_JRE_LIB_ARCH_DIR = $(EXPORT_JRE_LIB_DIR)/$(LIBARCH)
+EXPORT_BIN_DIR = $(EXPORT_PATH)/bin
+EXPORT_LIB_ARCH_DIR = $(EXPORT_LIB_DIR)/$(LIBARCH)
 
 # non-universal macosx builds need to appear universal
 ifeq ($(OS_VENDOR), Darwin)
   ifneq ($(MACOSX_UNIVERSAL), true)
-    EXPORT_JRE_LIB_ARCH_DIR = $(EXPORT_JRE_LIB_DIR)
+    EXPORT_LIB_ARCH_DIR = $(EXPORT_LIB_DIR)
   endif
 endif
 
@@ -370,4 +368,3 @@
 EXPORT_LIST += $(EXPORT_INCLUDE_DIR)/jmm.h
 
 .PHONY: $(HS_ALT_MAKE)/defs.make
-
--- a/make/linux/makefiles/defs.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/linux/makefiles/defs.make	Thu May 07 20:51:12 2015 -0700
@@ -244,17 +244,17 @@
 EXPORT_LIST += $(EXPORT_DOCS_DIR)/platform/jvmti/jvmti.html
 
 # client and server subdirectories have symbolic links to ../libjsig.so
-EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
+EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz
+    EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.diz
   else
-    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo
+    EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.debuginfo
   endif
 endif
-EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
-EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
-EXPORT_MINIMAL_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/minimal
+EXPORT_SERVER_DIR = $(EXPORT_LIB_ARCH_DIR)/server
+EXPORT_CLIENT_DIR = $(EXPORT_LIB_ARCH_DIR)/client
+EXPORT_MINIMAL_DIR = $(EXPORT_LIB_ARCH_DIR)/minimal
 
 ifeq ($(findstring true, $(JVM_VARIANT_SERVER) $(JVM_VARIANT_ZERO) $(JVM_VARIANT_ZEROSHARK) $(JVM_VARIANT_CORE)), true)
   EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
@@ -295,14 +295,14 @@
 
 # Serviceability Binaries
 
-ADD_SA_BINARIES/DEFAULT = $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
+ADD_SA_BINARIES/DEFAULT = $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX) \
                           $(EXPORT_LIB_DIR)/sa-jdi.jar
 
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-    ADD_SA_BINARIES/DEFAULT += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+    ADD_SA_BINARIES/DEFAULT += $(EXPORT_LIB_ARCH_DIR)/libsaproc.diz
   else
-    ADD_SA_BINARIES/DEFAULT += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+    ADD_SA_BINARIES/DEFAULT += $(EXPORT_LIB_ARCH_DIR)/libsaproc.debuginfo
   endif
 endif
 
--- a/make/linux/makefiles/mapfile-vers-debug	Thu May 07 10:19:31 2015 -0700
+++ b/make/linux/makefiles/mapfile-vers-debug	Thu May 07 20:51:12 2015 -0700
@@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #
 
 # Define public interface.
@@ -107,6 +107,7 @@
                 JVM_GetClassTypeAnnotations;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                 JVM_GetEnclosingMethodInfo;
                 JVM_GetFieldIxModifiers;
                 JVM_GetFieldTypeAnnotations;
--- a/make/linux/makefiles/mapfile-vers-product	Thu May 07 10:19:31 2015 -0700
+++ b/make/linux/makefiles/mapfile-vers-product	Thu May 07 20:51:12 2015 -0700
@@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #
 
 # Define public interface.
@@ -107,6 +107,7 @@
                 JVM_GetClassTypeAnnotations;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                 JVM_GetEnclosingMethodInfo;
                 JVM_GetFieldIxModifiers;
                 JVM_GetFieldTypeAnnotations;
--- a/make/linux/makefiles/vm.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/linux/makefiles/vm.make	Thu May 07 20:51:12 2015 -0700
@@ -127,7 +127,7 @@
 # By default, link the *.o into the library, not the executable.
 LINK_INTO$(LINK_INTO) = LIBJVM
 
-JDK_LIBDIR = $(JAVA_HOME)/jre/lib/$(LIBARCH)
+JDK_LIBDIR = $(JAVA_HOME)/lib/$(LIBARCH)
 
 #----------------------------------------------------------------------
 # jvm_db & dtrace
--- a/make/solaris/makefiles/defs.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/solaris/makefiles/defs.make	Thu May 07 20:51:12 2015 -0700
@@ -224,17 +224,17 @@
 EXPORT_LIST += $(EXPORT_DOCS_DIR)/platform/jvmti/jvmti.html
 
 # client and server subdirectories have symbolic links to ../libjsig.$(LIBRARY_SUFFIX)
-EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
+EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.$(LIBRARY_SUFFIX)
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.diz
+    EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.diz
   else
-    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libjsig.debuginfo
+    EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libjsig.debuginfo
   endif
 endif
 
-EXPORT_SERVER_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/server
-EXPORT_CLIENT_DIR = $(EXPORT_JRE_LIB_ARCH_DIR)/client
+EXPORT_SERVER_DIR = $(EXPORT_LIB_ARCH_DIR)/server
+EXPORT_CLIENT_DIR = $(EXPORT_LIB_ARCH_DIR)/client
 
 ifeq ($(JVM_VARIANT_SERVER),true)
   EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
@@ -295,12 +295,12 @@
   endif
 endif
 
-EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX)
+EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libsaproc.$(LIBRARY_SUFFIX)
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.diz
+    EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libsaproc.diz
   else
-    EXPORT_LIST += $(EXPORT_JRE_LIB_ARCH_DIR)/libsaproc.debuginfo
+    EXPORT_LIST += $(EXPORT_LIB_ARCH_DIR)/libsaproc.debuginfo
   endif
 endif
 EXPORT_LIST += $(EXPORT_LIB_DIR)/sa-jdi.jar
--- a/make/solaris/makefiles/dtrace.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/solaris/makefiles/dtrace.make	Thu May 07 20:51:12 2015 -0700
@@ -130,8 +130,9 @@
 $(XLIBJVM_DTRACE): $(DTRACE_SRCDIR)/$(JVM_DTRACE).c $(DTRACE_SRCDIR)/$(JVM_DTRACE).h $(LIBJVM_DTRACE_MAPFILE)
 	@echo $(LOG_INFO) Making $@
 	$(QUIETLY) mkdir -p $(XLIBJVM_DIR) ; \
-	$(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. \
-		$(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor
+	$(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. $(EXTRA_CFLAGS) \
+	    $(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c \
+	    $(EXTRA_LDFLAGS) -lc -lthread -ldoor
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(XLIBJVM_DTRACE_DEBUGINFO)
 # Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
@@ -216,8 +217,9 @@
 
 $(LIBJVM_DTRACE): $(DTRACE_SRCDIR)/$(JVM_DTRACE).c $(XLIBJVM_DTRACE) $(DTRACE_SRCDIR)/$(JVM_DTRACE).h $(LIBJVM_DTRACE_MAPFILE)
 	@echo $(LOG_INFO) Making $@
-	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) -D$(TYPE) -I.  \
-		$(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor
+	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) -D$(TYPE) -I. $(EXTRA_CFLAGS) \
+	    $(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c \
+	    $(EXTRA_LDFLAGS) -lc -lthread -ldoor
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJVM_DTRACE_DEBUGINFO)
 	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DTRACE_DEBUGINFO) $@
--- a/make/solaris/makefiles/jsig.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/solaris/makefiles/jsig.make	Thu May 07 20:51:12 2015 -0700
@@ -50,7 +50,9 @@
 $(LIBJSIG): $(JSIGSRCDIR)/jsig.c $(LIBJSIG_MAPFILE)
 	@echo $(LOG_INFO) Making signal interposition lib...
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
-                         $(LFLAGS_JSIG) -o $@ $(JSIGSRCDIR)/jsig.c -ldl
+	                 $(EXTRA_CFLAGS) \
+                         $(LFLAGS_JSIG) $(EXTRA_LDFLAGS) \
+	                 -o $@ $(JSIGSRCDIR)/jsig.c -ldl
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBJSIG_DEBUGINFO)
 	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBJSIG_DEBUGINFO) $@
--- a/make/solaris/makefiles/mapfile-vers	Thu May 07 10:19:31 2015 -0700
+++ b/make/solaris/makefiles/mapfile-vers	Thu May 07 20:51:12 2015 -0700
@@ -19,7 +19,7 @@
 # Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 # or visit www.oracle.com if you need additional information or have any
 # questions.
-#  
+#
 #
 
 # Define public interface.
@@ -30,7 +30,7 @@
                 JNI_CreateJavaVM;
                 JNI_GetCreatedJavaVMs;
                 JNI_GetDefaultJavaVMInitArgs;
-        
+
                 # JVM
                 JVM_ActiveProcessorCount;
                 JVM_ArrayCopy;
@@ -107,6 +107,7 @@
                 JVM_GetClassTypeAnnotations;
                 JVM_GetDeclaredClasses;
                 JVM_GetDeclaringClass;
+                JVM_GetSimpleBinaryName;
                 JVM_GetEnclosingMethodInfo;
                 JVM_GetFieldIxModifiers;
                 JVM_GetFieldTypeAnnotations;
--- a/make/solaris/makefiles/product.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/solaris/makefiles/product.make	Thu May 07 20:51:12 2015 -0700
@@ -37,6 +37,11 @@
 OPT_CFLAGS/ciEnv.o = $(OPT_CFLAGS) -xinline=no%__1cFciEnvbFpost_compiled_method_load_event6MpnHnmethod__v_
 endif
 
+# Need extra inlining to get oop_ps_push_contents functions to perform well enough.
+ifndef USE_GCC
+OPT_CFLAGS/psPromotionManager.o = $(OPT_CFLAGS) -W2,-Ainline:inc=1000
+endif
+
 # (OPT_CFLAGS/SLOWER is also available, to alter compilation of buggy files)
 ifeq ("${Platform_compiler}", "sparcWorks")
 
--- a/make/solaris/makefiles/saproc.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/solaris/makefiles/saproc.make	Thu May 07 20:51:12 2015 -0700
@@ -89,6 +89,17 @@
 # when actually building on Nevada-B158 or earlier:
 #SOLARIS_11_B159_OR_LATER=-DSOLARIS_11_B159_OR_LATER
 
+$(SADISOBJ): $(SADISSRCFILES)
+	           $(QUIETLY) $(CC)                                     \
+	           $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG)     \
+	           -I$(SASRCDIR)                                        \
+	           -I$(GENERATED)                                       \
+	           -I$(BOOT_JAVA_HOME)/include                          \
+	           -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family)    \
+	           $(SOLARIS_11_B159_OR_LATER)                          \
+	           $(EXTRA_CFLAGS)					\
+	           $(SADISSRCFILES)                                     \
+	           -c -o $(SADISOBJ)
 
 $(LIBSAPROC): $(SASRCFILES) $(SADISOBJ) $(SAMAPFILE)
 	$(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \
@@ -103,23 +114,13 @@
 	           -I$(BOOT_JAVA_HOME)/include                          \
 	           -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family)    \
 	           $(SOLARIS_11_B159_OR_LATER)                          \
+	           $(EXTRA_CXXFLAGS) $(EXTRA_LDFLAGS)			\
+	           $(SADISOBJ)                                          \
 	           $(SASRCFILES)                                        \
-	           $(SADISOBJ)                                          \
 	           $(SA_LFLAGS)                                         \
 	           -o $@                                                \
 	           -ldl -ldemangle -lthread -lc
 
-$(SADISOBJ): $(SADISSRCFILES)
-	           $(QUIETLY) $(CC)                                     \
-	           $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG)     \
-	           -I$(SASRCDIR)                                        \
-	           -I$(GENERATED)                                       \
-	           -I$(BOOT_JAVA_HOME)/include                          \
-	           -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family)    \
-	           $(SOLARIS_11_B159_OR_LATER)                          \
-	           $(SADISSRCFILES)                                     \
-	           -c -o $(SADISOBJ)
-
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(LIBSAPROC_DEBUGINFO)
 	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(LIBSAPROC_DEBUGINFO) $@
--- a/make/solaris/makefiles/vm.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/solaris/makefiles/vm.make	Thu May 07 20:51:12 2015 -0700
@@ -148,7 +148,7 @@
 # By default, link the *.o into the library, not the executable.
 LINK_INTO$(LINK_INTO) = LIBJVM
 
-JDK_LIBDIR = $(JAVA_HOME)/jre/lib/$(LIBARCH)
+JDK_LIBDIR = $(JAVA_HOME)/lib/$(LIBARCH)
 
 #----------------------------------------------------------------------
 # jvm_db & dtrace
@@ -288,6 +288,8 @@
 endif
 endif
 
+LFLAGS_VM += $(EXTRA_LDFLAGS)
+
 ifdef USE_GCC
 LINK_VM = $(LINK_LIB.CC)
 else
--- a/make/windows/makefiles/defs.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/windows/makefiles/defs.make	Thu May 07 20:51:12 2015 -0700
@@ -249,8 +249,8 @@
   endif
 endif
 
-EXPORT_SERVER_DIR = $(EXPORT_JRE_BIN_DIR)/server
-EXPORT_CLIENT_DIR = $(EXPORT_JRE_BIN_DIR)/client
+EXPORT_SERVER_DIR = $(EXPORT_BIN_DIR)/server
+EXPORT_CLIENT_DIR = $(EXPORT_BIN_DIR)/client
 
 ifeq ($(JVM_VARIANT_SERVER),true)
   EXPORT_LIST += $(EXPORT_SERVER_DIR)/Xusage.txt
@@ -280,13 +280,13 @@
 EXPORT_LIST += $(EXPORT_LIB_DIR)/jvm.lib
 
 ifeq ($(BUILD_WIN_SA), 1)
-  EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.$(LIBRARY_SUFFIX)
+  EXPORT_LIST += $(EXPORT_BIN_DIR)/sawindbg.$(LIBRARY_SUFFIX)
   ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
     ifeq ($(ZIP_DEBUGINFO_FILES),1)
-      EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.diz
+      EXPORT_LIST += $(EXPORT_BIN_DIR)/sawindbg.diz
     else
-      EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.pdb
-      EXPORT_LIST += $(EXPORT_JRE_BIN_DIR)/sawindbg.map
+      EXPORT_LIST += $(EXPORT_BIN_DIR)/sawindbg.pdb
+      EXPORT_LIST += $(EXPORT_BIN_DIR)/sawindbg.map
     endif
   endif
   EXPORT_LIST += $(EXPORT_LIB_DIR)/sa-jdi.jar
--- a/make/windows/makefiles/sa.make	Thu May 07 10:19:31 2015 -0700
+++ b/make/windows/makefiles/sa.make	Thu May 07 20:51:12 2015 -0700
@@ -91,6 +91,9 @@
 
 SAWINDBG=sawindbg.dll
 
+# Resource file containing VERSIONINFO
+SA_Res_Files=.\version.sares
+
 checkAndBuildSA:: $(SAWINDBG)
 
 # These do not need to be optimized (don't run a lot of code) and it
@@ -126,10 +129,13 @@
 # Note that we do not keep sawindbj.obj around as it would then
 # get included in the dumpbin command in build_vm_def.sh
 
+# Force resources to be rebuilt every time
+$(SA_Res_Files): FORCE
+
 # In VS2005 or VS2008 the link command creates a .manifest file that we want
 # to insert into the linked artifact so we do not need to track it separately.
 # Use ";#2" for .dll and ";#1" for .exe in the MT command below:
-$(SAWINDBG): $(SASRCFILES)
+$(SAWINDBG): $(SASRCFILES) $(SA_Res_Files)
 	set INCLUDE=$(SA_INCLUDE)$(INCLUDE)
 	$(CXX) @<<
 	  -I"$(BootStrapDir)/include" -I"$(BootStrapDir)/include/win32"
@@ -138,7 +144,7 @@
 	  -out:$*.obj
 <<
 	set LIB=$(SA_LIB)$(LIB)
-	$(LD) -out:$@ -DLL sawindbg.obj sadis.obj dbgeng.lib $(SA_LFLAGS)
+	$(LD) -out:$@ -DLL sawindbg.obj sadis.obj dbgeng.lib $(SA_LFLAGS) $(SA_Res_Files)
 !if "$(MT)" != ""
 	$(MT) -manifest $(@F).manifest -outputresource:$(@F);#2
 !endif
@@ -150,6 +156,9 @@
 !endif
 	-@rm -f $*.obj
 
+{$(COMMONSRC)\os\windows\vm}.rc.sares:
+        @$(RC) $(RC_FLAGS) /D "HS_FNAME=$(SAWINDBG)" /fo"$@" $<
+
 cleanall :
 	rm -rf $(GENERATED)/saclasses
 	rm -rf $(GENERATED)/sa-jdi.jar
--- a/src/cpu/aarch64/vm/aarch64.ad	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/aarch64/vm/aarch64.ad	Thu May 07 20:51:12 2015 -0700
@@ -2614,6 +2614,8 @@
     case INDINDEXSCALEDI2L:
     case INDINDEXSCALEDOFFSETI2LN:
     case INDINDEXSCALEDI2LN:
+    case INDINDEXOFFSETI2L:
+    case INDINDEXOFFSETI2LN:
       scale = Address::sxtw(size);
       break;
     default:
@@ -5060,6 +5062,20 @@
   %}
 %}
 
+operand indIndexOffsetI2L(iRegP reg, iRegI ireg, immLU12 off)
+%{
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP reg (ConvI2L ireg)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg, $off I2L" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 operand indIndexScaledOffsetI2L(iRegP reg, iRegI ireg, immIScale scale, immLU12 off)
 %{
   constraint(ALLOC_IN_RC(ptr_reg));
@@ -5120,7 +5136,7 @@
 %{
   constraint(ALLOC_IN_RC(ptr_reg));
   match(AddP reg off);
-  op_cost(INSN_COST);
+  op_cost(0);
   format %{ "[$reg, $off]" %}
   interface(MEMORY_INTER) %{
     base($reg);
@@ -5190,6 +5206,21 @@
   %}
 %}
 
+operand indIndexOffsetI2LN(iRegN reg, iRegI ireg, immLU12 off)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  constraint(ALLOC_IN_RC(ptr_reg));
+  match(AddP (AddP (DecodeN reg) (ConvI2L ireg)) off);
+  op_cost(INSN_COST);
+  format %{ "$reg, $ireg, $off I2L\t# narrow" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index($ireg);
+    scale(0x0);
+    disp($off);
+  %}
+%}
+
 operand indIndexScaledOffsetI2LN(iRegN reg, iRegI ireg, immIScale scale, immLU12 off)
 %{
   predicate(Universe::narrow_oop_shift() == 0);
@@ -5452,8 +5483,8 @@
 // memory is used to define read/write location for load/store
 // instruction defs. we can turn a memory op into an Address
 
-opclass memory(indirect, indIndexScaledOffsetI,  indIndexScaledOffsetL, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
-               indirectN, indIndexScaledOffsetIN,  indIndexScaledOffsetLN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
+opclass memory(indirect, indIndexScaledOffsetI, indIndexScaledOffsetL, indIndexOffsetI2L, indIndexScaledOffsetI2L, indIndexScaled, indIndexScaledI2L, indIndex, indOffI, indOffL,
+               indirectN, indIndexScaledOffsetIN, indIndexScaledOffsetLN, indIndexOffsetI2LN, indIndexScaledOffsetI2LN, indIndexScaledN, indIndexScaledI2LN, indIndexN, indOffIN, indOffLN);
 
 
 // iRegIorL2I is used for src inputs in rules for 32 bit int (I)
@@ -8346,7 +8377,7 @@
 instruct addP_reg_reg_ext(iRegPNoSp dst, iRegP src1, iRegIorL2I src2) %{
   match(Set dst (AddP src1 (ConvI2L src2)));
 
-  ins_cost(INSN_COST);
+  ins_cost(1.9 * INSN_COST);
   format %{ "add $dst, $src1, $src2, sxtw\t# ptr" %}
 
   ins_encode %{
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1469,7 +1469,7 @@
     f(op, 31, 29);
     f(0b11010000, 28, 21);
     f(0b000000, 15, 10);
-    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);
+    zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);
   }
 
   #define INSN(NAME, op)                                \
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Thu May 07 20:51:12 2015 -0700
@@ -68,6 +68,8 @@
 
 define_pd_global(bool, UseMembar,            true);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 // GC Ergo Flags
 define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
 
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -42,6 +42,8 @@
   using MacroAssembler::call_VM_leaf_base;
 
   // Interpreter specific version of call_VM_base
+  using MacroAssembler::call_VM_leaf_base;
+
   virtual void call_VM_leaf_base(address entry_point,
                                  int number_of_arguments);
 
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2238,6 +2238,341 @@
 }
 
 /**
+ * Helpers for multiply_to_len().
+ */
+void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+                                     Register src1, Register src2) {
+  adds(dest_lo, dest_lo, src1);
+  adc(dest_hi, dest_hi, zr);
+  adds(dest_lo, dest_lo, src2);
+  adc(final_dest_hi, dest_hi, zr);
+}
+
+// Generate an address from (r + r1 extend offset).  "size" is the
+// size of the operand.  The result may be in rscratch2.
+Address MacroAssembler::offsetted_address(Register r, Register r1,
+                                          Address::extend ext, int offset, int size) {
+  if (offset || (ext.shift() % size != 0)) {
+    lea(rscratch2, Address(r, r1, ext));
+    return Address(rscratch2, offset);
+  } else {
+    return Address(r, r1, ext);
+  }
+}
+
+/**
+ * Multiply 64 bit by 64 bit first loop.
+ */
+void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                                           Register y, Register y_idx, Register z,
+                                           Register carry, Register product,
+                                           Register idx, Register kdx) {
+  //
+  //  jlong carry, x[], y[], z[];
+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
+  //    huge_128 product = y[idx] * x[xstart] + carry;
+  //    z[kdx] = (jlong)product;
+  //    carry  = (jlong)(product >>> 64);
+  //  }
+  //  z[xstart] = carry;
+  //
+
+  Label L_first_loop, L_first_loop_exit;
+  Label L_one_x, L_one_y, L_multiply;
+
+  subsw(xstart, xstart, 1);
+  br(Assembler::MI, L_one_x);
+
+  lea(rscratch1, Address(x, xstart, Address::lsl(LogBytesPerInt)));
+  ldr(x_xstart, Address(rscratch1));
+  ror(x_xstart, x_xstart, 32); // convert big-endian to little-endian
+
+  bind(L_first_loop);
+  subsw(idx, idx, 1);
+  br(Assembler::MI, L_first_loop_exit);
+  subsw(idx, idx, 1);
+  br(Assembler::MI, L_one_y);
+  lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt)));
+  ldr(y_idx, Address(rscratch1));
+  ror(y_idx, y_idx, 32); // convert big-endian to little-endian
+  bind(L_multiply);
+
+  // AArch64 has a multiply-accumulate instruction that we can't use
+  // here because it has no way to process carries, so we have to use
+  // separate add and adc instructions.  Bah.
+  umulh(rscratch1, x_xstart, y_idx); // x_xstart * y_idx -> rscratch1:product
+  mul(product, x_xstart, y_idx);
+  adds(product, product, carry);
+  adc(carry, rscratch1, zr);   // x_xstart * y_idx + carry -> carry:product
+
+  subw(kdx, kdx, 2);
+  ror(product, product, 32); // back to big-endian
+  str(product, offsetted_address(z, kdx, Address::uxtw(LogBytesPerInt), 0, BytesPerLong));
+
+  b(L_first_loop);
+
+  bind(L_one_y);
+  ldrw(y_idx, Address(y,  0));
+  b(L_multiply);
+
+  bind(L_one_x);
+  ldrw(x_xstart, Address(x,  0));
+  b(L_first_loop);
+
+  bind(L_first_loop_exit);
+}
+
+/**
+ * Multiply 128 bit by 128. Unrolled inner loop.
+ *
+ */
+void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
+                                             Register carry, Register carry2,
+                                             Register idx, Register jdx,
+                                             Register yz_idx1, Register yz_idx2,
+                                             Register tmp, Register tmp3, Register tmp4,
+                                             Register tmp6, Register product_hi) {
+
+  //   jlong carry, x[], y[], z[];
+  //   int kdx = ystart+1;
+  //   for (int idx=ystart-2; idx >= 0; idx -= 2) { // Third loop
+  //     huge_128 tmp3 = (y[idx+1] * product_hi) + z[kdx+idx+1] + carry;
+  //     jlong carry2  = (jlong)(tmp3 >>> 64);
+  //     huge_128 tmp4 = (y[idx]   * product_hi) + z[kdx+idx] + carry2;
+  //     carry  = (jlong)(tmp4 >>> 64);
+  //     z[kdx+idx+1] = (jlong)tmp3;
+  //     z[kdx+idx] = (jlong)tmp4;
+  //   }
+  //   idx += 2;
+  //   if (idx > 0) {
+  //     yz_idx1 = (y[idx] * product_hi) + z[kdx+idx] + carry;
+  //     z[kdx+idx] = (jlong)yz_idx1;
+  //     carry  = (jlong)(yz_idx1 >>> 64);
+  //   }
+  //
+
+  Label L_third_loop, L_third_loop_exit, L_post_third_loop_done;
+
+  lsrw(jdx, idx, 2);
+
+  bind(L_third_loop);
+
+  subsw(jdx, jdx, 1);
+  br(Assembler::MI, L_third_loop_exit);
+  subw(idx, idx, 4);
+
+  lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt)));
+
+  ldp(yz_idx2, yz_idx1, Address(rscratch1, 0));
+
+  lea(tmp6, Address(z, idx, Address::uxtw(LogBytesPerInt)));
+
+  ror(yz_idx1, yz_idx1, 32); // convert big-endian to little-endian
+  ror(yz_idx2, yz_idx2, 32);
+
+  ldp(rscratch2, rscratch1, Address(tmp6, 0));
+
+  mul(tmp3, product_hi, yz_idx1);  //  yz_idx1 * product_hi -> tmp4:tmp3
+  umulh(tmp4, product_hi, yz_idx1);
+
+  ror(rscratch1, rscratch1, 32); // convert big-endian to little-endian
+  ror(rscratch2, rscratch2, 32);
+
+  mul(tmp, product_hi, yz_idx2);   //  yz_idx2 * product_hi -> carry2:tmp
+  umulh(carry2, product_hi, yz_idx2);
+
+  // propagate sum of both multiplications into carry:tmp4:tmp3
+  adds(tmp3, tmp3, carry);
+  adc(tmp4, tmp4, zr);
+  adds(tmp3, tmp3, rscratch1);
+  adcs(tmp4, tmp4, tmp);
+  adc(carry, carry2, zr);
+  adds(tmp4, tmp4, rscratch2);
+  adc(carry, carry, zr);
+
+  ror(tmp3, tmp3, 32); // convert little-endian to big-endian
+  ror(tmp4, tmp4, 32);
+  stp(tmp4, tmp3, Address(tmp6, 0));
+
+  b(L_third_loop);
+  bind (L_third_loop_exit);
+
+  andw (idx, idx, 0x3);
+  cbz(idx, L_post_third_loop_done);
+
+  Label L_check_1;
+  subsw(idx, idx, 2);
+  br(Assembler::MI, L_check_1);
+
+  lea(rscratch1, Address(y, idx, Address::uxtw(LogBytesPerInt)));
+  ldr(yz_idx1, Address(rscratch1, 0));
+  ror(yz_idx1, yz_idx1, 32);
+  mul(tmp3, product_hi, yz_idx1);  //  yz_idx1 * product_hi -> tmp4:tmp3
+  umulh(tmp4, product_hi, yz_idx1);
+  lea(rscratch1, Address(z, idx, Address::uxtw(LogBytesPerInt)));
+  ldr(yz_idx2, Address(rscratch1, 0));
+  ror(yz_idx2, yz_idx2, 32);
+
+  add2_with_carry(carry, tmp4, tmp3, carry, yz_idx2);
+
+  ror(tmp3, tmp3, 32);
+  str(tmp3, Address(rscratch1, 0));
+
+  bind (L_check_1);
+
+  andw (idx, idx, 0x1);
+  subsw(idx, idx, 1);
+  br(Assembler::MI, L_post_third_loop_done);
+  ldrw(tmp4, Address(y, idx, Address::uxtw(LogBytesPerInt)));
+  mul(tmp3, tmp4, product_hi);  //  tmp4 * product_hi -> carry2:tmp3
+  umulh(carry2, tmp4, product_hi);
+  ldrw(tmp4, Address(z, idx, Address::uxtw(LogBytesPerInt)));
+
+  add2_with_carry(carry2, tmp3, tmp4, carry);
+
+  strw(tmp3, Address(z, idx, Address::uxtw(LogBytesPerInt)));
+  extr(carry, carry2, tmp3, 32);
+
+  bind(L_post_third_loop_done);
+}
+
+/**
+ * Code for BigInteger::multiplyToLen() instrinsic.
+ *
+ * r0: x
+ * r1: xlen
+ * r2: y
+ * r3: ylen
+ * r4:  z
+ * r5: zlen
+ * r10: tmp1
+ * r11: tmp2
+ * r12: tmp3
+ * r13: tmp4
+ * r14: tmp5
+ * r15: tmp6
+ * r16: tmp7
+ *
+ */
+void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
+                                     Register z, Register zlen,
+                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                                     Register tmp5, Register tmp6, Register product_hi) {
+
+  assert_different_registers(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6);
+
+  const Register idx = tmp1;
+  const Register kdx = tmp2;
+  const Register xstart = tmp3;
+
+  const Register y_idx = tmp4;
+  const Register carry = tmp5;
+  const Register product  = xlen;
+  const Register x_xstart = zlen;  // reuse register
+
+  // First Loop.
+  //
+  //  final static long LONG_MASK = 0xffffffffL;
+  //  int xstart = xlen - 1;
+  //  int ystart = ylen - 1;
+  //  long carry = 0;
+  //  for (int idx=ystart, kdx=ystart+1+xstart; idx >= 0; idx-, kdx--) {
+  //    long product = (y[idx] & LONG_MASK) * (x[xstart] & LONG_MASK) + carry;
+  //    z[kdx] = (int)product;
+  //    carry = product >>> 32;
+  //  }
+  //  z[xstart] = (int)carry;
+  //
+
+  movw(idx, ylen);      // idx = ylen;
+  movw(kdx, zlen);      // kdx = xlen+ylen;
+  mov(carry, zr);       // carry = 0;
+
+  Label L_done;
+
+  movw(xstart, xlen);
+  subsw(xstart, xstart, 1);
+  br(Assembler::MI, L_done);
+
+  multiply_64_x_64_loop(x, xstart, x_xstart, y, y_idx, z, carry, product, idx, kdx);
+
+  Label L_second_loop;
+  cbzw(kdx, L_second_loop);
+
+  Label L_carry;
+  subw(kdx, kdx, 1);
+  cbzw(kdx, L_carry);
+
+  strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
+  lsr(carry, carry, 32);
+  subw(kdx, kdx, 1);
+
+  bind(L_carry);
+  strw(carry, Address(z, kdx, Address::uxtw(LogBytesPerInt)));
+
+  // Second and third (nested) loops.
+  //
+  // for (int i = xstart-1; i >= 0; i--) { // Second loop
+  //   carry = 0;
+  //   for (int jdx=ystart, k=ystart+1+i; jdx >= 0; jdx--, k--) { // Third loop
+  //     long product = (y[jdx] & LONG_MASK) * (x[i] & LONG_MASK) +
+  //                    (z[k] & LONG_MASK) + carry;
+  //     z[k] = (int)product;
+  //     carry = product >>> 32;
+  //   }
+  //   z[i] = (int)carry;
+  // }
+  //
+  // i = xlen, j = tmp1, k = tmp2, carry = tmp5, x[i] = product_hi
+
+  const Register jdx = tmp1;
+
+  bind(L_second_loop);
+  mov(carry, zr);                // carry = 0;
+  movw(jdx, ylen);               // j = ystart+1
+
+  subsw(xstart, xstart, 1);      // i = xstart-1;
+  br(Assembler::MI, L_done);
+
+  str(z, Address(pre(sp, -4 * wordSize)));
+
+  Label L_last_x;
+  lea(z, offsetted_address(z, xstart, Address::uxtw(LogBytesPerInt), 4, BytesPerInt)); // z = z + k - j
+  subsw(xstart, xstart, 1);       // i = xstart-1;
+  br(Assembler::MI, L_last_x);
+
+  lea(rscratch1, Address(x, xstart, Address::uxtw(LogBytesPerInt)));
+  ldr(product_hi, Address(rscratch1));
+  ror(product_hi, product_hi, 32);  // convert big-endian to little-endian
+
+  Label L_third_loop_prologue;
+  bind(L_third_loop_prologue);
+
+  str(ylen, Address(sp, wordSize));
+  stp(x, xstart, Address(sp, 2 * wordSize));
+  multiply_128_x_128_loop(y, z, carry, x, jdx, ylen, product,
+                          tmp2, x_xstart, tmp3, tmp4, tmp6, product_hi);
+  ldp(z, ylen, Address(post(sp, 2 * wordSize)));
+  ldp(x, xlen, Address(post(sp, 2 * wordSize)));   // copy old xstart -> xlen
+
+  addw(tmp3, xlen, 1);
+  strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt)));
+  subsw(tmp3, tmp3, 1);
+  br(Assembler::MI, L_done);
+
+  lsr(carry, carry, 32);
+  strw(carry, Address(z, tmp3, Address::uxtw(LogBytesPerInt)));
+  b(L_second_loop);
+
+  // Next infrequent code is moved outside loops.
+  bind(L_last_x);
+  ldrw(product_hi, Address(x,  0));
+  b(L_third_loop_prologue);
+
+  bind(L_done);
+}
+
+/**
  * Emits code to update CRC-32 with a byte value according to constants in table
  *
  * @param [in,out]crc   Register containing the crc.
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -167,9 +167,8 @@
 
   // aliases defined in AARCH64 spec
 
-
   template<class T>
-  inline void  cmpw(Register Rd, T imm)  { subsw(zr, Rd, imm); }
+  inline void cmpw(Register Rd, T imm)  { subsw(zr, Rd, imm); }
   inline void cmp(Register Rd, unsigned imm)  { subs(zr, Rd, imm); }
 
   inline void cmnw(Register Rd, unsigned imm) { addsw(zr, Rd, imm); }
@@ -1121,9 +1120,34 @@
                       Register tmp1, Register tmp2,
                       Register tmp3, Register tmp4,
                       int int_cnt1, Register result);
-
+private:
+  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
+                       Register src1, Register src2);
+  void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) {
+    add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2);
+  }
+  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
+                             Register y, Register y_idx, Register z,
+                             Register carry, Register product,
+                             Register idx, Register kdx);
+  void multiply_128_x_128_loop(Register y, Register z,
+                               Register carry, Register carry2,
+                               Register idx, Register jdx,
+                               Register yz_idx1, Register yz_idx2,
+                               Register tmp, Register tmp3, Register tmp4,
+                               Register tmp7, Register product_hi);
+public:
+  void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
+                       Register zlen, Register tmp1, Register tmp2, Register tmp3,
+                       Register tmp4, Register tmp5, Register tmp6, Register tmp7);
   // ISB may be needed because of a safepoint
   void maybe_isb() { isb(); }
+
+private:
+  // Return the effective address r + (r1 << ext) + offset.
+  // Uses rscratch2.
+  Address offsetted_address(Register r, Register r1, Address::extend ext,
+                            int offset, int size);
 };
 
 // Used by aarch64.ad to control code generation
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2356,8 +2356,45 @@
     return start;
   }
 
-#undef __
-#define __ masm->
+  /**
+   *  Arguments:
+   *
+   *  Input:
+   *    c_rarg0   - x address
+   *    c_rarg1   - x length
+   *    c_rarg2   - y address
+   *    c_rarg3   - y lenth
+   *    c_rarg4   - z address
+   *    c_rarg5   - z length
+   */
+  address generate_multiplyToLen() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");
+
+    address start = __ pc();
+    const Register x     = r0;
+    const Register xlen  = r1;
+    const Register y     = r2;
+    const Register ylen  = r3;
+    const Register z     = r4;
+    const Register zlen  = r5;
+
+    const Register tmp1  = r10;
+    const Register tmp2  = r11;
+    const Register tmp3  = r12;
+    const Register tmp4  = r13;
+    const Register tmp5  = r14;
+    const Register tmp6  = r15;
+    const Register tmp7  = r16;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7);
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(lr);
+
+    return start;
+  }
 
   // Continuation point for throwing of implicit exceptions that are
   // not handled in the current activation. Fabricates an exception
@@ -2375,6 +2412,9 @@
   // otherwise assume that stack unwinding will be initiated, so
   // caller saved registers were assumed volatile in the compiler.
 
+#undef __
+#define __ masm->
+
   address generate_throw_exception(const char* name,
                                    address runtime_entry,
                                    Register arg1 = noreg,
@@ -2518,6 +2558,10 @@
     // arraycopy stubs used by compilers
     generate_arraycopy_stubs();
 
+    if (UseMultiplyToLenIntrinsic) {
+      StubRoutines::_multiplyToLen = generate_multiplyToLen();
+    }
+
 #ifndef BUILTIN_SIM
     if (UseAESIntrinsics) {
       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
--- a/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/aarch64/vm/templateTable_aarch64.cpp	Thu May 07 20:51:12 2015 -0700
@@ -502,10 +502,17 @@
   __ neg(reg, reg);
 }
 
-void TemplateTable::iload()
-{
+void TemplateTable::iload() {
+  iload_internal();
+}
+
+void TemplateTable::nofast_iload() {
+  iload_internal(may_not_rewrite);
+}
+
+void TemplateTable::iload_internal(RewriteControl rc) {
   transition(vtos, itos);
-  if (RewriteFrequentPairs) {
+  if (RewriteFrequentPairs && rc == may_rewrite) {
     // TODO : check x86 code for what to do here
     __ call_Unimplemented();
   } else {
@@ -759,8 +766,15 @@
   __ ldr(r0, iaddress(n));
 }
 
-void TemplateTable::aload_0()
-{
+void TemplateTable::aload_0() {
+  aload_0_internal();
+}
+
+void TemplateTable::nofast_aload_0() {
+  aload_0_internal(may_not_rewrite);
+}
+
+void TemplateTable::aload_0_internal(RewriteControl rc) {
   // According to bytecode histograms, the pairs:
   //
   // _aload_0, _fast_igetfield
@@ -782,7 +796,7 @@
   //   aload_0, iload_1
   // These bytecodes with a small amount of code are most profitable
   // to rewrite
-  if (RewriteFrequentPairs) {
+  if (RewriteFrequentPairs && rc == may_rewrite) {
     __ call_Unimplemented();
   } else {
     aload(0);
@@ -2132,37 +2146,21 @@
   assert_different_registers(Rcache, index, temp);
 
   Label resolved;
+
+  Bytecodes::Code code = bytecode();
+  switch (code) {
+  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
+  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
+  }
+
   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
   __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
-  __ cmp(temp, (int) bytecode());  // have we resolved this bytecode?
+  __ cmp(temp, (int) code);  // have we resolved this bytecode?
   __ br(Assembler::EQ, resolved);
 
   // resolve first time through
-  address entry;
-  switch (bytecode()) {
-  case Bytecodes::_getstatic:
-  case Bytecodes::_putstatic:
-  case Bytecodes::_getfield:
-  case Bytecodes::_putfield:
-    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);
-    break;
-  case Bytecodes::_invokevirtual:
-  case Bytecodes::_invokespecial:
-  case Bytecodes::_invokestatic:
-  case Bytecodes::_invokeinterface:
-    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);
-    break;
-  case Bytecodes::_invokehandle:
-    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);
-    break;
-  case Bytecodes::_invokedynamic:
-    entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);
-    break;
-  default:
-    fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
-    break;
-  }
-  __ mov(temp, (int) bytecode());
+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
+  __ mov(temp, (int) code);
   __ call_VM(noreg, entry, temp);
 
   // Update registers with resolved info
@@ -2280,7 +2278,7 @@
   __ verify_oop(r);
 }
 
-void TemplateTable::getfield_or_static(int byte_no, bool is_static)
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc)
 {
   const Register cache = r2;
   const Register index = r3;
@@ -2310,11 +2308,14 @@
   assert(btos == 0, "change code, btos != 0");
   __ cbnz(flags, notByte);
 
+  // Don't rewrite getstatic, only getfield
+  if (is_static) rc = may_not_rewrite;
+
   // btos
   __ load_signed_byte(r0, field);
   __ push(btos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_bgetfield, bc, r1);
   }
   __ b(Done);
@@ -2325,7 +2326,7 @@
   // atos
   __ load_heap_oop(r0, field);
   __ push(atos);
-  if (!is_static) {
+  if (rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_agetfield, bc, r1);
   }
   __ b(Done);
@@ -2337,7 +2338,7 @@
   __ ldrw(r0, field);
   __ push(itos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_igetfield, bc, r1);
   }
   __ b(Done);
@@ -2349,7 +2350,7 @@
   __ load_unsigned_short(r0, field);
   __ push(ctos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_cgetfield, bc, r1);
   }
   __ b(Done);
@@ -2361,7 +2362,7 @@
   __ load_signed_short(r0, field);
   __ push(stos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_sgetfield, bc, r1);
   }
   __ b(Done);
@@ -2373,7 +2374,7 @@
   __ ldr(r0, field);
   __ push(ltos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_lgetfield, bc, r1);
   }
   __ b(Done);
@@ -2385,7 +2386,7 @@
   __ ldrs(v0, field);
   __ push(ftos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_fgetfield, bc, r1);
   }
   __ b(Done);
@@ -2399,7 +2400,7 @@
   __ ldrd(v0, field);
   __ push(dtos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_dgetfield, bc, r1);
   }
 #ifdef ASSERT
@@ -2421,6 +2422,10 @@
   getfield_or_static(byte_no, false);
 }
 
+void TemplateTable::nofast_getfield(int byte_no) {
+  getfield_or_static(byte_no, false, may_not_rewrite);
+}
+
 void TemplateTable::getstatic(int byte_no)
 {
   getfield_or_static(byte_no, true);
@@ -2484,7 +2489,7 @@
   }
 }
 
-void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
   transition(vtos, vtos);
 
   const Register cache = r2;
@@ -2521,12 +2526,15 @@
   assert(btos == 0, "change code, btos != 0");
   __ cbnz(flags, notByte);
 
+  // Don't rewrite putstatic, only putfield
+  if (is_static) rc = may_not_rewrite;
+
   // btos
   {
     __ pop(btos);
     if (!is_static) pop_and_check_object(obj);
     __ strb(r0, field);
-    if (!is_static) {
+    if (rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_bputfield, bc, r1, true, byte_no);
     }
     __ b(Done);
@@ -2542,7 +2550,7 @@
     if (!is_static) pop_and_check_object(obj);
     // Store into the field
     do_oop_store(_masm, field, r0, _bs->kind(), false);
-    if (!is_static) {
+    if (rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_aputfield, bc, r1, true, byte_no);
     }
     __ b(Done);
@@ -2557,7 +2565,7 @@
     __ pop(itos);
     if (!is_static) pop_and_check_object(obj);
     __ strw(r0, field);
-    if (!is_static) {
+    if (rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_iputfield, bc, r1, true, byte_no);
     }
     __ b(Done);
@@ -2572,7 +2580,7 @@
     __ pop(ctos);
     if (!is_static) pop_and_check_object(obj);
     __ strh(r0, field);
-    if (!is_static) {
+    if (rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_cputfield, bc, r1, true, byte_no);
     }
     __ b(Done);
@@ -2587,7 +2595,7 @@
     __ pop(stos);
     if (!is_static) pop_and_check_object(obj);
     __ strh(r0, field);
-    if (!is_static) {
+    if (rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_sputfield, bc, r1, true, byte_no);
     }
     __ b(Done);
@@ -2602,7 +2610,7 @@
     __ pop(ltos);
     if (!is_static) pop_and_check_object(obj);
     __ str(r0, field);
-    if (!is_static) {
+    if (rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_lputfield, bc, r1, true, byte_no);
     }
     __ b(Done);
@@ -2617,7 +2625,7 @@
     __ pop(ftos);
     if (!is_static) pop_and_check_object(obj);
     __ strs(v0, field);
-    if (!is_static) {
+    if (rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_fputfield, bc, r1, true, byte_no);
     }
     __ b(Done);
@@ -2634,7 +2642,7 @@
     __ pop(dtos);
     if (!is_static) pop_and_check_object(obj);
     __ strd(v0, field);
-    if (!is_static) {
+    if (rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_dputfield, bc, r1, true, byte_no);
     }
   }
@@ -2661,6 +2669,10 @@
   putfield_or_static(byte_no, false);
 }
 
+void TemplateTable::nofast_putfield(int byte_no) {
+  putfield_or_static(byte_no, false, may_not_rewrite);
+}
+
 void TemplateTable::putstatic(int byte_no) {
   putfield_or_static(byte_no, true);
 }
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -193,6 +193,15 @@
     }
   }
 
+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+  }
+
+  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
+    UseMultiplyToLenIntrinsic = true;
+  }
+
 #ifdef COMPILER2
   if (FLAG_IS_DEFAULT(OptoScheduling)) {
     OptoScheduling = true;
--- a/src/cpu/ppc/vm/assembler_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/assembler_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,8 +85,7 @@
 }
 
 // Low-level andi-one-instruction-macro.
-void Assembler::andi(Register a, Register s, const int ui16) {
-  assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
+void Assembler::andi(Register a, Register s, const long ui16) {
   if (is_power_of_2_long(((jlong) ui16)+1)) {
     // pow2minus1
     clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
@@ -97,6 +96,7 @@
     // negpow2
     clrrdi(a, s, log2_long((jlong)-ui16));
   } else {
+    assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
     andi_(a, s, ui16);
   }
 }
@@ -356,7 +356,6 @@
 // 16 bit immediate offset.
 int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
   // Avoid accidentally trying to use R0 for indexed addressing.
-  assert(d != R0, "R0 not allowed");
   assert_different_registers(d, tmp);
 
   short xa, xb, xc, xd; // Four 16-bit chunks of const.
@@ -370,6 +369,58 @@
     return 0;
   }
 
+  int retval = 0;
+  if (return_simm16_rest) {
+    retval = xd;
+    x = rem << 16;
+    xd = 0;
+  }
+
+  if (d == R0) { // Can't use addi.
+    if (is_simm(x, 32)) { // opt 2: simm32
+      lis(d, x >> 16);
+      if (xd) ori(d, d, (unsigned short)xd);
+    } else {
+      // 64-bit value: x = xa xb xc xd
+      xa = (x >> 48) & 0xffff;
+      xb = (x >> 32) & 0xffff;
+      xc = (x >> 16) & 0xffff;
+      bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
+      if (tmp == noreg || (xc == 0 && xd == 0)) {
+        if (xa_loaded) {
+          lis(d, xa);
+          if (xb) { ori(d, d, (unsigned short)xb); }
+        } else {
+          li(d, xb);
+        }
+        sldi(d, d, 32);
+        if (xc) { oris(d, d, (unsigned short)xc); }
+        if (xd) { ori( d, d, (unsigned short)xd); }
+      } else {
+        // Exploit instruction level parallelism if we have a tmp register.
+        bool xc_loaded = (xd & 0x8000) ? (xc != -1) : (xc != 0);
+        if (xa_loaded) {
+          lis(tmp, xa);
+        }
+        if (xc_loaded) {
+          lis(d, xc);
+        }
+        if (xa_loaded) {
+          if (xb) { ori(tmp, tmp, (unsigned short)xb); }
+        } else {
+          li(tmp, xb);
+        }
+        if (xc_loaded) {
+          if (xd) { ori(d, d, (unsigned short)xd); }
+        } else {
+          li(d, xd);
+        }
+        insrdi(d, tmp, 32, 0);
+      }
+    }
+    return retval;
+  }
+
   xc = rem & 0xFFFF; // Next 16-bit chunk.
   rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
 
@@ -377,28 +428,27 @@
     lis(d, xc);
   } else { // High 32 bits needed.
 
-    if (tmp != noreg) { // opt 3: We have a temp reg.
+    if (tmp != noreg  && (int)x != 0) { // opt 3: We have a temp reg.
       // No carry propagation between xc and higher chunks here (use logical instructions).
       xa = (x >> 48) & 0xffff;
       xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
-      bool load_xa = (xa != 0) || (xb < 0);
+      bool xa_loaded = (xb & 0x8000) ? (xa != -1) : (xa != 0);
       bool return_xd = false;
 
-      if (load_xa) { lis(tmp, xa); }
+      if (xa_loaded) { lis(tmp, xa); }
       if (xc) { lis(d, xc); }
-      if (load_xa) {
+      if (xa_loaded) {
         if (xb) { ori(tmp, tmp, (unsigned short)xb); } // No addi, we support tmp == R0.
       } else {
-        li(tmp, xb); // non-negative
+        li(tmp, xb);
       }
       if (xc) {
-        if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
-        else if (xd) { addi(d, d, xd); }
+        if (xd) { addi(d, d, xd); }
       } else {
         li(d, xd);
       }
       insrdi(d, tmp, 32, 0);
-      return return_xd ? xd : 0; // non-negative
+      return retval;
     }
 
     xb = rem & 0xFFFF; // Next 16-bit chunk.
@@ -417,11 +467,51 @@
     if (xc) { addis(d, d, xc); }
   }
 
-  // opt 5: Return offset to be inserted into following instruction.
-  if (return_simm16_rest) return xd;
+  if (xd) { addi(d, d, xd); }
+  return retval;
+}
 
-  if (xd) { addi(d, d, xd); }
-  return 0;
+// We emit only one addition to s to optimize latency.
+int Assembler::add_const_optimized(Register d, Register s, long x, Register tmp, bool return_simm16_rest) {
+  assert(s != R0 && s != tmp, "unsupported");
+  long rem = x;
+
+  // Case 1: Can use mr or addi.
+  short xd = rem & 0xFFFF; // Lowest 16-bit chunk.
+  rem = (rem >> 16) + ((unsigned short)xd >> 15);
+  if (rem == 0) {
+    if (xd == 0) {
+      if (d != s) { mr(d, s); }
+      return 0;
+    }
+    if (return_simm16_rest) {
+      return xd;
+    }
+    addi(d, s, xd);
+    return 0;
+  }
+
+  // Case 2: Can use addis.
+  if (xd == 0) {
+    short xc = rem & 0xFFFF; // 2nd 16-bit chunk.
+    rem = (rem >> 16) + ((unsigned short)xd >> 15);
+    if (rem == 0) {
+      addis(d, s, xc);
+      return 0;
+    }
+  }
+
+  // Other cases: load & add.
+  Register tmp1 = tmp,
+           tmp2 = noreg;
+  if ((d != tmp) && (d != s)) {
+    // Can use d.
+    tmp1 = d;
+    tmp2 = tmp;
+  }
+  int simm16_rest = load_const_optimized(tmp1, x, tmp2, return_simm16_rest);
+  add(d, tmp1, s);
+  return simm16_rest;
 }
 
 #ifndef PRODUCT
--- a/src/cpu/ppc/vm/assembler_ppc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/assembler_ppc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -224,10 +224,13 @@
     ADDIS_OPCODE  = (15u << OPCODE_SHIFT),
     ADDIC__OPCODE = (13u << OPCODE_SHIFT),
     ADDE_OPCODE   = (31u << OPCODE_SHIFT | 138u << 1),
+    ADDME_OPCODE  = (31u << OPCODE_SHIFT | 234u << 1),
+    ADDZE_OPCODE  = (31u << OPCODE_SHIFT | 202u << 1),
     SUBF_OPCODE   = (31u << OPCODE_SHIFT |  40u << 1),
     SUBFC_OPCODE  = (31u << OPCODE_SHIFT |   8u << 1),
     SUBFE_OPCODE  = (31u << OPCODE_SHIFT | 136u << 1),
     SUBFIC_OPCODE = (8u  << OPCODE_SHIFT),
+    SUBFME_OPCODE = (31u << OPCODE_SHIFT | 232u << 1),
     SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1),
     DIVW_OPCODE   = (31u << OPCODE_SHIFT | 491u << 1),
     MULLW_OPCODE  = (31u << OPCODE_SHIFT | 235u << 1),
@@ -657,6 +660,9 @@
     SYNC_OPCODE    = (31u << OPCODE_SHIFT |  598u << 1),
     EIEIO_OPCODE   = (31u << OPCODE_SHIFT |  854u << 1),
 
+    // Wait instructions for polling.
+    WAIT_OPCODE    = (31u << OPCODE_SHIFT |   62u << 1),
+
     // Trap instructions
     TDI_OPCODE     = (2u  << OPCODE_SHIFT),
     TWI_OPCODE     = (3u  << OPCODE_SHIFT),
@@ -666,8 +672,10 @@
     // Atomics.
     LWARX_OPCODE   = (31u << OPCODE_SHIFT |   20u << 1),
     LDARX_OPCODE   = (31u << OPCODE_SHIFT |   84u << 1),
+    LQARX_OPCODE   = (31u << OPCODE_SHIFT |  276u << 1),
     STWCX_OPCODE   = (31u << OPCODE_SHIFT |  150u << 1),
-    STDCX_OPCODE   = (31u << OPCODE_SHIFT |  214u << 1)
+    STDCX_OPCODE   = (31u << OPCODE_SHIFT |  214u << 1),
+    STQCX_OPCODE   = (31u << OPCODE_SHIFT |  182u << 1)
 
   };
 
@@ -1171,6 +1179,14 @@
   inline void adde_(  Register d, Register a, Register b);
   inline void subfe(  Register d, Register a, Register b);
   inline void subfe_( Register d, Register a, Register b);
+  inline void addme(  Register d, Register a);
+  inline void addme_( Register d, Register a);
+  inline void subfme( Register d, Register a);
+  inline void subfme_(Register d, Register a);
+  inline void addze(  Register d, Register a);
+  inline void addze_( Register d, Register a);
+  inline void subfze( Register d, Register a);
+  inline void subfze_(Register d, Register a);
   inline void neg(    Register d, Register a);
   inline void neg_(   Register d, Register a);
   inline void mulli(  Register d, Register a, int si16);
@@ -1189,6 +1205,38 @@
   inline void divw(   Register d, Register a, Register b);
   inline void divw_(  Register d, Register a, Register b);
 
+  // Fixed-Point Arithmetic Instructions with Overflow detection
+  inline void addo(    Register d, Register a, Register b);
+  inline void addo_(   Register d, Register a, Register b);
+  inline void subfo(   Register d, Register a, Register b);
+  inline void subfo_(  Register d, Register a, Register b);
+  inline void addco(   Register d, Register a, Register b);
+  inline void addco_(  Register d, Register a, Register b);
+  inline void subfco(  Register d, Register a, Register b);
+  inline void subfco_( Register d, Register a, Register b);
+  inline void addeo(   Register d, Register a, Register b);
+  inline void addeo_(  Register d, Register a, Register b);
+  inline void subfeo(  Register d, Register a, Register b);
+  inline void subfeo_( Register d, Register a, Register b);
+  inline void addmeo(  Register d, Register a);
+  inline void addmeo_( Register d, Register a);
+  inline void subfmeo( Register d, Register a);
+  inline void subfmeo_(Register d, Register a);
+  inline void addzeo(  Register d, Register a);
+  inline void addzeo_( Register d, Register a);
+  inline void subfzeo( Register d, Register a);
+  inline void subfzeo_(Register d, Register a);
+  inline void nego(    Register d, Register a);
+  inline void nego_(   Register d, Register a);
+  inline void mulldo(  Register d, Register a, Register b);
+  inline void mulldo_( Register d, Register a, Register b);
+  inline void mullwo(  Register d, Register a, Register b);
+  inline void mullwo_( Register d, Register a, Register b);
+  inline void divdo(   Register d, Register a, Register b);
+  inline void divdo_(  Register d, Register a, Register b);
+  inline void divwo(   Register d, Register a, Register b);
+  inline void divwo_(  Register d, Register a, Register b);
+
   // extended mnemonics
   inline void li(   Register d, int si16);
   inline void lis(  Register d, int si16);
@@ -1303,7 +1351,7 @@
   inline void isel_0( Register d, ConditionRegister cr, Condition cc, Register b = noreg);
 
   // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
-         void andi(   Register a, Register s, int ui16);   // optimized version
+         void andi(   Register a, Register s, long ui16);   // optimized version
   inline void andi_(  Register a, Register s, int ui16);
   inline void andis_( Register a, Register s, int ui16);
   inline void ori(    Register a, Register s, int ui16);
@@ -1688,14 +1736,21 @@
   inline void isync();
   inline void elemental_membar(int e); // Elemental Memory Barriers (>=Power 8)
 
+  // Wait instructions for polling. Attention: May result in SIGILL.
+  inline void wait();
+  inline void waitrsv(); // >=Power7
+
   // atomics
   inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
   inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
+  inline void lqarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
   inline bool lxarx_hint_exclusive_access();
   inline void lwarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
   inline void ldarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
+  inline void lqarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
   inline void stwcx_( Register s, Register a, Register b);
   inline void stdcx_( Register s, Register a, Register b);
+  inline void stqcx_( Register s, Register a, Register b);
 
   // Instructions for adjusting thread priority for simultaneous
   // multithreading (SMT) on Power5.
@@ -2054,10 +2109,13 @@
   // Atomics: use ra0mem to disallow R0 as base.
   inline void lwarx_unchecked(Register d, Register b, int eh1);
   inline void ldarx_unchecked(Register d, Register b, int eh1);
+  inline void lqarx_unchecked(Register d, Register b, int eh1);
   inline void lwarx( Register d, Register b, bool hint_exclusive_access);
   inline void ldarx( Register d, Register b, bool hint_exclusive_access);
+  inline void lqarx( Register d, Register b, bool hint_exclusive_access);
   inline void stwcx_(Register s, Register b);
   inline void stdcx_(Register s, Register b);
+  inline void stqcx_(Register s, Register b);
   inline void lfs(   FloatRegister d, int si16);
   inline void lfsx(  FloatRegister d, Register b);
   inline void lfd(   FloatRegister d, int si16);
@@ -2120,6 +2178,20 @@
     return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest);
   }
 
+  // If return_simm16_rest, the return value needs to get added afterwards.
+         int add_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false);
+  inline int add_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
+    return add_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
+  }
+
+  // If return_simm16_rest, the return value needs to get added afterwards.
+  inline int sub_const_optimized(Register d, Register s, long x, Register tmp = R0, bool return_simm16_rest = false) {
+    return add_const_optimized(d, s, -x, tmp, return_simm16_rest);
+  }
+  inline int sub_const_optimized(Register d, Register s, void* a, Register tmp = R0, bool return_simm16_rest = false) {
+    return sub_const_optimized(d, s, (long)(unsigned long)a, tmp, return_simm16_rest);
+  }
+
   // Creation
   Assembler(CodeBuffer* code) : AbstractAssembler(code) {
 #ifdef CHECK_DELAY
--- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Thu May 07 20:51:12 2015 -0700
@@ -100,6 +100,14 @@
 inline void Assembler::adde_(  Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
 inline void Assembler::subfe(  Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
 inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::addme(  Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::addme_( Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(1)); }
+inline void Assembler::subfme( Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::subfme_(Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(0) | rc(1)); }
+inline void Assembler::addze(  Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::addze_( Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(0) | rc(1)); }
+inline void Assembler::subfze( Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(0) | rc(0)); }
+inline void Assembler::subfze_(Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(0) | rc(1)); }
 inline void Assembler::neg(    Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(0) | rc(0)); }
 inline void Assembler::neg_(   Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(0) | rc(1)); }
 inline void Assembler::mulli(  Register d, Register a, int si16)   { emit_int32(MULLI_OPCODE  | rt(d) | ra(a) | simm(si16, 16)); }
@@ -118,6 +126,38 @@
 inline void Assembler::divw(   Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
 inline void Assembler::divw_(  Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
 
+// Fixed-Point Arithmetic Instructions with Overflow detection
+inline void Assembler::addo(    Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addo_(   Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfo(   Register d, Register a, Register b) { emit_int32(SUBF_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfo_(  Register d, Register a, Register b) { emit_int32(SUBF_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addco(   Register d, Register a, Register b) { emit_int32(ADDC_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addco_(  Register d, Register a, Register b) { emit_int32(ADDC_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfco(  Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfco_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addeo(   Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::addeo_(  Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::subfeo(  Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::subfeo_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::addmeo(  Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::addmeo_( Register d, Register a)             { emit_int32(ADDME_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::subfmeo( Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::subfmeo_(Register d, Register a)             { emit_int32(SUBFME_OPCODE | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::addzeo(  Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::addzeo_( Register d, Register a)             { emit_int32(ADDZE_OPCODE  | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::subfzeo( Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(1) | rc(0)); }
+inline void Assembler::subfzeo_(Register d, Register a)             { emit_int32(SUBFZE_OPCODE | rt(d) | ra(a) |         oe(1) | rc(1)); }
+inline void Assembler::nego(    Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(1) | rc(0)); }
+inline void Assembler::nego_(   Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(1) | rc(1)); }
+inline void Assembler::mulldo(  Register d, Register a, Register b) { emit_int32(MULLD_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::mulldo_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::mullwo(  Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::mullwo_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::divdo(   Register d, Register a, Register b) { emit_int32(DIVD_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::divdo_(  Register d, Register a, Register b) { emit_int32(DIVD_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+inline void Assembler::divwo(   Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(0)); }
+inline void Assembler::divwo_(  Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(1) | rc(1)); }
+
 // extended mnemonics
 inline void Assembler::li(   Register d, int si16)             { Assembler::addi_r0ok( d, R0, si16); }
 inline void Assembler::lis(  Register d, int si16)             { Assembler::addis_r0ok(d, R0, si16); }
@@ -540,15 +580,22 @@
 inline void Assembler::isync()     { emit_int32( ISYNC_OPCODE); }
 inline void Assembler::elemental_membar(int e) { assert(0 < e && e < 16, "invalid encoding"); emit_int32( SYNC_OPCODE | e1215(e)); }
 
+// Wait instructions for polling.
+inline void Assembler::wait()    { emit_int32( WAIT_OPCODE); }
+inline void Assembler::waitrsv() { emit_int32( WAIT_OPCODE | 1<<(31-10)); } // WC=0b01 >=Power7
+
 // atomics
 // Use ra0mem to disallow R0 as base.
 inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
+inline void Assembler::lqarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LQARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
 inline bool Assembler::lxarx_hint_exclusive_access()                                          { return VM_Version::has_lxarxeh(); }
 inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lqarx( Register d, Register a, Register b, bool hint_exclusive_access) { lqarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::stwcx_(Register s, Register a, Register b)                             { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 inline void Assembler::stdcx_(Register s, Register a, Register b)                             { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
+inline void Assembler::stqcx_(Register s, Register a, Register b)                             { emit_int32( STQCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
 
 // Instructions for adjusting thread priority
 // for simultaneous multithreading (SMT) on POWER5.
@@ -873,10 +920,13 @@
 // ra0 version
 inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
 inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
+inline void Assembler::lqarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LQARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
 inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::lqarx( Register d, Register b, bool hint_exclusive_access){ lqarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
 inline void Assembler::stwcx_(Register s, Register b)                            { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
 inline void Assembler::stdcx_(Register s, Register b)                            { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::stqcx_(Register s, Register b)                            { emit_int32( STQCX_OPCODE | rs(s) | rb(b) | rc(1)); }
 
 // ra0 version
 inline void Assembler::lfs( FloatRegister d, int si16)   { emit_int32( LFS_OPCODE  | frt(d) | simm(si16,16)); }
--- a/src/cpu/ppc/vm/c2_globals_ppc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/c2_globals_ppc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,7 +47,7 @@
 define_pd_global(intx, FLOATPRESSURE,                28);
 define_pd_global(intx, FreqInlineSize,               175);
 define_pd_global(intx, MinJumpTableSize,             10);
-define_pd_global(intx, INTPRESSURE,                  25);
+define_pd_global(intx, INTPRESSURE,                  26);
 define_pd_global(intx, InteriorEntryAlignment,       16);
 define_pd_global(size_t, NewSizeThreadIncrease,      ScaleForWordSize(4*K));
 define_pd_global(intx, RegisterCostAreaRatio,        16000);
--- a/src/cpu/ppc/vm/globals_ppc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/globals_ppc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,10 +55,12 @@
 
 define_pd_global(bool, UseMembar,             false);
 
+define_pd_global(bool, PreserveFramePointer,  false);
+
 // GC Ergo Flags
 define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // Default max size of CMS young gen, per GC worker thread.
 
-define_pd_global(uintx, TypeProfileLevel, 0);
+define_pd_global(uintx, TypeProfileLevel, 111);
 
 // Platform dependent flag handling: flags only defined on this platform.
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct)  \
@@ -71,14 +73,26 @@
                                                                             \
   product(uintx, PowerArchitecturePPC64, 0,                                 \
           "CPU Version: x for PowerX. Currently recognizes Power5 to "      \
-          "Power7. Default is 0. CPUs newer than Power7 will be "           \
-          "recognized as Power7.")                                          \
+          "Power8. Default is 0. Newer CPUs will be recognized as Power8.") \
                                                                             \
   /* Reoptimize code-sequences of calls at runtime, e.g. replace an */      \
   /* indirect call by a direct call.                                */      \
   product(bool, ReoptimizeCallSequences, true,                              \
           "Reoptimize code-sequences of calls at runtime.")                 \
                                                                             \
+  /* Power 8: Configure Data Stream Control Register. */                    \
+  product(uint64_t,DSCR_PPC64, (uintx)-1,                                   \
+          "Power8 or later: Specify encoded value for Data Stream Control " \
+          "Register")                                                       \
+  product(uint64_t,DSCR_DPFD_PPC64, 8,                                      \
+          "Power8 or later: DPFD (default prefetch depth) value of the "    \
+          "Data Stream Control Register."                                   \
+          " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch")    \
+  product(uint64_t,DSCR_URG_PPC64, 8,                                       \
+          "Power8 or later: URG (depth attainment urgency) value of the "   \
+          "Data Stream Control Register."                                   \
+          " 0: hardware default, 1: none, 2-7: min-max, 8: don't touch")    \
+                                                                            \
   product(bool, UseLoadInstructionsForStackBangingPPC64, false,             \
           "Use load instructions for stack banging.")                       \
                                                                             \
@@ -121,6 +135,41 @@
                                                                             \
   product(bool, ZapMemory, false, "Write 0x0101... to empty memory."        \
           " Use this to ease debugging.")                                   \
-
+                                                                            \
+  /* Use Restricted Transactional Memory for lock eliding */                \
+  product(bool, UseRTMLocking, false,                                       \
+          "Enable RTM lock eliding for inflated locks in compiled code")    \
+                                                                            \
+  experimental(bool, UseRTMForStackLocks, false,                            \
+          "Enable RTM lock eliding for stack locks in compiled code")       \
+                                                                            \
+  product(bool, UseRTMDeopt, false,                                         \
+          "Perform deopt and recompilation based on RTM abort ratio")       \
+                                                                            \
+  product(uintx, RTMRetryCount, 5,                                          \
+          "Number of RTM retries on lock abort or busy")                    \
+                                                                            \
+  experimental(intx, RTMSpinLoopCount, 100,                                 \
+          "Spin count for lock to become free before RTM retry")            \
+                                                                            \
+  experimental(intx, RTMAbortThreshold, 1000,                               \
+          "Calculate abort ratio after this number of aborts")              \
+                                                                            \
+  experimental(intx, RTMLockingThreshold, 10000,                            \
+          "Lock count at which to do RTM lock eliding without "             \
+          "abort ratio calculation")                                        \
+                                                                            \
+  experimental(intx, RTMAbortRatio, 50,                                     \
+          "Lock abort ratio at which to stop use RTM lock eliding")         \
+                                                                            \
+  experimental(intx, RTMTotalCountIncrRate, 64,                             \
+          "Increment total RTM attempted lock count once every n times")    \
+                                                                            \
+  experimental(intx, RTMLockingCalculationDelay, 0,                         \
+          "Number of milliseconds to wait before start calculating aborts " \
+          "for RTM locking")                                                \
+                                                                            \
+  experimental(bool, UseRTMXendForLockBusy, true,                           \
+          "Use RTM Xend instead of Xabort when lock busy")                  \
 
 #endif // CPU_PPC_VM_GLOBALS_PPC_HPP
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Thu May 07 20:51:12 2015 -0700
@@ -446,7 +446,7 @@
 }
 
 // Load object from cpool->resolved_references(index).
-void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
+void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index, Label *is_null) {
   assert_different_registers(result, index);
   get_constant_pool(result);
 
@@ -469,7 +469,7 @@
 #endif
   // Add in the index.
   add(result, tmp, result);
-  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result);
+  load_heap_oop(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT), result, is_null);
 }
 
 // Generate a subtype check: branch to ok_is_subtype if sub_klass is
@@ -876,7 +876,6 @@
     // If condition is true we are done and hence we can store 0 in the displaced
     // header indicating it is a recursive lock.
     bne(CCR0, slow_case);
-    release();
     std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
         BasicLock::displaced_header_offset_in_bytes(), monitor);
     b(done);
@@ -1861,7 +1860,7 @@
     const Register mdp = tmp1;
     add(mdp, tmp1, R28_mdx);
 
-    // Pffset of the current profile entry to update.
+    // Offset of the current profile entry to update.
     const Register entry_offset = tmp2;
     // entry_offset = array len in number of cells
     ld(entry_offset, in_bytes(ArrayData::array_len_offset()), mdp);
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,7 +85,7 @@
                          Register tmp1, Register tmp2, Register tmp3, Label &ok_is_subtype);
 
   // Load object from cpool->resolved_references(index).
-  void load_resolved_reference_at_index(Register result, Register index);
+  void load_resolved_reference_at_index(Register result, Register index, Label *is_null = NULL);
 
   void generate_stack_overflow_check_with_compare_and_throw(Register Rmem_frame_size, Register Rscratch1);
   void load_receiver(Register Rparam_count, Register Rrecv_dst);
@@ -137,7 +137,6 @@
   void field_offset_at(int n, Register tmp, Register dest, Register base);
   int  field_offset_at(Register object, address bcp, int offset);
   void fast_iaaccess(int n, address bcp);
-  void fast_iagetfield(address bcp);
   void fast_iaputfield(address bcp, bool do_store_check);
 
   void index_check(Register array, Register index, int index_shift, Register tmp, Register res);
--- a/src/cpu/ppc/vm/interpreter_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/interpreter_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -427,7 +427,6 @@
   return entry;
 }
 
-
 // Call an accessor method (assuming it is resolved, otherwise drop into
 // vanilla (slow path) entry.
 address InterpreterGenerator::generate_jump_to_normal_entry(void) {
@@ -473,7 +472,8 @@
 
   // This is not a leaf but we have a JavaFrameAnchor now and we will
   // check (create) exceptions afterward so this is ok.
-  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError),
+                  R16_thread);
 
   // Pop the C frame and restore LR.
   __ pop_frame();
--- a/src/cpu/ppc/vm/interpreter_ppc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/interpreter_ppc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -47,4 +47,4 @@
   }
 #endif
 
-#endif // CPU_PPC_VM_INTERPRETER_PPC_PP
+#endif // CPU_PPC_VM_INTERPRETER_PPC_HPP
--- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
  * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1455,7 +1455,7 @@
 // Several special cases exist to avoid that unnecessary information is generated.
 //
 void MacroAssembler::cmpxchgd(ConditionRegister flag,
-                              Register dest_current_value, Register compare_value, Register exchange_value,
+                              Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
                               Register addr_base, int semantics, bool cmpxchgx_hint,
                               Register int_flag_success, Label* failed_ext, bool contention_hint) {
   Label retry;
@@ -1465,7 +1465,7 @@
 
   // Save one branch if result is returned via register and result register is different from the other ones.
   bool use_result_reg    = (int_flag_success!=noreg);
-  bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value &&
+  bool preset_result_reg = (int_flag_success!=dest_current_value && int_flag_success!=compare_value.register_or_noreg() &&
                             int_flag_success!=exchange_value && int_flag_success!=addr_base);
   assert(int_flag_success == noreg || failed_ext == NULL, "cannot have both");
 
@@ -1481,7 +1481,7 @@
   // Add simple guard in order to reduce risk of starving under high contention (recommended by IBM).
   if (contention_hint) { // Don't try to reserve if cmp fails.
     ld(dest_current_value, 0, addr_base);
-    cmpd(flag, dest_current_value, compare_value);
+    cmpd(flag, compare_value, dest_current_value);
     bne(flag, failed);
   }
 
@@ -1489,7 +1489,7 @@
   bind(retry);
 
   ldarx(dest_current_value, addr_base, cmpxchgx_hint);
-  cmpd(flag, dest_current_value, compare_value);
+  cmpd(flag, compare_value, dest_current_value);
   if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
     bne_predict_not_taken(flag, failed);
   } else {
@@ -1873,7 +1873,6 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
-  fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
            /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
            /*where=*/obj_reg,
@@ -1909,7 +1908,6 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
-  fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
                  /*where=*/obj_reg,
@@ -1946,7 +1944,6 @@
   assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
 
   // CmpxchgX sets cr_reg to cmpX(temp2_reg, mark_reg).
-  fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
   cmpxchgd(/*flag=*/cr_reg, /*current_value=*/temp2_reg,
                  /*compare_value=*/mark_reg, /*exchange_value=*/temp_reg,
                  /*where=*/obj_reg,
@@ -1987,9 +1984,371 @@
   beq(cr_reg, done);
 }
 
+// TM on PPC64.
+void MacroAssembler::atomic_inc_ptr(Register addr, Register result, int simm16) {
+  Label retry;
+  bind(retry);
+  ldarx(result, addr, /*hint*/ false);
+  addi(result, result, simm16);
+  stdcx_(result, addr);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
+  } else {
+    bne(                  CCR0, retry); // stXcx_ sets CCR0
+  }
+}
+
+void MacroAssembler::atomic_ori_int(Register addr, Register result, int uimm16) {
+  Label retry;
+  bind(retry);
+  lwarx(result, addr, /*hint*/ false);
+  ori(result, result, uimm16);
+  stwcx_(result, addr);
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    bne_predict_not_taken(CCR0, retry); // stXcx_ sets CCR0
+  } else {
+    bne(                  CCR0, retry); // stXcx_ sets CCR0
+  }
+}
+
+#if INCLUDE_RTM_OPT
+
+// Update rtm_counters based on abort status
+// input: abort_status
+//        rtm_counters (RTMLockingCounters*)
+void MacroAssembler::rtm_counters_update(Register abort_status, Register rtm_counters_Reg) {
+  // Mapping to keep PreciseRTMLockingStatistics similar to x86.
+  // x86 ppc (! means inverted, ? means not the same)
+  //  0   31  Set if abort caused by XABORT instruction.
+  //  1  ! 7  If set, the transaction may succeed on a retry. This bit is always clear if bit 0 is set.
+  //  2   13  Set if another logical processor conflicted with a memory address that was part of the transaction that aborted.
+  //  3   10  Set if an internal buffer overflowed.
+  //  4  ?12  Set if a debug breakpoint was hit.
+  //  5  ?32  Set if an abort occurred during execution of a nested transaction.
+  const  int tm_failure_bit[] = {Assembler::tm_tabort, // Note: Seems like signal handler sets this, too.
+                                 Assembler::tm_failure_persistent, // inverted: transient
+                                 Assembler::tm_trans_cf,
+                                 Assembler::tm_footprint_of,
+                                 Assembler::tm_non_trans_cf,
+                                 Assembler::tm_suspended};
+  const bool tm_failure_inv[] = {false, true, false, false, false, false};
+  assert(sizeof(tm_failure_bit)/sizeof(int) == RTMLockingCounters::ABORT_STATUS_LIMIT, "adapt mapping!");
+
+  const Register addr_Reg = R0;
+  // Keep track of offset to where rtm_counters_Reg had pointed to.
+  int counters_offs = RTMLockingCounters::abort_count_offset();
+  addi(addr_Reg, rtm_counters_Reg, counters_offs);
+  const Register temp_Reg = rtm_counters_Reg;
+
+  //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
+  ldx(temp_Reg, addr_Reg);
+  addi(temp_Reg, temp_Reg, 1);
+  stdx(temp_Reg, addr_Reg);
+
+  if (PrintPreciseRTMLockingStatistics) {
+    int counters_offs_delta = RTMLockingCounters::abortX_count_offset() - counters_offs;
+
+    //mftexasr(abort_status); done by caller
+    for (int i = 0; i < RTMLockingCounters::ABORT_STATUS_LIMIT; i++) {
+      counters_offs += counters_offs_delta;
+      li(temp_Reg, counters_offs_delta); // can't use addi with R0
+      add(addr_Reg, addr_Reg, temp_Reg); // point to next counter
+      counters_offs_delta = sizeof(uintx);
+
+      Label check_abort;
+      rldicr_(temp_Reg, abort_status, tm_failure_bit[i], 0);
+      if (tm_failure_inv[i]) {
+        bne(CCR0, check_abort);
+      } else {
+        beq(CCR0, check_abort);
+      }
+      //atomic_inc_ptr(addr_Reg, temp_Reg); We don't increment atomically
+      ldx(temp_Reg, addr_Reg);
+      addi(temp_Reg, temp_Reg, 1);
+      stdx(temp_Reg, addr_Reg);
+      bind(check_abort);
+    }
+  }
+  li(temp_Reg, -counters_offs); // can't use addi with R0
+  add(rtm_counters_Reg, addr_Reg, temp_Reg); // restore
+}
+
+// Branch if (random & (count-1) != 0), count is 2^n
+// tmp and CR0 are killed
+void MacroAssembler::branch_on_random_using_tb(Register tmp, int count, Label& brLabel) {
+  mftb(tmp);
+  andi_(tmp, tmp, count-1);
+  bne(CCR0, brLabel);
+}
+
+// Perform abort ratio calculation, set no_rtm bit if high ratio.
+// input:  rtm_counters_Reg (RTMLockingCounters* address) - KILLED
+void MacroAssembler::rtm_abort_ratio_calculation(Register rtm_counters_Reg,
+                                                 RTMLockingCounters* rtm_counters,
+                                                 Metadata* method_data) {
+  Label L_done, L_check_always_rtm1, L_check_always_rtm2;
+
+  if (RTMLockingCalculationDelay > 0) {
+    // Delay calculation.
+    ld(rtm_counters_Reg, (RegisterOrConstant)(intptr_t)RTMLockingCounters::rtm_calculation_flag_addr());
+    cmpdi(CCR0, rtm_counters_Reg, 0);
+    beq(CCR0, L_done);
+    load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
+  }
+  // Abort ratio calculation only if abort_count > RTMAbortThreshold.
+  //   Aborted transactions = abort_count * 100
+  //   All transactions = total_count *  RTMTotalCountIncrRate
+  //   Set no_rtm bit if (Aborted transactions >= All transactions * RTMAbortRatio)
+  ld(R0, RTMLockingCounters::abort_count_offset(), rtm_counters_Reg);
+  cmpdi(CCR0, R0, RTMAbortThreshold);
+  blt(CCR0, L_check_always_rtm2);
+  mulli(R0, R0, 100);
+
+  const Register tmpReg = rtm_counters_Reg;
+  ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
+  mulli(tmpReg, tmpReg, RTMTotalCountIncrRate);
+  mulli(tmpReg, tmpReg, RTMAbortRatio);
+  cmpd(CCR0, R0, tmpReg);
+  blt(CCR0, L_check_always_rtm1); // jump to reload
+  if (method_data != NULL) {
+    // Set rtm_state to "no rtm" in MDO.
+    // Not using a metadata relocation. Method and Class Loader are kept alive anyway.
+    // (See nmethod::metadata_do and CodeBuffer::finalize_oop_references.)
+    load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
+    atomic_ori_int(R0, tmpReg, NoRTM);
+  }
+  b(L_done);
+
+  bind(L_check_always_rtm1);
+  load_const_optimized(rtm_counters_Reg, (address)rtm_counters, R0); // reload
+  bind(L_check_always_rtm2);
+  ld(tmpReg, RTMLockingCounters::total_count_offset(), rtm_counters_Reg);
+  cmpdi(CCR0, tmpReg, RTMLockingThreshold / RTMTotalCountIncrRate);
+  blt(CCR0, L_done);
+  if (method_data != NULL) {
+    // Set rtm_state to "always rtm" in MDO.
+    // Not using a metadata relocation. See above.
+    load_const(R0, (address)method_data + MethodData::rtm_state_offset_in_bytes(), tmpReg);
+    atomic_ori_int(R0, tmpReg, UseRTM);
+  }
+  bind(L_done);
+}
+
+// Update counters and perform abort ratio calculation.
+// input: abort_status_Reg
+void MacroAssembler::rtm_profiling(Register abort_status_Reg, Register temp_Reg,
+                                   RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data,
+                                   bool profile_rtm) {
+
+  assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+  // Update rtm counters based on state at abort.
+  // Reads abort_status_Reg, updates flags.
+  assert_different_registers(abort_status_Reg, temp_Reg);
+  load_const_optimized(temp_Reg, (address)rtm_counters, R0);
+  rtm_counters_update(abort_status_Reg, temp_Reg);
+  if (profile_rtm) {
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    rtm_abort_ratio_calculation(temp_Reg, rtm_counters, method_data);
+  }
+}
+
+// Retry on abort if abort's status indicates non-persistent failure.
+// inputs: retry_count_Reg
+//       : abort_status_Reg
+// output: retry_count_Reg decremented by 1
+void MacroAssembler::rtm_retry_lock_on_abort(Register retry_count_Reg, Register abort_status_Reg,
+                                             Label& retryLabel, Label* checkRetry) {
+  Label doneRetry;
+  rldicr_(R0, abort_status_Reg, tm_failure_persistent, 0);
+  bne(CCR0, doneRetry);
+  if (checkRetry) { bind(*checkRetry); }
+  addic_(retry_count_Reg, retry_count_Reg, -1);
+  blt(CCR0, doneRetry);
+  smt_yield(); // Can't use wait(). No permission (SIGILL).
+  b(retryLabel);
+  bind(doneRetry);
+}
+
+// Spin and retry if lock is busy.
+// inputs: box_Reg (monitor address)
+//       : retry_count_Reg
+// output: retry_count_Reg decremented by 1
+// CTR is killed
+void MacroAssembler::rtm_retry_lock_on_busy(Register retry_count_Reg, Register owner_addr_Reg, Label& retryLabel) {
+  Label SpinLoop, doneRetry;
+  addic_(retry_count_Reg, retry_count_Reg, -1);
+  blt(CCR0, doneRetry);
+  li(R0, RTMSpinLoopCount);
+  mtctr(R0);
+
+  bind(SpinLoop);
+  smt_yield(); // Can't use waitrsv(). No permission (SIGILL).
+  bdz(retryLabel);
+  ld(R0, 0, owner_addr_Reg);
+  cmpdi(CCR0, R0, 0);
+  bne(CCR0, SpinLoop);
+  b(retryLabel);
+
+  bind(doneRetry);
+}
+
+// Use RTM for normal stack locks.
+// Input: objReg (object to lock)
+void MacroAssembler::rtm_stack_locking(ConditionRegister flag,
+                                       Register obj, Register mark_word, Register tmp,
+                                       Register retry_on_abort_count_Reg,
+                                       RTMLockingCounters* stack_rtm_counters,
+                                       Metadata* method_data, bool profile_rtm,
+                                       Label& DONE_LABEL, Label& IsInflated) {
+  assert(UseRTMForStackLocks, "why call this otherwise?");
+  assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+
+  if (RTMRetryCount > 0) {
+    load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort
+    bind(L_rtm_retry);
+  }
+  andi_(R0, mark_word, markOopDesc::monitor_value);  // inflated vs stack-locked|neutral|biased
+  bne(CCR0, IsInflated);
+
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      branch_on_random_using_tb(tmp, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(stack_rtm_counters != NULL, "should not be NULL when profiling RTM");
+    load_const_optimized(tmp, (address)stack_rtm_counters->total_count_addr(), R0);
+    //atomic_inc_ptr(tmp, /*temp, will be reloaded*/mark_word); We don't increment atomically
+    ldx(mark_word, tmp);
+    addi(mark_word, mark_word, 1);
+    stdx(mark_word, tmp);
+    bind(L_noincrement);
+  }
+  tbegin_();
+  beq(CCR0, L_on_abort);
+  ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);         // Reload in transaction, conflicts need to be tracked.
+  andi(R0, mark_word, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+  cmpwi(flag, R0, markOopDesc::unlocked_value);                // bits = 001 unlocked
+  beq(flag, DONE_LABEL);                                       // all done if unlocked
+
+  if (UseRTMXendForLockBusy) {
+    tend_();
+    b(L_decrement_retry);
+  } else {
+    tabort_();
+  }
+  bind(L_on_abort);
+  const Register abort_status_Reg = tmp;
+  mftexasr(abort_status_Reg);
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, /*temp*/mark_word, stack_rtm_counters, method_data, profile_rtm);
+  }
+  ld(mark_word, oopDesc::mark_offset_in_bytes(), obj); // reload
+  if (RTMRetryCount > 0) {
+    // Retry on lock abort if abort status is not permanent.
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry, &L_decrement_retry);
+  } else {
+    bind(L_decrement_retry);
+  }
+}
+
+// Use RTM for inflating locks
+// inputs: obj       (object to lock)
+//         mark_word (current header - KILLED)
+//         boxReg    (on-stack box address (displaced header location) - KILLED)
+void MacroAssembler::rtm_inflated_locking(ConditionRegister flag,
+                                          Register obj, Register mark_word, Register boxReg,
+                                          Register retry_on_busy_count_Reg, Register retry_on_abort_count_Reg,
+                                          RTMLockingCounters* rtm_counters,
+                                          Metadata* method_data, bool profile_rtm,
+                                          Label& DONE_LABEL) {
+  assert(UseRTMLocking, "why call this otherwise?");
+  Label L_rtm_retry, L_decrement_retry, L_on_abort;
+  // Clean monitor_value bit to get valid pointer.
+  int owner_offset = ObjectMonitor::owner_offset_in_bytes() - markOopDesc::monitor_value;
+
+  // Store non-null, using boxReg instead of (intptr_t)markOopDesc::unused_mark().
+  std(boxReg, BasicLock::displaced_header_offset_in_bytes(), boxReg);
+  const Register tmpReg = boxReg;
+  const Register owner_addr_Reg = mark_word;
+  addi(owner_addr_Reg, mark_word, owner_offset);
+
+  if (RTMRetryCount > 0) {
+    load_const_optimized(retry_on_busy_count_Reg, RTMRetryCount);  // Retry on lock busy.
+    load_const_optimized(retry_on_abort_count_Reg, RTMRetryCount); // Retry on abort.
+    bind(L_rtm_retry);
+  }
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    Label L_noincrement;
+    if (RTMTotalCountIncrRate > 1) {
+      branch_on_random_using_tb(R0, (int)RTMTotalCountIncrRate, L_noincrement);
+    }
+    assert(rtm_counters != NULL, "should not be NULL when profiling RTM");
+    load_const(R0, (address)rtm_counters->total_count_addr(), tmpReg);
+    //atomic_inc_ptr(R0, tmpReg); We don't increment atomically
+    ldx(tmpReg, R0);
+    addi(tmpReg, tmpReg, 1);
+    stdx(tmpReg, R0);
+    bind(L_noincrement);
+  }
+  tbegin_();
+  beq(CCR0, L_on_abort);
+  // We don't reload mark word. Will only be reset at safepoint.
+  ld(R0, 0, owner_addr_Reg); // Load in transaction, conflicts need to be tracked.
+  cmpdi(flag, R0, 0);
+  beq(flag, DONE_LABEL);
+
+  if (UseRTMXendForLockBusy) {
+    tend_();
+    b(L_decrement_retry);
+  } else {
+    tabort_();
+  }
+  bind(L_on_abort);
+  const Register abort_status_Reg = tmpReg;
+  mftexasr(abort_status_Reg);
+  if (PrintPreciseRTMLockingStatistics || profile_rtm) {
+    rtm_profiling(abort_status_Reg, /*temp*/ owner_addr_Reg, rtm_counters, method_data, profile_rtm);
+    // Restore owner_addr_Reg
+    ld(mark_word, oopDesc::mark_offset_in_bytes(), obj);
+#ifdef ASSERT
+    andi_(R0, mark_word, markOopDesc::monitor_value);
+    asm_assert_ne("must be inflated", 0xa754); // Deflating only allowed at safepoint.
+#endif
+    addi(owner_addr_Reg, mark_word, owner_offset);
+  }
+  if (RTMRetryCount > 0) {
+    // Retry on lock abort if abort status is not permanent.
+    rtm_retry_lock_on_abort(retry_on_abort_count_Reg, abort_status_Reg, L_rtm_retry);
+  }
+
+  // Appears unlocked - try to swing _owner from null to non-null.
+  cmpxchgd(flag, /*current val*/ R0, (intptr_t)0, /*new val*/ R16_thread, owner_addr_Reg,
+           MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+           MacroAssembler::cmpxchgx_hint_acquire_lock(), noreg, &L_decrement_retry, true);
+
+  if (RTMRetryCount > 0) {
+    // success done else retry
+    b(DONE_LABEL);
+    bind(L_decrement_retry);
+    // Spin and retry if lock is busy.
+    rtm_retry_lock_on_busy(retry_on_busy_count_Reg, owner_addr_Reg, L_rtm_retry);
+  } else {
+    bind(L_decrement_retry);
+  }
+}
+
+#endif //  INCLUDE_RTM_OPT
+
 // "The box" is the space on the stack where we copy the object mark.
 void MacroAssembler::compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
-                                               Register temp, Register displaced_header, Register current_header) {
+                                               Register temp, Register displaced_header, Register current_header,
+                                               bool try_bias,
+                                               RTMLockingCounters* rtm_counters,
+                                               RTMLockingCounters* stack_rtm_counters,
+                                               Metadata* method_data,
+                                               bool use_rtm, bool profile_rtm) {
   assert_different_registers(oop, box, temp, displaced_header, current_header);
   assert(flag != CCR0, "bad condition register");
   Label cont;
@@ -2006,10 +2365,18 @@
     return;
   }
 
-  if (UseBiasedLocking) {
+  if (try_bias) {
     biased_locking_enter(flag, oop, displaced_header, temp, current_header, cont);
   }
 
+#if INCLUDE_RTM_OPT
+  if (UseRTMForStackLocks && use_rtm) {
+    rtm_stack_locking(flag, oop, displaced_header, temp, /*temp*/ current_header,
+                      stack_rtm_counters, method_data, profile_rtm,
+                      cont, object_has_monitor);
+  }
+#endif // INCLUDE_RTM_OPT
+
   // Handle existing monitor.
   if ((EmitSync & 0x02) == 0) {
     // The object has an existing monitor iff (mark & monitor_value) != 0.
@@ -2066,14 +2433,22 @@
     bind(object_has_monitor);
     // The object's monitor m is unlocked iff m->owner == NULL,
     // otherwise m->owner may contain a thread or a stack address.
-    //
+
+#if INCLUDE_RTM_OPT
+    // Use the same RTM locking code in 32- and 64-bit VM.
+    if (use_rtm) {
+      rtm_inflated_locking(flag, oop, displaced_header, box, temp, /*temp*/ current_header,
+                           rtm_counters, method_data, profile_rtm, cont);
+    } else {
+#endif // INCLUDE_RTM_OPT
+
     // Try to CAS m->owner from NULL to current thread.
     addi(temp, displaced_header, ObjectMonitor::owner_offset_in_bytes()-markOopDesc::monitor_value);
     li(displaced_header, 0);
     // CmpxchgX sets flag to cmpX(current, displaced).
     cmpxchgd(/*flag=*/flag,
              /*current_value=*/current_header,
-             /*compare_value=*/displaced_header,
+             /*compare_value=*/(intptr_t)0,
              /*exchange_value=*/R16_thread,
              /*where=*/temp,
              MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
@@ -2095,6 +2470,10 @@
     //asm_assert_mem4_isnot_zero(ObjectMonitor::OwnerIsThread_offset_in_bytes(), temp,
     //                           "monitor->OwnerIsThread shouldn't be 0", -1);
 #   endif
+
+#if INCLUDE_RTM_OPT
+    } // use_rtm()
+#endif
   }
 
   bind(cont);
@@ -2103,7 +2482,8 @@
 }
 
 void MacroAssembler::compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
-                                                 Register temp, Register displaced_header, Register current_header) {
+                                                 Register temp, Register displaced_header, Register current_header,
+                                                 bool try_bias, bool use_rtm) {
   assert_different_registers(oop, box, temp, displaced_header, current_header);
   assert(flag != CCR0, "bad condition register");
   Label cont;
@@ -2115,10 +2495,24 @@
     return;
   }
 
-  if (UseBiasedLocking) {
+  if (try_bias) {
     biased_locking_exit(flag, oop, current_header, cont);
   }
 
+#if INCLUDE_RTM_OPT
+  if (UseRTMForStackLocks && use_rtm) {
+    assert(!UseBiasedLocking, "Biased locking is not supported with RTM locking");
+    Label L_regular_unlock;
+    ld(current_header, oopDesc::mark_offset_in_bytes(), oop);         // fetch markword
+    andi(R0, current_header, markOopDesc::biased_lock_mask_in_place); // look at 3 lock bits
+    cmpwi(flag, R0, markOopDesc::unlocked_value);                     // bits = 001 unlocked
+    bne(flag, L_regular_unlock);                                      // else RegularLock
+    tend_();                                                          // otherwise end...
+    b(cont);                                                          // ... and we're done
+    bind(L_regular_unlock);
+  }
+#endif
+
   // Find the lock address and load the displaced header from the stack.
   ld(displaced_header, BasicLock::displaced_header_offset_in_bytes(), box);
 
@@ -2129,13 +2523,12 @@
   // Handle existing monitor.
   if ((EmitSync & 0x02) == 0) {
     // The object has an existing monitor iff (mark & monitor_value) != 0.
+    RTM_OPT_ONLY( if (!(UseRTMForStackLocks && use_rtm)) ) // skip load if already done
     ld(current_header, oopDesc::mark_offset_in_bytes(), oop);
-    andi(temp, current_header, markOopDesc::monitor_value);
-    cmpdi(flag, temp, 0);
-    bne(flag, object_has_monitor);
+    andi_(R0, current_header, markOopDesc::monitor_value);
+    bne(CCR0, object_has_monitor);
   }
 
-
   // Check if it is still a light weight lock, this is is true if we see
   // the stack address of the basicLock in the markOop of the object.
   // Cmpxchg sets flag to cmpd(current_header, box).
@@ -2158,6 +2551,20 @@
     bind(object_has_monitor);
     addi(current_header, current_header, -markOopDesc::monitor_value); // monitor
     ld(temp,             ObjectMonitor::owner_offset_in_bytes(), current_header);
+
+    // It's inflated.
+#if INCLUDE_RTM_OPT
+    if (use_rtm) {
+      Label L_regular_inflated_unlock;
+      // Clean monitor_value bit to get valid pointer
+      cmpdi(flag, temp, 0);
+      bne(flag, L_regular_inflated_unlock);
+      tend_();
+      b(cont);
+      bind(L_regular_inflated_unlock);
+    }
+#endif
+
     ld(displaced_header, ObjectMonitor::recursions_offset_in_bytes(), current_header);
     xorr(temp, R16_thread, temp);      // Will be 0 if we are the owner.
     orr(temp, temp, displaced_header); // Will be 0 if there are 0 recursions.
@@ -2441,6 +2848,8 @@
   //   oop_result
   //   R16_thread->in_bytes(JavaThread::vm_result_offset())
 
+  verify_thread();
+
   ld(oop_result, in_bytes(JavaThread::vm_result_offset()), R16_thread);
   li(R0, 0);
   std(R0, in_bytes(JavaThread::vm_result_offset()), R16_thread);
@@ -2462,26 +2871,24 @@
   std(R0, in_bytes(JavaThread::vm_result_2_offset()), R16_thread);
 }
 
-
-void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
+Register MacroAssembler::encode_klass_not_null(Register dst, Register src) {
   Register current = (src != noreg) ? src : dst; // Klass is in dst if no src provided.
   if (Universe::narrow_klass_base() != 0) {
     // Use dst as temp if it is free.
-    load_const(R0, Universe::narrow_klass_base(), (dst != current && dst != R0) ? dst : noreg);
-    sub(dst, current, R0);
+    sub_const_optimized(dst, current, Universe::narrow_klass_base(), R0);
     current = dst;
   }
   if (Universe::narrow_klass_shift() != 0) {
     srdi(dst, current, Universe::narrow_klass_shift());
     current = dst;
   }
-  mr_if_needed(dst, current); // Move may be required.
+  return current;
 }
 
 void MacroAssembler::store_klass(Register dst_oop, Register klass, Register ck) {
   if (UseCompressedClassPointers) {
-    encode_klass_not_null(ck, klass);
-    stw(ck, oopDesc::klass_offset_in_bytes(), dst_oop);
+    Register compressedKlass = encode_klass_not_null(ck, klass);
+    stw(compressedKlass, oopDesc::klass_offset_in_bytes(), dst_oop);
   } else {
     std(klass, oopDesc::klass_offset_in_bytes(), dst_oop);
   }
@@ -2514,8 +2921,7 @@
     sldi(shifted_src, src, Universe::narrow_klass_shift());
   }
   if (Universe::narrow_klass_base() != 0) {
-    load_const(R0, Universe::narrow_klass_base());
-    add(dst, shifted_src, R0);
+    add_const_optimized(dst, shifted_src, Universe::narrow_klass_base(), R0);
   }
 }
 
--- a/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 #define CPU_PPC_VM_MACROASSEMBLER_PPC_HPP
 
 #include "asm/assembler.hpp"
+#include "runtime/rtmLocking.hpp"
 #include "utilities/macros.hpp"
 
 // MacroAssembler extends Assembler by a few frequently used macros.
@@ -432,8 +433,8 @@
                 int semantics, bool cmpxchgx_hint = false,
                 Register int_flag_success = noreg, bool contention_hint = false);
   void cmpxchgd(ConditionRegister flag,
-                Register dest_current_value, Register compare_value, Register exchange_value, Register addr_base,
-                int semantics, bool cmpxchgx_hint = false,
+                Register dest_current_value, RegisterOrConstant compare_value, Register exchange_value,
+                Register addr_base, int semantics, bool cmpxchgx_hint = false,
                 Register int_flag_success = noreg, Label* failed = NULL, bool contention_hint = false);
 
   // interface method calling
@@ -506,8 +507,42 @@
   // biased locking exit case failed.
   void biased_locking_exit(ConditionRegister cr_reg, Register mark_addr, Register temp_reg, Label& done);
 
-  void compiler_fast_lock_object(  ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
-  void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box, Register tmp1, Register tmp2, Register tmp3);
+  void atomic_inc_ptr(Register addr, Register result, int simm16 = 1);
+  void atomic_ori_int(Register addr, Register result, int uimm16);
+
+#if INCLUDE_RTM_OPT
+  void rtm_counters_update(Register abort_status, Register rtm_counters);
+  void branch_on_random_using_tb(Register tmp, int count, Label& brLabel);
+  void rtm_abort_ratio_calculation(Register rtm_counters_reg, RTMLockingCounters* rtm_counters,
+                                   Metadata* method_data);
+  void rtm_profiling(Register abort_status_Reg, Register temp_Reg,
+                     RTMLockingCounters* rtm_counters, Metadata* method_data, bool profile_rtm);
+  void rtm_retry_lock_on_abort(Register retry_count, Register abort_status,
+                               Label& retryLabel, Label* checkRetry = NULL);
+  void rtm_retry_lock_on_busy(Register retry_count, Register owner_addr, Label& retryLabel);
+  void rtm_stack_locking(ConditionRegister flag, Register obj, Register mark_word, Register tmp,
+                         Register retry_on_abort_count,
+                         RTMLockingCounters* stack_rtm_counters,
+                         Metadata* method_data, bool profile_rtm,
+                         Label& DONE_LABEL, Label& IsInflated);
+  void rtm_inflated_locking(ConditionRegister flag, Register obj, Register mark_word, Register box,
+                            Register retry_on_busy_count, Register retry_on_abort_count,
+                            RTMLockingCounters* rtm_counters,
+                            Metadata* method_data, bool profile_rtm,
+                            Label& DONE_LABEL);
+#endif
+
+  void compiler_fast_lock_object(ConditionRegister flag, Register oop, Register box,
+                                 Register tmp1, Register tmp2, Register tmp3,
+                                 bool try_bias = UseBiasedLocking,
+                                 RTMLockingCounters* rtm_counters = NULL,
+                                 RTMLockingCounters* stack_rtm_counters = NULL,
+                                 Metadata* method_data = NULL,
+                                 bool use_rtm = false, bool profile_rtm = false);
+
+  void compiler_fast_unlock_object(ConditionRegister flag, Register oop, Register box,
+                                   Register tmp1, Register tmp2, Register tmp3,
+                                   bool try_bias = UseBiasedLocking, bool use_rtm = false);
 
   // Support for serializing memory accesses between threads
   void serialize_memory(Register thread, Register tmp1, Register tmp2);
@@ -576,7 +611,7 @@
                                       Register tmp = noreg);
 
   // Null allowed.
-  inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg);
+  inline void load_heap_oop(Register d, RegisterOrConstant offs, Register s1 = noreg, Label *is_null = NULL);
 
   // Encode/decode heap oop. Oop may not be null, else en/decoding goes wrong.
   // src == d allowed.
@@ -593,7 +628,7 @@
   void store_klass_gap(Register dst_oop, Register val = noreg); // Will store 0 if val not specified.
   static int instr_size_for_decode_klass_not_null();
   void decode_klass_not_null(Register dst, Register src = noreg);
-  void encode_klass_not_null(Register dst, Register src = noreg);
+  Register encode_klass_not_null(Register dst, Register src = noreg);
 
   // Load common heap base into register.
   void reinit_heapbase(Register d, Register tmp = noreg);
--- a/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.inline.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -333,19 +333,29 @@
   }
 }
 
-inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1) {
+inline void MacroAssembler::load_heap_oop(Register d, RegisterOrConstant offs, Register s1, Label *is_null) {
   if (UseCompressedOops) {
     lwz(d, offs, s1);
-    decode_heap_oop(d);
+    if (is_null != NULL) {
+      cmpwi(CCR0, d, 0);
+      beq(CCR0, *is_null);
+      decode_heap_oop_not_null(d);
+    } else {
+      decode_heap_oop(d);
+    }
   } else {
     ld(d, offs, s1);
+    if (is_null != NULL) {
+      cmpdi(CCR0, d, 0);
+      beq(CCR0, *is_null);
+    }
   }
 }
 
 inline Register MacroAssembler::encode_heap_oop_not_null(Register d, Register src) {
   Register current = (src != noreg) ? src : d; // Oop to be compressed is in d if no src provided.
   if (Universe::narrow_oop_base_overlaps()) {
-    sub(d, current, R30);
+    sub_const_optimized(d, current, Universe::narrow_oop_base(), R0);
     current = d;
   }
   if (Universe::narrow_oop_shift() != 0) {
@@ -358,7 +368,7 @@
 inline Register MacroAssembler::decode_heap_oop_not_null(Register d, Register src) {
   if (Universe::narrow_oop_base_disjoint() && src != noreg && src != d &&
       Universe::narrow_oop_shift() != 0) {
-    mr(d, R30);
+    load_const_optimized(d, Universe::narrow_oop_base(), R0);
     rldimi(d, src, Universe::narrow_oop_shift(), 32-Universe::narrow_oop_shift());
     return d;
   }
@@ -369,7 +379,7 @@
     current = d;
   }
   if (Universe::narrow_oop_base() != NULL) {
-    add(d, current, R30);
+    add_const_optimized(d, current, Universe::narrow_oop_base(), R0);
     current = d;
   }
   return current; // Decoded oop is in this register.
@@ -377,11 +387,19 @@
 
 inline void MacroAssembler::decode_heap_oop(Register d) {
   Label isNull;
+  bool use_isel = false;
   if (Universe::narrow_oop_base() != NULL) {
     cmpwi(CCR0, d, 0);
-    beq(CCR0, isNull);
+    if (VM_Version::has_isel()) {
+      use_isel = true;
+    } else {
+      beq(CCR0, isNull);
+    }
   }
   decode_heap_oop_not_null(d);
+  if (use_isel) {
+    isel_0(d, CCR0, Assembler::equal);
+  }
   bind(isNull);
 }
 
--- a/src/cpu/ppc/vm/methodHandles_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/methodHandles_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -466,7 +466,7 @@
                  strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
   const char* mh_reg_name = has_mh ? "R23_method_handle" : "G23";
   tty->print_cr("MH %s %s="INTPTR_FORMAT " sp=" INTPTR_FORMAT,
-                adaptername, mh_reg_name, (intptr_t) mh, entry_sp);
+                adaptername, mh_reg_name, p2i(mh), p2i(entry_sp));
 
   if (Verbose) {
     tty->print_cr("Registers:");
--- a/src/cpu/ppc/vm/methodHandles_ppc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/methodHandles_ppc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,9 +27,6 @@
 // These definitions are inlined into class MethodHandles.
 
 // Adapters
-//static unsigned int adapter_code_size() {
-//  return 32*K DEBUG_ONLY(+ 16*K) + (TraceMethodHandles ? 16*K : 0) + (VerifyMethodHandles ? 32*K : 0);
-//}
 enum /* platform_dependent_constants */ {
   adapter_code_size = NOT_LP64(16000 DEBUG_ONLY(+ 25000)) LP64_ONLY(32000 DEBUG_ONLY(+ 150000))
 };
@@ -45,7 +42,9 @@
 
   static void verify_method_handle(MacroAssembler* _masm, Register mh_reg,
                                    Register temp_reg, Register temp2_reg) {
-    Unimplemented();
+    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
+                 temp_reg, temp2_reg,
+                 "reference is a MH");
   }
 
   static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
--- a/src/cpu/ppc/vm/ppc.ad	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/ppc.ad	Thu May 07 20:51:12 2015 -0700
@@ -447,8 +447,8 @@
   R26,
   R27,
   R28,
-/*R29*/             // global TOC
-/*R30*/             // Narrow Oop Base
+/*R29,*/             // global TOC
+  R30,
   R31
 );
 
@@ -484,58 +484,11 @@
   R26,
   R27,
   R28,
-/*R29*/
-/*R30*/             // Narrow Oop Base
+/*R29,*/
+  R30,
   R31
 );
 
-// Complement-required-in-pipeline operands for narrow oops.
-reg_class bits32_reg_ro_not_complement (
-/*R0*/     // R0
-  R1,      // SP
-  R2,      // TOC
-  R3,
-  R4,
-  R5,
-  R6,
-  R7,
-  R8,
-  R9,
-  R10,
-  R11,
-  R12,
-/*R13,*/   // system thread id
-  R14,
-  R15,
-  R16,    // R16_thread
-  R17,
-  R18,
-  R19,
-  R20,
-  R21,
-  R22,
-/*R23,
-  R24,
-  R25,
-  R26,
-  R27,
-  R28,*/
-/*R29,*/ // TODO: let allocator handle TOC!!
-/*R30,*/
-  R31
-);
-
-// Complement-required-in-pipeline operands for narrow oops.
-// See 64-bit declaration.
-reg_class bits32_reg_ro_complement (
-  R23,
-  R24,
-  R25,
-  R26,
-  R27,
-  R28
-);
-
 reg_class rscratch1_bits32_reg(R11);
 reg_class rscratch2_bits32_reg(R12);
 reg_class rarg1_bits32_reg(R3);
@@ -591,8 +544,8 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+  R30_H, R30,
   R31_H, R31
 );
 
@@ -629,8 +582,8 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+  R30_H, R30,
   R31_H, R31
 );
 
@@ -667,8 +620,8 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/
-/*R30_H, R30*/
+/*R29_H, R29,*/
+  R30_H, R30,
   R31_H, R31
 );
 
@@ -704,64 +657,11 @@
   R26_H, R26,
   R27_H, R27,
   R28_H, R28,
-/*R29_H, R29*/ // TODO: let allocator handle TOC!!
-/*R30_H, R30,*/
+/*R29_H, R29,*/ // TODO: let allocator handle TOC!!
+  R30_H, R30,
   R31_H, R31
 );
 
-// Complement-required-in-pipeline operands.
-reg_class bits64_reg_ro_not_complement (
-/*R0_H,  R0*/     // R0
-  R1_H,  R1,      // SP
-  R2_H,  R2,      // TOC
-  R3_H,  R3,
-  R4_H,  R4,
-  R5_H,  R5,
-  R6_H,  R6,
-  R7_H,  R7,
-  R8_H,  R8,
-  R9_H,  R9,
-  R10_H, R10,
-  R11_H, R11,
-  R12_H, R12,
-/*R13_H, R13*/   // system thread id
-  R14_H, R14,
-  R15_H, R15,
-  R16_H, R16,    // R16_thread
-  R17_H, R17,
-  R18_H, R18,
-  R19_H, R19,
-  R20_H, R20,
-  R21_H, R21,
-  R22_H, R22,
-/*R23_H, R23,
-  R24_H, R24,
-  R25_H, R25,
-  R26_H, R26,
-  R27_H, R27,
-  R28_H, R28,*/
-/*R29_H, R29*/ // TODO: let allocator handle TOC!!
-/*R30_H, R30,*/
-  R31_H, R31
-);
-
-// Complement-required-in-pipeline operands.
-// This register mask is used for the trap instructions that implement
-// the null checks on AIX. The trap instruction first computes the
-// complement of the value it shall trap on. Because of this, the
-// instruction can not be scheduled in the same cycle as an other
-// instruction reading the normal value of the same register. So we
-// force the value to check into 'bits64_reg_ro_not_complement'
-// and then copy it to 'bits64_reg_ro_complement' for the trap.
-reg_class bits64_reg_ro_complement (
-  R23_H, R23,
-  R24_H, R24,
-  R25_H, R25,
-  R26_H, R26,
-  R27_H, R27,
-  R28_H, R28
-);
-
 
 // ----------------------------
 // Special Class for Condition Code Flags Register
@@ -777,6 +677,17 @@
   CCR7
 );
 
+reg_class int_flags_ro(
+  CCR0,
+  CCR1,
+  CCR2,
+  CCR3,
+  CCR4,
+  CCR5,
+  CCR6,
+  CCR7
+);
+
 reg_class int_flags_CR0(CCR0);
 reg_class int_flags_CR1(CCR1);
 reg_class int_flags_CR6(CCR6);
@@ -2876,7 +2787,7 @@
 
   // Use release_store for card-marking to ensure that previous
   // oop-stores are visible before the card-mark change.
-  enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr) %{
+  enc_class enc_cms_card_mark(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     // FIXME: Implement this as a cmove and use a fixed condition code
     // register which is written on every transition to compiled code,
@@ -2897,8 +2808,8 @@
     // Check CMSCollectorCardTableModRefBSExt::_requires_release and do the
     // StoreStore barrier conditionally.
     __ lwz(R0, 0, $releaseFieldAddr$$Register);
-    __ cmpwi(CCR0, R0, 0);
-    __ beq_predict_taken(CCR0, skip_storestore);
+    __ cmpwi($crx$$CondRegister, R0, 0);
+    __ beq_predict_taken($crx$$CondRegister, skip_storestore);
 #endif
     __ li(R0, 0);
     __ membar(Assembler::StoreStore);
@@ -3108,7 +3019,7 @@
     nodes->push(n2);
   %}
 
-  enc_class enc_cmove_reg(iRegIdst dst, flagsReg crx, iRegIsrc src, cmpOp cmp) %{
+  enc_class enc_cmove_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src, cmpOp cmp) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
 
     MacroAssembler _masm(&cbuf);
@@ -3123,7 +3034,7 @@
     __ bind(done);
   %}
 
-  enc_class enc_cmove_imm(iRegIdst dst, flagsReg crx, immI16 src, cmpOp cmp) %{
+  enc_class enc_cmove_imm(iRegIdst dst, flagsRegSrc crx, immI16 src, cmpOp cmp) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
 
     MacroAssembler _masm(&cbuf);
@@ -3269,7 +3180,7 @@
     __ bind(done);
   %}
 
-  enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL mem ) %{
+  enc_class enc_cmove_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL mem ) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
 
     MacroAssembler _masm(&cbuf);
@@ -3281,7 +3192,7 @@
     __ bind(done);
   %}
 
-  enc_class enc_bc(flagsReg crx, cmpOp cmp, Label lbl) %{
+  enc_class enc_bc(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
 
     MacroAssembler _masm(&cbuf);
@@ -3309,7 +3220,7 @@
           l);
   %}
 
-  enc_class enc_bc_far(flagsReg crx, cmpOp cmp, Label lbl) %{
+  enc_class enc_bc_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // The scheduler doesn't know about branch shortening, so we set the opcode
     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
@@ -3341,7 +3252,7 @@
   %}
 
   // Branch used with Power6 scheduling (can be shortened without changing the node).
-  enc_class enc_bc_short_far(flagsReg crx, cmpOp cmp, Label lbl) %{
+  enc_class enc_bc_short_far(flagsRegSrc crx, cmpOp cmp, Label lbl) %{
     // The scheduler doesn't know about branch shortening, so we set the opcode
     // to ppc64Opcode_bc in order to hide this detail from the scheduler.
     // TODO: PPC port $archOpcode(ppc64Opcode_bc);
@@ -4700,6 +4611,15 @@
   interface(REG_INTER);
 %}
 
+operand flagsRegSrc() %{
+  constraint(ALLOC_IN_RC(int_flags_ro));
+  match(RegFlags);
+  match(flagsReg);
+  match(flagsRegCR0);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Condition Code Flag Register CR0
 operand flagsRegCR0() %{
   constraint(ALLOC_IN_RC(int_flags_CR0));
@@ -4783,6 +4703,13 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits32_reg_ro));
   match(DecodeN reg);
+  format %{ "$reg" %}
+  interface(REG_INTER)
+%}
+
+operand iRegN2P_klass(iRegNsrc reg) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits32_reg_ro));
   match(DecodeNKlass reg);
   format %{ "$reg" %}
   interface(REG_INTER)
@@ -4839,6 +4766,19 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(DecodeN reg);
+  op_cost(100);
+  format %{ "[$reg]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp(0x0);
+  %}
+%}
+
+operand indirectNarrow_klass(iRegNsrc reg) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(DecodeNKlass reg);
   op_cost(100);
   format %{ "[$reg]" %}
@@ -4855,6 +4795,19 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeN reg) offset);
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset16Narrow_klass(iRegNsrc reg, immL16 offset) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeNKlass reg) offset);
   op_cost(100);
   format %{ "[$reg + $offset]" %}
@@ -4871,6 +4824,19 @@
   predicate(false /* TODO: PPC port MatchDecodeNodes*/);
   constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeN reg) offset);
+  op_cost(100);
+  format %{ "[$reg + $offset]" %}
+  interface(MEMORY_INTER) %{
+    base($reg);
+    index(0x0);
+    scale(0x0);
+    disp($offset);
+  %}
+%}
+
+operand indOffset16NarrowAlg4_klass(iRegNsrc reg, immL16Alg4 offset) %{
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
+  constraint(ALLOC_IN_RC(bits64_reg_ro));
   match(AddP (DecodeNKlass reg) offset);
   op_cost(100);
   format %{ "[$reg + $offset]" %}
@@ -4998,9 +4964,9 @@
 // encoding and format. The classic case of this is memory operands.
 // Indirect is not included since its use is limited to Compare & Swap.
 
-opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indOffset16Narrow);
+opclass memory(indirect, indOffset16 /*, indIndex, tlsReference*/, indirectNarrow, indirectNarrow_klass, indOffset16Narrow, indOffset16Narrow_klass);
 // Memory operand where offsets are 4-aligned. Required for ld, std.
-opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4);
+opclass memoryAlg4(indirect, indOffset16Alg4, indirectNarrow, indOffset16NarrowAlg4, indOffset16NarrowAlg4_klass);
 opclass indirectMemory(indirect, indirectNarrow);
 
 // Special opclass for I and ConvL2I.
@@ -5009,7 +4975,7 @@
 // Operand classes to match encode and decode. iRegN_P2N is only used
 // for storeN. I have never seen an encode node elsewhere.
 opclass iRegN_P2N(iRegNsrc, iRegP2N);
-opclass iRegP_N2P(iRegPsrc, iRegN2P);
+opclass iRegP_N2P(iRegPsrc, iRegN2P, iRegN2P_klass);
 
 //----------PIPELINE-----------------------------------------------------------
 
@@ -5593,6 +5559,19 @@
   ins_pipe(pipe_class_memory);
 %}
 
+instruct loadN2P_klass_unscaled(iRegPdst dst, memory mem) %{
+  match(Set dst (DecodeNKlass (LoadNKlass mem)));
+  // SAPJVM GL 2014-05-21 Differs.
+  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0 &&
+            _kids[0]->_leaf->as_Load()->is_unordered());
+  ins_cost(MEMORY_REF_COST);
+
+  format %{ "LWZ     $dst, $mem \t// DecodeN (unscaled)" %}
+  size(4);
+  ins_encode( enc_lwz(dst, mem) );
+  ins_pipe(pipe_class_memory);
+%}
+
 // Load Pointer
 instruct loadP(iRegPdst dst, memoryAlg4 mem) %{
   match(Set dst (LoadP mem));
@@ -5669,8 +5648,9 @@
 %}
 
 // Load Float acquire.
-instruct loadF_ac(regF dst, memory mem) %{
+instruct loadF_ac(regF dst, memory mem, flagsRegCR0 cr0) %{
   match(Set dst (LoadF mem));
+  effect(TEMP cr0);
   ins_cost(3*MEMORY_REF_COST);
 
   format %{ "LFS     $dst, $mem \t// acquire\n\t"
@@ -5705,8 +5685,9 @@
 %}
 
 // Load Double - aligned acquire.
-instruct loadD_ac(regD dst, memory mem) %{
+instruct loadD_ac(regD dst, memory mem, flagsRegCR0 cr0) %{
   match(Set dst (LoadD mem));
+  effect(TEMP cr0);
   ins_cost(3*MEMORY_REF_COST);
 
   format %{ "LFD     $dst, $mem \t// acquire\n\t"
@@ -6034,11 +6015,10 @@
 instruct loadBase(iRegLdst dst) %{
   effect(DEF dst);
 
-  format %{ "MR      $dst, r30_heapbase" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_or);
-    __ mr($dst$$Register, R30);
+  format %{ "LoadConst $dst, heapbase" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ load_const_optimized($dst$$Register, Universe::narrow_oop_base(), R0);
   %}
   ins_pipe(pipe_class_default);
 %}
@@ -6114,7 +6094,7 @@
   effect(TEMP src2);
   ins_cost(DEFAULT_COST);
 
-  format %{ "ORI    $dst, $src1, $src2 \t// narrow klass lo" %}
+  format %{ "ORI     $dst, $src1, $src2 \t// narrow klass lo" %}
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_ori);
@@ -6563,8 +6543,9 @@
 // do a releasing store. For this, it gets the address of
 // CMSCollectorCardTableModRefBSExt::_requires_release as input.
 // (Using releaseFieldAddr in the match rule is a hack.)
-instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr) %{
+instruct storeCM_CMS(memory mem, iRegLdst releaseFieldAddr, flagsReg crx) %{
   match(Set mem (StoreCM mem releaseFieldAddr));
+  effect(TEMP crx);
   predicate(false);
   ins_cost(MEMORY_REF_COST);
 
@@ -6572,7 +6553,7 @@
   ins_cannot_rematerialize(true);
 
   format %{ "STB     #0, $mem \t// CMS card-mark byte (must be 0!), checking requires_release in [$releaseFieldAddr]" %}
-  ins_encode( enc_cms_card_mark(mem, releaseFieldAddr) );
+  ins_encode( enc_cms_card_mark(mem, releaseFieldAddr, crx) );
   ins_pipe(pipe_class_memory);
 %}
 
@@ -6589,8 +6570,9 @@
   expand %{
     immL baseImm %{ 0 /* TODO: PPC port (jlong)CMSCollectorCardTableModRefBSExt::requires_release_address() */ %}
     iRegLdst releaseFieldAddress;
+    flagsReg crx;
     loadConL_Ex(releaseFieldAddress, baseImm);
-    storeCM_CMS(mem, releaseFieldAddress);
+    storeCM_CMS(mem, releaseFieldAddress, crx);
   %}
 %}
 
@@ -6639,39 +6621,34 @@
   predicate(false);
 
   format %{ "SUB     $dst, $src, oop_base \t// encode" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_subf);
-    __ subf($dst$$Register, R30, $src$$Register);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ sub_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 // Conditional sub base.
-instruct cond_sub_base(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_sub_base(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   match(Set dst (EncodeP (Binary crx src1)));
   predicate(false);
 
-  ins_variable_size_depending_on_alignment(true);
-
   format %{ "BEQ     $crx, done\n\t"
-            "SUB     $dst, $src1, R30 \t// encode: subtract base if != NULL\n"
+            "SUB     $dst, $src1, heapbase \t// encode: subtract base if != NULL\n"
             "done:" %}
-  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling())*/ ? 12 : 8);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     Label done;
     __ beq($crx$$CondRegister, done);
-    __ subf($dst$$Register, R30, $src1$$Register);
-    // TODO PPC port __ endgroup_if_needed(_size == 12);
+    __ sub_const_optimized($dst$$Register, $src1$$Register, Universe::narrow_oop_base(), R0);
     __ bind(done);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 // Power 7 can use isel instruction
-instruct cond_set_0_oop(iRegNdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_set_0_oop(iRegNdst dst, flagsRegSrc crx, iRegPsrc src1) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   match(Set dst (EncodeP (Binary crx src1)));
   predicate(false);
@@ -6777,42 +6754,37 @@
   match(Set dst (DecodeN src));
   predicate(false);
 
-  format %{ "ADD     $dst, $src, R30 \t// DecodeN, add oop base" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_add);
-    __ add($dst$$Register, $src$$Register, R30);
+  format %{ "ADD     $dst, $src, heapbase \t// DecodeN, add oop base" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
   %}
   ins_pipe(pipe_class_default);
 %}
 
 // conditianal add base for expand
-instruct cond_add_base(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_add_base(iRegPdst dst, flagsRegSrc crx, iRegPsrc src) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   // NOTICE that the rule is nonsense - we just have to make sure that:
   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
   //  - we have to match 'crx' to avoid an "illegal USE of non-input: flagsReg crx" error in ADLC.
-  match(Set dst (DecodeN (Binary crx src1)));
+  match(Set dst (DecodeN (Binary crx src)));
   predicate(false);
 
-  ins_variable_size_depending_on_alignment(true);
-
   format %{ "BEQ     $crx, done\n\t"
-            "ADD     $dst, $src1, R30 \t// DecodeN: add oop base if $src1 != NULL\n"
+            "ADD     $dst, $src, heapbase \t// DecodeN: add oop base if $src != NULL\n"
             "done:" %}
-  size(false /* TODO: PPC PORT (InsertEndGroupPPC64 && Compile::current()->do_hb_scheduling()) */? 12 : 8);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_cmove);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     Label done;
     __ beq($crx$$CondRegister, done);
-    __ add($dst$$Register, $src1$$Register, R30);
-    // TODO PPC port  __ endgroup_if_needed(_size == 12);
+    __ add_const_optimized($dst$$Register, $src$$Register, Universe::narrow_oop_base(), R0);
     __ bind(done);
   %}
   ins_pipe(pipe_class_default);
 %}
 
-instruct cond_set_0_ptr(iRegPdst dst, flagsReg crx, iRegPsrc src1) %{
+instruct cond_set_0_ptr(iRegPdst dst, flagsRegSrc crx, iRegPsrc src1) %{
   // The match rule is needed to make it a 'MachTypeNode'!
   // NOTICE that the rule is nonsense - we just have to make sure that:
   //  - _matrule->_rChild->_opType == "DecodeN" (see InstructForm::captures_bottom_type() in formssel.cpp)
@@ -6888,7 +6860,7 @@
             Universe::narrow_oop_base_disjoint());
   ins_cost(DEFAULT_COST);
 
-  format %{ "MOV     $dst, R30 \t\n"
+  format %{ "MOV     $dst, heapbase \t\n"
             "RLDIMI  $dst, $src, shift, 32-shift \t// decode with disjoint base" %}
   postalloc_expand %{
     loadBaseNode *n1 = new loadBaseNode();
@@ -6946,7 +6918,7 @@
 
     assert(ra_->is_oop(this) == true, "A decodeN node must produce an oop!");
     ra_->set_oop(n_cond_set, true);
-    
+
     ra_->set_pair(n1->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
     ra_->set_pair(n_compare->_idx, ra_->get_reg_second(n_crx), ra_->get_reg_first(n_crx));
     ra_->set_pair(n2->_idx, ra_->get_reg_second(this), ra_->get_reg_first(this));
@@ -7303,7 +7275,7 @@
 //----------Conditional Move---------------------------------------------------
 
 // Cmove using isel.
-instruct cmovI_reg_isel(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
+instruct cmovI_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7321,7 +7293,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_reg(cmpOp cmp, flagsReg crx, iRegIdst dst, iRegIsrc src) %{
+instruct cmovI_reg(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, iRegIsrc src) %{
   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7335,7 +7307,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_imm(cmpOp cmp, flagsReg crx, iRegIdst dst, immI16 src) %{
+instruct cmovI_imm(cmpOp cmp, flagsRegSrc crx, iRegIdst dst, immI16 src) %{
   match(Set dst (CMoveI (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7349,7 +7321,7 @@
 %}
 
 // Cmove using isel.
-instruct cmovL_reg_isel(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
+instruct cmovL_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7367,7 +7339,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_reg(cmpOp cmp, flagsReg crx, iRegLdst dst, iRegLsrc src) %{
+instruct cmovL_reg(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, iRegLsrc src) %{
   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7381,7 +7353,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_imm(cmpOp cmp, flagsReg crx, iRegLdst dst, immL16 src) %{
+instruct cmovL_imm(cmpOp cmp, flagsRegSrc crx, iRegLdst dst, immL16 src) %{
   match(Set dst (CMoveL (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7395,7 +7367,7 @@
 %}
 
 // Cmove using isel.
-instruct cmovN_reg_isel(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
+instruct cmovN_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7414,7 +7386,7 @@
 %}
 
 // Conditional move for RegN. Only cmov(reg, reg).
-instruct cmovN_reg(cmpOp cmp, flagsReg crx, iRegNdst dst, iRegNsrc src) %{
+instruct cmovN_reg(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, iRegNsrc src) %{
   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7428,7 +7400,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovN_imm(cmpOp cmp, flagsReg crx, iRegNdst dst, immN_0 src) %{
+instruct cmovN_imm(cmpOp cmp, flagsRegSrc crx, iRegNdst dst, immN_0 src) %{
   match(Set dst (CMoveN (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7442,7 +7414,7 @@
 %}
 
 // Cmove using isel.
-instruct cmovP_reg_isel(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegPsrc src) %{
+instruct cmovP_reg_isel(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegPsrc src) %{
   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
   predicate(VM_Version::has_isel());
   ins_cost(DEFAULT_COST);
@@ -7460,7 +7432,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovP_reg(cmpOp cmp, flagsReg crx, iRegPdst dst, iRegP_N2P src) %{
+instruct cmovP_reg(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, iRegP_N2P src) %{
   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
   predicate(!VM_Version::has_isel());
   ins_cost(DEFAULT_COST+BRANCH_COST);
@@ -7474,7 +7446,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovP_imm(cmpOp cmp, flagsReg crx, iRegPdst dst, immP_0 src) %{
+instruct cmovP_imm(cmpOp cmp, flagsRegSrc crx, iRegPdst dst, immP_0 src) %{
   match(Set dst (CMoveP (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7487,7 +7459,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovF_reg(cmpOp cmp, flagsReg crx, regF dst, regF src) %{
+instruct cmovF_reg(cmpOp cmp, flagsRegSrc crx, regF dst, regF src) %{
   match(Set dst (CMoveF (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7509,7 +7481,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovD_reg(cmpOp cmp, flagsReg crx, regD dst, regD src) %{
+instruct cmovD_reg(cmpOp cmp, flagsRegSrc crx, regD dst, regD src) %{
   match(Set dst (CMoveD (Binary cmp crx) (Binary dst src)));
   ins_cost(DEFAULT_COST+BRANCH_COST);
 
@@ -7542,8 +7514,9 @@
 // Mem_ptr must be a memory operand, else this node does not get
 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 // can be rematerialized which leads to errors.
-instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal) %{
+instruct storeLConditional_regP_regL_regL(flagsReg crx, indirect mem_ptr, iRegLsrc oldVal, iRegLsrc newVal, flagsRegCR0 cr0) %{
   match(Set crx (StoreLConditional mem_ptr (Binary oldVal newVal)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
@@ -7560,16 +7533,16 @@
 // Mem_ptr must be a memory operand, else this node does not get
 // Flag_needs_anti_dependence_check set by adlc. If this is not set this node
 // can be rematerialized which leads to errors.
-instruct storePConditional_regP_regP_regP(flagsReg crx, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
-  match(Set crx (StorePConditional mem_ptr (Binary oldVal newVal)));
-  format %{ "CMPXCHGD if ($crx = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
-    __ cmpxchgd($crx$$CondRegister, R0, $oldVal$$Register, $newVal$$Register, $mem_ptr$$Register,
-                MacroAssembler::MemBarNone, MacroAssembler::cmpxchgx_hint_atomic_update(),
-                noreg, NULL, true);
-  %}
-  ins_pipe(pipe_class_default);
+instruct storePConditional_regP_regP_regP(flagsRegCR0 cr0, indirect mem_ptr, iRegPsrc oldVal, iRegPsrc newVal) %{
+  match(Set cr0 (StorePConditional mem_ptr (Binary oldVal newVal)));
+  ins_cost(2*MEMORY_REF_COST);
+
+  format %{ "STDCX_  if ($cr0 = ($oldVal == *$mem_ptr)) *mem_ptr = $newVal; as bool" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_stdcx_);
+    __ stdcx_($newVal$$Register, $mem_ptr$$Register);
+  %}
+  ins_pipe(pipe_class_memory);
 %}
 
 // Implement LoadPLocked. Must be ordered against changes of the memory location
@@ -7577,13 +7550,14 @@
 // Don't know whether this is ever used.
 instruct loadPLocked(iRegPdst dst, memory mem) %{
   match(Set dst (LoadPLocked mem));
-  ins_cost(MEMORY_REF_COST);
-
-  format %{ "LD      $dst, $mem \t// loadPLocked\n\t"
-            "TWI     $dst\n\t"
-            "ISYNC" %}
-  size(12);
-  ins_encode( enc_ld_ac(dst, mem) );
+  ins_cost(2*MEMORY_REF_COST);
+
+  format %{ "LDARX   $dst, $mem \t// loadPLocked\n\t" %}
+  size(4);
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_ldarx);
+    __ ldarx($dst$$Register, $mem$$Register, MacroAssembler::cmpxchgx_hint_atomic_update());
+  %}
   ins_pipe(pipe_class_memory);
 %}
 
@@ -7593,8 +7567,9 @@
 // (CompareAndSwap ...)" or "If (CmpI (CompareAndSwap ..))"  cannot be
 // matched.
 
-instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2) %{
+instruct compareAndSwapI_regP_regI_regI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src1, iRegIsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapI mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7607,8 +7582,9 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2) %{
+instruct compareAndSwapN_regP_regN_regN(iRegIdst res, iRegPdst mem_ptr, iRegNsrc src1, iRegNsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapN mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGW $res, $mem_ptr, $src1, $src2; as bool" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7621,8 +7597,9 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2) %{
+instruct compareAndSwapL_regP_regL_regL(iRegIdst res, iRegPdst mem_ptr, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapL mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7635,8 +7612,9 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2) %{
+instruct compareAndSwapP_regP_regP_regP(iRegIdst res, iRegPdst mem_ptr, iRegPsrc src1, iRegPsrc src2, flagsRegCR0 cr0) %{
   match(Set res (CompareAndSwapP mem_ptr (Binary src1 src2)));
+  effect(TEMP cr0);
   format %{ "CMPXCHGD $res, $mem_ptr, $src1, $src2; as bool; ptr" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode %{
@@ -7649,48 +7627,54 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
+instruct getAndAddI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndAddI mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndAddI $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndAddI(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
+instruct getAndAddL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndAddL mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndAddL $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndAddL(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src) %{
+instruct getAndSetI(iRegIdst res, iRegPdst mem_ptr, iRegIsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetI mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetI $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src) %{
+instruct getAndSetL(iRegLdst res, iRegPdst mem_ptr, iRegLsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetL mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetL $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src) %{
+instruct getAndSetP(iRegPdst res, iRegPdst mem_ptr, iRegPsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetP mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetP $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetL(res, mem_ptr, src) );
   ins_pipe(pipe_class_default);
 %}
 
-instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src) %{
+instruct getAndSetN(iRegNdst res, iRegPdst mem_ptr, iRegNsrc src, flagsRegCR0 cr0) %{
   match(Set res (GetAndSetN mem_ptr src));
+  effect(TEMP cr0);
   format %{ "GetAndSetN $res, $mem_ptr, $src" %}
   // Variable size: instruction count smaller if regs are disjoint.
   ins_encode( enc_GetAndSetI(res, mem_ptr, src) );
@@ -7898,18 +7882,8 @@
 %}
 
 // Immediate Subtraction
-// The compiler converts "x-c0" into "x+ -c0" (see SubINode::Ideal),
-// so this rule seems to be unused.
-instruct subI_reg_imm16(iRegIdst dst, iRegIsrc src1, immI16 src2) %{
-  match(Set dst (SubI src1 src2));
-  format %{ "SUBI    $dst, $src1, $src2" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
-    __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
-  %}
-  ins_pipe(pipe_class_default);
-%}
+// Immediate Subtraction: The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
+// Don't try to use addi with - $src2$$constant since it can overflow when $src2$$constant == minI16.
 
 // SubI from constant (using subfic).
 instruct subI_imm16_reg(iRegIdst dst, immI16 src1, iRegIsrc src2) %{
@@ -7989,22 +7963,6 @@
   ins_pipe(pipe_class_default);
 %}
 
-// Immediate Subtraction
-// The compiler converts "x-c0" into "x+ -c0" (see SubLNode::Ideal),
-// so this rule seems to be unused.
-// No constant pool entries required.
-instruct subL_reg_imm16(iRegLdst dst, iRegLsrc src1, immL16 src2) %{
-  match(Set dst (SubL src1 src2));
-
-  format %{ "SUBI    $dst, $src1, $src2 \t// long" %}
-  size(4);
-  ins_encode %{
-    // TODO: PPC port $archOpcode(ppc64Opcode_addi);
-    __ addi($dst$$Register, $src1$$Register, ($src2$$constant) * (-1));
-  %}
-  ins_pipe(pipe_class_default);
-%}
-
 // Turn the sign-bit of a long into a 64-bit mask, 0x0...0 for
 // positive longs and 0xF...F for negative ones.
 instruct signmask64I_regL(iRegIdst dst, iRegLsrc src) %{
@@ -8165,7 +8123,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_bne_negI_reg(iRegIdst dst, flagsReg crx, iRegIsrc src1) %{
+instruct cmovI_bne_negI_reg(iRegIdst dst, flagsRegSrc crx, iRegIsrc src1) %{
   effect(USE_DEF dst, USE src1, USE crx);
   predicate(false);
 
@@ -8228,7 +8186,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_bne_negL_reg(iRegLdst dst, flagsReg crx, iRegLsrc src1) %{
+instruct cmovL_bne_negL_reg(iRegLdst dst, flagsRegSrc crx, iRegLsrc src1) %{
   effect(USE_DEF dst, USE src1, USE crx);
   predicate(false);
 
@@ -8281,7 +8239,7 @@
 %}
 
 // Long Remainder with registers
-instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2, flagsRegCR0 cr0) %{
+instruct modL_reg_reg_Ex(iRegLdst dst, iRegLsrc src1, iRegLsrc src2) %{
   match(Set dst (ModL src1 src2));
   ins_cost(10*DEFAULT_COST);
 
@@ -9011,7 +8969,6 @@
 instruct andL_reg_uimm16(iRegLdst dst, iRegLsrc src1, uimmL16 src2, flagsRegCR0 cr0) %{
   match(Set dst (AndL src1 src2));
   effect(KILL cr0);
-  ins_cost(DEFAULT_COST);
 
   format %{ "ANDI    $dst, $src1, $src2 \t// long" %}
   size(4);
@@ -9803,7 +9760,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsReg crx, stackSlotL src) %{
+instruct cmovI_bso_stackSlotL(iRegIdst dst, flagsRegSrc crx, stackSlotL src) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE src);
   predicate(false);
@@ -9817,7 +9774,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsReg crx, stackSlotL mem) %{
+instruct cmovI_bso_stackSlotL_conLvalue0_Ex(iRegIdst dst, flagsRegSrc crx, stackSlotL mem) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE mem);
   predicate(false);
@@ -9972,7 +9929,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsReg crx, stackSlotL src) %{
+instruct cmovL_bso_stackSlotL(iRegLdst dst, flagsRegSrc crx, stackSlotL src) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE src);
   predicate(false);
@@ -9986,7 +9943,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsReg crx, stackSlotL mem) %{
+instruct cmovL_bso_stackSlotL_conLvalue0_Ex(iRegLdst dst, flagsRegSrc crx, stackSlotL mem) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx, USE mem);
   predicate(false);
@@ -10255,7 +10212,6 @@
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
-    // FIXME: avoid andi_ ?
     __ andi_(R0, $src1$$Register, $src2$$constant);
   %}
   ins_pipe(pipe_class_compare);
@@ -10302,13 +10258,12 @@
   size(4);
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_andi_);
-    // FIXME: avoid andi_ ?
     __ andi_(R0, $src1$$Register, $src2$$constant);
   %}
   ins_pipe(pipe_class_compare);
 %}
 
-instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsReg crx) %{
+instruct cmovI_conIvalueMinus1_conIvalue1(iRegIdst dst, flagsRegSrc crx) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx);
   predicate(false);
@@ -10332,7 +10287,7 @@
   ins_pipe(pipe_class_compare);
 %}
 
-instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsReg crx) %{
+instruct cmovI_conIvalueMinus1_conIvalue0_conIvalue1_Ex(iRegIdst dst, flagsRegSrc crx) %{
   // no match-rule, false predicate
   effect(DEF dst, USE crx);
   predicate(false);
@@ -10622,8 +10577,9 @@
 //----------Float Compares----------------------------------------------------
 
 instruct cmpFUnordered_reg_reg(flagsReg crx, regF src1, regF src2) %{
+  // Needs matchrule, see cmpDUnordered.
+  match(Set crx (CmpF src1 src2)); 
   // no match-rule, false predicate
-  effect(DEF crx, USE src1, USE src2);
   predicate(false);
 
   format %{ "cmpFUrd $crx, $src1, $src2" %}
@@ -10731,8 +10687,14 @@
 %}
 
 instruct cmpDUnordered_reg_reg(flagsReg crx, regD src1, regD src2) %{
-  // no match-rule, false predicate
-  effect(DEF crx, USE src1, USE src2);
+  // Needs matchrule so that ideal opcode is Cmp. This causes that gcm places the 
+  // node right before the conditional move using it. 
+  // In jck test api/java_awt/geom/QuadCurve2DFloat/index.html#SetCurveTesttestCase7,
+  // compilation of java.awt.geom.RectangularShape::getBounds()Ljava/awt/Rectangle
+  // crashed in register allocation where the flags Reg between cmpDUnoredered and a
+  // conditional move was supposed to be spilled.
+  match(Set crx (CmpD src1 src2)); 
+  // False predicate, shall not be matched.
   predicate(false);
 
   format %{ "cmpFUrd $crx, $src1, $src2" %}
@@ -10830,7 +10792,7 @@
 %}
 
 // Conditional Near Branch
-instruct branchCon(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchCon(cmpOp cmp, flagsRegSrc crx, label lbl) %{
   // Same match rule as `branchConFar'.
   match(If cmp crx);
   effect(USE lbl);
@@ -10853,7 +10815,7 @@
 // expensive.
 //
 // Conditional Far Branch
-instruct branchConFar(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchConFar(cmpOp cmp, flagsRegSrc crx, label lbl) %{
   // Same match rule as `branchCon'.
   match(If cmp crx);
   effect(USE crx, USE lbl);
@@ -10871,7 +10833,7 @@
 %}
 
 // Conditional Branch used with Power6 scheduler (can be far or short).
-instruct branchConSched(cmpOp cmp, flagsReg crx, label lbl) %{
+instruct branchConSched(cmpOp cmp, flagsRegSrc crx, label lbl) %{
   // Same match rule as `branchCon'.
   match(If cmp crx);
   effect(USE crx, USE lbl);
@@ -10890,7 +10852,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct branchLoopEnd(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEnd(cmpOp cmp, flagsRegSrc crx, label labl) %{
   match(CountedLoopEnd cmp crx);
   effect(USE labl);
   ins_cost(BRANCH_COST);
@@ -10904,7 +10866,7 @@
   ins_pipe(pipe_class_default);
 %}
 
-instruct branchLoopEndFar(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEndFar(cmpOp cmp, flagsRegSrc crx, label labl) %{
   match(CountedLoopEnd cmp crx);
   effect(USE labl);
   predicate(!false /* TODO: PPC port HB_Schedule */);
@@ -10920,7 +10882,7 @@
 %}
 
 // Conditional Branch used with Power6 scheduler (can be far or short).
-instruct branchLoopEndSched(cmpOp cmp, flagsReg crx, label labl) %{
+instruct branchLoopEndSched(cmpOp cmp, flagsRegSrc crx, label labl) %{
   match(CountedLoopEnd cmp crx);
   effect(USE labl);
   predicate(false /* TODO: PPC port HB_Schedule */);
@@ -10969,13 +10931,14 @@
 instruct cmpFastLock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
   match(Set crx (FastLock oop box));
   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
-  // TODO PPC port predicate(!UseNewFastLockPPC64 || UseBiasedLocking);
+  predicate(/*(!UseNewFastLockPPC64 || UseBiasedLocking) &&*/ !Compile::current()->use_rtm());
 
   format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
-                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
+                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                 UseBiasedLocking && !UseOptoBiasInlining); // SAPJVM MD 2014-11-06 UseOptoBiasInlining
     // If locking was successfull, crx should indicate 'EQ'.
     // The compiler generates a branch to the runtime call to
     // _complete_monitor_locking_Java for the case where crx is 'NE'.
@@ -10983,15 +10946,58 @@
   ins_pipe(pipe_class_compare);
 %}
 
+// Separate version for TM. Use bound register for box to enable USE_KILL.
+instruct cmpFastLock_tm(flagsReg crx, iRegPdst oop, rarg2RegP box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
+  match(Set crx (FastLock oop box));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, USE_KILL box);
+  predicate(Compile::current()->use_rtm());
+
+  format %{ "FASTLOCK  $oop, $box, $tmp1, $tmp2, $tmp3 (TM)" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ compiler_fast_lock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
+                                 $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                 /*Biased Locking*/ false,
+                                 _rtm_counters, _stack_rtm_counters,
+                                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
+                                 /*TM*/ true, ra_->C->profile_rtm());
+    // If locking was successfull, crx should indicate 'EQ'.
+    // The compiler generates a branch to the runtime call to
+    // _complete_monitor_locking_Java for the case where crx is 'NE'.
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
 instruct cmpFastUnlock(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
   match(Set crx (FastUnlock oop box));
   effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+  predicate(!Compile::current()->use_rtm());
 
   format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2" %}
   ins_encode %{
     // TODO: PPC port $archOpcode(ppc64Opcode_compound);
     __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
-                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register);
+                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                   UseBiasedLocking && !UseOptoBiasInlining,
+                                   false);
+    // If unlocking was successfull, crx should indicate 'EQ'.
+    // The compiler generates a branch to the runtime call to
+    // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
+  %}
+  ins_pipe(pipe_class_compare);
+%}
+
+instruct cmpFastUnlock_tm(flagsReg crx, iRegPdst oop, iRegPdst box, iRegPdst tmp1, iRegPdst tmp2, iRegPdst tmp3) %{
+  match(Set crx (FastUnlock oop box));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3);
+  predicate(Compile::current()->use_rtm());
+
+  format %{ "FASTUNLOCK  $oop, $box, $tmp1, $tmp2 (TM)" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ compiler_fast_unlock_object($crx$$CondRegister, $oop$$Register, $box$$Register,
+                                   $tmp3$$Register, $tmp1$$Register, $tmp2$$Register,
+                                   /*Biased Locking*/ false, /*TM*/ true);
     // If unlocking was successfull, crx should indicate 'EQ'.
     // The compiler generates a branch to the runtime call to
     // _complete_monitor_unlocking_Java for the case where crx is 'NE'.
@@ -11658,6 +11664,66 @@
   ins_pipe(pipe_class_default);
 %}
 
+
+//----------Overflow Math Instructions-----------------------------------------
+
+// Note that we have to make sure that XER.SO is reset before using overflow instructions.
+// Simple Overflow operations can be matched by very few instructions (e.g. addExact: xor, and_, bc).
+// Seems like only Long intrinsincs have an advantage. (The only expensive one is OverflowMulL.)
+
+instruct overflowAddL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+  match(Set cr0 (OverflowAddL op1 op2));
+
+  format %{ "add_    $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ addo_(R0, $op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowSubL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+  match(Set cr0 (OverflowSubL op1 op2));
+
+  format %{ "subfo_  R0, $op2, $op1\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ subfo_(R0, $op2$$Register, $op1$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowNegL_reg(flagsRegCR0 cr0, immL_0 zero, iRegLsrc op2) %{
+  match(Set cr0 (OverflowSubL zero op2));
+
+  format %{ "nego_   R0, $op2\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ nego_(R0, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct overflowMulL_reg_reg(flagsRegCR0 cr0, iRegLsrc op1, iRegLsrc op2) %{
+  match(Set cr0 (OverflowMulL op1 op2));
+
+  format %{ "mulldo_ R0, $op1, $op2\t# overflow check long" %}
+  ins_encode %{
+    // TODO: PPC port $archOpcode(ppc64Opcode_compound);
+    __ li(R0, 0);
+    __ mtxer(R0); // clear XER.SO
+    __ mulldo_(R0, $op1$$Register, $op2$$Register);
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+
 // ============================================================================
 // Safepoint Instruction
 
--- a/src/cpu/ppc/vm/register_definitions_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/register_definitions_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,19 +23,10 @@
  *
  */
 
-// make sure the defines don't screw up the declarations later on in this file
+// Make sure the defines don't screw up the declarations later on in this file.
 #define DONT_USE_REGISTER_DEFINES
 
-#include "precompiled.hpp"
-#include "asm/macroAssembler.hpp"
 #include "asm/register.hpp"
-#include "register_ppc.hpp"
-#ifdef TARGET_ARCH_MODEL_ppc_32
-# include "interp_masm_ppc_32.hpp"
-#endif
-#ifdef TARGET_ARCH_MODEL_ppc_64
-# include "interp_masm_ppc_64.hpp"
-#endif
 
 REGISTER_DEFINITION(Register, noreg);
 
--- a/src/cpu/ppc/vm/relocInfo_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/relocInfo_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,14 +25,12 @@
 
 #include "precompiled.hpp"
 #include "asm/assembler.inline.hpp"
-#include "assembler_ppc.inline.hpp"
 #include "code/relocInfo.hpp"
 #include "nativeInst_ppc.hpp"
 #include "oops/oop.inline.hpp"
 #include "runtime/safepoint.hpp"
 
 void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
-  bool copy_back_to_oop_pool = true;  // TODO: PPC port
   // The following comment is from the declaration of DataRelocation:
   //
   //  "The "o" (displacement) argument is relevant only to split relocations
--- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 #include "code/debugInfoRec.hpp"
 #include "code/icBuffer.hpp"
 #include "code/vtableStubs.hpp"
+#include "frame_ppc.hpp"
 #include "interpreter/interpreter.hpp"
 #include "interpreter/interp_masm.hpp"
 #include "oops/compiledICHolder.hpp"
@@ -194,8 +195,8 @@
   RegisterSaver_LiveIntReg(   R27 ),
   RegisterSaver_LiveIntReg(   R28 ),
   RegisterSaver_LiveIntReg(   R29 ),
-  RegisterSaver_LiveIntReg(   R31 ),
-  RegisterSaver_LiveIntReg(   R30 ), // r30 must be the last register
+  RegisterSaver_LiveIntReg(   R30 ),
+  RegisterSaver_LiveIntReg(   R31 ), // must be the last register (see save/restore functions below)
 };
 
 OopMap* RegisterSaver::push_frame_reg_args_and_save_live_registers(MacroAssembler* masm,
@@ -229,29 +230,30 @@
 
   BLOCK_COMMENT("push_frame_reg_args_and_save_live_registers {");
 
-  // Save r30 in the last slot of the not yet pushed frame so that we
+  // Save r31 in the last slot of the not yet pushed frame so that we
   // can use it as scratch reg.
-  __ std(R30, -reg_size, R1_SP);
+  __ std(R31, -reg_size, R1_SP);
   assert(-reg_size == register_save_offset - frame_size_in_bytes + ((regstosave_num-1)*reg_size),
          "consistency check");
 
   // save the flags
   // Do the save_LR_CR by hand and adjust the return pc if requested.
-  __ mfcr(R30);
-  __ std(R30, _abi(cr), R1_SP);
+  __ mfcr(R31);
+  __ std(R31, _abi(cr), R1_SP);
   switch (return_pc_location) {
-    case return_pc_is_lr:    __ mflr(R30);           break;
-    case return_pc_is_r4:    __ mr(R30, R4);     break;
+    case return_pc_is_lr:    __ mflr(R31);           break;
+    case return_pc_is_r4:    __ mr(R31, R4);     break;
     case return_pc_is_thread_saved_exception_pc:
-                                 __ ld(R30, thread_(saved_exception_pc)); break;
+                             __ ld(R31, thread_(saved_exception_pc)); break;
     default: ShouldNotReachHere();
   }
-  if (return_pc_adjustment != 0)
-    __ addi(R30, R30, return_pc_adjustment);
-  __ std(R30, _abi(lr), R1_SP);
+  if (return_pc_adjustment != 0) {
+    __ addi(R31, R31, return_pc_adjustment);
+  }
+  __ std(R31, _abi(lr), R1_SP);
 
   // push a new frame
-  __ push_frame(frame_size_in_bytes, R30);
+  __ push_frame(frame_size_in_bytes, R31);
 
   // save all registers (ints and floats)
   offset = register_save_offset;
@@ -261,7 +263,7 @@
 
     switch (reg_type) {
       case RegisterSaver::int_reg: {
-        if (reg_num != 30) { // We spilled R30 right at the beginning.
+        if (reg_num != 31) { // We spilled R31 right at the beginning.
           __ std(as_Register(reg_num), offset, R1_SP);
         }
         break;
@@ -272,8 +274,8 @@
       }
       case RegisterSaver::special_reg: {
         if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
-          __ mfctr(R30);
-          __ std(R30, offset, R1_SP);
+          __ mfctr(R31);
+          __ std(R31, offset, R1_SP);
         } else {
           Unimplemented();
         }
@@ -321,7 +323,7 @@
 
     switch (reg_type) {
       case RegisterSaver::int_reg: {
-        if (reg_num != 30) // R30 restored at the end, it's the tmp reg!
+        if (reg_num != 31) // R31 restored at the end, it's the tmp reg!
           __ ld(as_Register(reg_num), offset, R1_SP);
         break;
       }
@@ -332,8 +334,8 @@
       case RegisterSaver::special_reg: {
         if (reg_num == SR_CTR_SpecialRegisterEnumValue) {
           if (restore_ctr) { // Nothing to do here if ctr already contains the next address.
-            __ ld(R30, offset, R1_SP);
-            __ mtctr(R30);
+            __ ld(R31, offset, R1_SP);
+            __ mtctr(R31);
           }
         } else {
           Unimplemented();
@@ -350,10 +352,10 @@
   __ pop_frame();
 
   // restore the flags
-  __ restore_LR_CR(R30);
+  __ restore_LR_CR(R31);
 
   // restore scratch register's value
-  __ ld(R30, -reg_size, R1_SP);
+  __ ld(R31, -reg_size, R1_SP);
 
   BLOCK_COMMENT("} restore_live_registers_and_pop_frame");
 }
@@ -2021,6 +2023,8 @@
   __ push_frame(frame_size_in_bytes, r_temp_1);          // Push the c2n adapter's frame.
   frame_done_pc = (intptr_t)__ pc();
 
+  __ verify_thread();
+
   // Native nmethod wrappers never take possesion of the oop arguments.
   // So the caller will gc the arguments.
   // The only thing we need an oopMap for is if the call is static.
@@ -2594,7 +2598,7 @@
 }
 
 uint SharedRuntime::out_preserve_stack_slots() {
-#ifdef COMPILER2
+#if defined(COMPILER1) || defined(COMPILER2)
   return frame::jit_out_preserve_size / VMRegImpl::stack_slot_size;
 #else
   return 0;
@@ -2868,11 +2872,6 @@
   __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
   __ BIND(skip_restore_excp);
 
-  // reload narrro_oop_base
-  if (UseCompressedOops && Universe::narrow_oop_base() != 0) {
-    __ load_const_optimized(R30, Universe::narrow_oop_base());
-  }
-
   __ pop_frame();
 
   // stack: (deoptee, optional i2c, caller of deoptee, ...).
--- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -261,9 +261,6 @@
       // global toc register
       __ load_const(R29, MacroAssembler::global_toc(), R11_scratch1);
 
-      // Load narrow oop base.
-      __ reinit_heapbase(R30, R11_scratch1);
-
       // Remember the senderSP so we interpreter can pop c2i arguments off of the stack
       // when called via a c2i.
 
@@ -418,6 +415,23 @@
   // or native call stub.  The pending exception in Thread is
   // converted into a Java-level exception.
   //
+  // Read:
+  //
+  //   LR:     The pc the runtime library callee wants to return to.
+  //           Since the exception occurred in the callee, the return pc
+  //           from the point of view of Java is the exception pc.
+  //   thread: Needed for method handles.
+  //
+  // Invalidate:
+  //
+  //   volatile registers (except below).
+  //
+  // Update:
+  //
+  //   R4_ARG2: exception
+  //
+  // (LR is unchanged and is live out).
+  //
   address generate_forward_exception() {
     StubCodeMark mark(this, "StubRoutines", "forward_exception");
     address start = __ pc();
@@ -1256,9 +1270,9 @@
     Register tmp3 = R8_ARG6;
 
 #if defined(ABI_ELFv2)
-     address nooverlap_target = aligned ?
-       StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
-       StubRoutines::jbyte_disjoint_arraycopy();
+    address nooverlap_target = aligned ?
+      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
+      StubRoutines::jbyte_disjoint_arraycopy();
 #else
     address nooverlap_target = aligned ?
       ((FunctionDescriptor*)StubRoutines::arrayof_jbyte_disjoint_arraycopy())->entry() :
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -264,11 +264,11 @@
       __ cmpdi(CCR0, Rmdo, 0);
       __ beq(CCR0, no_mdo);
 
-      // Increment invocation counter in the MDO.
-      const int mdo_ic_offs = in_bytes(MethodData::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
-      __ lwz(Rscratch2, mdo_ic_offs, Rmdo);
+      // Increment backedge counter in the MDO.
+      const int mdo_bc_offs = in_bytes(MethodData::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+      __ lwz(Rscratch2, mdo_bc_offs, Rmdo);
       __ addi(Rscratch2, Rscratch2, increment);
-      __ stw(Rscratch2, mdo_ic_offs, Rmdo);
+      __ stw(Rscratch2, mdo_bc_offs, Rmdo);
       __ load_const_optimized(Rscratch1, mask, R0);
       __ and_(Rscratch1, Rscratch2, Rscratch1);
       __ bne(CCR0, done);
@@ -276,12 +276,12 @@
     }
 
     // Increment counter in MethodCounters*.
-    const int mo_ic_offs = in_bytes(MethodCounters::invocation_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
+    const int mo_bc_offs = in_bytes(MethodCounters::backedge_counter_offset()) + in_bytes(InvocationCounter::counter_offset());
     __ bind(no_mdo);
     __ get_method_counters(R19_method, R3_counters, done);
-    __ lwz(Rscratch2, mo_ic_offs, R3_counters);
+    __ lwz(Rscratch2, mo_bc_offs, R3_counters);
     __ addi(Rscratch2, Rscratch2, increment);
-    __ stw(Rscratch2, mo_ic_offs, R3_counters);
+    __ stw(Rscratch2, mo_bc_offs, R3_counters);
     __ load_const_optimized(Rscratch1, mask, R0);
     __ and_(Rscratch1, Rscratch2, Rscratch1);
     __ beq(CCR0, *overflow);
@@ -611,12 +611,7 @@
 // For others we can use a normal (native) entry.
 
 inline bool math_entry_available(AbstractInterpreter::MethodKind kind) {
-  // Provide math entry with debugging on demand.
-  // Note: Debugging changes which code will get executed:
-  // Debugging or disabled InlineIntrinsics: java method will get interpreted and performs a native call.
-  // Not debugging and enabled InlineIntrinics: processor instruction will get used.
-  // Result might differ slightly due to rounding etc.
-  if (!InlineIntrinsics && (!FLAG_IS_ERGO(InlineIntrinsics))) return false; // Generate a vanilla entry.
+  if (!InlineIntrinsics) return false;
 
   return ((kind==Interpreter::java_lang_math_sqrt && VM_Version::has_fsqrt()) ||
           (kind==Interpreter::java_lang_math_abs));
@@ -628,15 +623,8 @@
     return Interpreter::entry_for_kind(Interpreter::zerolocals);
   }
 
-  Label Lslow_path;
-  const Register Rjvmti_mode = R11_scratch1;
   address entry = __ pc();
 
-  // Provide math entry with debugging on demand.
-  __ lwz(Rjvmti_mode, thread_(interp_only_mode));
-  __ cmpwi(CCR0, Rjvmti_mode, 0);
-  __ bne(CCR0, Lslow_path); // jvmti_mode!=0
-
   __ lfd(F1_RET, Interpreter::stackElementSize, R15_esp);
 
   // Pop c2i arguments (if any) off when we return.
@@ -659,9 +647,6 @@
   // And we're done.
   __ blr();
 
-  // Provide slow path for JVMTI case.
-  __ bind(Lslow_path);
-  __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R12_scratch2);
   __ flush();
 
   return entry;
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/templateInterpreter_ppc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2013, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2013, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
   // Run with +PrintInterpreter to get the VM to print out the size.
   // Max size with JVMTI
 
-  const static int InterpreterCodeSize = 210*K;
+  const static int InterpreterCodeSize = 230*K;
 
 #endif // CPU_PPC_VM_TEMPLATEINTERPRETER_PPC_HPP
 
--- a/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright 2013, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -375,23 +375,22 @@
 
   int index_size = wide ? sizeof(u2) : sizeof(u1);
   const Register Rscratch = R11_scratch1;
-  Label resolved;
+  Label is_null;
 
   // We are resolved if the resolved reference cache entry contains a
   // non-null object (CallSite, etc.)
   __ get_cache_index_at_bcp(Rscratch, 1, index_size);  // Load index.
-  __ load_resolved_reference_at_index(R17_tos, Rscratch);
-  __ cmpdi(CCR0, R17_tos, 0);
-  __ bne(CCR0, resolved);
+  __ load_resolved_reference_at_index(R17_tos, Rscratch, &is_null);
+  __ verify_oop(R17_tos);
+  __ dispatch_epilog(atos, Bytecodes::length_for(bytecode()));
+
+  __ bind(is_null);
   __ load_const_optimized(R3_ARG1, (int)bytecode());
 
   address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
 
   // First time invocation - must resolve first.
   __ call_VM(R17_tos, entry, R3_ARG1);
-
-  __ align(32, 12);
-  __ bind(resolved);
   __ verify_oop(R17_tos);
 }
 
@@ -437,6 +436,14 @@
 }
 
 void TemplateTable::iload() {
+  iload_internal();
+}
+
+void TemplateTable::nofast_iload() {
+  iload_internal(may_not_rewrite);
+}
+
+void TemplateTable::iload_internal(RewriteControl rc) {
   transition(vtos, itos);
 
   // Get the local value into tos
@@ -445,7 +452,7 @@
 
   // Rewrite iload,iload  pair into fast_iload2
   //         iload,caload pair into fast_icaload
-  if (RewriteFrequentPairs) {
+  if (RewriteFrequentPairs && rc == may_rewrite) {
     Label Lrewrite, Ldone;
     Register Rnext_byte  = R3_ARG1,
              Rrewrite_to = R6_ARG4,
@@ -709,6 +716,14 @@
 }
 
 void TemplateTable::aload_0() {
+  aload_0_internal();
+}
+
+void TemplateTable::nofast_aload_0() {
+  aload_0_internal(may_not_rewrite);
+}
+
+void TemplateTable::aload_0_internal(RewriteControl rc) {
   transition(vtos, atos);
   // According to bytecode histograms, the pairs:
   //
@@ -732,7 +747,7 @@
   // These bytecodes with a small amount of code are most profitable
   // to rewrite.
 
-  if (RewriteFrequentPairs) {
+  if (RewriteFrequentPairs && rc == may_rewrite) {
 
     Label Lrewrite, Ldont_rewrite;
     Register Rnext_byte  = R3_ARG1,
@@ -2144,6 +2159,12 @@
   __ get_cache_and_index_at_bcp(Rcache, 1, index_size);
   Label Lresolved, Ldone;
 
+  Bytecodes::Code code = bytecode();
+  switch (code) {
+  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
+  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
+  }
+
   assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
   // We are resolved if the indices offset contains the current bytecode.
 #if defined(VM_LITTLE_ENDIAN)
@@ -2152,24 +2173,11 @@
   __ lbz(Rscratch, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache);
 #endif
   // Acquire by cmp-br-isync (see below).
-  __ cmpdi(CCR0, Rscratch, (int)bytecode());
+  __ cmpdi(CCR0, Rscratch, (int)code);
   __ beq(CCR0, Lresolved);
 
-  address entry = NULL;
-  switch (bytecode()) {
-    case Bytecodes::_getstatic      : // fall through
-    case Bytecodes::_putstatic      : // fall through
-    case Bytecodes::_getfield       : // fall through
-    case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break;
-    case Bytecodes::_invokevirtual  : // fall through
-    case Bytecodes::_invokespecial  : // fall through
-    case Bytecodes::_invokestatic   : // fall through
-    case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke); break;
-    case Bytecodes::_invokehandle   : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle); break;
-    case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic); break;
-    default                         : ShouldNotReachHere(); break;
-  }
-  __ li(R4_ARG2, (int)bytecode());
+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
+  __ li(R4_ARG2, code);
   __ call_VM(noreg, entry, R4_ARG2, true);
 
   // Update registers with resolved info.
@@ -2350,7 +2358,7 @@
 }
 
 // PPC64: implement volatile loads as fence-store-acquire.
-void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
   transition(vtos, vtos);
 
   Label Lacquire, Lisync;
@@ -2366,7 +2374,7 @@
   static address field_branch_table[number_of_states],
                  static_branch_table[number_of_states];
 
-  address* branch_table = is_static ? static_branch_table : field_branch_table;
+  address* branch_table = (is_static || rc == may_not_rewrite) ? static_branch_table : field_branch_table;
 
   // Get field offset.
   resolve_cache_and_index(byte_no, Rcache, Rscratch, sizeof(u2));
@@ -2417,7 +2425,14 @@
 #ifdef ASSERT
   __ bind(LFlagInvalid);
   __ stop("got invalid flag", 0x654);
-
+#endif
+
+  if (!is_static && rc == may_not_rewrite) {
+    // We reuse the code from is_static.  It's jumped to via the table above.
+    return;
+  }
+
+#ifdef ASSERT
   // __ bind(Lvtos);
   address pc_before_fence = __ pc();
   __ fence(); // Volatile entry point (one instruction before non-volatile_entry point).
@@ -2434,7 +2449,9 @@
   branch_table[dtos] = __ pc(); // non-volatile_entry point
   __ lfdx(F15_ftos, Rclass_or_obj, Roffset);
   __ push(dtos);
-  if (!is_static) patch_bytecode(Bytecodes::_fast_dgetfield, Rbc, Rscratch);
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_dgetfield, Rbc, Rscratch);
+  }
   {
     Label acquire_double;
     __ beq(CCR6, acquire_double); // Volatile?
@@ -2453,7 +2470,9 @@
   branch_table[ftos] = __ pc(); // non-volatile_entry point
   __ lfsx(F15_ftos, Rclass_or_obj, Roffset);
   __ push(ftos);
-  if (!is_static) { patch_bytecode(Bytecodes::_fast_fgetfield, Rbc, Rscratch); }
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_fgetfield, Rbc, Rscratch);
+  }
   {
     Label acquire_float;
     __ beq(CCR6, acquire_float); // Volatile?
@@ -2472,7 +2491,9 @@
   branch_table[itos] = __ pc(); // non-volatile_entry point
   __ lwax(R17_tos, Rclass_or_obj, Roffset);
   __ push(itos);
-  if (!is_static) patch_bytecode(Bytecodes::_fast_igetfield, Rbc, Rscratch);
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_igetfield, Rbc, Rscratch);
+  }
   __ beq(CCR6, Lacquire); // Volatile?
   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
 
@@ -2483,7 +2504,9 @@
   branch_table[ltos] = __ pc(); // non-volatile_entry point
   __ ldx(R17_tos, Rclass_or_obj, Roffset);
   __ push(ltos);
-  if (!is_static) patch_bytecode(Bytecodes::_fast_lgetfield, Rbc, Rscratch);
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_lgetfield, Rbc, Rscratch);
+  }
   __ beq(CCR6, Lacquire); // Volatile?
   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
 
@@ -2495,7 +2518,9 @@
   __ lbzx(R17_tos, Rclass_or_obj, Roffset);
   __ extsb(R17_tos, R17_tos);
   __ push(btos);
-  if (!is_static) patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch);
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_bgetfield, Rbc, Rscratch);
+  }
   __ beq(CCR6, Lacquire); // Volatile?
   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
 
@@ -2506,7 +2531,9 @@
   branch_table[ctos] = __ pc(); // non-volatile_entry point
   __ lhzx(R17_tos, Rclass_or_obj, Roffset);
   __ push(ctos);
-  if (!is_static) patch_bytecode(Bytecodes::_fast_cgetfield, Rbc, Rscratch);
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_cgetfield, Rbc, Rscratch);
+  }
   __ beq(CCR6, Lacquire); // Volatile?
   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
 
@@ -2517,7 +2544,9 @@
   branch_table[stos] = __ pc(); // non-volatile_entry point
   __ lhax(R17_tos, Rclass_or_obj, Roffset);
   __ push(stos);
-  if (!is_static) patch_bytecode(Bytecodes::_fast_sgetfield, Rbc, Rscratch);
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_sgetfield, Rbc, Rscratch);
+  }
   __ beq(CCR6, Lacquire); // Volatile?
   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
 
@@ -2530,7 +2559,9 @@
   __ verify_oop(R17_tos);
   __ push(atos);
   //__ dcbt(R17_tos); // prefetch
-  if (!is_static) patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch);
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_agetfield, Rbc, Rscratch);
+  }
   __ beq(CCR6, Lacquire); // Volatile?
   __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
 
@@ -2553,6 +2584,10 @@
   getfield_or_static(byte_no, false);
 }
 
+void TemplateTable::nofast_getfield(int byte_no) {
+  getfield_or_static(byte_no, false, may_not_rewrite);
+}
+
 void TemplateTable::getstatic(int byte_no) {
   getfield_or_static(byte_no, true);
 }
@@ -2643,7 +2678,7 @@
 }
 
 // PPC64: implement volatile stores as release-store (return bytecode contains an additional release).
-void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
   Label Lvolatile;
 
   const Register Rcache        = R5_ARG3,  // Do not use ARG1/2 (causes trouble in jvmti_post_field_mod).
@@ -2657,10 +2692,12 @@
                  Rbc           = Rscratch3;
   const ConditionRegister CR_is_vol = CCR2; // Non-volatile condition register (survives runtime call in do_oop_store).
 
-  static address field_branch_table[number_of_states],
+  static address field_rw_branch_table[number_of_states],
+                 field_norw_branch_table[number_of_states],
                  static_branch_table[number_of_states];
 
-  address* branch_table = is_static ? static_branch_table : field_branch_table;
+  address* branch_table = is_static ? static_branch_table :
+    (rc == may_rewrite ? field_rw_branch_table : field_norw_branch_table);
 
   // Stack (grows up):
   //  value
@@ -2688,7 +2725,9 @@
 
   // Load from branch table and dispatch (volatile case: one instruction ahead).
   __ sldi(Rflags, Rflags, LogBytesPerWord);
-  if (!support_IRIW_for_not_multiple_copy_atomic_cpu) { __ cmpwi(CR_is_vol, Rscratch, 1); } // Volatile?
+  if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
+    __ cmpwi(CR_is_vol, Rscratch, 1);  // Volatile?
+  }
   __ sldi(Rscratch, Rscratch, exact_log2(BytesPerInstWord)); // Volatile? size of instruction 1 : 0.
   __ ldx(Rbtable, Rbtable, Rflags);
 
@@ -2715,9 +2754,13 @@
   assert(branch_table[dtos] == 0, "can't compute twice");
   branch_table[dtos] = __ pc(); // non-volatile_entry point
   __ pop(dtos);
-  if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
+  if (!is_static) {
+    pop_and_check_object(Rclass_or_obj);  // Kills R11_scratch1.
+  }
   __ stfdx(F15_ftos, Rclass_or_obj, Roffset);
-  if (!is_static) { patch_bytecode(Bytecodes::_fast_dputfield, Rbc, Rscratch, true, byte_no); }
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_dputfield, Rbc, Rscratch, true, byte_no);
+  }
   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
     __ beq(CR_is_vol, Lvolatile); // Volatile?
   }
@@ -2731,7 +2774,9 @@
   __ pop(ftos);
   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
   __ stfsx(F15_ftos, Rclass_or_obj, Roffset);
-  if (!is_static) { patch_bytecode(Bytecodes::_fast_fputfield, Rbc, Rscratch, true, byte_no); }
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_fputfield, Rbc, Rscratch, true, byte_no);
+  }
   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
     __ beq(CR_is_vol, Lvolatile); // Volatile?
   }
@@ -2745,7 +2790,9 @@
   __ pop(itos);
   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
   __ stwx(R17_tos, Rclass_or_obj, Roffset);
-  if (!is_static) { patch_bytecode(Bytecodes::_fast_iputfield, Rbc, Rscratch, true, byte_no); }
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_iputfield, Rbc, Rscratch, true, byte_no);
+  }
   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
     __ beq(CR_is_vol, Lvolatile); // Volatile?
   }
@@ -2759,7 +2806,9 @@
   __ pop(ltos);
   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
   __ stdx(R17_tos, Rclass_or_obj, Roffset);
-  if (!is_static) { patch_bytecode(Bytecodes::_fast_lputfield, Rbc, Rscratch, true, byte_no); }
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_lputfield, Rbc, Rscratch, true, byte_no);
+  }
   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
     __ beq(CR_is_vol, Lvolatile); // Volatile?
   }
@@ -2773,7 +2822,9 @@
   __ pop(btos);
   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
   __ stbx(R17_tos, Rclass_or_obj, Roffset);
-  if (!is_static) { patch_bytecode(Bytecodes::_fast_bputfield, Rbc, Rscratch, true, byte_no); }
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_bputfield, Rbc, Rscratch, true, byte_no);
+  }
   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
     __ beq(CR_is_vol, Lvolatile); // Volatile?
   }
@@ -2787,7 +2838,9 @@
   __ pop(ctos);
   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1..
   __ sthx(R17_tos, Rclass_or_obj, Roffset);
-  if (!is_static) { patch_bytecode(Bytecodes::_fast_cputfield, Rbc, Rscratch, true, byte_no); }
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_cputfield, Rbc, Rscratch, true, byte_no);
+  }
   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
     __ beq(CR_is_vol, Lvolatile); // Volatile?
   }
@@ -2801,7 +2854,9 @@
   __ pop(stos);
   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // Kills R11_scratch1.
   __ sthx(R17_tos, Rclass_or_obj, Roffset);
-  if (!is_static) { patch_bytecode(Bytecodes::_fast_sputfield, Rbc, Rscratch, true, byte_no); }
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_sputfield, Rbc, Rscratch, true, byte_no);
+  }
   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
     __ beq(CR_is_vol, Lvolatile); // Volatile?
   }
@@ -2815,7 +2870,9 @@
   __ pop(atos);
   if (!is_static) { pop_and_check_object(Rclass_or_obj); } // kills R11_scratch1
   do_oop_store(_masm, Rclass_or_obj, Roffset, R17_tos, Rscratch, Rscratch2, Rscratch3, _bs->kind(), false /* precise */, true /* check null */);
-  if (!is_static) { patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no); }
+  if (!is_static && rc == may_rewrite) {
+    patch_bytecode(Bytecodes::_fast_aputfield, Rbc, Rscratch, true, byte_no);
+  }
   if (!support_IRIW_for_not_multiple_copy_atomic_cpu) {
     __ beq(CR_is_vol, Lvolatile); // Volatile?
     __ dispatch_epilog(vtos, Bytecodes::length_for(bytecode()));
@@ -2839,6 +2896,10 @@
   putfield_or_static(byte_no, false);
 }
 
+void TemplateTable::nofast_putfield(int byte_no) {
+  putfield_or_static(byte_no, false, may_not_rewrite);
+}
+
 void TemplateTable::putstatic(int byte_no) {
   putfield_or_static(byte_no, true);
 }
@@ -3259,7 +3320,9 @@
   __ testbitdi(CCR0, R0, Rflags, ConstantPoolCacheEntry::is_vfinal_shift);
   __ bfalse(CCR0, LnotFinal);
 
-  patch_bytecode(Bytecodes::_fast_invokevfinal, Rnew_bc, R12_scratch2);
+  if (RewriteBytecodes && !UseSharedSpaces) {
+    patch_bytecode(Bytecodes::_fast_invokevfinal, Rnew_bc, R12_scratch2);
+  }
   invokevfinal_helper(Rvtableindex_or_method, Rflags, R11_scratch1, R12_scratch2);
 
   __ align(32, 12);
@@ -3795,9 +3858,9 @@
   transition(atos, itos);
 
   Label Ldone, Lis_null, Lquicked, Lresolved;
-  Register Roffset         = R5_ARG3,
+  Register Roffset         = R6_ARG4,
            RobjKlass       = R4_ARG2,
-           RspecifiedKlass = R6_ARG4, // Generate_ClassCastException_verbose_handler will expect the value in this register.
+           RspecifiedKlass = R5_ARG3,
            Rcpool          = R11_scratch1,
            Rtags           = R12_scratch2;
 
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,12 +32,13 @@
 #include "runtime/os.hpp"
 #include "runtime/stubCodeGenerator.hpp"
 #include "utilities/defaultStream.hpp"
+#include "utilities/globalDefinitions.hpp"
 #include "vm_version_ppc.hpp"
 
 # include <sys/sysinfo.h>
 
 int VM_Version::_features = VM_Version::unknown_m;
-int VM_Version::_measured_cache_line_size = 128; // default value
+int VM_Version::_measured_cache_line_size = 32; // pessimistic init value
 const char* VM_Version::_features_str = "";
 bool VM_Version::_is_determine_features_test_running = false;
 
@@ -55,7 +56,9 @@
 
   // If PowerArchitecturePPC64 hasn't been specified explicitly determine from features.
   if (FLAG_IS_DEFAULT(PowerArchitecturePPC64)) {
-    if (VM_Version::has_popcntw()) {
+    if (VM_Version::has_lqarx()) {
+      FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 8);
+    } else if (VM_Version::has_popcntw()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 7);
     } else if (VM_Version::has_cmpb()) {
       FLAG_SET_ERGO(uintx, PowerArchitecturePPC64, 6);
@@ -66,8 +69,14 @@
     }
   }
   guarantee(PowerArchitecturePPC64 == 0 || PowerArchitecturePPC64 == 5 ||
-            PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7,
-            "PowerArchitecturePPC64 should be 0, 5, 6 or 7");
+            PowerArchitecturePPC64 == 6 || PowerArchitecturePPC64 == 7 ||
+            PowerArchitecturePPC64 == 8,
+            "PowerArchitecturePPC64 should be 0, 5, 6, 7, or 8");
+
+  // Power 8: Configure Data Stream Control Register.
+  if (PowerArchitecturePPC64 >= 8) {
+    config_dscr();
+  }
 
   if (!UseSIGTRAP) {
     MSG(TrapBasedICMissChecks);
@@ -97,7 +106,7 @@
   // Create and print feature-string.
   char buf[(num_features+1) * 16]; // Max 16 chars per feature.
   jio_snprintf(buf, sizeof(buf),
-               "ppc64%s%s%s%s%s%s%s%s",
+               "ppc64%s%s%s%s%s%s%s%s%s%s%s%s",
                (has_fsqrt()   ? " fsqrt"   : ""),
                (has_isel()    ? " isel"    : ""),
                (has_lxarxeh() ? " lxarxeh" : ""),
@@ -106,11 +115,17 @@
                (has_popcntb() ? " popcntb" : ""),
                (has_popcntw() ? " popcntw" : ""),
                (has_fcfids()  ? " fcfids"  : ""),
-               (has_vand()    ? " vand"    : "")
+               (has_vand()    ? " vand"    : ""),
+               (has_lqarx()   ? " lqarx"   : ""),
+               (has_vcipher() ? " vcipher" : ""),
+               (has_vpmsumb() ? " vpmsumb" : ""),
+               (has_tcheck()  ? " tcheck"  : "")
                // Make sure number of %s matches num_features!
               );
   _features_str = os::strdup(buf);
-  NOT_PRODUCT(if (Verbose) print_features(););
+  if (Verbose) {
+    print_features();
+  }
 
   // PPC64 supports 8-byte compare-exchange operations (see
   // Atomic::cmpxchg and StubGenerator::generate_atomic_cmpxchg_ptr)
@@ -171,7 +186,86 @@
     FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
     FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
   }
+  // Adjust RTM (Restricted Transactional Memory) flags.
+  if (!has_tcheck() && UseRTMLocking) {
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    // VM_Version_init() is executed after UseBiasedLocking is used
+    // in Thread::allocate().
+    vm_exit_during_initialization("RTM instructions are not available on this CPU");
+  }
 
+  if (UseRTMLocking) {
+#if INCLUDE_RTM_OPT
+    if (!UnlockExperimentalVMOptions) {
+      vm_exit_during_initialization("UseRTMLocking is only available as experimental option on this platform. "
+                                    "It must be enabled via -XX:+UnlockExperimentalVMOptions flag.");
+    } else {
+      warning("UseRTMLocking is only available as experimental option on this platform.");
+    }
+    if (!FLAG_IS_CMDLINE(UseRTMLocking)) {
+      // RTM locking should be used only for applications with
+      // high lock contention. For now we do not use it by default.
+      vm_exit_during_initialization("UseRTMLocking flag should be only set on command line");
+    }
+    if (!is_power_of_2(RTMTotalCountIncrRate)) {
+      warning("RTMTotalCountIncrRate must be a power of 2, resetting it to 64");
+      FLAG_SET_DEFAULT(RTMTotalCountIncrRate, 64);
+    }
+    if (RTMAbortRatio < 0 || RTMAbortRatio > 100) {
+      warning("RTMAbortRatio must be in the range 0 to 100, resetting it to 50");
+      FLAG_SET_DEFAULT(RTMAbortRatio, 50);
+    }
+    FLAG_SET_ERGO(bool, UseNewFastLockPPC64, false); // Does not implement TM.
+    guarantee(RTMSpinLoopCount > 0, "unsupported");
+#else
+    // Only C2 does RTM locking optimization.
+    // Can't continue because UseRTMLocking affects UseBiasedLocking flag
+    // setting during arguments processing. See use_biased_locking().
+    vm_exit_during_initialization("RTM locking optimization is not supported in this VM");
+#endif
+  } else { // !UseRTMLocking
+    if (UseRTMForStackLocks) {
+      if (!FLAG_IS_DEFAULT(UseRTMForStackLocks)) {
+        warning("UseRTMForStackLocks flag should be off when UseRTMLocking flag is off");
+      }
+      FLAG_SET_DEFAULT(UseRTMForStackLocks, false);
+    }
+    if (UseRTMDeopt) {
+      FLAG_SET_DEFAULT(UseRTMDeopt, false);
+    }
+    if (PrintPreciseRTMLockingStatistics) {
+      FLAG_SET_DEFAULT(PrintPreciseRTMLockingStatistics, false);
+    }
+  }
+
+  // This machine does not allow unaligned memory accesses
+  if (UseUnalignedAccesses) {
+    if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
+      warning("Unaligned memory access is not available on this CPU");
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+  }
+}
+
+bool VM_Version::use_biased_locking() {
+#if INCLUDE_RTM_OPT
+  // RTM locking is most useful when there is high lock contention and
+  // low data contention. With high lock contention the lock is usually
+  // inflated and biased locking is not suitable for that case.
+  // RTM locking code requires that biased locking is off.
+  // Note: we can't switch off UseBiasedLocking in get_processor_features()
+  // because it is used by Thread::allocate() which is called before
+  // VM_Version::initialize().
+  if (UseRTMLocking && UseBiasedLocking) {
+    if (FLAG_IS_DEFAULT(UseBiasedLocking)) {
+      FLAG_SET_DEFAULT(UseBiasedLocking, false);
+    } else {
+      warning("Biased locking is not supported with RTM locking; ignoring UseBiasedLocking flag." );
+      UseBiasedLocking = false;
+    }
+  }
+#endif
+  return UseBiasedLocking;
 }
 
 void VM_Version::print_features() {
@@ -437,16 +531,19 @@
   // Don't use R0 in ldarx.
   // Keep R3_ARG1 unmodified, it contains &field (see below).
   // Keep R4_ARG2 unmodified, it contains offset = 0 (see below).
-  a->fsqrt(F3, F4);                            // code[0] -> fsqrt_m
-  a->fsqrts(F3, F4);                           // code[1] -> fsqrts_m
-  a->isel(R7, R5, R6, 0);                      // code[2] -> isel_m
-  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3] -> lxarx_m
-  a->cmpb(R7, R5, R6);                         // code[4] -> bcmp
-  //a->mftgpr(R7, F3);                         // code[5] -> mftgpr
-  a->popcntb(R7, R5);                          // code[6] -> popcntb
-  a->popcntw(R7, R5);                          // code[7] -> popcntw
-  a->fcfids(F3, F4);                           // code[8] -> fcfids
-  a->vand(VR0, VR0, VR0);                      // code[9] -> vand
+  a->fsqrt(F3, F4);                            // code[0]  -> fsqrt_m
+  a->fsqrts(F3, F4);                           // code[1]  -> fsqrts_m
+  a->isel(R7, R5, R6, 0);                      // code[2]  -> isel_m
+  a->ldarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[3]  -> lxarx_m
+  a->cmpb(R7, R5, R6);                         // code[4]  -> cmpb
+  a->popcntb(R7, R5);                          // code[5]  -> popcntb
+  a->popcntw(R7, R5);                          // code[6]  -> popcntw
+  a->fcfids(F3, F4);                           // code[7]  -> fcfids
+  a->vand(VR0, VR0, VR0);                      // code[8]  -> vand
+  a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
+  a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
+  a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
+  a->tcheck(0);                                // code[12] -> tcheck
   a->blr();
 
   // Emit function to set one cache line to zero. Emit function descriptor and get pointer to it.
@@ -485,11 +582,14 @@
   if (code[feature_cntr++]) features |= isel_m;
   if (code[feature_cntr++]) features |= lxarxeh_m;
   if (code[feature_cntr++]) features |= cmpb_m;
-  //if(code[feature_cntr++])features |= mftgpr_m;
   if (code[feature_cntr++]) features |= popcntb_m;
   if (code[feature_cntr++]) features |= popcntw_m;
   if (code[feature_cntr++]) features |= fcfids_m;
   if (code[feature_cntr++]) features |= vand_m;
+  if (code[feature_cntr++]) features |= lqarx_m;
+  if (code[feature_cntr++]) features |= vcipher_m;
+  if (code[feature_cntr++]) features |= vpmsumb_m;
+  if (code[feature_cntr++]) features |= tcheck_m;
 
   // Print the detection code.
   if (PrintAssembly) {
@@ -501,6 +601,69 @@
   _features = features;
 }
 
+// Power 8: Configure Data Stream Control Register.
+void VM_Version::config_dscr() {
+  assert(has_tcheck(), "Only execute on Power 8 or later!");
+
+  // 7 InstWords for each call (function descriptor + blr instruction).
+  const int code_size = (2+2*7)*BytesPerInstWord;
+
+  // Allocate space for the code.
+  ResourceMark rm;
+  CodeBuffer cb("config_dscr", code_size, 0);
+  MacroAssembler* a = new MacroAssembler(&cb);
+
+  // Emit code.
+  uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd();
+  uint32_t *code = (uint32_t *)a->pc();
+  a->mfdscr(R3);
+  a->blr();
+
+  void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd();
+  a->mtdscr(R3);
+  a->blr();
+
+  uint32_t *code_end = (uint32_t *)a->pc();
+  a->flush();
+
+  // Print the detection code.
+  if (PrintAssembly) {
+    ttyLocker ttyl;
+    tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", code);
+    Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
+  }
+
+  // Apply the configuration if needed.
+  uint64_t dscr_val = (*get_dscr)();
+  if (Verbose) {
+    tty->print_cr("dscr value was 0x%lx" , dscr_val);
+  }
+  bool change_requested = false;
+  if (DSCR_PPC64 != (uintx)-1) {
+    dscr_val = DSCR_PPC64;
+    change_requested = true;
+  }
+  if (DSCR_DPFD_PPC64 <= 7) {
+    uint64_t mask = 0x7;
+    if ((dscr_val & mask) != DSCR_DPFD_PPC64) {
+      dscr_val = (dscr_val & ~mask) | (DSCR_DPFD_PPC64);
+      change_requested = true;
+    }
+  }
+  if (DSCR_URG_PPC64 <= 7) {
+    uint64_t mask = 0x7 << 6;
+    if ((dscr_val & mask) != DSCR_DPFD_PPC64 << 6) {
+      dscr_val = (dscr_val & ~mask) | (DSCR_URG_PPC64 << 6);
+      change_requested = true;
+    }
+  }
+  if (change_requested) {
+    (*set_dscr)(dscr_val);
+    if (Verbose) {
+      tty->print_cr("dscr was set to 0x%lx" , (*get_dscr)());
+    }
+  }
+}
 
 static int saved_features = 0;
 
--- a/src/cpu/ppc/vm/vm_version_ppc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/vm_version_ppc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -41,7 +41,10 @@
     popcntw,
     fcfids,
     vand,
-    dcba,
+    lqarx,
+    vcipher,
+    vpmsumb,
+    tcheck,
     num_features // last entry to count features
   };
   enum Feature_Flag_Set {
@@ -55,7 +58,10 @@
     popcntw_m             = (1 << popcntw),
     fcfids_m              = (1 << fcfids ),
     vand_m                = (1 << vand   ),
-    dcba_m                = (1 << dcba   ),
+    lqarx_m               = (1 << lqarx  ),
+    vcipher_m             = (1 << vcipher),
+    vpmsumb_m             = (1 << vpmsumb),
+    tcheck_m              = (1 << tcheck ),
     all_features_m        = -1
   };
   static int  _features;
@@ -65,12 +71,16 @@
 
   static void print_features();
   static void determine_features(); // also measures cache line size
+  static void config_dscr(); // Power 8: Configure Data Stream Control Register.
   static void determine_section_size();
   static void power6_micro_bench();
 public:
   // Initialization
   static void initialize();
 
+  // Override Abstract_VM_Version implementation
+  static bool use_biased_locking();
+
   static bool is_determine_features_test_running() { return _is_determine_features_test_running; }
   // CPU instruction support
   static bool has_fsqrt()   { return (_features & fsqrt_m) != 0; }
@@ -82,7 +92,10 @@
   static bool has_popcntw() { return (_features & popcntw_m) != 0; }
   static bool has_fcfids()  { return (_features & fcfids_m) != 0; }
   static bool has_vand()    { return (_features & vand_m) != 0; }
-  static bool has_dcba()    { return (_features & dcba_m) != 0; }
+  static bool has_lqarx()   { return (_features & lqarx_m) != 0; }
+  static bool has_vcipher() { return (_features & vcipher_m) != 0; }
+  static bool has_vpmsumb() { return (_features & vpmsumb_m) != 0; }
+  static bool has_tcheck()  { return (_features & tcheck_m) != 0; }
 
   static const char* cpu_features() { return _features_str; }
 
--- a/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/ppc/vm/vtableStubs_ppc_64.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
- * Copyright 2012, 2014 SAP AG. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,7 +24,6 @@
  */
 
 #include "precompiled.hpp"
-#include "asm/assembler.hpp"
 #include "asm/macroAssembler.inline.hpp"
 #include "code/vtableStubs.hpp"
 #include "interp_masm_ppc_64.hpp"
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -74,6 +74,8 @@
 
 define_pd_global(bool, UseMembar,            false);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 // GC Ergo Flags
 define_pd_global(size_t, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
 
--- a/src/cpu/sparc/vm/interp_masm_sparc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/sparc/vm/interp_masm_sparc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -203,7 +203,6 @@
   void field_offset_at(int n, Register tmp, Register dest, Register base);
   int  field_offset_at(Register object, address bcp, int offset);
   void fast_iaaccess(int n, address bcp);
-  void fast_iagetfield(address bcp);
   void fast_iaputfield(address bcp, bool do_store_check );
 
   void index_check(Register array, Register index, int index_shift, Register tmp, Register res);
--- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -3019,44 +3019,107 @@
    // past the store that releases the lock.  But TSO is a strong memory model
    // and that particular flavor of barrier is a noop, so we can safely elide it.
    // Note that we use 1-0 locking by default for the inflated case.  We
-   // close the resultant (and rare) race by having contented threads in
+   // close the resultant (and rare) race by having contended threads in
    // monitorenter periodically poll _owner.
-   ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rscratch);
-   ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions), Rbox);
-   xor3(Rscratch, G2_thread, Rscratch);
-   orcc(Rbox, Rscratch, Rbox);
-   brx(Assembler::notZero, false, Assembler::pn, done);
-   delayed()->
-   ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList), Rscratch);
-   ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq), Rbox);
-   orcc(Rbox, Rscratch, G0);
-   if (EmitSync & 65536) {
-      Label LSucc ;
-      brx(Assembler::notZero, false, Assembler::pn, LSucc);
-      delayed()->nop();
-      ba(done);
-      delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner));
-
-      bind(LSucc);
-      st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner));
-      if (os::is_MP()) { membar (StoreLoad); }
-      ld_ptr(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ), Rscratch);
-      andcc(Rscratch, Rscratch, G0);
-      brx(Assembler::notZero, false, Assembler::pt, done);
-      delayed()->andcc(G0, G0, G0);
-      add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
-      mov(G2_thread, Rscratch);
-      cas_ptr(Rmark, G0, Rscratch);
-      // invert icc.zf and goto done
-      br_notnull(Rscratch, false, Assembler::pt, done);
-      delayed()->cmp(G0, G0);
-      ba(done);
-      delayed()->cmp(G0, 1);
+
+   if (EmitSync & 1024) {
+     // Emit code to check that _owner == Self
+     // We could fold the _owner test into subsequent code more efficiently
+     // than using a stand-alone check, but since _owner checking is off by
+     // default we don't bother. We also might consider predicating the
+     // _owner==Self check on Xcheck:jni or running on a debug build.
+     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), Rscratch);
+     orcc(Rscratch, G0, G0);
+     brx(Assembler::notZero, false, Assembler::pn, done);
+     delayed()->nop();
+   }
+
+   if (EmitSync & 512) {
+     // classic lock release code absent 1-0 locking
+     //   m->Owner = null;
+     //   membar #storeload
+     //   if (m->cxq|m->EntryList) == null goto Success
+     //   if (m->succ != null) goto Success
+     //   if CAS (&m->Owner,0,Self) != 0 goto Success
+     //   goto SlowPath
+     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
+     orcc(Rbox, G0, G0);
+     brx(Assembler::notZero, false, Assembler::pn, done);
+     delayed()->nop();
+     st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+     if (os::is_MP()) { membar(StoreLoad); }
+     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
+     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
+     orcc(Rbox, Rscratch, G0);
+     brx(Assembler::zero, false, Assembler::pt, done);
+     delayed()->
+     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
+     andcc(Rscratch, Rscratch, G0);
+     brx(Assembler::notZero, false, Assembler::pt, done);
+     delayed()->andcc(G0, G0, G0);
+     add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
+     mov(G2_thread, Rscratch);
+     cas_ptr(Rmark, G0, Rscratch);
+     cmp(Rscratch, G0);
+     // invert icc.zf and goto done
+     brx(Assembler::notZero, false, Assembler::pt, done);
+     delayed()->cmp(G0, G0);
+     br(Assembler::always, false, Assembler::pt, done);
+     delayed()->cmp(G0, 1);
    } else {
-      brx(Assembler::notZero, false, Assembler::pn, done);
-      delayed()->nop();
-      ba(done);
-      delayed()->st_ptr(G0, Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner));
+     // 1-0 form : avoids CAS and MEMBAR in the common case
+     // Do not bother to ratify that m->Owner == Self.
+     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)), Rbox);
+     orcc(Rbox, G0, G0);
+     brx(Assembler::notZero, false, Assembler::pn, done);
+     delayed()->
+     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)), Rscratch);
+     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)), Rbox);
+     orcc(Rbox, Rscratch, G0);
+     if (EmitSync & 16384) {
+       // As an optional optimization, if (EntryList|cxq) != null and _succ is null then
+       // we should transfer control directly to the slow-path.
+       // This test makes the reacquire operation below very infrequent.
+       // The logic is equivalent to :
+       //   if (cxq|EntryList) == null : Owner=null; goto Success
+       //   if succ == null : goto SlowPath
+       //   Owner=null; membar #storeload
+       //   if succ != null : goto Success
+       //   if CAS(&Owner,null,Self) != null goto Success
+       //   goto SlowPath
+       brx(Assembler::zero, true, Assembler::pt, done);
+       delayed()->
+       st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+       ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
+       andcc(Rscratch, Rscratch, G0) ;
+       brx(Assembler::zero, false, Assembler::pt, done);
+       delayed()->orcc(G0, 1, G0);
+       st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+     } else {
+       brx(Assembler::zero, false, Assembler::pt, done);
+       delayed()->
+       st_ptr(G0, Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+     }
+     if (os::is_MP()) { membar(StoreLoad); }
+     // Check that _succ is (or remains) non-zero
+     ld_ptr(Address(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), Rscratch);
+     andcc(Rscratch, Rscratch, G0);
+     brx(Assembler::notZero, false, Assembler::pt, done);
+     delayed()->andcc(G0, G0, G0);
+     add(Rmark, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner), Rmark);
+     mov(G2_thread, Rscratch);
+     cas_ptr(Rmark, G0, Rscratch);
+     cmp(Rscratch, G0);
+     // invert icc.zf and goto done
+     // A slightly better v8+/v9 idiom would be the following:
+     //   movrnz Rscratch,1,Rscratch
+     //   ba done
+     //   xorcc Rscratch,1,G0
+     // In v8+ mode the idiom would be valid IFF Rscratch was a G or O register
+     brx(Assembler::notZero, false, Assembler::pt, done);
+     delayed()->cmp(G0, G0);
+     br(Assembler::always, false, Assembler::pt, done);
+     delayed()->cmp(G0, 1);
    }
 
    bind   (LStacked);
@@ -3632,23 +3695,11 @@
     if (satb_log_enqueue_with_frame == 0) {
       generate_satb_log_enqueue(with_frame);
       assert(satb_log_enqueue_with_frame != 0, "postcondition.");
-      if (G1SATBPrintStubs) {
-        tty->print_cr("Generated with-frame satb enqueue:");
-        Disassembler::decode((u_char*)satb_log_enqueue_with_frame,
-                             satb_log_enqueue_with_frame_end,
-                             tty);
-      }
     }
   } else {
     if (satb_log_enqueue_frameless == 0) {
       generate_satb_log_enqueue(with_frame);
       assert(satb_log_enqueue_frameless != 0, "postcondition.");
-      if (G1SATBPrintStubs) {
-        tty->print_cr("Generated frameless satb enqueue:");
-        Disassembler::decode((u_char*)satb_log_enqueue_frameless,
-                             satb_log_enqueue_frameless_end,
-                             tty);
-      }
     }
   }
 }
@@ -3841,12 +3892,6 @@
   if (dirty_card_log_enqueue == 0) {
     generate_dirty_card_log_enqueue(byte_map_base);
     assert(dirty_card_log_enqueue != 0, "postcondition.");
-    if (G1SATBPrintStubs) {
-      tty->print_cr("Generated dirty_card enqueue:");
-      Disassembler::decode((u_char*)dirty_card_log_enqueue,
-                           dirty_card_log_enqueue_end,
-                           tty);
-    }
   }
 }
 
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2664,6 +2664,9 @@
     // disallows any pending_exception.
     __ mov(L3_box, O1);
 
+    // Pass in current thread pointer
+    __ mov(G2_thread, O2);
+
     __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), relocInfo::runtime_call_type);
     __ delayed()->mov(L4, O0);              // Need oop in O0
 
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -385,7 +385,6 @@
   __ verify_oop(Otos_i);
 }
 
-
 void TemplateTable::ldc2_w() {
   transition(vtos, vtos);
   Label Long, exit;
@@ -430,22 +429,28 @@
   __ bind(exit);
 }
 
-
 void TemplateTable::locals_index(Register reg, int offset) {
   __ ldub( at_bcp(offset), reg );
 }
 
-
 void TemplateTable::locals_index_wide(Register reg) {
   // offset is 2, not 1, because Lbcp points to wide prefix code
   __ get_2_byte_integer_at_bcp(2, G4_scratch, reg, InterpreterMacroAssembler::Unsigned);
 }
 
 void TemplateTable::iload() {
+  iload_internal();
+}
+
+void TemplateTable::nofast_iload() {
+  iload_internal(may_not_rewrite);
+}
+
+void TemplateTable::iload_internal(RewriteControl rc) {
   transition(vtos, itos);
   // Rewrite iload,iload  pair into fast_iload2
   //         iload,caload pair into fast_icaload
-  if (RewriteFrequentPairs) {
+  if (RewriteFrequentPairs && rc == may_rewrite) {
     Label rewrite, done;
 
     // get next byte
@@ -672,8 +677,15 @@
   __ ld_ptr( Llocals, Interpreter::local_offset_in_bytes(n), Otos_i );
 }
 
-
 void TemplateTable::aload_0() {
+  aload_0_internal();
+}
+
+void TemplateTable::nofast_aload_0() {
+  aload_0_internal(may_not_rewrite);
+}
+
+void TemplateTable::aload_0_internal(RewriteControl rc) {
   transition(vtos, atos);
 
   // According to bytecode histograms, the pairs:
@@ -687,7 +699,7 @@
   // bytecode into a pair bytecode; otherwise it rewrites the current
   // bytecode into _fast_aload_0 that doesn't do the pair check anymore.
   //
-  if (RewriteFrequentPairs) {
+  if (RewriteFrequentPairs && rc == may_rewrite) {
     Label rewrite, done;
 
     // get next byte
@@ -731,7 +743,6 @@
   }
 }
 
-
 void TemplateTable::istore() {
   transition(itos, vtos);
   locals_index(G3_scratch);
@@ -2045,30 +2056,21 @@
                                             Register index,
                                             size_t index_size) {
   // Depends on cpCacheOop layout!
+
   Label resolved;
-
-    assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
-    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, Lbyte_code, byte_no, 1, index_size);
-    __ cmp(Lbyte_code, (int) bytecode());  // have we resolved this bytecode?
-    __ br(Assembler::equal, false, Assembler::pt, resolved);
-    __ delayed()->set((int)bytecode(), O1);
-
-  address entry;
-  switch (bytecode()) {
-    case Bytecodes::_getstatic      : // fall through
-    case Bytecodes::_putstatic      : // fall through
-    case Bytecodes::_getfield       : // fall through
-    case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put); break;
-    case Bytecodes::_invokevirtual  : // fall through
-    case Bytecodes::_invokespecial  : // fall through
-    case Bytecodes::_invokestatic   : // fall through
-    case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  break;
-    case Bytecodes::_invokehandle   : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);  break;
-    case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);  break;
-    default:
-      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
-      break;
+  Bytecodes::Code code = bytecode();
+  switch (code) {
+  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
+  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
   }
+
+  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, Lbyte_code, byte_no, 1, index_size);
+  __ cmp(Lbyte_code, code);  // have we resolved this bytecode?
+  __ br(Assembler::equal, false, Assembler::pt, resolved);
+  __ delayed()->set(code, O1);
+
+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
   // first time invocation - must resolve first
   __ call_VM(noreg, entry, O1);
   // Update registers with resolved info
@@ -2183,7 +2185,7 @@
   }
 }
 
-void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
   transition(vtos, vtos);
 
   Register Rcache = G3_scratch;
@@ -2231,7 +2233,7 @@
   __ load_heap_oop(Rclass, Roffset, Otos_i);
   __ verify_oop(Otos_i);
   __ push(atos);
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_agetfield, G3_scratch, G4_scratch);
   }
   __ ba(checkVolatile);
@@ -2246,7 +2248,7 @@
   // itos
   __ ld(Rclass, Roffset, Otos_i);
   __ push(itos);
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_igetfield, G3_scratch, G4_scratch);
   }
   __ ba(checkVolatile);
@@ -2262,7 +2264,7 @@
   // load must be atomic
   __ ld_long(Rclass, Roffset, Otos_l);
   __ push(ltos);
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_lgetfield, G3_scratch, G4_scratch);
   }
   __ ba(checkVolatile);
@@ -2277,7 +2279,7 @@
   // btos
   __ ldsb(Rclass, Roffset, Otos_i);
   __ push(itos);
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_bgetfield, G3_scratch, G4_scratch);
   }
   __ ba(checkVolatile);
@@ -2292,7 +2294,7 @@
   // ctos
   __ lduh(Rclass, Roffset, Otos_i);
   __ push(itos);
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_cgetfield, G3_scratch, G4_scratch);
   }
   __ ba(checkVolatile);
@@ -2307,7 +2309,7 @@
   // stos
   __ ldsh(Rclass, Roffset, Otos_i);
   __ push(itos);
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_sgetfield, G3_scratch, G4_scratch);
   }
   __ ba(checkVolatile);
@@ -2323,7 +2325,7 @@
   // ftos
   __ ldf(FloatRegisterImpl::S, Rclass, Roffset, Ftos_f);
   __ push(ftos);
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_fgetfield, G3_scratch, G4_scratch);
   }
   __ ba(checkVolatile);
@@ -2335,7 +2337,7 @@
   // dtos
   __ ldf(FloatRegisterImpl::D, Rclass, Roffset, Ftos_d);
   __ push(dtos);
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_dgetfield, G3_scratch, G4_scratch);
   }
 
@@ -2350,16 +2352,18 @@
   __ bind(exit);
 }
 
-
 void TemplateTable::getfield(int byte_no) {
   getfield_or_static(byte_no, false);
 }
 
+void TemplateTable::nofast_getfield(int byte_no) {
+  getfield_or_static(byte_no, false, may_not_rewrite);
+}
+
 void TemplateTable::getstatic(int byte_no) {
   getfield_or_static(byte_no, true);
 }
 
-
 void TemplateTable::fast_accessfield(TosState state) {
   transition(atos, state);
   Register Rcache  = G3_scratch;
@@ -2544,7 +2548,7 @@
   __ verify_oop(r);
 }
 
-void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
   transition(vtos, vtos);
   Register Rcache = G3_scratch;
   Register index  = G4_scratch;
@@ -2620,7 +2624,7 @@
       __ pop_i();
       pop_and_check_object(Rclass);
       __ st(Otos_i, Rclass, Roffset);
-      patch_bytecode(Bytecodes::_fast_iputfield, G3_scratch, G4_scratch, true, byte_no);
+      if (rc == may_rewrite) patch_bytecode(Bytecodes::_fast_iputfield, G3_scratch, G4_scratch, true, byte_no);
       __ ba(checkVolatile);
       __ delayed()->tst(Lscratch);
     }
@@ -2636,7 +2640,7 @@
       pop_and_check_object(Rclass);
       __ verify_oop(Otos_i);
       do_oop_store(_masm, Rclass, Roffset, 0, Otos_i, G1_scratch, _bs->kind(), false);
-      patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch, true, byte_no);
+      if (rc == may_rewrite) patch_bytecode(Bytecodes::_fast_aputfield, G3_scratch, G4_scratch, true, byte_no);
       __ ba(checkVolatile);
       __ delayed()->tst(Lscratch);
     }
@@ -2653,7 +2657,7 @@
     __ pop_i();
     if (!is_static) pop_and_check_object(Rclass);
     __ stb(Otos_i, Rclass, Roffset);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_bputfield, G3_scratch, G4_scratch, true, byte_no);
     }
     __ ba(checkVolatile);
@@ -2670,7 +2674,7 @@
     __ pop_l();
     if (!is_static) pop_and_check_object(Rclass);
     __ st_long(Otos_l, Rclass, Roffset);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_lputfield, G3_scratch, G4_scratch, true, byte_no);
     }
     __ ba(checkVolatile);
@@ -2687,7 +2691,7 @@
     __ pop_i();
     if (!is_static) pop_and_check_object(Rclass);
     __ sth(Otos_i, Rclass, Roffset);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_cputfield, G3_scratch, G4_scratch, true, byte_no);
     }
     __ ba(checkVolatile);
@@ -2704,7 +2708,7 @@
     __ pop_i();
     if (!is_static) pop_and_check_object(Rclass);
     __ sth(Otos_i, Rclass, Roffset);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_sputfield, G3_scratch, G4_scratch, true, byte_no);
     }
     __ ba(checkVolatile);
@@ -2721,7 +2725,7 @@
     __ pop_f();
     if (!is_static) pop_and_check_object(Rclass);
     __ stf(FloatRegisterImpl::S, Ftos_f, Rclass, Roffset);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_fputfield, G3_scratch, G4_scratch, true, byte_no);
     }
     __ ba(checkVolatile);
@@ -2735,7 +2739,7 @@
     __ pop_d();
     if (!is_static) pop_and_check_object(Rclass);
     __ stf(FloatRegisterImpl::D, Ftos_d, Rclass, Roffset);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_dputfield, G3_scratch, G4_scratch, true, byte_no);
     }
   }
@@ -2809,16 +2813,18 @@
   }
 }
 
-
 void TemplateTable::putfield(int byte_no) {
   putfield_or_static(byte_no, false);
 }
 
+void TemplateTable::nofast_putfield(int byte_no) {
+  putfield_or_static(byte_no, false, may_not_rewrite);
+}
+
 void TemplateTable::putstatic(int byte_no) {
   putfield_or_static(byte_no, true);
 }
 
-
 void TemplateTable::fast_xaccess(TosState state) {
   transition(vtos, state);
   Register Rcache = G3_scratch;
@@ -2971,7 +2977,9 @@
   __ br(Assembler::zero, false, Assembler::pt, notFinal);
   __ delayed()->and3(Rret, 0xFF, G4_scratch);      // gets number of parameters
 
-  patch_bytecode(Bytecodes::_fast_invokevfinal, Rscratch, Rtemp);
+  if (RewriteBytecodes && !UseSharedSpaces) {
+    patch_bytecode(Bytecodes::_fast_invokevfinal, Rscratch, Rtemp);
+  }
 
   invokevfinal_helper(Rscratch, Rret);
 
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,7 +32,7 @@
 
 int VM_Version::_features = VM_Version::unknown_m;
 const char* VM_Version::_features_str = "";
-unsigned int VM_Version::_L2_cache_line_size = 0;
+unsigned int VM_Version::_L2_data_cache_line_size = 0;
 
 void VM_Version::initialize() {
   _features = determine_features();
@@ -356,10 +356,17 @@
     (cache_line_size > ContendedPaddingWidth))
     ContendedPaddingWidth = cache_line_size;
 
+  // This machine does not allow unaligned memory accesses
+  if (UseUnalignedAccesses) {
+    if (!FLAG_IS_DEFAULT(UseUnalignedAccesses))
+      warning("Unaligned memory access is not available on this CPU");
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, false);
+  }
+
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
     tty->print_cr("L1 data cache line size: %u", L1_data_cache_line_size());
-    tty->print_cr("L2 cache line size: %u", L2_cache_line_size());
+    tty->print_cr("L2 data cache line size: %u", L2_data_cache_line_size());
     tty->print("Allocation");
     if (AllocatePrefetchStyle <= 0) {
       tty->print_cr(": no prefetching");
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu May 07 20:51:12 2015 -0700
@@ -96,8 +96,8 @@
   static int  _features;
   static const char* _features_str;
 
-  static unsigned int _L2_cache_line_size;
-  static unsigned int L2_cache_line_size() { return _L2_cache_line_size; }
+  static unsigned int _L2_data_cache_line_size;
+  static unsigned int L2_data_cache_line_size() { return _L2_data_cache_line_size; }
 
   static void print_features();
   static int  determine_features();
@@ -171,7 +171,7 @@
   static const char* cpu_features()     { return _features_str; }
 
   // default prefetch block size on sparc
-  static intx prefetch_data_size()      { return L2_cache_line_size();  }
+  static intx prefetch_data_size()      { return L2_data_cache_line_size();  }
 
   // Prefetch
   static intx prefetch_copy_interval_in_bytes() {
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu May 07 20:51:12 2015 -0700
@@ -3359,6 +3359,20 @@
 
 
 // Integer vector arithmetic
+void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_int8(0x01);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_int8(0x02);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::paddb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
@@ -3379,6 +3393,20 @@
   emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
 }
 
+void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse3(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8(0x01);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse3(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_int8(0x02);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
   emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
@@ -3804,6 +3832,17 @@
   emit_int8(0x01);
 }
 
+void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  emit_int8(0x19);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - insert into lower 128 bits
+  // 0x01 - insert into upper 128 bits
+  emit_int8(0x01);
+}
+
 void Assembler::vextractf128h(Address dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu May 07 20:51:12 2015 -0700
@@ -142,8 +142,10 @@
 
 #endif // _LP64
 
-// JSR 292 fixed register usages:
-REGISTER_DECLARATION(Register, rbp_mh_SP_save, rbp);
+// JSR 292
+// On x86, the SP does not have to be saved when invoking method handle intrinsics
+// or compiled lambda forms. We indicate that by setting rbp_mh_SP_save to noreg.
+REGISTER_DECLARATION(Register, rbp_mh_SP_save, noreg);
 
 // Address is an abstraction used to represent a memory location
 // using any of the amd64 addressing modes with one object.
@@ -1777,6 +1779,12 @@
   void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
   void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
 
+  // Add horizontal packed integers
+  void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void phaddw(XMMRegister dst, XMMRegister src);
+  void phaddd(XMMRegister dst, XMMRegister src);
+
   // Add packed integers
   void paddb(XMMRegister dst, XMMRegister src);
   void paddw(XMMRegister dst, XMMRegister src);
@@ -1869,6 +1877,7 @@
   // Copy low 128bit into high 128bit of YMM registers.
   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vextractf128h(XMMRegister dst, XMMRegister src);
 
   // Load/store high 128bit of YMM registers which does not destroy other half.
   void vinsertf128h(XMMRegister dst, Address src);
--- a/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Thu May 07 20:51:12 2015 -0700
@@ -343,14 +343,13 @@
   return FrameMap::rsp_opr;
 }
 
-
 // JSR 292
+// On x86, there is no need to save the SP, because neither
+// method handle intrinsics, nor compiled lambda forms modify it.
 LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
-  assert(rbp == rbp_mh_SP_save, "must be same register");
-  return rbp_opr;
+  return LIR_OprFact::illegalOpr;
 }
 
-
 bool FrameMap::validate_frame() {
   return true;
 }
--- a/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/c1_MacroAssembler_x86.cpp	Thu May 07 20:51:12 2015 -0700
@@ -360,6 +360,9 @@
   generate_stack_overflow_check(bang_size_in_bytes);
 
   push(rbp);
+  if (PreserveFramePointer) {
+    mov(rbp, rsp);
+  }
 #ifdef TIERED
   // c2 leaves fpu stack dirty. Clean it on entry
   if (UseSSE < 2 ) {
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu May 07 20:51:12 2015 -0700
@@ -754,14 +754,9 @@
     // WIN64_ONLY: No need to add frame::arg_reg_save_area_bytes to SP
     // since we do a leave anyway.
 
-    // Pop the return address since we are possibly changing SP (restoring from BP).
+    // Pop the return address.
     __ leave();
     __ pop(rcx);
-
-    // Restore SP from BP if the exception PC is a method handle call site.
-    NOT_LP64(__ get_thread(thread);)
-    __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
-    __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
     __ jmp(rcx);  // jump to exception handler
     break;
   default:  ShouldNotReachHere();
@@ -832,11 +827,6 @@
   // the pop is also necessary to simulate the effect of a ret(0)
   __ pop(exception_pc);
 
-  // Restore SP from BP if the exception PC is a method handle call site.
-  NOT_LP64(__ get_thread(thread);)
-  __ cmpl(Address(thread, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // continue at exception handler (return address removed)
   // note: do *not* remove arguments when unwinding the
   //       activation since the caller assumes having
--- a/src/cpu/x86/vm/frame_x86.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/frame_x86.cpp	Thu May 07 20:51:12 2015 -0700
@@ -224,7 +224,8 @@
     if (sender_blob->is_nmethod()) {
         nmethod* nm = sender_blob->as_nmethod_or_null();
         if (nm != NULL) {
-            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
+                nm->method()->is_method_handle_intrinsic()) {
                 return false;
             }
         }
@@ -391,10 +392,9 @@
 // frame::verify_deopt_original_pc
 //
 // Verifies the calculated original PC of a deoptimization PC for the
-// given unextended SP.  The unextended SP might also be the saved SP
-// for MethodHandle call sites.
+// given unextended SP.
 #ifdef ASSERT
-void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) {
   frame fr;
 
   // This is ugly but it's better than to change {get,set}_original_pc
@@ -404,33 +404,23 @@
 
   address original_pc = nm->get_original_pc(&fr);
   assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
-  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
 }
 #endif
 
 //------------------------------------------------------------------------------
 // frame::adjust_unextended_sp
 void frame::adjust_unextended_sp() {
-  // If we are returning to a compiled MethodHandle call site, the
-  // saved_fp will in fact be a saved value of the unextended SP.  The
-  // simplest way to tell whether we are returning to such a call site
-  // is as follows:
+  // On x86, sites calling method handle intrinsics and lambda forms are treated
+  // as any other call site. Therefore, no special action is needed when we are
+  // returning to any of these call sites.
 
   nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
   if (sender_nm != NULL) {
-    // If the sender PC is a deoptimization point, get the original
-    // PC.  For MethodHandle call site the unextended_sp is stored in
-    // saved_fp.
-    if (sender_nm->is_deopt_mh_entry(_pc)) {
-      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
-      _unextended_sp = _fp;
-    }
-    else if (sender_nm->is_deopt_entry(_pc)) {
+    // If the sender PC is a deoptimization point, get the original PC.
+    if (sender_nm->is_deopt_entry(_pc) ||
+        sender_nm->is_deopt_mh_entry(_pc)) {
       DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
     }
-    else if (sender_nm->is_method_handle_return(_pc)) {
-      _unextended_sp = _fp;
-    }
   }
 }
 
--- a/src/cpu/x86/vm/frame_x86.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/frame_x86.hpp	Thu May 07 20:51:12 2015 -0700
@@ -76,11 +76,11 @@
 //    [locals and parameters   ]
 //                               <- sender sp
 
-// [1] When the c++ interpreter calls a new method it returns to the frame
+// [1] When the C++ interpreter calls a new method it returns to the frame
 //     manager which allocates a new frame on the stack. In that case there
 //     is no real callee of this newly allocated frame. The frame manager is
-//     aware of the  additional frame(s) and will pop them as nested calls
-//     complete. Howevers tTo make it look good in the debugger the frame
+//     aware of the additional frame(s) and will pop them as nested calls
+//     complete. However, to make it look good in the debugger the frame
 //     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
 //     with a fake interpreter_state* parameter to make it easy to debug
 //     nested calls.
@@ -88,7 +88,7 @@
 // Note that contrary to the layout for the assembly interpreter the
 // expression stack allocated for the C++ interpreter is full sized.
 // However this is not as bad as it seems as the interpreter frame_manager
-// will truncate the unused space on succesive method calls.
+// will truncate the unused space on successive method calls.
 //
 // ------------------------------ C++ interpreter ----------------------------------------
 
@@ -167,10 +167,7 @@
 
 #ifdef ASSERT
   // Used in frame::sender_for_{interpreter,compiled}_frame
-  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
-  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
-    verify_deopt_original_pc(nm, unextended_sp, true);
-  }
+  static void verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp);
 #endif
 
  public:
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Thu May 07 20:51:12 2015 -0700
@@ -94,7 +94,7 @@
   // find_blob call. This is also why we can have no asserts on the validity
   // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
   // -> pd_last_frame should use a specialized version of pd_last_frame which could
-  // call a specilaized frame constructor instead of this one.
+  // call a specialized frame constructor instead of this one.
   // Then we could use the assert below. However this assert is of somewhat dubious
   // value.
   // assert(_pc != NULL, "no pc?");
--- a/src/cpu/x86/vm/globalDefinitions_x86.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/globalDefinitions_x86.hpp	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -58,4 +58,9 @@
   #endif
 #endif
 
+#if defined(COMPILER2) && !defined(JAVASE_EMBEDDED)
+// Include Restricted Transactional Memory lock eliding optimization
+#define INCLUDE_RTM_OPT 1
+#endif
+
 #endif // CPU_X86_VM_GLOBALDEFINITIONS_X86_HPP
--- a/src/cpu/x86/vm/globals_x86.hpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Thu May 07 20:51:12 2015 -0700
@@ -82,14 +82,13 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
+define_pd_global(bool, PreserveFramePointer, false);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
                                                                             \
   develop(bool, IEEEPrecision, true,                                        \
           "Enables IEEE precision (for INTEL only)")                        \
                                                                             \
-  product(intx, FenceInstruction, 0,                                        \
-          "(Unsafe,Unstable) Experimental")                                 \
-                                                                            \
   product(bool, UseStoreImmI16, true,                                       \
           "Use store immediate 16-bits value instruction on x86")           \
                                                                             \
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1958,6 +1958,11 @@
 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
 // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
+// Arguably given that the spec legislates the JNI case as undefined our implementation
+// could reasonably *avoid* checking owner in Fast_Unlock().
+// In the interest of performance we elide m->Owner==Self check in unlock.
+// A perfectly viable alternative is to elide the owner check except when
+// Xcheck:jni is enabled.
 
 void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {
   assert(boxReg == rax, "");
@@ -1966,24 +1971,6 @@
   if (EmitSync & 4) {
     // Disable - inhibit all inlining.  Force control through the slow-path
     cmpptr (rsp, 0);
-  } else
-  if (EmitSync & 8) {
-    Label DONE_LABEL;
-    if (UseBiasedLocking) {
-       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
-    }
-    // Classic stack-locking code ...
-    // Check whether the displaced header is 0
-    //(=> recursive unlock)
-    movptr(tmpReg, Address(boxReg, 0));
-    testptr(tmpReg, tmpReg);
-    jccb(Assembler::zero, DONE_LABEL);
-    // If not recursive lock, reset the header to displaced header
-    if (os::is_MP()) {
-      lock();
-    }
-    cmpxchgptr(tmpReg, Address(objReg, 0));   // Uses RAX which is box
-    bind(DONE_LABEL);
   } else {
     Label DONE_LABEL, Stacked, CheckSucc;
 
@@ -2060,9 +2047,9 @@
     // the number of loads below (currently 4) to just 2 or 3.
     // Refer to the comments in synchronizer.cpp.
     // In practice the chain of fetches doesn't seem to impact performance, however.
+    xorptr(boxReg, boxReg);
     if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
        // Attempt to reduce branch density - AMD's branch predictor.
-       xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
@@ -2070,7 +2057,6 @@
        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
        jmpb  (DONE_LABEL);
     } else {
-       xorptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
        orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
        jccb  (Assembler::notZero, DONE_LABEL);
        movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
@@ -2093,10 +2079,8 @@
        bind  (CheckSucc);
 
        // Optional pre-test ... it's safe to elide this
-       if ((EmitSync & 16) == 0) {
-          cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
-          jccb  (Assembler::zero, LGoSlowPath);
-       }
+       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
+       jccb(Assembler::zero, LGoSlowPath);
 
        // We have a classic Dekker-style idiom:
        //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
@@ -2109,7 +2093,8 @@
        //     In older IA32 processors MFENCE is slower than lock:add or xchg
        //     particularly if the write-buffer is full as might be the case if
        //     if stores closely precede the fence or fence-equivalent instruction.
-       //     In more modern implementations MFENCE appears faster, however.
+       //     See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
+       //     as the situation has changed with Nehalem and Shanghai.
        // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
        //     The $lines underlying the top-of-stack should be in M-state.
        //     The locked add instruction is serializing, of course.
@@ -2126,11 +2111,7 @@
 
        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), NULL_WORD);
        if (os::is_MP()) {
-          if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
-            mfence();
-          } else {
-            lock (); addptr(Address(rsp, 0), 0);
-          }
+         lock(); addptr(Address(rsp, 0), 0);
        }
        // Ratify _succ remains non-null
        cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), 0);
@@ -2179,8 +2160,17 @@
     }
 #else // _LP64
     // It's inflated
-    movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
-    xorptr(boxReg, r15_thread);
+    if (EmitSync & 1024) {
+      // Emit code to check that _owner == Self
+      // We could fold the _owner test into subsequent code more efficiently
+      // than using a stand-alone check, but since _owner checking is off by
+      // default we don't bother. We also might consider predicating the
+      // _owner==Self check on Xcheck:jni or running on a debug build.
+      movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+      xorptr(boxReg, r15_thread);
+    } else {
+      xorptr(boxReg, boxReg);
+    }
     orptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
     jccb  (Assembler::notZero, DONE_LABEL);
     movptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
@@ -2190,23 +2180,51 @@
     jmpb  (DONE_LABEL);
 
     if ((EmitSync & 65536) == 0) {
+      // Try to avoid passing control into the slow_path ...
       Label LSuccess, LGoSlowPath ;
       bind  (CheckSucc);
+
+      // The following optional optimization can be elided if necessary
+      // Effectively: if (succ == null) goto SlowPath
+      // The code reduces the window for a race, however,
+      // and thus benefits performance.
       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
       jccb  (Assembler::zero, LGoSlowPath);
 
-      // I'd much rather use lock:andl m->_owner, 0 as it's faster than the
-      // the explicit ST;MEMBAR combination, but masm doesn't currently support
-      // "ANDQ M,IMM".  Don't use MFENCE here.  lock:add to TOS, xchg, etc
-      // are all faster when the write buffer is populated.
-      movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
-      if (os::is_MP()) {
-         lock (); addl (Address(rsp, 0), 0);
+      if ((EmitSync & 16) && os::is_MP()) {
+        orptr(boxReg, boxReg);
+        xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
+      } else {
+        movptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), (int32_t)NULL_WORD);
+        if (os::is_MP()) {
+          // Memory barrier/fence
+          // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
+          // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
+          // This is faster on Nehalem and AMD Shanghai/Barcelona.
+          // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
+          // We might also restructure (ST Owner=0;barrier;LD _Succ) to
+          // (mov box,0; xchgq box, &m->Owner; LD _succ) .
+          lock(); addl(Address(rsp, 0), 0);
+        }
       }
       cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), (int32_t)NULL_WORD);
       jccb  (Assembler::notZero, LSuccess);
 
-      movptr (boxReg, (int32_t)NULL_WORD);                   // box is really EAX
+      // Rare inopportune interleaving - race.
+      // The successor vanished in the small window above.
+      // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
+      // We need to ensure progress and succession.
+      // Try to reacquire the lock.
+      // If that fails then the new owner is responsible for succession and this
+      // thread needs to take no further action and can exit via the fast path (success).
+      // If the re-acquire succeeds then pass control into the slow path.
+      // As implemented, this latter mode is horrible because we generated more
+      // coherence traffic on the lock *and* artifically extended the critical section
+      // length while by virtue of passing control into the slow path.
+
+      // box is really RAX -- the following CMPXCHG depends on that binding
+      // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
+      movptr(boxReg, (int32_t)NULL_WORD);
       if (os::is_MP()) { lock(); }
       cmpxchgptr(r15_thread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
       jccb  (Assembler::notEqual, LSuccess);
@@ -2231,10 +2249,6 @@
     }
 #endif
     bind(DONE_LABEL);
-    // Avoid branch to branch on AMD processors
-    if (EmitSync & 32768) {
-       nop();
-    }
   }
 }
 #endif // COMPILER2
@@ -6090,6 +6104,10 @@
     // We always push rbp, so that on return to interpreter rbp, will be
     // restored correctly and we can correct the stack.
     push(rbp);
+    // Save caller's stack pointer into RBP if the frame pointer is preserved.
+    if (PreserveFramePointer) {
+      mov(rbp, rsp);
+    }
     // Remove word for ebp
     framesize -= wordSize;
 
@@ -6104,6 +6122,11 @@
     // Save RBP register now.
     framesize -= wordSize;
     movptr(Address(rsp, framesize), rbp);
+    // Save caller's stack pointer into RBP if the frame pointer is preserved.
+    if (PreserveFramePointer) {
+      movptr(rbp, rsp);
+      addptr(rbp, framesize + wordSize);
+    }
   }
 
   if (VerifyStackAtCalls) { // Majik cookie to verify stack depth
@@ -6657,7 +6680,7 @@
     subl(cnt2, stride2);
     jccb(Assembler::notZero, COMPARE_WIDE_VECTORS_LOOP);
     // clean upper bits of YMM registers
-    vzeroupper();
+    vpxor(vec1, vec1);
 
     // compare wide vectors tail
     bind(COMPARE_WIDE_TAIL);
@@ -6672,7 +6695,7 @@
     // Identifies the mismatching (higher or lower)16-bytes in the 32-byte vectors.
     bind(VECTOR_NOT_EQUAL);
     // clean upper bits of YMM registers
-    vzeroupper();
+    vpxor(vec1, vec1);
     lea(str1, Address(str1, result, scale));
     lea(str2, Address(str2, result, scale));
     jmp(COMPARE_16_CHARS);
@@ -6931,7 +6954,8 @@
   bind(DONE);
   if (UseAVX >= 2) {
     // clean upper bits of YMM registers
-    vzeroupper();
+    vpxor(vec1, vec1);
+    vpxor(vec2, vec2);
   }
 }
 
@@ -7065,7 +7089,8 @@
 
         BIND(L_check_fill_8_bytes);
         // clean upper bits of YMM registers
-        vzeroupper();
+        movdl(xtmp, value);
+        pshufd(xtmp, xtmp, 0);
       } else {
         // Fill 32-byte chunks
         pshufd(xtmp, xtmp, 0);
@@ -7228,7 +7253,11 @@
     bind(L_copy_16_chars_exit);
     if (UseAVX >= 2) {
       // clean upper bits of YMM registers
-      vzeroupper();
+      vpxor(tmp2Reg, tmp2Reg);
+      vpxor(tmp3Reg, tmp3Reg);
+      vpxor(tmp4Reg, tmp4Reg);
+      movdl(tmp1Reg, tmp5);
+      pshufd(tmp1Reg, tmp1Reg, 0);
     }
     subptr(len, 8);
     jccb(Assembler::greater, L_copy_8_chars_exit);
--- a/src/cpu/x86/vm/methodHandles_x86.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/methodHandles_x86.cpp	Thu May 07 20:51:12 2015 -0700
@@ -374,7 +374,7 @@
     //  member_reg - MemberName that was the trailing argument
     //  temp1_recv_klass - klass of stacked receiver, if needed
     //  rsi/r13 - interpreter linkage (if interpreted)
-    //  rcx, rdx, rsi, rdi, r8, r8 - compiler arguments (if compiled)
+    //  rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled)
 
     Label L_incompatible_class_change_error;
     switch (iid) {
--- a/src/cpu/x86/vm/rtmLocking.cpp	Thu May 07 10:19:31 2015 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-/*
- * Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "memory/allocation.inline.hpp"
-#include "runtime/task.hpp"
-#include "runtime/rtmLocking.hpp"
-
-// One-shot PeriodicTask subclass for enabling RTM locking
-uintx RTMLockingCounters::_calculation_flag = 0;
-
-class RTMLockingCalculationTask : public PeriodicTask {
- public:
-  RTMLockingCalculationTask(size_t interval_time) : PeriodicTask(interval_time){  }
-
-  virtual void task() {
-    RTMLockingCounters::_calculation_flag = 1;
-    // Reclaim our storage and disenroll ourself
-    delete this;
-  }
-};
-
-void RTMLockingCounters::init() {
-  if (UseRTMLocking && RTMLockingCalculationDelay > 0) {
-    RTMLockingCalculationTask* task = new RTMLockingCalculationTask(RTMLockingCalculationDelay);
-    task->enroll();
-  } else {
-    _calculation_flag = 1;
-  }
-}
-
-//------------------------------print_on-------------------------------
-void RTMLockingCounters::print_on(outputStream* st) {
-  tty->print_cr("# rtm locks total (estimated): " UINTX_FORMAT, _total_count * RTMTotalCountIncrRate);
-  tty->print_cr("# rtm lock aborts  : " UINTX_FORMAT, _abort_count);
-  for (int i = 0; i < ABORT_STATUS_LIMIT; i++) {
-    tty->print_cr("# rtm lock aborts %d: " UINTX_FORMAT, i, _abortX_count[i]);
-  }
-}
--- a/src/cpu/x86/vm/runtime_x86_32.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp	Thu May 07 20:51:12 2015 -0700
@@ -126,10 +126,6 @@
 
   // rax: exception handler for given <exception oop/exception pc>
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
-  __ cmpl(Address(rcx, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // We have a handler in rax, (could be deopt blob)
   // rdx - throwing pc, deopt blob will need it.
 
--- a/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_32.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2343,12 +2343,14 @@
 
     // should be a peal
     // +wordSize because of the push above
+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
+    __ push(thread);
     __ lea(rax, Address(rbp, lock_slot_rbp_offset));
     __ push(rax);
 
     __ push(obj_reg);
     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
-    __ addptr(rsp, 2*wordSize);
+    __ addptr(rsp, 3*wordSize);
 #ifdef ASSERT
     {
       Label L;
--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2581,6 +2581,7 @@
     __ lea(c_rarg1, Address(rsp, lock_slot_offset * VMRegImpl::stack_slot_size));
 
     __ mov(c_rarg0, obj_reg);
+    __ mov(c_rarg2, r15_thread);
     __ mov(r12, rsp); // remember sp
     __ subptr(rsp, frame::arg_reg_save_area_bytes); // windows
     __ andptr(rsp, -16); // align stack as required by ABI
@@ -2590,6 +2591,7 @@
     __ movptr(rbx, Address(r15_thread, in_bytes(Thread::pending_exception_offset())));
     __ movptr(Address(r15_thread, in_bytes(Thread::pending_exception_offset())), (int32_t)NULL_WORD);
 
+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
     __ mov(rsp, r12); // restore sp
     __ reinit_heapbase();
@@ -3393,8 +3395,8 @@
 
   // Save callee-saved registers.  See x86_64.ad.
 
-  // rbp is an implicitly saved callee saved register (i.e. the calling
-  // convention will save restore it in prolog/epilog) Other than that
+  // rbp is an implicitly saved callee saved register (i.e., the calling
+  // convention will save/restore it in the prolog/epilog). Other than that
   // there are no callee save registers now that adapter frames are gone.
 
   __ movptr(Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt), rbp);
@@ -3436,9 +3438,9 @@
 
   // Restore callee-saved registers
 
-  // rbp is an implicitly saved callee saved register (i.e. the calling
+  // rbp is an implicitly saved callee-saved register (i.e., the calling
   // convention will save restore it in prolog/epilog) Other than that
-  // there are no callee save registers no that adapter frames are gone.
+  // there are no callee save registers now that adapter frames are gone.
 
   __ movptr(rbp, Address(rsp, SimpleRuntimeFrame::rbp_off << LogBytesPerInt));
 
@@ -3447,10 +3449,6 @@
 
   // rax: exception handler
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
-  __ cmpl(Address(r15_thread, JavaThread::is_method_handle_return_offset()), 0);
-  __ cmovptr(Assembler::notEqual, rsp, rbp_mh_SP_save);
-
   // We have a handler in rax (could be deopt blob).
   __ mov(r8, rax);
 
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu May 07 20:51:12 2015 -0700
@@ -835,7 +835,8 @@
 
     if (UseUnalignedLoadStores && (UseAVX >= 2)) {
       // clean upper bits of YMM registers
-      __ vzeroupper();
+      __ vpxor(xmm0, xmm0);
+      __ vpxor(xmm1, xmm1);
     }
     __ addl(qword_count, 8);
     __ jccb(Assembler::zero, L_exit);
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu May 07 20:51:12 2015 -0700
@@ -1352,7 +1352,8 @@
       __ BIND(L_end);
       if (UseAVX >= 2) {
         // clean upper bits of YMM registers
-        __ vzeroupper();
+        __ vpxor(xmm0, xmm0);
+        __ vpxor(xmm1, xmm1);
       }
     } else {
       // Copy 32-bytes per iteration
@@ -1429,7 +1430,8 @@
       __ BIND(L_end);
       if (UseAVX >= 2) {
         // clean upper bits of YMM registers
-        __ vzeroupper();
+        __ vpxor(xmm0, xmm0);
+        __ vpxor(xmm1, xmm1);
       }
     } else {
       // Copy 32-bytes per iteration
--- a/src/cpu/x86/vm/templateTable_x86.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/templateTable_x86.cpp	Thu May 07 20:51:12 2015 -0700
@@ -543,8 +543,16 @@
 }
 
 void TemplateTable::iload() {
+  iload_internal();
+}
+
+void TemplateTable::nofast_iload() {
+  iload_internal(may_not_rewrite);
+}
+
+void TemplateTable::iload_internal(RewriteControl rc) {
   transition(vtos, itos);
-  if (RewriteFrequentPairs) {
+  if (RewriteFrequentPairs && rc == may_rewrite) {
     Label rewrite, done;
     const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
     LP64_ONLY(assert(rbx != bc, "register damaged"));
@@ -815,6 +823,14 @@
 }
 
 void TemplateTable::aload_0() {
+  aload_0_internal();
+}
+
+void TemplateTable::nofast_aload_0() {
+  aload_0_internal(may_not_rewrite);
+}
+
+void TemplateTable::aload_0_internal(RewriteControl rc) {
   transition(vtos, atos);
   // According to bytecode histograms, the pairs:
   //
@@ -837,7 +853,7 @@
   //   aload_0, iload_1
   // These bytecodes with a small amount of code are most profitable
   // to rewrite
-  if (RewriteFrequentPairs) {
+  if (RewriteFrequentPairs && rc == may_rewrite) {
     Label rewrite, done;
 
     const Register bc = LP64_ONLY(c_rarg3) NOT_LP64(rcx);
@@ -2491,29 +2507,21 @@
   assert_different_registers(Rcache, index, temp);
 
   Label resolved;
-    assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
-    __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
-    __ cmpl(temp, (int) bytecode());  // have we resolved this bytecode?
-    __ jcc(Assembler::equal, resolved);
+
+  Bytecodes::Code code = bytecode();
+  switch (code) {
+  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
+  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
+  }
+
+  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
+  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
+  __ cmpl(temp, code);  // have we resolved this bytecode?
+  __ jcc(Assembler::equal, resolved);
 
   // resolve first time through
-  address entry;
-  switch (bytecode()) {
-    case Bytecodes::_getstatic      : // fall through
-    case Bytecodes::_putstatic      : // fall through
-    case Bytecodes::_getfield       : // fall through
-    case Bytecodes::_putfield       : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_get_put);        break;
-    case Bytecodes::_invokevirtual  : // fall through
-    case Bytecodes::_invokespecial  : // fall through
-    case Bytecodes::_invokestatic   : // fall through
-    case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);         break;
-    case Bytecodes::_invokehandle   : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);   break;
-    case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);  break;
-    default:
-      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
-      break;
-  }
-  __ movl(temp, (int)bytecode());
+  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
+  __ movl(temp, code);
   __ call_VM(noreg, entry, temp);
   // Update registers with resolved info
   __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
@@ -2628,7 +2636,7 @@
   __ verify_oop(r);
 }
 
-void TemplateTable::getfield_or_static(int byte_no, bool is_static) {
+void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
   transition(vtos, vtos);
 
   const Register cache = rcx;
@@ -2660,7 +2668,7 @@
   __ load_signed_byte(rax, field);
   __ push(btos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_bgetfield, bc, rbx);
   }
   __ jmp(Done);
@@ -2671,7 +2679,7 @@
   // atos
   __ load_heap_oop(rax, field);
   __ push(atos);
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_agetfield, bc, rbx);
   }
   __ jmp(Done);
@@ -2683,7 +2691,7 @@
   __ movl(rax, field);
   __ push(itos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_igetfield, bc, rbx);
   }
   __ jmp(Done);
@@ -2695,7 +2703,7 @@
   __ load_unsigned_short(rax, field);
   __ push(ctos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_cgetfield, bc, rbx);
   }
   __ jmp(Done);
@@ -2707,7 +2715,7 @@
   __ load_signed_short(rax, field);
   __ push(stos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_sgetfield, bc, rbx);
   }
   __ jmp(Done);
@@ -2731,7 +2739,7 @@
 
   __ push(ltos);
   // Rewrite bytecode to be faster
-  LP64_ONLY(if (!is_static) patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx));
+  LP64_ONLY(if (!is_static && rc == may_rewrite) patch_bytecode(Bytecodes::_fast_lgetfield, bc, rbx));
   __ jmp(Done);
 
   __ bind(notLong);
@@ -2743,7 +2751,7 @@
   NOT_LP64(__ fld_s(field));
   __ push(ftos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_fgetfield, bc, rbx);
   }
   __ jmp(Done);
@@ -2758,7 +2766,7 @@
   NOT_LP64(__ fld_d(field));
   __ push(dtos);
   // Rewrite bytecode to be faster
-  if (!is_static) {
+  if (!is_static && rc == may_rewrite) {
     patch_bytecode(Bytecodes::_fast_dgetfield, bc, rbx);
   }
 #ifdef ASSERT
@@ -2779,6 +2787,10 @@
   getfield_or_static(byte_no, false);
 }
 
+void TemplateTable::nofast_getfield(int byte_no) {
+  getfield_or_static(byte_no, false, may_not_rewrite);
+}
+
 void TemplateTable::getstatic(int byte_no) {
   getfield_or_static(byte_no, true);
 }
@@ -2870,7 +2882,7 @@
   }
 }
 
-void TemplateTable::putfield_or_static(int byte_no, bool is_static) {
+void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
   transition(vtos, vtos);
 
   const Register cache = rcx;
@@ -2911,7 +2923,7 @@
     __ pop(btos);
     if (!is_static) pop_and_check_object(obj);
     __ movb(field, rax);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_bputfield, bc, rbx, true, byte_no);
     }
     __ jmp(Done);
@@ -2927,7 +2939,7 @@
     if (!is_static) pop_and_check_object(obj);
     // Store into the field
     do_oop_store(_masm, field, rax, _bs->kind(), false);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_aputfield, bc, rbx, true, byte_no);
     }
     __ jmp(Done);
@@ -2942,7 +2954,7 @@
     __ pop(itos);
     if (!is_static) pop_and_check_object(obj);
     __ movl(field, rax);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_iputfield, bc, rbx, true, byte_no);
     }
     __ jmp(Done);
@@ -2957,7 +2969,7 @@
     __ pop(ctos);
     if (!is_static) pop_and_check_object(obj);
     __ movw(field, rax);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_cputfield, bc, rbx, true, byte_no);
     }
     __ jmp(Done);
@@ -2972,7 +2984,7 @@
     __ pop(stos);
     if (!is_static) pop_and_check_object(obj);
     __ movw(field, rax);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_sputfield, bc, rbx, true, byte_no);
     }
     __ jmp(Done);
@@ -2988,7 +3000,7 @@
     __ pop(ltos);
     if (!is_static) pop_and_check_object(obj);
     __ movq(field, rax);
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_lputfield, bc, rbx, true, byte_no);
     }
     __ jmp(Done);
@@ -3035,7 +3047,7 @@
     if (!is_static) pop_and_check_object(obj);
     NOT_LP64( __ fstp_s(field);)
     LP64_ONLY( __ movflt(field, xmm0);)
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_fputfield, bc, rbx, true, byte_no);
     }
     __ jmp(Done);
@@ -3053,7 +3065,7 @@
     if (!is_static) pop_and_check_object(obj);
     NOT_LP64( __ fstp_d(field);)
     LP64_ONLY( __ movdbl(field, xmm0);)
-    if (!is_static) {
+    if (!is_static && rc == may_rewrite) {
       patch_bytecode(Bytecodes::_fast_dputfield, bc, rbx, true, byte_no);
     }
   }
@@ -3079,6 +3091,10 @@
   putfield_or_static(byte_no, false);
 }
 
+void TemplateTable::nofast_putfield(int byte_no) {
+  putfield_or_static(byte_no, false, may_not_rewrite);
+}
+
 void TemplateTable::putstatic(int byte_no) {
   putfield_or_static(byte_no, true);
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/templateTable_x86.hpp	Thu May 07 20:51:12 2015 -0700
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 1998, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_X86_VM_TEMPLATETABLE_X86_HPP
+#define CPU_X86_VM_TEMPLATETABLE_X86_HPP
+
+  static void prepare_invoke(int byte_no,
+                             Register method,         // linked method (or i-klass)
+                             Register index = noreg,  // itable index, MethodType, etc.
+                             Register recv  = noreg,  // if caller wants to see it
+                             Register flags = noreg   // if caller wants to test it
+                             );
+  static void invokevirtual_helper(Register index, Register recv,
+                                   Register flags);
+  static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
+
+  // Helpers
+  static void index_check(Register array, Register index);
+  static void index_check_without_pop(Register array, Register index);
+
+#endif // CPU_X86_VM_TEMPLATETABLE_X86_HPP
--- a/src/cpu/x86/vm/templateTable_x86_32.hpp	Thu May 07 10:19:31 2015 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef CPU_X86_VM_TEMPLATETABLE_X86_32_HPP
-#define CPU_X86_VM_TEMPLATETABLE_X86_32_HPP
-
-  static void prepare_invoke(int byte_no,
-                             Register method,         // linked method (or i-klass)
-                             Register index = noreg,  // itable index, MethodType, etc.
-                             Register recv  = noreg,  // if caller wants to see it
-                             Register flags = noreg   // if caller wants to test it
-                             );
-  static void invokevirtual_helper(Register index, Register recv,
-                                   Register flags);
-  static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
-
-  // Helpers
-  static void index_check(Register array, Register index);
-  static void index_check_without_pop(Register array, Register index);
-
-#endif // CPU_X86_VM_TEMPLATETABLE_X86_32_HPP
--- a/src/cpu/x86/vm/templateTable_x86_64.hpp	Thu May 07 10:19:31 2015 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,42 +0,0 @@
-/*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef CPU_X86_VM_TEMPLATETABLE_X86_64_HPP
-#define CPU_X86_VM_TEMPLATETABLE_X86_64_HPP
-
-  static void prepare_invoke(int byte_no,
-                             Register method,         // linked method (or i-klass)
-                             Register index = noreg,  // itable index, MethodType, etc.
-                             Register recv  = noreg,  // if caller wants to see it
-                             Register flags = noreg   // if caller wants to test it
-                             );
-  static void invokevirtual_helper(Register index, Register recv,
-                                   Register flags);
-  static void volatile_barrier(Assembler::Membar_mask_bits order_constraint);
-
-  // Helpers
-  static void index_check(Register array, Register index);
-  static void index_check_without_pop(Register array, Register index);
-
-#endif // CPU_X86_VM_TEMPLATETABLE_X86_64_HPP
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu May 07 10:19:31 2015 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Thu May 07 20:51:12 2015 -0700
@@ -379,15 +379,6 @@
   };
 };
 
-
-void VM_Version::get_cpu_info_wrapper() {
-  get_cpu_info_stub(&_cpuid_info);
-}
-
-#ifndef CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED
-  #define CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(f) f()
-#endif
-
 void VM_Version::get_processor_features() {
 
   _cpu = 4; // 486 by default
@@ -401,9 +392,7 @@
   if (!Use486InstrsOnly) {
     // Get raw processor info
 
-    // Some platforms (like Win*) need a wrapper around here
-    // in order to properly handle SEGV for YMM registers test.
-    CALL_TEST_FUNC_WITH_WRAPPER_IF_NEEDED(get_cpu_info_wrapper);
+    get_cpu_info_stub(&_cpuid_info);
 
     assert_is_initialized();
     _cpu = extended_cpu_family();
@@ -980,6 +969,11 @@
      (cache_line_size > ContendedPaddingWidth))
      ContendedPaddingWidth = cache_line_size;
 
+  // This machine allows unaligned memory accesses
+  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
+    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
+  }
+
 #ifndef PRODUCT
   if (PrintMiscellaneous && Verbose) {
     tty->print_cr("Logical CPUs per core: %u",