changeset 9488:846276b97202

Merge
author amurillo
date Mon, 19 Oct 2015 12:30:17 -0700
parents d7ffd16382fe a8a8604f890f
children 4be1d228e368 29c399fbbf25
files test/compiler/TestMoveStoresOutOfLoopsStoreNoCtrl.java test/runtime/6888954/vmerrors.sh
diffstat 942 files changed, 66600 insertions(+), 6657 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/compiler/ImmutableOopMapSet.java	Thu Oct 15 15:15:17 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/compiler/ImmutableOopMapSet.java	Mon Oct 19 12:30:17 2015 -0700
@@ -67,9 +67,6 @@
       }
     }
 
-    public void visitValueLocation(Address valueAddr) {
-    }
-
     public void visitNarrowOopLocation(Address narrowOopAddr) {
       addressVisitor.visitCompOopAddress(narrowOopAddr);
     }
@@ -216,9 +213,9 @@
       }
     }
 
-    // We want narow oop, value and oop oop_types
-    OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[]{
-        OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.VALUE_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE
+    // We want narow oop and oop oop_types
+    OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[] {
+        OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE
     };
 
     {
@@ -231,8 +228,6 @@
             // to detect in the debugging system
             // assert(Universe::is_heap_or_null(*loc), "found non oop pointer");
             visitor.visitOopLocation(loc);
-          } else if (omv.getType() == OopMapValue.OopTypes.VALUE_VALUE) {
-            visitor.visitValueLocation(loc);
           } else if (omv.getType() == OopMapValue.OopTypes.NARROWOOP_VALUE) {
             visitor.visitNarrowOopLocation(loc);
           }
--- a/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapValue.java	Thu Oct 15 15:15:17 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapValue.java	Mon Oct 19 12:30:17 2015 -0700
@@ -49,7 +49,6 @@
   // Types of OopValues
   static int UNUSED_VALUE;
   static int OOP_VALUE;
-  static int VALUE_VALUE;
   static int NARROWOOP_VALUE;
   static int CALLEE_SAVED_VALUE;
   static int DERIVED_OOP_VALUE;
@@ -73,7 +72,6 @@
     REGISTER_MASK_IN_PLACE = db.lookupIntConstant("OopMapValue::register_mask_in_place").intValue();
     UNUSED_VALUE           = db.lookupIntConstant("OopMapValue::unused_value").intValue();
     OOP_VALUE              = db.lookupIntConstant("OopMapValue::oop_value").intValue();
-    VALUE_VALUE            = db.lookupIntConstant("OopMapValue::value_value").intValue();
     NARROWOOP_VALUE        = db.lookupIntConstant("OopMapValue::narrowoop_value").intValue();
     CALLEE_SAVED_VALUE     = db.lookupIntConstant("OopMapValue::callee_saved_value").intValue();
     DERIVED_OOP_VALUE      = db.lookupIntConstant("OopMapValue::derived_oop_value").intValue();
@@ -82,7 +80,6 @@
   public static abstract class OopTypes {
     public static final OopTypes UNUSED_VALUE       = new OopTypes() { int getValue() { return OopMapValue.UNUSED_VALUE;       }};
     public static final OopTypes OOP_VALUE          = new OopTypes() { int getValue() { return OopMapValue.OOP_VALUE;          }};
-    public static final OopTypes VALUE_VALUE        = new OopTypes() { int getValue() { return OopMapValue.VALUE_VALUE;        }};
     public static final OopTypes NARROWOOP_VALUE    = new OopTypes() { int getValue() { return OopMapValue.NARROWOOP_VALUE;         }};
     public static final OopTypes CALLEE_SAVED_VALUE = new OopTypes() { int getValue() { return OopMapValue.CALLEE_SAVED_VALUE; }};
     public static final OopTypes DERIVED_OOP_VALUE  = new OopTypes() { int getValue() { return OopMapValue.DERIVED_OOP_VALUE;  }};
@@ -105,7 +102,6 @@
 
   // Querying
   public boolean isOop()         { return (getValue() & TYPE_MASK_IN_PLACE) == OOP_VALUE;          }
-  public boolean isValue()       { return (getValue() & TYPE_MASK_IN_PLACE) == VALUE_VALUE;        }
   public boolean isNarrowOop()   { return (getValue() & TYPE_MASK_IN_PLACE) == NARROWOOP_VALUE;    }
   public boolean isCalleeSaved() { return (getValue() & TYPE_MASK_IN_PLACE) == CALLEE_SAVED_VALUE; }
   public boolean isDerivedOop()  { return (getValue() & TYPE_MASK_IN_PLACE) == DERIVED_OOP_VALUE;  }
@@ -117,7 +113,6 @@
     int which = (getValue() & TYPE_MASK_IN_PLACE);
          if (which == UNUSED_VALUE) return OopTypes.UNUSED_VALUE;
     else if (which == OOP_VALUE)    return OopTypes.OOP_VALUE;
-    else if (which == VALUE_VALUE)  return OopTypes.VALUE_VALUE;
     else if (which == NARROWOOP_VALUE)   return OopTypes.NARROWOOP_VALUE;
     else if (which == CALLEE_SAVED_VALUE) return OopTypes.CALLEE_SAVED_VALUE;
     else if (which == DERIVED_OOP_VALUE)  return OopTypes.DERIVED_OOP_VALUE;
--- a/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapVisitor.java	Thu Oct 15 15:15:17 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapVisitor.java	Mon Oct 19 12:30:17 2015 -0700
@@ -31,6 +31,5 @@
 public interface OopMapVisitor {
   public void visitOopLocation(Address oopAddr);
   public void visitDerivedOopLocation(Address baseOopAddr, Address derivedOopAddr);
-  public void visitValueLocation(Address valueAddr);
   public void visitNarrowOopLocation(Address narrowOopAddr);
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Thu Oct 15 15:15:17 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Mon Oct 19 12:30:17 2015 -0700
@@ -536,9 +536,6 @@
       }
     }
 
-    public void visitValueLocation(Address valueAddr) {
-    }
-
     public void visitNarrowOopLocation(Address compOopAddr) {
       addressVisitor.visitCompOopAddress(compOopAddr);
     }
--- a/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Thu Oct 15 15:15:17 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Mon Oct 19 12:30:17 2015 -0700
@@ -1220,9 +1220,6 @@
       oms = new OopMapStream(map, OopMapValue.OopTypes.NARROWOOP_VALUE);
       buf.append(omvIterator.iterate(oms, "NarrowOops:", false));
 
-      oms = new OopMapStream(map, OopMapValue.OopTypes.VALUE_VALUE);
-      buf.append(omvIterator.iterate(oms, "Values:", false));
-
       oms = new OopMapStream(map, OopMapValue.OopTypes.CALLEE_SAVED_VALUE);
       buf.append(omvIterator.iterate(oms, "Callee saved:",  true));
 
--- a/make/bsd/makefiles/compiler1.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/bsd/makefiles/compiler1.make	Mon Oct 19 12:30:17 2015 -0700
@@ -28,4 +28,7 @@
 
 VM_SUBDIR = client
 
+# We don't support the JVMCI in a client VM.
+INCLUDE_JVMCI := false
+
 CFLAGS += -DCOMPILER1
--- a/make/bsd/makefiles/gcc.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/bsd/makefiles/gcc.make	Mon Oct 19 12:30:17 2015 -0700
@@ -149,6 +149,7 @@
     PCH_FLAG/sharedRuntimeTrig.o = $(PCH_FLAG/NO_PCH)
     PCH_FLAG/sharedRuntimeTrans.o = $(PCH_FLAG/NO_PCH)
     PCH_FLAG/unsafe.o = $(PCH_FLAG/NO_PCH)
+    PCH_FLAG/jvmciCompilerToVM.o = $(PCH_FLAG/NO_PCH)
 
   endif
 else # ($(USE_CLANG), true)
@@ -313,10 +314,11 @@
 
 # Work around some compiler bugs.
 ifeq ($(USE_CLANG), true)
-  # Clang <= 6.1
+  # Clang < 6 | <= 6.1 | <= 7.0
   ifeq ($(shell expr \
       $(CC_VER_MAJOR) \< 6 \| \
-      \( $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) \<= 1 \) \
+      \( $(CC_VER_MAJOR) = 6 \& $(CC_VER_MINOR) \<= 1 \) \| \
+      \( $(CC_VER_MAJOR) = 7 \& $(CC_VER_MINOR) \<= 0 \) \
     ), 1)
     OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT)
     OPT_CFLAGS/unsafe.o += -O1
--- a/make/bsd/makefiles/jsig.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/bsd/makefiles/jsig.make	Mon Oct 19 12:30:17 2015 -0700
@@ -62,7 +62,7 @@
 $(LIBJSIG): $(JSIGSRCDIR)/jsig.c $(LIBJSIG_MAPFILE)
 	@echo $(LOG_INFO) Making signal interposition lib...
 	$(QUIETLY) $(CC) $(SYMFLAG) $(ARCHFLAG) $(SHARED_FLAG) $(PICFLAG) \
-                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) -o $@ $<
+                         $(LFLAGS_JSIG) $(JSIG_DEBUG_CFLAGS) $(EXTRA_CFLAGS) -o $@ $<
 ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
   ifeq ($(OS_VENDOR), Darwin)
 	$(DSYMUTIL) $@
--- a/make/bsd/makefiles/minimal1.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/bsd/makefiles/minimal1.make	Mon Oct 19 12:30:17 2015 -0700
@@ -38,6 +38,7 @@
 INCLUDE_NMT := false
 INCLUDE_TRACE := false
 INCLUDE_CDS := false
+INCLUDE_JVMCI := false
 
 CXXFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
 CFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
--- a/make/excludeSrc.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/excludeSrc.make	Mon Oct 19 12:30:17 2015 -0700
@@ -106,6 +106,25 @@
 	 memTracker.cpp nmtDCmd.cpp mallocSiteTable.cpp
 endif
 
+ifneq (,$(findstring $(Platform_arch_model), x86_64, sparc))
+      # JVMCI is supported only on x86_64 and SPARC.
+else
+      INCLUDE_JVMCI := false
+endif
+
+ifeq ($(INCLUDE_JVMCI), false)
+      CXXFLAGS += -DINCLUDE_JVMCI=0
+      CFLAGS += -DINCLUDE_JVMCI=0
+
+      jvmci_dir := $(HS_COMMON_SRC)/share/vm/jvmci
+      jvmci_dir_alt := $(HS_ALT_SRC)/share/vm/jvmci
+      jvmci_exclude := $(notdir $(wildcard $(jvmci_dir)/*.cpp))	\
+			$(notdir $(wildcard $(jvmci_dir_alt)/*.cpp))
+      Src_Files_EXCLUDE += $(jvmci_exclude) \
+	jvmciCodeInstaller_aarch64.cpp jvmciCodeInstaller_ppc.cpp jvmciCodeInstaller_sparc.cpp \
+	jvmciCodeInstaller_x86.cpp
+endif
+
 -include $(HS_ALT_MAKE)/excludeSrc.make
 
 .PHONY: $(HS_ALT_MAKE)/excludeSrc.make
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/gensrc/Gensrc-jdk.vm.ci.gmk	Mon Oct 19 12:30:17 2015 -0700
@@ -0,0 +1,121 @@
+#
+# Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.  Oracle designates this
+# particular file as subject to the "Classpath" exception as provided
+# by Oracle in the LICENSE file that accompanied this code.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+
+default: all
+
+include $(SPEC)
+include MakeBase.gmk
+include JavaCompilation.gmk
+include SetupJavaCompilers.gmk
+
+GENSRC_DIR := $(SUPPORT_OUTPUTDIR)/gensrc/jdk.vm.ci
+SRC_DIR := $(HOTSPOT_TOPDIR)/src/jdk.vm.ci/share/classes
+
+################################################################################
+# Compile the annotation processor
+
+$(eval $(call SetupJavaCompilation, BUILD_JVMCI_OPTIONS, \
+    SETUP := GENERATE_OLDBYTECODE, \
+    SRC := $(SRC_DIR)/jdk.vm.ci.options/src \
+        $(SRC_DIR)/jdk.vm.ci.options.processor/src \
+        $(SRC_DIR)/jdk.vm.ci.inittimer/src, \
+    BIN := $(BUILDTOOLS_OUTPUTDIR)/jvmci_options, \
+    JAR := $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.options.jar, \
+))
+
+$(eval $(call SetupJavaCompilation, BUILD_JVMCI_SERVICE, \
+    SETUP := GENERATE_OLDBYTECODE, \
+    SRC := $(SRC_DIR)/jdk.vm.ci.service/src \
+        $(SRC_DIR)/jdk.vm.ci.service.processor/src, \
+    BIN := $(BUILDTOOLS_OUTPUTDIR)/jvmci_service, \
+    JAR := $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.service.jar, \
+))
+
+################################################################################
+
+PROC_SRC_SUBDIRS := \
+    jdk.vm.ci.compiler \
+    jdk.vm.ci.hotspot \
+    jdk.vm.ci.hotspot.amd64 \
+    jdk.vm.ci.hotspot.sparc \
+    #
+
+PROC_SRC_DIRS := $(patsubst %, $(SRC_DIR)/%/src, $(PROC_SRC_SUBDIRS))
+
+PROC_SRCS := $(filter %.java, $(call CacheFind, $(PROC_SRC_DIRS)))
+
+ALL_SRC_DIRS := $(wildcard $(SRC_DIR)/*/src)
+SOURCEPATH := $(call PathList, $(ALL_SRC_DIRS))
+PROCESSOR_PATH := $(call PathList, \
+    $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.options.jar \
+    $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.service.jar)
+
+$(GENSRC_DIR)/_gensrc_proc_done: $(PROC_SRCS) \
+    $(BUILD_JVMCI_OPTIONS) $(BUILD_JVMCI_SERVICE)
+	$(MKDIR) -p $(@D)
+	$(eval $(call ListPathsSafely,PROC_SRCS,$(@D)/_gensrc_proc_files))
+	$(JAVA_SMALL) $(NEW_JAVAC) \
+	    -sourcepath $(SOURCEPATH) \
+	    -implicit:none \
+	    -proc:only \
+	    -processorpath $(PROCESSOR_PATH) \
+	    -d $(GENSRC_DIR) \
+	    -s $(GENSRC_DIR) \
+	    @$(@D)/_gensrc_proc_files
+	$(TOUCH) $@
+
+TARGETS += $(GENSRC_DIR)/_gensrc_proc_done
+
+################################################################################
+
+$(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors: \
+    $(GENSRC_DIR)/_gensrc_proc_done
+	$(MKDIR) -p $(@D)
+	($(CD) $(GENSRC_DIR)/META-INF/jvmci.options && \
+	    $(RM) -f $@; \
+	    for i in $$(ls); do \
+	      echo $${i}_OptionDescriptors >> $@; \
+	    done)
+
+TARGETS += $(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors
+
+################################################################################
+
+$(GENSRC_DIR)/_providers_converted: $(GENSRC_DIR)/_gensrc_proc_done
+	$(MKDIR) -p $(GENSRC_DIR)/META-INF/services
+	($(CD) $(GENSRC_DIR)/META-INF/jvmci.providers && \
+	    for i in $$($(LS)); do \
+	      c=$$($(CAT) $$i | $(TR) -d '\n\r'); \
+	      $(ECHO) $$i >> $(GENSRC_DIR)/META-INF/services/$$c; \
+	    done)
+	$(TOUCH) $@
+
+TARGETS += $(GENSRC_DIR)/_providers_converted
+
+################################################################################
+
+all: $(TARGETS)
+
+.PHONY: default all
--- a/make/linux/makefiles/compiler1.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/linux/makefiles/compiler1.make	Mon Oct 19 12:30:17 2015 -0700
@@ -28,4 +28,7 @@
 
 VM_SUBDIR = client
 
+# We don't support the JVMCI in a client VM.
+INCLUDE_JVMCI := false
+
 CFLAGS += -DCOMPILER1
--- a/make/linux/makefiles/gcc.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/linux/makefiles/gcc.make	Mon Oct 19 12:30:17 2015 -0700
@@ -213,12 +213,16 @@
   # Since GCC 4.3, -Wconversion has changed its meanings to warn these implicit
   # conversions which might affect the values. Only enable it in earlier versions.
   ifeq "$(shell expr \( $(CC_VER_MAJOR) \> 4 \) \| \( \( $(CC_VER_MAJOR) = 4 \) \& \( $(CC_VER_MINOR) \>= 3 \) \))" "0"
+    # GCC < 4.3
     WARNING_FLAGS += -Wconversion
   endif  
   ifeq "$(shell expr \( $(CC_VER_MAJOR) \> 4 \) \| \( \( $(CC_VER_MAJOR) = 4 \) \& \( $(CC_VER_MINOR) \>= 8 \) \))" "1"
+    # GCC >= 4.8
     # This flag is only known since GCC 4.3. Gcc 4.8 contains a fix so that with templates no
     # warnings are issued: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=11856
     WARNING_FLAGS += -Wtype-limits
+    # GCC < 4.8 don't accept this flag for C++.
+    WARNING_FLAGS += -Wno-format-zero-length
   endif
 endif
 
--- a/make/linux/makefiles/minimal1.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/linux/makefiles/minimal1.make	Mon Oct 19 12:30:17 2015 -0700
@@ -38,6 +38,7 @@
 INCLUDE_NMT := false
 INCLUDE_TRACE := false
 INCLUDE_CDS := false
+INCLUDE_JVMCI := false
 
 CXXFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
 CFLAGS += -DMINIMAL_JVM -DCOMPILER1 -DVMTYPE=\"Minimal\"
--- a/make/solaris/makefiles/compiler1.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/solaris/makefiles/compiler1.make	Mon Oct 19 12:30:17 2015 -0700
@@ -28,4 +28,7 @@
 
 VM_SUBDIR = client
 
+# We don't support the JVMCI in a client VM.
+INCLUDE_JVMCI := false
+
 CFLAGS += -DCOMPILER1
--- a/make/windows/build_vm_def.sh	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/windows/build_vm_def.sh	Mon Oct 19 12:30:17 2015 -0700
@@ -52,6 +52,7 @@
 CAT="$MKS_HOME/cat.exe"
 RM="$MKS_HOME/rm.exe"
 DUMPBIN="link.exe /dump"
+export VS_UNICODE_OUTPUT= 
 
 if [ "$1" = "-nosa" ]; then
     echo EXPORTS > vm.def
--- a/make/windows/create_obj_files.sh	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/windows/create_obj_files.sh	Mon Oct 19 12:30:17 2015 -0700
@@ -111,6 +111,7 @@
 
 COMPILER2_SPECIFIC_FILES="opto libadt bcEscapeAnalyzer.cpp c2_* runtime_*"
 COMPILER1_SPECIFIC_FILES="c1_*"
+JVMCI_SPECIFIC_FILES="*jvmci* *JVMCI*"
 SHARK_SPECIFIC_FILES="shark"
 ZERO_SPECIFIC_FILES="zero"
 
@@ -119,11 +120,11 @@
 
 # Exclude per type.
 case "${TYPE}" in
-    "compiler1") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER2_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;;
+    "compiler1") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER2_SPECIFIC_FILES} ${JVMCI_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;;
     "compiler2") Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES}" ;;
     "tiered")    Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${ZERO_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES}" ;;
-    "zero")      Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;;
-    "shark")     Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES}" ;;
+    "zero")      Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${JVMCI_SPECIFIC_FILES} ${SHARK_SPECIFIC_FILES} ciTypeFlow.cpp" ;;
+    "shark")     Src_Files_EXCLUDE="${Src_Files_EXCLUDE} ${COMPILER1_SPECIFIC_FILES} ${COMPILER2_SPECIFIC_FILES} ${JVMCI_SPECIFIC_FILES} ${ZERO_SPECIFIC_FILES}" ;;
 esac
 
 # Special handling of arch model.
--- a/make/windows/makefiles/compile.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/windows/makefiles/compile.make	Mon Oct 19 12:30:17 2015 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -31,6 +31,7 @@
 #   /nologo   Supress copyright message at every cl.exe startup
 #   /W3       Warning level 3
 #   /Zi       Include debugging information
+#   /d2Zi+    Extended debugging symbols for optimized code (/Zo in VS2013 Update 3 and later)
 #   /WX       Treat any warning error as a fatal error
 #   /MD       Use dynamic multi-threaded runtime (msvcrt.dll or msvc*NN.dll)
 #   /MTd      Use static multi-threaded runtime debug versions
@@ -57,7 +58,7 @@
 
 # Let's add debug information when Full Debug Symbols is enabled
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-CXX_FLAGS=$(CXX_FLAGS) /Zi
+CXX_FLAGS=$(CXX_FLAGS) /Zi /d2Zi+
 !endif
 
 # Based on BUILDARCH we add some flags and select the default compiler name
--- a/make/windows/makefiles/projectcreator.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/windows/makefiles/projectcreator.make	Mon Oct 19 12:30:17 2015 -0700
@@ -145,6 +145,10 @@
  -ignorePath_TARGET tiered \
  -ignorePath_TARGET c1_
 
+ProjectCreatorIDEOptionsIgnoreJVMCI=\
+ -ignorePath_TARGET src/share/vm/jvmci \
+ -ignorePath_TARGET vm/jvmci
+
 ProjectCreatorIDEOptionsIgnoreCompiler2=\
  -ignorePath_TARGET compiler2 \
  -ignorePath_TARGET tiered \
@@ -165,6 +169,8 @@
 ##################################################
 ProjectCreatorIDEOptions=$(ProjectCreatorIDEOptions) \
  -define_compiler1 COMPILER1 \
+ -define_compiler1 INCLUDE_JVMCI=0 \
+$(ProjectCreatorIDEOptionsIgnoreJVMCI:TARGET=compiler1) \
 $(ProjectCreatorIDEOptionsIgnoreCompiler2:TARGET=compiler1)
 
 ##################################################
--- a/make/windows/makefiles/vm.make	Thu Oct 15 15:15:17 2015 -0700
+++ b/make/windows/makefiles/vm.make	Mon Oct 19 12:30:17 2015 -0700
@@ -40,7 +40,7 @@
 !endif
 
 !if "$(Variant)" == "compiler1"
-CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1"
+CXX_FLAGS=$(CXX_FLAGS) /D "COMPILER1" /D INCLUDE_JVMCI=0
 !endif
 
 !if "$(Variant)" == "compiler2"
@@ -152,6 +152,7 @@
 VM_PATH=$(VM_PATH);../generated/jvmtifiles
 VM_PATH=$(VM_PATH);../generated/tracefiles
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/c1
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/jvmci
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/compiler
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/code
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/interpreter
@@ -163,6 +164,7 @@
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc/cms
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/gc/g1
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/asm
+VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/logging
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/memory
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/oops
 VM_PATH=$(VM_PATH);$(WorkSpace)/src/share/vm/prims
@@ -232,6 +234,9 @@
 {$(COMMONSRC)\share\vm\classfile}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
+{$(COMMONSRC)\share\vm\jvmci}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
 {$(COMMONSRC)\share\vm\gc\parallel}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
@@ -250,6 +255,9 @@
 {$(COMMONSRC)\share\vm\asm}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
+{$(COMMONSRC)\share\vm\logging}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
 {$(COMMONSRC)\share\vm\memory}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
@@ -330,6 +338,9 @@
 {$(ALTSRC)\share\vm\asm}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
+{$(ALTSRC)\share\vm\logging}.cpp.obj::
+        $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
+
 {$(ALTSRC)\share\vm\memory}.cpp.obj::
         $(CXX) $(CXX_FLAGS) $(CXX_USE_PCH) /c $<
 
--- a/src/cpu/aarch64/vm/aarch64.ad	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/aarch64.ad	Mon Oct 19 12:30:17 2015 -0700
@@ -1039,6 +1039,7 @@
   bool leading_membar(const MemBarNode *barrier);
 
   bool is_card_mark_membar(const MemBarNode *barrier);
+  bool is_CAS(int opcode);
 
   MemBarNode *leading_to_normal(MemBarNode *leading);
   MemBarNode *normal_to_leading(const MemBarNode *barrier);
@@ -1057,6 +1058,9 @@
   bool unnecessary_volatile(const Node *barrier);
   bool needs_releasing_store(const Node *store);
 
+  // predicate controlling translation of CompareAndSwapX
+  bool needs_acquiring_load_exclusive(const Node *load);
+
   // predicate controlling translation of StoreCM
   bool unnecessary_storestore(const Node *storecm);
 %}
@@ -1088,15 +1092,58 @@
   //   str<x>
   //   dmb ish
   //
+  // We can also use ldaxr and stlxr to implement compare and swap CAS
+  // sequences. These are normally translated to an instruction
+  // sequence like the following
+  //
+  //   dmb      ish
+  // retry:
+  //   ldxr<x>   rval raddr
+  //   cmp       rval rold
+  //   b.ne done
+  //   stlxr<x>  rval, rnew, rold
+  //   cbnz      rval retry
+  // done:
+  //   cset      r0, eq
+  //   dmb ishld
+  //
+  // Note that the exclusive store is already using an stlxr
+  // instruction. That is required to ensure visibility to other
+  // threads of the exclusive write (assuming it succeeds) before that
+  // of any subsequent writes.
+  //
+  // The following instruction sequence is an improvement on the above
+  //
+  // retry:
+  //   ldaxr<x>  rval raddr
+  //   cmp       rval rold
+  //   b.ne done
+  //   stlxr<x>  rval, rnew, rold
+  //   cbnz      rval retry
+  // done:
+  //   cset      r0, eq
+  //
+  // We don't need the leading dmb ish since the stlxr guarantees
+  // visibility of prior writes in the case that the swap is
+  // successful. Crucially we don't have to worry about the case where
+  // the swap is not successful since no valid program should be
+  // relying on visibility of prior changes by the attempting thread
+  // in the case where the CAS fails.
+  //
+  // Similarly, we don't need the trailing dmb ishld if we substitute
+  // an ldaxr instruction since that will provide all the guarantees we
+  // require regarding observation of changes made by other threads
+  // before any change to the CAS address observed by the load.
+  //
   // In order to generate the desired instruction sequence we need to
   // be able to identify specific 'signature' ideal graph node
   // sequences which i) occur as a translation of a volatile reads or
-  // writes and ii) do not occur through any other translation or
-  // graph transformation. We can then provide alternative aldc
-  // matching rules which translate these node sequences to the
-  // desired machine code sequences. Selection of the alternative
-  // rules can be implemented by predicates which identify the
-  // relevant node sequences.
+  // writes or CAS operations and ii) do not occur through any other
+  // translation or graph transformation. We can then provide
+  // alternative aldc matching rules which translate these node
+  // sequences to the desired machine code sequences. Selection of the
+  // alternative rules can be implemented by predicates which identify
+  // the relevant node sequences.
   //
   // The ideal graph generator translates a volatile read to the node
   // sequence
@@ -1163,6 +1210,15 @@
   // get if it is fed and feeds a cpuorder membar and if its feed
   // membar also feeds an acquiring load.
   //
+  // Finally an inlined (Unsafe) CAS operation is translated to the
+  // following ideal graph
+  //
+  //   MemBarRelease
+  //   MemBarCPUOrder
+  //   CompareAndSwapX {CardMark}-optional
+  //   MemBarCPUOrder
+  //   MemBarAcquire
+  //
   // So, where we can identify these volatile read and write
   // signatures we can choose to plant either of the above two code
   // sequences. For a volatile read we can simply plant a normal
@@ -1177,6 +1233,14 @@
   // and MemBarVolatile and instead plant a simple stlr<x>
   // instruction.
   //
+  // when we recognise a CAS signature we can choose to plant a dmb
+  // ish as a translation for the MemBarRelease, the conventional
+  // macro-instruction sequence for the CompareAndSwap node (which
+  // uses ldxr<x>) and then a dmb ishld for the MemBarAcquire.
+  // Alternatively, we can elide generation of the dmb instructions
+  // and plant the alternative CompareAndSwap macro-instruction
+  // sequence (which uses ldaxr<x>).
+  // 
   // Of course, the above only applies when we see these signature
   // configurations. We still want to plant dmb instructions in any
   // other cases where we may see a MemBarAcquire, MemBarRelease or
@@ -1194,7 +1258,8 @@
   // relevant dmb instructions.
   //
 
-  // graph traversal helpers used for volatile put/get optimization
+  // graph traversal helpers used for volatile put/get and CAS
+  // optimization
 
   // 1) general purpose helpers
 
@@ -1220,16 +1285,19 @@
 	return NULL;
     }
 
-    if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj())
+    if (!ctl || !mem || !ctl->is_Proj() || !mem->is_Proj()) {
       return NULL;
+    }
 
     membar = ctl->lookup(0);
 
-    if (!membar || !membar->is_MemBar())
+    if (!membar || !membar->is_MemBar()) {
       return NULL;
-
-    if (mem->lookup(0) != membar)
+    }
+
+    if (mem->lookup(0) != membar) {
       return NULL;
+    }
 
     return membar->as_MemBar();
   }
@@ -1259,8 +1327,9 @@
       }
     }
 
-    if (child == NULL)
+    if (child == NULL) {
       return NULL;
+    }
 
     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
       x = mem->fast_out(i);
@@ -1283,15 +1352,18 @@
   {
     int opcode = barrier->Opcode();
     // if this is a release membar we are ok
-    if (opcode == Op_MemBarRelease)
+    if (opcode == Op_MemBarRelease) {
       return true;
+    }
     // if its a cpuorder membar . . .
-    if (opcode != Op_MemBarCPUOrder)
+    if (opcode != Op_MemBarCPUOrder) {
       return false;
+    }
     // then the parent has to be a release membar
     MemBarNode *parent = parent_membar(barrier);
-    if (!parent)
+    if (!parent) {
       return false;
+    }
     opcode = parent->Opcode();
     return opcode == Op_MemBarRelease;
   }
@@ -1314,11 +1386,13 @@
   
   bool is_card_mark_membar(const MemBarNode *barrier)
   {
-    if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark))
+    if (!UseG1GC && !(UseConcMarkSweepGC && UseCondCardMark)) {
       return false;
-
-    if (barrier->Opcode() != Op_MemBarVolatile)
+    }
+
+    if (barrier->Opcode() != Op_MemBarVolatile) {
       return false;
+    }
 
     ProjNode *mem = barrier->proj_out(TypeFunc::Memory);
 
@@ -1333,8 +1407,8 @@
   }
 
 
-  // 3) helper predicates to traverse volatile put graphs which may
-  // contain GC barrier subgraphs
+  // 3) helper predicates to traverse volatile put or CAS graphs which
+  // may contain GC barrier subgraphs
 
   // Preamble
   // --------
@@ -1404,8 +1478,7 @@
   // currently being unmarked in which case the volatile put graph
   // will look slightly different
   //
-  //   MemBarRelease
-  //   MemBarCPUOrder___________________________________________
+  //   MemBarRelease____________________________________________
   //         ||    \\               Ctl \     Ctl \     \\  Mem \
   //         ||    StoreN/P[mo_release] CastP2X   If   LoadB     |
   //         | \     /                              \            |
@@ -1419,7 +1492,7 @@
   // memory flow includes the following subgraph:
   //
   //   MemBarRelease
-  //   MemBarCPUOrder
+  //  {MemBarCPUOrder}
   //          |  \      . . .
   //          |  StoreX[mo_release]  . . .
   //          |   /
@@ -1431,8 +1504,48 @@
   // detected starting from any candidate MemBarRelease,
   // StoreX[mo_release] or MemBarVolatile.
   //
+  // A simple variation on this normal case occurs for an unsafe CAS
+  // operation. The basic graph for a non-object CAS is
+  //
+  //   MemBarRelease
+  //         ||
+  //   MemBarCPUOrder
+  //         ||     \\   . . .
+  //         ||     CompareAndSwapX
+  //         ||       |
+  //         ||     SCMemProj
+  //         | \     /
+  //         | MergeMem
+  //         | /
+  //   MemBarCPUOrder
+  //         ||
+  //   MemBarAcquire
+  //
+  // The same basic variations on this arrangement (mutatis mutandis)
+  // occur when a card mark is introduced. i.e. we se the same basic
+  // shape but the StoreP/N is replaced with CompareAndSawpP/N and the
+  // tail of the graph is a pair comprising a MemBarCPUOrder +
+  // MemBarAcquire.
+  //
+  // So, in the case of a CAS the normal graph has the variant form
+  //
+  //   MemBarRelease
+  //   MemBarCPUOrder
+  //          |   \      . . .
+  //          |  CompareAndSwapX  . . .
+  //          |    |
+  //          |   SCMemProj
+  //          |   /  . . .
+  //         MergeMem
+  //          |
+  //   MemBarCPUOrder
+  //   MemBarAcquire
+  //
+  // This graph can also easily be detected starting from any
+  // candidate MemBarRelease, CompareAndSwapX or MemBarAcquire.
+  //
   // the code below uses two helper predicates, leading_to_normal and
-  // normal_to_leading to identify this configuration, one validating
+  // normal_to_leading to identify these normal graphs, one validating
   // the layout starting from the top membar and searching down and
   // the other validating the layout starting from the lower membar
   // and searching up.
@@ -1450,7 +1563,9 @@
   // they are only inserted for object puts. This significantly
   // complicates the task of identifying whether a MemBarRelease,
   // StoreX[mo_release] or MemBarVolatile forms part of a volatile put
-  // when using these GC configurations (see below).
+  // when using these GC configurations (see below). It adds similar
+  // complexity to the task of identifying whether a MemBarRelease,
+  // CompareAndSwapX or MemBarAcquire forms part of a CAS.
   //
   // In both cases the post-write subtree includes an auxiliary
   // MemBarVolatile (StoreLoad barrier) separating the object put and
@@ -1489,7 +1604,8 @@
   // (LoadB) from the card. Ctl and Mem are fed to the If via an
   // intervening StoreLoad barrier (MemBarVolatile).
   //
-  // So, with CMS we may see a node graph which looks like this
+  // So, with CMS we may see a node graph for a volatile object store
+  // which looks like this
   //
   //   MemBarRelease
   //   MemBarCPUOrder_(leading)__________________
@@ -1524,6 +1640,55 @@
   // from the StoreCM into the trailing membar (n.b. the latter
   // proceeds via a Phi associated with the If region).
   //
+  // The graph for a CAS varies slightly, the obvious difference being
+  // that the StoreN/P node is replaced by a CompareAndSwapP/N node
+  // and the trailing MemBarVolatile by a MemBarCPUOrder +
+  // MemBarAcquire pair. The other important difference is that the
+  // CompareAndSwap node's SCMemProj is not merged into the card mark
+  // membar - it still feeds the trailing MergeMem. This also means
+  // that the card mark membar receives its Mem feed directly from the
+  // leading membar rather than via a MergeMem.
+  //
+  //   MemBarRelease
+  //   MemBarCPUOrder__(leading)_________________________
+  //       ||                       \\                 C \
+  //   MemBarVolatile (card mark)  CompareAndSwapN/P  CastP2X
+  //     C |  ||    M |              |
+  //       | LoadB    |       ______/|
+  //       |   |      |      /       |
+  //       | Cmp      |     /      SCMemProj
+  //       | /        |    /         |
+  //       If         |   /         /
+  //       | \        |  /         /
+  // IfFalse  IfTrue  | /         /
+  //       \     / \  |/ prec    /
+  //        \   / StoreCM       /
+  //         \ /      |        /
+  //        Region   . . .    /
+  //          | \            /
+  //          |  . . .  \   / Bot
+  //          |       MergeMem
+  //          |          |
+  //        MemBarCPUOrder
+  //        MemBarAcquire (trailing)
+  //
+  // This has a slightly different memory subgraph to the one seen
+  // previously but the core of it is the same as for the CAS normal
+  // sungraph
+  //
+  //   MemBarRelease
+  //   MemBarCPUOrder____
+  //      ||             \      . . .
+  //   MemBarVolatile  CompareAndSwapX  . . .
+  //      |  \            |
+  //        . . .   SCMemProj
+  //          |     /  . . .
+  //         MergeMem
+  //          |
+  //   MemBarCPUOrder
+  //   MemBarAcquire
+  //
+  //
   // G1 is quite a lot more complicated. The nodes inserted on behalf
   // of G1 may comprise: a pre-write graph which adds the old value to
   // the SATB queue; the releasing store itself; and, finally, a
@@ -1575,12 +1740,16 @@
   // n.b. the LoadB in this subgraph is not the card read -- it's a
   // read of the SATB queue active flag.
   //
+  // Once again the CAS graph is a minor variant on the above with the
+  // expected substitutions of CompareAndSawpX for StoreN/P and
+  // MemBarCPUOrder + MemBarAcquire for trailing MemBarVolatile.
+  //
   // The G1 post-write subtree is also optional, this time when the
   // new value being written is either null or can be identified as a
   // newly allocated (young gen) object with no intervening control
   // flow. The latter cannot happen but the former may, in which case
-  // the card mark membar is omitted and the memory feeds from the
-  // leading membar and the StoreN/P are merged direct into the
+  // the card mark membar is omitted and the memory feeds form the
+  // leading membar and the SToreN/P are merged direct into the
   // trailing membar as per the normal subgraph. So, the only special
   // case which arises is when the post-write subgraph is generated.
   //
@@ -1668,113 +1837,53 @@
   // value check has been elided the total number of Phis is 2
   // otherwise it is 3.
   //
+  // The CAS graph when using G1GC also includes a pre-write subgraph
+  // and an optional post-write subgraph. Teh sam evarioations are
+  // introduced as for CMS with conditional card marking i.e. the
+  // StoreP/N is swapped for a CompareAndSwapP/N, the tariling
+  // MemBarVolatile for a MemBarCPUOrder + MemBarAcquire pair and the
+  // Mem feed from the CompareAndSwapP/N includes a precedence
+  // dependency feed to the StoreCM and a feed via an SCMemProj to the
+  // trailing membar. So, as before the configuration includes the
+  // normal CAS graph as a subgraph of the memory flow.
+  //
   // So, the upshot is that in all cases the volatile put graph will
   // include a *normal* memory subgraph betwen the leading membar and
-  // its child membar. When that child is not a card mark membar then
-  // it marks the end of a volatile put subgraph. If the child is a
-  // card mark membar then the normal subgraph will form part of a
-  // volatile put subgraph if and only if the child feeds an
-  // AliasIdxBot Mem feed to a trailing barrier via a MergeMem. That
-  // feed is either direct (for CMS) or via 2 or 3 Phi nodes merging
-  // the leading barrier memory flow (for G1).
+  // its child membar, either a volatile put graph (including a
+  // releasing StoreX) or a CAS graph (including a CompareAndSwapX).
+  // When that child is not a card mark membar then it marks the end
+  // of the volatile put or CAS subgraph. If the child is a card mark
+  // membar then the normal subgraph will form part of a volatile put
+  // subgraph if and only if the child feeds an AliasIdxBot Mem feed
+  // to a trailing barrier via a MergeMem. That feed is either direct
+  // (for CMS) or via 2 or 3 Phi nodes merging the leading barrier
+  // memory flow (for G1).
   // 
   // The predicates controlling generation of instructions for store
   // and barrier nodes employ a few simple helper functions (described
-  // below) which identify the presence or absence of these subgraph
-  // configurations and provide a means of traversing from one node in
-  // the subgraph to another.
+  // below) which identify the presence or absence of all these
+  // subgraph configurations and provide a means of traversing from
+  // one node in the subgraph to another.
+
+  // is_CAS(int opcode)
+  //
+  // return true if opcode is one of the possible CompareAndSwapX
+  // values otherwise false.
+
+  bool is_CAS(int opcode)
+  {
+    return (opcode == Op_CompareAndSwapI ||
+	    opcode == Op_CompareAndSwapL ||
+	    opcode == Op_CompareAndSwapN ||
+	    opcode == Op_CompareAndSwapP);
+  }
 
   // leading_to_normal
   //
-  //graph traversal helper which detects the normal case Mem feed
-  // from a release membar (or, optionally, its cpuorder child) to a
-  // dependent volatile membar i.e. it ensures that the following Mem
-  // flow subgraph is present.
-  //
-  //   MemBarRelease
-  //   MemBarCPUOrder
-  //          |  \      . . .
-  //          |  StoreN/P[mo_release]  . . .
-  //          |   /
-  //         MergeMem
-  //          |
-  //   MemBarVolatile
-  //
-  // if the correct configuration is present returns the volatile
-  // membar otherwise NULL.
-  //
-  // the input membar is expected to be either a cpuorder membar or a
-  // release membar. in the latter case it should not have a cpu membar
-  // child.
-  //
-  // the returned membar may be a card mark membar rather than a
-  // trailing membar.
-
-  MemBarNode *leading_to_normal(MemBarNode *leading)
-  {
-    assert((leading->Opcode() == Op_MemBarRelease ||
-	    leading->Opcode() == Op_MemBarCPUOrder),
-	   "expecting a volatile or cpuroder membar!");
-
-    // check the mem flow
-    ProjNode *mem = leading->proj_out(TypeFunc::Memory);
-
-    if (!mem)
-      return NULL;
-
-    Node *x = NULL;
-    StoreNode * st = NULL;
-    MergeMemNode *mm = NULL;
-
-    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
-      x = mem->fast_out(i);
-      if (x->is_MergeMem()) {
-	if (mm != NULL)
-	  return NULL;
-	// two merge mems is one too many
-	mm = x->as_MergeMem();
-      } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
-	// two releasing stores is one too many
-	if (st != NULL)
-	  return NULL;
-	st = x->as_Store();
-      }
-    }
-
-    if (!mm || !st)
-      return NULL;
-
-    bool found = false;
-    // ensure the store feeds the merge
-    for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
-      if (st->fast_out(i) == mm) {
-	found = true;
-	break;
-      }
-    }
-
-    if (!found)
-      return NULL;
-
-    MemBarNode *mbvol = NULL;
-    // ensure the merge feeds a volatile membar
-    for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
-      x = mm->fast_out(i);
-      if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
-	mbvol = x->as_MemBar();
-	break;
-      }
-    }
-
-    return mbvol;
-  }
-
-  // normal_to_leading
-  //
-  // graph traversal helper which detects the normal case Mem feed
-  // from either a card mark or a trailing membar to a preceding
-  // release membar (optionally its cpuorder child) i.e. it ensures
-  // that the following Mem flow subgraph is present.
+  //graph traversal helper which detects the normal case Mem feed from
+  // a release membar (or, optionally, its cpuorder child) to a
+  // dependent volatile membar i.e. it ensures that one or other of
+  // the following Mem flow subgraph is present.
   //
   //   MemBarRelease
   //   MemBarCPUOrder {leading}
@@ -1783,7 +1892,165 @@
   //          |   /
   //         MergeMem
   //          |
-  //   MemBarVolatile
+  //   MemBarVolatile {trailing or card mark}
+  //
+  //   MemBarRelease
+  //   MemBarCPUOrder {leading}
+  //      |       \      . . .
+  //      |     CompareAndSwapX  . . .
+  //               |
+  //     . . .    SCMemProj
+  //           \   |
+  //      |    MergeMem
+  //      |       /
+  //    MemBarCPUOrder
+  //    MemBarAcquire {trailing}
+  //
+  // if the correct configuration is present returns the trailing
+  // membar otherwise NULL.
+  //
+  // the input membar is expected to be either a cpuorder membar or a
+  // release membar. in the latter case it should not have a cpu membar
+  // child.
+  //
+  // the returned value may be a card mark or trailing membar
+  //
+
+  MemBarNode *leading_to_normal(MemBarNode *leading)
+  {
+    assert((leading->Opcode() == Op_MemBarRelease ||
+	    leading->Opcode() == Op_MemBarCPUOrder),
+	   "expecting a volatile or cpuroder membar!");
+
+    // check the mem flow
+    ProjNode *mem = leading->proj_out(TypeFunc::Memory);
+
+    if (!mem) {
+      return NULL;
+    }
+
+    Node *x = NULL;
+    StoreNode * st = NULL;
+    LoadStoreNode *cas = NULL;
+    MergeMemNode *mm = NULL;
+
+    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
+      x = mem->fast_out(i);
+      if (x->is_MergeMem()) {
+	if (mm != NULL) {
+	  return NULL;
+	}
+	// two merge mems is one too many
+	mm = x->as_MergeMem();
+      } else if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
+	// two releasing stores/CAS nodes is one too many
+	if (st != NULL || cas != NULL) {
+	  return NULL;
+	}
+	st = x->as_Store();
+      } else if (is_CAS(x->Opcode())) {
+	if (st != NULL || cas != NULL) {
+	  return NULL;
+	}
+	cas = x->as_LoadStore();
+      }
+    }
+
+    // must have a store or a cas
+    if (!st && !cas) {
+      return NULL;
+    }
+
+    // must have a merge if we also have st
+    if (st && !mm) {
+      return NULL;
+    }
+
+    Node *y = NULL;
+    if (cas) {
+      // look for an SCMemProj
+      for (DUIterator_Fast imax, i = cas->fast_outs(imax); i < imax; i++) {
+	x = cas->fast_out(i);
+	if (x->is_Proj()) {
+	  y = x;
+	  break;
+	}
+      }
+      if (y == NULL) {
+	return NULL;
+      }
+      // the proj must feed a MergeMem
+      for (DUIterator_Fast imax, i = y->fast_outs(imax); i < imax; i++) {
+	x = y->fast_out(i);
+	if (x->is_MergeMem()) {
+	  mm = x->as_MergeMem();
+	  break;
+	}
+      }
+      if (mm == NULL)
+	return NULL;
+    } else {
+      // ensure the store feeds the existing mergemem;
+      for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
+	if (st->fast_out(i) == mm) {
+	  y = st;
+	  break;
+	}
+      }
+      if (y == NULL) {
+	return NULL;
+      }
+    }
+
+    MemBarNode *mbar = NULL;
+    // ensure the merge feeds to the expected type of membar
+    for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
+      x = mm->fast_out(i);
+      if (x->is_MemBar()) {
+	int opcode = x->Opcode();
+	if (opcode == Op_MemBarVolatile && st) {
+	  mbar = x->as_MemBar();
+	} else if (cas && opcode == Op_MemBarCPUOrder) {
+	  MemBarNode *y =  x->as_MemBar();
+	  y = child_membar(y);
+	  if (y != NULL && y->Opcode() == Op_MemBarAcquire) {
+	    mbar = y;
+	  }
+	}
+	break;
+      }
+    }
+
+    return mbar;
+  }
+
+  // normal_to_leading
+  //
+  // graph traversal helper which detects the normal case Mem feed
+  // from either a card mark or a trailing membar to a preceding
+  // release membar (optionally its cpuorder child) i.e. it ensures
+  // that one or other of the following Mem flow subgraphs is present.
+  //
+  //   MemBarRelease
+  //   MemBarCPUOrder {leading}
+  //          |  \      . . .
+  //          |  StoreN/P[mo_release]  . . .
+  //          |   /
+  //         MergeMem
+  //          |
+  //   MemBarVolatile {card mark or trailing}
+  //
+  //   MemBarRelease
+  //   MemBarCPUOrder {leading}
+  //      |       \      . . .
+  //      |     CompareAndSwapX  . . .
+  //               |
+  //     . . .    SCMemProj
+  //           \   |
+  //      |    MergeMem
+  //      |        /
+  //    MemBarCPUOrder
+  //    MemBarAcquire {trailing}
   //
   // this predicate checks for the same flow as the previous predicate
   // but starting from the bottom rather than the top.
@@ -1797,51 +2064,116 @@
   MemBarNode *normal_to_leading(const MemBarNode *barrier)
   {
     // input must be a volatile membar
-    assert(barrier->Opcode() == Op_MemBarVolatile, "expecting a volatile membar");
+    assert((barrier->Opcode() == Op_MemBarVolatile ||
+	    barrier->Opcode() == Op_MemBarAcquire),
+	   "expecting a volatile or an acquire membar");
     Node *x;
+    bool is_cas = barrier->Opcode() == Op_MemBarAcquire;
+
+    // if we have an acquire membar then it must be fed via a CPUOrder
+    // membar
+
+    if (is_cas) {
+      // skip to parent barrier which must be a cpuorder
+      x = parent_membar(barrier);
+      if (x->Opcode() != Op_MemBarCPUOrder)
+	return NULL;
+    } else {
+      // start from the supplied barrier
+      x = (Node *)barrier;
+    }
 
     // the Mem feed to the membar should be a merge
-    x = barrier->in(TypeFunc::Memory);
+    x = x ->in(TypeFunc::Memory);
     if (!x->is_MergeMem())
       return NULL;
 
     MergeMemNode *mm = x->as_MergeMem();
 
-    // the AliasIdxBot slice should be another MemBar projection
-    x = mm->in(Compile::AliasIdxBot);
+    if (is_cas) {
+      // the merge should be fed from the CAS via an SCMemProj node
+      x = NULL;
+      for (uint idx = 1; idx < mm->req(); idx++) {
+	if (mm->in(idx)->Opcode() == Op_SCMemProj) {
+	  x = mm->in(idx);
+	  break;
+	}
+      }
+      if (x == NULL) {
+	return NULL;
+      }
+      // check for a CAS feeding this proj
+      x = x->in(0);
+      int opcode = x->Opcode();
+      if (!is_CAS(opcode)) {
+	return NULL;
+      }
+      // the CAS should get its mem feed from the leading membar
+      x = x->in(MemNode::Memory);
+    } else {
+      // the merge should get its Bottom mem feed from the leading membar
+      x = mm->in(Compile::AliasIdxBot);      
+    } 
+
     // ensure this is a non control projection
-    if (!x->is_Proj() || x->is_CFG())
+    if (!x->is_Proj() || x->is_CFG()) {
       return NULL;
+    }
     // if it is fed by a membar that's the one we want
     x = x->in(0);
 
-    if (!x->is_MemBar())
+    if (!x->is_MemBar()) {
       return NULL;
+    }
 
     MemBarNode *leading = x->as_MemBar();
     // reject invalid candidates
-    if (!leading_membar(leading))
+    if (!leading_membar(leading)) {
       return NULL;
-
-    // ok, we have a leading ReleaseMembar, now for the sanity clauses
-
-    // the leading membar must feed Mem to a releasing store
+    }
+
+    // ok, we have a leading membar, now for the sanity clauses
+
+    // the leading membar must feed Mem to a releasing store or CAS
     ProjNode *mem = leading->proj_out(TypeFunc::Memory);
     StoreNode *st = NULL;
+    LoadStoreNode *cas = NULL;
     for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
       x = mem->fast_out(i);
       if (x->is_Store() && x->as_Store()->is_release() && x->Opcode() != Op_StoreCM) {
+	// two stores or CASes is one too many
+	if (st != NULL || cas != NULL) {
+	  return NULL;
+	}
 	st = x->as_Store();
-	break;
+      } else if (is_CAS(x->Opcode())) {
+	if (st != NULL || cas != NULL) {
+	  return NULL;
+	}
+	cas = x->as_LoadStore();
       }
     }
-    if (st == NULL)
+
+    // we should not have both a store and a cas
+    if (st == NULL & cas == NULL) {
       return NULL;
-
-    // the releasing store has to feed the same merge
-    for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
-      if (st->fast_out(i) == mm)
-	return leading;
+    }
+
+    if (st == NULL) {
+      // nothing more to check
+      return leading;
+    } else {
+      // we should not have a store if we started from an acquire
+      if (is_cas) {
+	return NULL;
+      }
+
+      // the store should feed the merge we used to get here
+      for (DUIterator_Fast imax, i = st->fast_outs(imax); i < imax; i++) {
+	if (st->fast_out(i) == mm) {
+	  return leading;
+	}
+      }
     }
 
     return NULL;
@@ -1865,8 +2197,8 @@
   //  Bot |  / 
   //   MergeMem 
   //      |
-  //   MemBarVolatile (trailing)
-  //
+  //      |
+  //    MemBarVolatile {trailing}
   //
   // 2)
   //   MemBarRelease/CPUOrder (leading)
@@ -1884,7 +2216,8 @@
   //     Bot |   /
   //       MergeMem
   //         |
-  //   MemBarVolatile (trailing)
+  //    MemBarVolatile {trailing}
+  //
   //
   // 3)
   //   MemBarRelease/CPUOrder (leading)
@@ -1905,7 +2238,8 @@
   //     Bot |   /
   //       MergeMem
   //         |
-  //   MemBarVolatile (trailing)
+  //         |
+  //    MemBarVolatile {trailing}
   //
   // configuration 1 is only valid if UseConcMarkSweepGC &&
   // UseCondCardMark
@@ -1955,8 +2289,9 @@
 	    break;
 	  }
 	}
-	if (!phi)
+	if (!phi) {
 	  return NULL;
+	}
 	// look for another merge below this phi
 	feed = phi;
       } else {
@@ -1969,7 +2304,7 @@
     assert(mm->as_MergeMem()->in(Compile::AliasIdxBot) == feed, "expecting membar to feed AliasIdxBot slice to Merge");
 
     MemBarNode *trailing = NULL;
-    // be sure we have a volatile membar below the merge
+    // be sure we have a trailing membar the merge
     for (DUIterator_Fast imax, i = mm->fast_outs(imax); i < imax; i++) {
       x = mm->fast_out(i);
       if (x->is_MemBar() && x->Opcode() == Op_MemBarVolatile) {
@@ -1984,24 +2319,32 @@
   // trailing_to_card_mark
   //
   // graph traversal helper which detects extra, non-normal Mem feed
-  // from a trailing membar to a preceding card mark volatile membar
-  // i.e. it identifies whether one of the three possible extra GC
-  // post-write Mem flow subgraphs is present
+  // from a trailing volatile membar to a preceding card mark volatile
+  // membar i.e. it identifies whether one of the three possible extra
+  // GC post-write Mem flow subgraphs is present
   //
   // this predicate checks for the same flow as the previous predicate
   // but starting from the bottom rather than the top.
   //
-  // if the configurationis present returns the card mark membar
+  // if the configuration is present returns the card mark membar
   // otherwise NULL
+  //
+  // n.b. the supplied membar is expected to be a trailing
+  // MemBarVolatile i.e. the caller must ensure the input node has the
+  // correct opcode
 
   MemBarNode *trailing_to_card_mark(const MemBarNode *trailing)
   {
-    assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");
-
+    assert(trailing->Opcode() == Op_MemBarVolatile,
+	   "expecting a volatile membar");
+    assert(!is_card_mark_membar(trailing),
+	   "not expecting a card mark membar");
+
+    // the Mem feed to the membar should be a merge
     Node *x = trailing->in(TypeFunc::Memory);
-    // the Mem feed to the membar should be a merge
-    if (!x->is_MergeMem())
+    if (!x->is_MergeMem()) {
       return NULL;
+    }
 
     MergeMemNode *mm = x->as_MergeMem();
 
@@ -2054,13 +2397,15 @@
     }
     // the proj has to come from the card mark membar
     x = x->in(0);
-    if (!x->is_MemBar())
+    if (!x->is_MemBar()) {
       return NULL;
+    }
 
     MemBarNode *card_mark_membar = x->as_MemBar();
 
-    if (!is_card_mark_membar(card_mark_membar))
+    if (!is_card_mark_membar(card_mark_membar)) {
       return NULL;
+    }
 
     return card_mark_membar;
   }
@@ -2068,7 +2413,7 @@
   // trailing_to_leading
   //
   // graph traversal helper which checks the Mem flow up the graph
-  // from a (non-card mark) volatile membar attempting to locate and
+  // from a (non-card mark) trailing membar attempting to locate and
   // return an associated leading membar. it first looks for a
   // subgraph in the normal configuration (relying on helper
   // normal_to_leading). failing that it then looks for one of the
@@ -2081,22 +2426,35 @@
   // if the configuration is valid returns the cpuorder member for
   // preference or when absent the release membar otherwise NULL.
   //
-  // n.b. the input membar is expected to be a volatile membar but
-  // must *not* be a card mark membar.
+  // n.b. the input membar is expected to be either a volatile or
+  // acquire membar but in the former case must *not* be a card mark
+  // membar.
 
   MemBarNode *trailing_to_leading(const MemBarNode *trailing)
   {
-    assert(!is_card_mark_membar(trailing), "not expecting a card mark membar");
+    assert((trailing->Opcode() == Op_MemBarAcquire ||
+	    trailing->Opcode() == Op_MemBarVolatile),
+	   "expecting an acquire or volatile membar");
+    assert((trailing->Opcode() != Op_MemBarVolatile ||
+	    !is_card_mark_membar(trailing)),
+	   "not expecting a card mark membar");
 
     MemBarNode *leading = normal_to_leading(trailing);
 
-    if (leading)
+    if (leading) {
       return leading;
+    }
+
+    // nothing more to do if this is an acquire
+    if (trailing->Opcode() == Op_MemBarAcquire) {
+      return NULL;
+    }
 
     MemBarNode *card_mark_membar = trailing_to_card_mark(trailing);
 
-    if (!card_mark_membar)
+    if (!card_mark_membar) {
       return NULL;
+    }
 
     return normal_to_leading(card_mark_membar);
   }
@@ -2105,10 +2463,12 @@
 
 bool unnecessary_acquire(const Node *barrier)
 {
-  // assert barrier->is_MemBar();
-  if (UseBarriersForVolatile)
+  assert(barrier->is_MemBar(), "expecting a membar");
+
+  if (UseBarriersForVolatile) {
     // we need to plant a dmb
     return false;
+  }
 
   // a volatile read derived from bytecode (or also from an inlined
   // SHA field read via LibraryCallKit::load_field_from_object)
@@ -2140,8 +2500,9 @@
     //
     // where * tags node we were passed
     // and |k means input k
-    if (x->is_DecodeNarrowPtr())
+    if (x->is_DecodeNarrowPtr()) {
       x = x->in(1);
+    }
 
     return (x->is_Load() && x->as_Load()->is_acquire());
   }
@@ -2167,8 +2528,9 @@
     return false;
   ctl = parent->proj_out(TypeFunc::Control);
   mem = parent->proj_out(TypeFunc::Memory);
-  if (!ctl || !mem)
+  if (!ctl || !mem) {
     return false;
+  }
   // ensure the proj nodes both feed a LoadX[mo_acquire]
   LoadNode *ld = NULL;
   for (DUIterator_Fast imax, i = ctl->fast_outs(imax); i < imax; i++) {
@@ -2180,38 +2542,46 @@
     }
   }
   // it must be an acquiring load
-  if (! ld || ! ld->is_acquire())
-    return false;
-  for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
-    x = mem->fast_out(i);
-    // if we see the same load we drop it and stop searching
-    if (x == ld) {
-      ld = NULL;
-      break;
-    }
-  }
-  // we must have dropped the load
-  if (ld)
-    return false;
-  // check for a child cpuorder membar
-  MemBarNode *child  = child_membar(barrier->as_MemBar());
-  if (!child || child->Opcode() != Op_MemBarCPUOrder)
-    return false;
-
-  return true;
+  if (ld && ld->is_acquire()) {
+
+    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
+      x = mem->fast_out(i);
+      // if we see the same load we drop it and stop searching
+      if (x == ld) {
+	ld = NULL;
+	break;
+      }
+    }
+    // we must have dropped the load
+    if (ld == NULL) {
+      // check for a child cpuorder membar
+      MemBarNode *child  = child_membar(barrier->as_MemBar());
+      if (child && child->Opcode() == Op_MemBarCPUOrder)
+	return true;
+    }
+  }
+
+  // final option for unnecessary mebar is that it is a trailing node
+  // belonging to a CAS
+
+  MemBarNode *leading = trailing_to_leading(barrier->as_MemBar());
+
+  return leading != NULL;
 }
 
 bool needs_acquiring_load(const Node *n)
 {
-  // assert n->is_Load();
-  if (UseBarriersForVolatile)
+  assert(n->is_Load(), "expecting a load");
+  if (UseBarriersForVolatile) {
     // we use a normal load and a dmb
     return false;
+  }
 
   LoadNode *ld = n->as_Load();
 
-  if (!ld->is_acquire())
+  if (!ld->is_acquire()) {
     return false;
+  }
 
   // check if this load is feeding an acquire membar
   //
@@ -2261,20 +2631,23 @@
 
   membar = parent_membar(ld);
 
-  if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
+  if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
     return false;
+  }
 
   // ensure that there is a CPUOrder->Acquire->CPUOrder membar chain
 
   membar = child_membar(membar);
 
-  if (!membar || !membar->Opcode() == Op_MemBarAcquire)
+  if (!membar || !membar->Opcode() == Op_MemBarAcquire) {
     return false;
+  }
 
   membar = child_membar(membar);
   
-  if (!membar || !membar->Opcode() == Op_MemBarCPUOrder)
+  if (!membar || !membar->Opcode() == Op_MemBarCPUOrder) {
     return false;
+  }
 
   return true;
 }
@@ -2285,9 +2658,10 @@
 	  n->Opcode() == Op_MemBarRelease),
 	 "expecting a release membar");
 
-  if (UseBarriersForVolatile)
+  if (UseBarriersForVolatile) {
     // we need to plant a dmb
     return false;
+  }
 
   // if there is a dependent CPUOrder barrier then use that as the
   // leading
@@ -2303,12 +2677,14 @@
   // must start with a normal feed
   MemBarNode *child_barrier = leading_to_normal(barrier);
 
-  if (!child_barrier)
+  if (!child_barrier) {
     return false;
-
-  if (!is_card_mark_membar(child_barrier))
+  }
+
+  if (!is_card_mark_membar(child_barrier)) {
     // this is the trailing membar and we are done
     return true;
+  }
 
   // must be sure this card mark feeds a trailing membar
   MemBarNode *trailing = card_mark_to_trailing(child_barrier);
@@ -2318,17 +2694,19 @@
 bool unnecessary_volatile(const Node *n)
 {
   // assert n->is_MemBar();
-  if (UseBarriersForVolatile)
+  if (UseBarriersForVolatile) {
     // we need to plant a dmb
     return false;
+  }
 
   MemBarNode *mbvol = n->as_MemBar();
 
   // first we check if this is part of a card mark. if so then we have
   // to generate a StoreLoad barrier
   
-  if (is_card_mark_membar(mbvol))
+  if (is_card_mark_membar(mbvol)) {
       return false;
+  }
 
   // ok, if it's not a card mark then we still need to check if it is
   // a trailing membar of a volatile put hgraph.
@@ -2341,29 +2719,33 @@
 bool needs_releasing_store(const Node *n)
 {
   // assert n->is_Store();
-  if (UseBarriersForVolatile)
+  if (UseBarriersForVolatile) {
     // we use a normal store and dmb combination
     return false;
+  }
 
   StoreNode *st = n->as_Store();
 
   // the store must be marked as releasing
-  if (!st->is_release())
+  if (!st->is_release()) {
     return false;
+  }
 
   // the store must be fed by a membar
 
   Node *x = st->lookup(StoreNode::Memory);
 
-  if (! x || !x->is_Proj())
+  if (! x || !x->is_Proj()) {
     return false;
+  }
 
   ProjNode *proj = x->as_Proj();
 
   x = proj->lookup(0);
 
-  if (!x || !x->is_MemBar())
+  if (!x || !x->is_MemBar()) {
     return false;
+  }
 
   MemBarNode *barrier = x->as_MemBar();
 
@@ -2372,24 +2754,76 @@
   // volatile put graph.
 
   // reject invalid candidates
-  if (!leading_membar(barrier))
+  if (!leading_membar(barrier)) {
     return false;
+  }
 
   // does this lead a normal subgraph?
   MemBarNode *mbvol = leading_to_normal(barrier);
 
-  if (!mbvol)
+  if (!mbvol) {
     return false;
+  }
 
   // all done unless this is a card mark
-  if (!is_card_mark_membar(mbvol))
+  if (!is_card_mark_membar(mbvol)) {
     return true;
+  }
   
   // we found a card mark -- just make sure we have a trailing barrier
 
   return (card_mark_to_trailing(mbvol) != NULL);
 }
 
+// predicate controlling translation of CAS
+//
+// returns true if CAS needs to use an acquiring load otherwise false
+
+bool needs_acquiring_load_exclusive(const Node *n)
+{
+  assert(is_CAS(n->Opcode()), "expecting a compare and swap");
+  if (UseBarriersForVolatile) {
+    return false;
+  }
+
+  // CAS nodes only ought to turn up in inlined unsafe CAS operations
+#ifdef ASSERT
+  LoadStoreNode *st = n->as_LoadStore();
+
+  // the store must be fed by a membar
+
+  Node *x = st->lookup(StoreNode::Memory);
+
+  assert (x && x->is_Proj(), "CAS not fed by memory proj!");
+
+  ProjNode *proj = x->as_Proj();
+
+  x = proj->lookup(0);
+
+  assert (x && x->is_MemBar(), "CAS not fed by membar!");
+
+  MemBarNode *barrier = x->as_MemBar();
+
+  // the barrier must be a cpuorder mmebar fed by a release membar
+
+  assert(barrier->Opcode() == Op_MemBarCPUOrder,
+	 "CAS not fed by cpuorder membar!");
+      
+  MemBarNode *b = parent_membar(barrier);
+  assert ((b != NULL && b->Opcode() == Op_MemBarRelease),
+	  "CAS not fed by cpuorder+release membar pair!");
+
+  // does this lead a normal subgraph?
+  MemBarNode *mbar = leading_to_normal(barrier);
+
+  assert(mbar != NULL, "CAS not embedded in normal graph!");
+
+  assert(mbar->Opcode() == Op_MemBarAcquire, "trailing membar should be an acquire");
+#endif // ASSERT
+  // so we can just return true here
+  return true;
+}
+
 // predicate controlling translation of StoreCM
 //
 // returns true if a StoreStore must precede the card write otherwise
@@ -2403,14 +2837,16 @@
   // and the associated card mark when we are using CMS without
   // conditional card marking
 
-  if (!UseConcMarkSweepGC || UseCondCardMark)
+  if (!UseConcMarkSweepGC || UseCondCardMark) {
     return true;
+  }
 
   // if we are implementing volatile puts using barriers then the
   // object put as an str so we must insert the dmb ishst
 
-  if (UseBarriersForVolatile)
+  if (UseBarriersForVolatile) {
     return false;
+  }
 
   // we can omit the dmb ishst if this StoreCM is part of a volatile
   // put because in thta case the put will be implemented by stlr
@@ -2422,19 +2858,22 @@
 
   Node *x = storecm->in(StoreNode::Memory);
 
-  if (!x->is_Proj())
+  if (!x->is_Proj()) {
     return false;
+  }
 
   x = x->in(0);
 
-  if (!x->is_MemBar())
+  if (!x->is_MemBar()) {
     return false;
+  }
 
   MemBarNode *leading = x->as_MemBar();
 
   // reject invalid candidates
-  if (!leading_membar(leading))
+  if (!leading_membar(leading)) {
     return false;
+  }
 
   // we can omit the StoreStore if it is the head of a normal subgraph
   return (leading_to_normal(leading) != NULL);
@@ -3024,6 +3463,10 @@
   return true;  // Per default match rules are supported.
 }
 
+const int Matcher::float_pressure(int default_pressure_threshold) {
+  return default_pressure_threshold;
+}
+
 int Matcher::regnum_to_fpu_offset(int regnum)
 {
   Unimplemented();
@@ -8365,9 +8808,13 @@
 // XXX No flag versions for CompareAndSwap{I,L,P,N} because matcher
 // can't match them
 
+// standard CompareAndSwapX when we are using barriers
+// these have higher priority than the rules selected by a predicate
+
 instruct compareAndSwapI(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
 
   match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
 
   effect(KILL cr);
 
@@ -8385,6 +8832,7 @@
 instruct compareAndSwapL(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
 
   match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
 
   effect(KILL cr);
 
@@ -8402,6 +8850,7 @@
 instruct compareAndSwapP(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
 
   match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
 
   effect(KILL cr);
 
@@ -8419,6 +8868,7 @@
 instruct compareAndSwapN(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
 
   match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+  ins_cost(2 * VOLATILE_REF_COST);
 
   effect(KILL cr);
 
@@ -8433,6 +8883,84 @@
   ins_pipe(pipe_slow);
 %}
 
+// alternative CompareAndSwapX when we are eliding barriers
+
+instruct compareAndSwapIAcq(iRegINoSp res, indirect mem, iRegINoSp oldval, iRegINoSp newval, rFlagsReg cr) %{
+
+  predicate(needs_acquiring_load_exclusive(n));
+  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
+  ins_cost(VOLATILE_REF_COST);
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchgw_acq $mem, $oldval, $newval\t# (int) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapLAcq(iRegINoSp res, indirect mem, iRegLNoSp oldval, iRegLNoSp newval, rFlagsReg cr) %{
+
+  predicate(needs_acquiring_load_exclusive(n));
+  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
+  ins_cost(VOLATILE_REF_COST);
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchg_acq $mem, $oldval, $newval\t# (long) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapPAcq(iRegINoSp res, indirect mem, iRegP oldval, iRegP newval, rFlagsReg cr) %{
+
+  predicate(needs_acquiring_load_exclusive(n));
+  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
+  ins_cost(VOLATILE_REF_COST);
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchg_acq $mem, $oldval, $newval\t# (ptr) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchg_acq(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
+instruct compareAndSwapNAcq(iRegINoSp res, indirect mem, iRegNNoSp oldval, iRegNNoSp newval, rFlagsReg cr) %{
+
+  predicate(needs_acquiring_load_exclusive(n));
+  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
+  ins_cost(VOLATILE_REF_COST);
+
+  effect(KILL cr);
+
+ format %{
+    "cmpxchgw_acq $mem, $oldval, $newval\t# (narrow oop) if $mem == $oldval then $mem <-- $newval"
+    "cset $res, EQ\t# $res <-- (EQ ? 1 : 0)"
+ %}
+
+ ins_encode(aarch64_enc_cmpxchgw_acq(mem, oldval, newval),
+            aarch64_enc_cset_eq(res));
+
+  ins_pipe(pipe_slow);
+%}
+
 
 instruct get_and_setI(indirect mem, iRegINoSp newv, iRegI prev) %{
   match(Set prev (GetAndSetI mem newv));
@@ -13286,6 +13814,25 @@
   ins_pipe(pipe_cmp_branch);
 %}
 
+instruct cmpP_narrowOop_imm0_branch(cmpOp cmp, iRegN oop, immP0 zero, label labl, rFlagsReg cr) %{
+  match(If cmp (CmpP (DecodeN oop) zero));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $oop, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    if (cond == Assembler::EQ)
+      __ cbzw($oop$$Register, *L);
+    else
+      __ cbnzw($oop$$Register, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
 // Conditional Far Branch
 // Conditional Far Branch Unsigned
 // TODO: fixme
@@ -14662,6 +15209,102 @@
   ins_pipe(pipe_class_default);
 %}
 
+// --------------------------------- SQRT -------------------------------------
+
+instruct vsqrt2D(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SqrtVD src));
+  format %{ "fsqrt  $dst, $src\t# vector (2D)" %}
+  ins_encode %{
+    __ fsqrt(as_FloatRegister($dst$$reg), __ T2D,
+             as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// --------------------------------- ABS --------------------------------------
+
+instruct vabs2F(vecD dst, vecD src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AbsVF src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fabs  $dst,$src\t# vector (2S)" %}
+  ins_encode %{
+    __ fabs(as_FloatRegister($dst$$reg), __ T2S,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vabs4F(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AbsVF src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fabs  $dst,$src\t# vector (4S)" %}
+  ins_encode %{
+    __ fabs(as_FloatRegister($dst$$reg), __ T4S,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vabs2D(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AbsVD src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fabs  $dst,$src\t# vector (2D)" %}
+  ins_encode %{
+    __ fabs(as_FloatRegister($dst$$reg), __ T2D,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// --------------------------------- NEG --------------------------------------
+
+instruct vneg2F(vecD dst, vecD src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (NegVF src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fneg  $dst,$src\t# vector (2S)" %}
+  ins_encode %{
+    __ fneg(as_FloatRegister($dst$$reg), __ T2S,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vneg4F(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (NegVF src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fneg  $dst,$src\t# vector (4S)" %}
+  ins_encode %{
+    __ fneg(as_FloatRegister($dst$$reg), __ T4S,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vneg2D(vecX dst, vecX src)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (NegVD src));
+  ins_cost(INSN_COST * 3);
+  format %{ "fneg  $dst,$src\t# vector (2D)" %}
+  ins_encode %{
+    __ fneg(as_FloatRegister($dst$$reg), __ T2D,
+            as_FloatRegister($src$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // --------------------------------- AND --------------------------------------
 
 instruct vand8B(vecD dst, vecD src1, vecD src2)
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -2311,6 +2311,12 @@
 
 #define MSG "invalid arrangement"
 
+#define ASSERTION (T == T2S || T == T4S || T == T2D)
+  INSN(fsqrt, 1, 0b11111);
+  INSN(fabs,  0, 0b01111);
+  INSN(fneg,  1, 0b01111);
+#undef ASSERTION
+
 #define ASSERTION (T == T8B || T == T16B || T == T4H || T == T8H || T == T2S || T == T4S)
   INSN(rev64, 0, 0b00000);
 #undef ASSERTION
--- a/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/c2_globals_aarch64.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -68,10 +68,11 @@
 
 // Peephole and CISC spilling both break the graph, and so makes the
 // scheduler sick.
-define_pd_global(bool, OptoPeephole,                 true);
+define_pd_global(bool, OptoPeephole,                 false);
 define_pd_global(bool, UseCISCSpill,                 true);
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
+define_pd_global(bool, OptoRegScheduling,            false);
 
 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
--- a/src/cpu/aarch64/vm/compiledIC_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/compiledIC_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -51,13 +51,15 @@
 // ----------------------------------------------------------------------------
 
 #define __ _masm.
-address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
   // Stub is fixed up when the corresponding call is converted from
   // calling compiled code to calling interpreted code.
   // mov rmethod, 0
   // jmp -4 # to self
 
-  address mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  if (mark == NULL) {
+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  }
 
   // Note that the code buffer's insts_mark is always relative to insts.
   // That's why we must use the macroassembler to generate a stub.
--- a/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -30,5 +30,6 @@
 
   void generate_more_monitors();
   void generate_deopt_handling();
+  void lock_method(void);
 
 #endif // CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -42,6 +42,11 @@
 
 // Implementation of InterpreterMacroAssembler
 
+void InterpreterMacroAssembler::jump_to_entry(address entry) {
+  assert(entry, "Entry must have been generated by now");
+  b(entry);
+}
+
 #ifndef CC_INTERP
 
 void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
@@ -1542,14 +1547,14 @@
     if (MethodData::profile_arguments()) {
       Label done;
       int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
-      add(mdp, mdp, off_to_args);
 
       for (int i = 0; i < TypeProfileArgsLimit; i++) {
         if (i > 0 || MethodData::profile_return()) {
           // If return value type is profiled we may have no argument to profile
-          ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
+          ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
           sub(tmp, tmp, i*TypeStackSlotEntries::per_arg_count());
           cmp(tmp, TypeStackSlotEntries::per_arg_count());
+          add(rscratch1, mdp, off_to_args);
           br(Assembler::LT, done);
         }
         ldr(tmp, Address(callee, Method::const_offset()));
@@ -1557,26 +1562,27 @@
         // stack offset o (zero based) from the start of the argument
         // list, for n arguments translates into offset n - o - 1 from
         // the end of the argument list
-        ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args));
+        ldr(rscratch1, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))));
         sub(tmp, tmp, rscratch1);
         sub(tmp, tmp, 1);
         Address arg_addr = argument_address(tmp);
         ldr(tmp, arg_addr);
 
-        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
+        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i)));
         profile_obj_type(tmp, mdo_arg_addr);
 
         int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
-        add(mdp, mdp, to_add);
         off_to_args += to_add;
       }
 
       if (MethodData::profile_return()) {
-        ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
+        ldr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())));
         sub(tmp, tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count());
       }
 
+      add(rscratch1, mdp, off_to_args);
       bind(done);
+      mov(mdp, rscratch1);
 
       if (MethodData::profile_return()) {
         // We're right after the type profile for the last
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -66,6 +66,8 @@
 
   void load_earlyret_value(TosState state);
 
+  void jump_to_entry(address entry);
+
 #ifdef CC_INTERP
   void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
   void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
--- a/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -41,14 +41,13 @@
   address generate_native_entry(bool synchronized);
   address generate_abstract_entry(void);
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
-  address generate_jump_to_normal_entry(void);
-  address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); }
-  address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
+  address generate_accessor_entry(void) { return NULL; }
+  address generate_empty_entry(void) { return NULL; }
   void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
   address generate_Reference_get_entry();
   address generate_CRC32_update_entry();
   address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
-  void lock_method(void);
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
   void generate_stack_overflow_check(void);
 
   void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
--- a/src/cpu/aarch64/vm/interpreter_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/interpreter_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -236,17 +236,6 @@
   __ blrt(rscratch1, gpargs, fpargs, rtype);
 }
 
-// Jump into normal path for accessor and empty entry to jump to normal entry
-// The "fast" optimization don't update compilation count therefore can disable inlining
-// for these functions that should be inlined.
-address InterpreterGenerator::generate_jump_to_normal_entry(void) {
-  address entry_point = __ pc();
-
-  assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
-  __ b(Interpreter::entry_for_kind(Interpreter::zerolocals));
-  return entry_point;
-}
-
 // Abstract method entry
 // Attempt to execute abstract method. Throw exception
 address InterpreterGenerator::generate_abstract_entry(void) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_aarch64.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+  Unimplemented();
+  return 0;
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+  Unimplemented();
+}
+
+// convert JVMCI register indices (as used in oop maps) to HotSpot registers
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+  return NULL;
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+  return false;
+}
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -1709,6 +1709,20 @@
   return idivq_offset;
 }
 
+void MacroAssembler::membar(Membar_mask_bits order_constraint) {
+  address prev = pc() - NativeMembar::instruction_size;
+  if (prev == code()->last_membar()) {
+    NativeMembar *bar = NativeMembar_at(prev);
+    // We are merging two memory barrier instructions.  On AArch64 we
+    // can do this simply by ORing them together.
+    bar->set_kind(bar->get_kind() | order_constraint);
+    BLOCK_COMMENT("merged membar");
+  } else {
+    code()->set_last_membar(pc());
+    dmb(Assembler::barrier(order_constraint));
+  }
+}
+
 // MacroAssembler routines found actually to be needed
 
 void MacroAssembler::push(Register src)
@@ -2238,7 +2252,7 @@
     ttyLocker ttyl;
     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
                     msg);
-    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+    assert(false, "DEBUG MESSAGE: %s", msg);
   }
 }
 
@@ -2286,18 +2300,30 @@
 }
 #endif
 
-void MacroAssembler::push_CPU_state() {
-    push(0x3fffffff, sp);         // integer registers except lr & sp
-
+void MacroAssembler::push_CPU_state(bool save_vectors) {
+  push(0x3fffffff, sp);         // integer registers except lr & sp
+
+  if (!save_vectors) {
     for (int i = 30; i >= 0; i -= 2)
       stpd(as_FloatRegister(i), as_FloatRegister(i+1),
            Address(pre(sp, -2 * wordSize)));
+  } else {
+    for (int i = 30; i >= 0; i -= 2)
+      stpq(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(pre(sp, -4 * wordSize)));
+  }
 }
 
-void MacroAssembler::pop_CPU_state() {
-  for (int i = 0; i < 32; i += 2)
-    ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
-         Address(post(sp, 2 * wordSize)));
+void MacroAssembler::pop_CPU_state(bool restore_vectors) {
+  if (!restore_vectors) {
+    for (int i = 0; i < 32; i += 2)
+      ldpd(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 2 * wordSize)));
+  } else {
+    for (int i = 0; i < 32; i += 2)
+      ldpq(as_FloatRegister(i), as_FloatRegister(i+1),
+           Address(post(sp, 4 * wordSize)));
+  }
 
   pop(0x3fffffff, sp);         // integer registers except lr & sp
 }
@@ -3027,6 +3053,24 @@
   _masm->bind(_label);
 }
 
+void MacroAssembler::addptr(const Address &dst, int32_t src) {
+  Address adr;
+  switch(dst.getMode()) {
+  case Address::base_plus_offset:
+    // This is the expected mode, although we allow all the other
+    // forms below.
+    adr = form_address(rscratch2, dst.base(), dst.offset(), LogBytesPerWord);
+    break;
+  default:
+    lea(rscratch2, dst);
+    adr = Address(rscratch2);
+    break;
+  }
+  ldr(rscratch1, adr);
+  add(rscratch1, rscratch1, src);
+  str(rscratch1, adr);
+}
+
 void MacroAssembler::cmpptr(Register src1, Address src2) {
   unsigned long offset;
   adrp(rscratch1, src2, offset);
@@ -3063,11 +3107,15 @@
 
   if (UseCondCardMark) {
     Label L_already_dirty;
+    membar(StoreLoad);
     ldrb(rscratch2,  Address(obj, rscratch1));
     cbz(rscratch2, L_already_dirty);
     strb(zr, Address(obj, rscratch1));
     bind(L_already_dirty);
   } else {
+    if (UseConcMarkSweepGC && CMSPrecleaningEnabled) {
+      membar(StoreStore);
+    }
     strb(zr, Address(obj, rscratch1));
   }
 }
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -152,6 +152,13 @@
     strw(scratch, a);
   }
 
+  void bind(Label& L) {
+    Assembler::bind(L);
+    code()->clear_last_membar();
+  }
+
+  void membar(Membar_mask_bits order_constraint);
+
   // Frame creation and destruction shared between JITs.
   void build_frame(int framesize);
   void remove_frame(int framesize);
@@ -777,8 +784,8 @@
 
   DEBUG_ONLY(void verify_heapbase(const char* msg);)
 
-  void push_CPU_state();
-  void pop_CPU_state() ;
+  void push_CPU_state(bool save_vectors = false);
+  void pop_CPU_state(bool restore_vectors = false) ;
 
   // Round up to a power of two
   void round_to(Register reg, int modulus);
@@ -908,13 +915,7 @@
 
   // Arithmetics
 
-  void addptr(Address dst, int32_t src) {
-    lea(rscratch2, dst);
-    ldr(rscratch1, Address(rscratch2));
-    add(rscratch1, rscratch1, src);
-    str(rscratch1, Address(rscratch2));
-  }
-
+  void addptr(const Address &dst, int32_t src);
   void cmpptr(Register src1, Address src2);
 
   // Various forms of CAS
--- a/src/cpu/aarch64/vm/methodHandles_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/methodHandles_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -50,7 +50,7 @@
 
 #ifdef ASSERT
 static int check_nonzero(const char* xname, int x) {
-  assert(x != 0, err_msg("%s should be nonzero", xname));
+  assert(x != 0, "%s should be nonzero", xname);
   return x;
 }
 #define NONZERO(x) check_nonzero(#x, x)
@@ -407,7 +407,7 @@
     }
 
     default:
-      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
       break;
     }
 
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -101,6 +101,12 @@
   static bool maybe_cpool_ref(address instr) {
     return is_adrp_at(instr) || is_ldr_literal_at(instr);
   }
+
+  bool is_Membar() {
+    unsigned int insn = uint_at(0);
+    return Instruction_aarch64::extract(insn, 31, 12) == 0b11010101000000110011 &&
+      Instruction_aarch64::extract(insn, 7, 0) == 0b10111111;
+  }
 };
 
 inline NativeInstruction* nativeInstruction_at(address address) {
@@ -487,4 +493,15 @@
   return (NativeCallTrampolineStub*)addr;
 }
 
+class NativeMembar : public NativeInstruction {
+public:
+  unsigned int get_kind() { return Instruction_aarch64::extract(uint_at(0), 11, 8); }
+  void set_kind(int order_kind) { Instruction_aarch64::patch(addr_at(0), 11, 8, order_kind); }
+};
+
+inline NativeMembar *NativeMembar_at(address addr) {
+  assert(nativeInstruction_at(addr)->is_Membar(), "no membar found");
+  return (NativeMembar*)addr;
+}
+
 #endif // CPU_AARCH64_VM_NATIVEINST_AARCH64_HPP
--- a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -102,12 +102,5 @@
   }
 }
 
-void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest)  {
-  if (NativeInstruction::maybe_cpool_ref(addr())) {
-    address old_addr = old_addr_for(addr(), src, dest);
-    MacroAssembler::pd_patch_instruction(addr(), MacroAssembler::target_addr_for_insn(old_addr));
-  }
-}
-
 void metadata_Relocation::pd_fix_value(address x) {
 }
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -75,8 +75,8 @@
 // FIXME -- this is used by C1
 class RegisterSaver {
  public:
-  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words);
-  static void restore_live_registers(MacroAssembler* masm);
+  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
+  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
 
   // Offsets into the register save area
   // Used by deoptimization when it is managing result register
@@ -108,7 +108,17 @@
 
 };
 
-OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words) {
+OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
+#ifdef COMPILER2
+  if (save_vectors) {
+    // Save upper half of vector registers
+    int vect_words = 32 * 8 / wordSize;
+    additional_frame_words += vect_words;
+  }
+#else
+  assert(!save_vectors, "vectors are generated only by C2");
+#endif
+
   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
                                      reg_save_size*BytesPerInt, 16);
   // OopMap frame size is in compiler stack slots (jint's) not bytes or words
@@ -122,7 +132,7 @@
   // Save registers, fpu state, and flags.
 
   __ enter();
-  __ push_CPU_state();
+  __ push_CPU_state(save_vectors);
 
   // Set an oopmap for the call site.  This oopmap will map all
   // oop-registers and debug-info registers as callee-saved.  This
@@ -139,14 +149,14 @@
                                     // register slots are 8 bytes
                                     // wide, 32 floating-point
                                     // registers
-      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
+      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset + additional_frame_slots),
                                 r->as_VMReg());
     }
   }
 
   for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) {
     FloatRegister r = as_FloatRegister(i);
-    int sp_offset = 2 * i;
+    int sp_offset = save_vectors ? (4 * i) : (2 * i);
     oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
                               r->as_VMReg());
   }
@@ -154,8 +164,11 @@
   return oop_map;
 }
 
-void RegisterSaver::restore_live_registers(MacroAssembler* masm) {
-  __ pop_CPU_state();
+void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
+#ifndef COMPILER2
+  assert(!restore_vectors, "vectors are generated only by C2");
+#endif
+  __ pop_CPU_state(restore_vectors);
   __ leave();
 }
 
@@ -177,9 +190,9 @@
 }
 
 // Is vector's size (in bytes) bigger than a size saved by default?
-// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
+// 8 bytes vector registers are saved by default on AArch64.
 bool SharedRuntime::is_wide_vector(int size) {
-  return size > 16;
+  return size > 8;
 }
 // The java_calling_convention describes stack locations as ideal slots on
 // a frame with no abi restrictions. Since we must observe abi restrictions
@@ -460,11 +473,11 @@
 }
 
 
-static void gen_i2c_adapter(MacroAssembler *masm,
-                            int total_args_passed,
-                            int comp_args_on_stack,
-                            const BasicType *sig_bt,
-                            const VMRegPair *regs) {
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+                                    int total_args_passed,
+                                    int comp_args_on_stack,
+                                    const BasicType *sig_bt,
+                                    const VMRegPair *regs) {
 
   // Note: r13 contains the senderSP on entry. We must preserve it since
   // we may do a i2c -> c2i transition if we lose a race where compiled
@@ -1146,7 +1159,7 @@
     assert((unsigned)gpargs < 256, "eek!");
     assert((unsigned)fpargs < 32, "eek!");
     __ lea(rscratch1, RuntimeAddress(dest));
-    __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type);
+    if (UseBuiltinSim)   __ mov(rscratch2, (gpargs << 6) | (fpargs << 2) | type);
     __ blrt(rscratch1, rscratch2);
     __ maybe_isb();
   }
@@ -1194,7 +1207,7 @@
   } else if (iid == vmIntrinsics::_invokeBasic) {
     has_receiver = true;
   } else {
-    fatal(err_msg_res("unexpected intrinsic id %d", iid));
+    fatal("unexpected intrinsic id %d", iid);
   }
 
   if (member_reg != noreg) {
@@ -1521,14 +1534,13 @@
 
   int vep_offset = ((intptr_t)__ pc()) - start;
 
-  // Generate stack overflow check
-
   // If we have to make this method not-entrant we'll overwrite its
   // first instruction with a jump.  For this action to be legal we
   // must ensure that this first instruction is a B, BL, NOP, BKPT,
   // SVC, HVC, or SMC.  Make it a NOP.
   __ nop();
 
+  // Generate stack overflow check
   if (UseStackBanging) {
     __ bang_stack_with_offset(StackShadowPages*os::vm_page_size());
   } else {
@@ -1709,23 +1721,20 @@
   // need to spill before we call out
   int c_arg = total_c_args - total_in_args;
 
-  // Pre-load a static method's oop into r20.  Used both by locking code and
-  // the normal JNI call code.
+  // Pre-load a static method's oop into c_rarg1.
   if (method->is_static() && !is_critical_native) {
 
     //  load oop into a register
-    __ movoop(oop_handle_reg,
+    __ movoop(c_rarg1,
               JNIHandles::make_local(method->method_holder()->java_mirror()),
               /*immediate*/true);
 
     // Now handlize the static class mirror it's known not-null.
-    __ str(oop_handle_reg, Address(sp, klass_offset));
+    __ str(c_rarg1, Address(sp, klass_offset));
     map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
 
     // Now get the handle
-    __ lea(oop_handle_reg, Address(sp, klass_offset));
-    // store the klass handle as second argument
-    __ mov(c_rarg1, oop_handle_reg);
+    __ lea(c_rarg1, Address(sp, klass_offset));
     // and protect the arg if we must spill
     c_arg--;
   }
@@ -1740,19 +1749,13 @@
 
   __ set_last_Java_frame(sp, noreg, (address)the_pc, rscratch1);
 
-
-  // We have all of the arguments setup at this point. We must not touch any register
-  // argument registers at this point (what if we save/restore them there are no oop?
-
+  Label dtrace_method_entry, dtrace_method_entry_done;
   {
-    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
-    // protect the args we've loaded
-    save_args(masm, total_c_args, c_arg, out_regs);
-    __ mov_metadata(c_rarg1, method());
-    __ call_VM_leaf(
-      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
-      rthread, c_rarg1);
-    restore_args(masm, total_c_args, c_arg, out_regs);
+    unsigned long offset;
+    __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset);
+    __ ldrb(rscratch1, Address(rscratch1, offset));
+    __ cbnzw(rscratch1, dtrace_method_entry);
+    __ bind(dtrace_method_entry_done);
   }
 
   // RedefineClasses() tracing support for obsolete method entry
@@ -1782,7 +1785,6 @@
   if (method->is_synchronized()) {
     assert(!is_critical_native, "unhandled");
 
-
     const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
 
     // Get the handle (the 2nd argument)
@@ -1838,7 +1840,6 @@
 
   // Finally just about ready to make the JNI call
 
-
   // get JNIEnv* which is first argument to native
   if (!is_critical_native) {
     __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
@@ -1879,9 +1880,9 @@
 
   // Unpack native results.
   switch (ret_type) {
-  case T_BOOLEAN: __ ubfx(r0, r0, 0, 8);            break;
+  case T_BOOLEAN: __ ubfx(r0, r0, 0, 8);             break;
   case T_CHAR   : __ ubfx(r0, r0, 0, 16);            break;
-  case T_BYTE   : __ sbfx(r0, r0, 0, 8);            break;
+  case T_BYTE   : __ sbfx(r0, r0, 0, 8);             break;
   case T_SHORT  : __ sbfx(r0, r0, 0, 16);            break;
   case T_INT    : __ sbfx(r0, r0, 0, 32);            break;
   case T_DOUBLE :
@@ -1904,14 +1905,17 @@
   //     Thread A is resumed to finish this native method, but doesn't block here since it
   //     didn't see any synchronization is progress, and escapes.
   __ mov(rscratch1, _thread_in_native_trans);
-  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
-  __ stlrw(rscratch1, rscratch2);
 
   if(os::is_MP()) {
     if (UseMembar) {
+      __ strw(rscratch1, Address(rthread, JavaThread::thread_state_offset()));
+
       // Force this write out before the read below
       __ dmb(Assembler::SY);
     } else {
+      __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+      __ stlrw(rscratch1, rscratch2);
+
       // Write serialization page so VM thread can do a pseudo remote membar.
       // We use the current thread pointer to calculate a thread specific
       // offset to write to within the page. This minimizes bus traffic
@@ -1920,25 +1924,220 @@
     }
   }
 
+  // check for safepoint operation in progress and/or pending suspend requests
+  Label safepoint_in_progress, safepoint_in_progress_done;
+  {
+    assert(SafepointSynchronize::_not_synchronized == 0, "fix this code");
+    unsigned long offset;
+    __ adrp(rscratch1,
+            ExternalAddress((address)SafepointSynchronize::address_of_state()),
+            offset);
+    __ ldrw(rscratch1, Address(rscratch1, offset));
+    __ cbnzw(rscratch1, safepoint_in_progress);
+    __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
+    __ cbnzw(rscratch1, safepoint_in_progress);
+    __ bind(safepoint_in_progress_done);
+  }
+
+  // change thread state
   Label after_transition;
-
-  // check for safepoint operation in progress and/or pending suspend requests
+  __ mov(rscratch1, _thread_in_Java);
+  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
+  __ stlrw(rscratch1, rscratch2);
+  __ bind(after_transition);
+
+  Label reguard;
+  Label reguard_done;
+  __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
+  __ cmpw(rscratch1, JavaThread::stack_guard_yellow_disabled);
+  __ br(Assembler::EQ, reguard);
+  __ bind(reguard_done);
+
+  // native result if any is live
+
+  // Unlock
+  Label unlock_done;
+  Label slow_path_unlock;
+  if (method->is_synchronized()) {
+
+    // Get locked oop from the handle we passed to jni
+    __ ldr(obj_reg, Address(oop_handle_reg, 0));
+
+    Label done;
+
+    if (UseBiasedLocking) {
+      __ biased_locking_exit(obj_reg, old_hdr, done);
+    }
+
+    // Simple recursive lock?
+
+    __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+    __ cbz(rscratch1, done);
+
+    // Must save r0 if if it is live now because cmpxchg must use it
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+
+
+    // get address of the stack lock
+    __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+    //  get old displaced header
+    __ ldr(old_hdr, Address(r0, 0));
+
+    // Atomic swap old header if oop still contains the stack lock
+    Label succeed;
+    __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock);
+    __ bind(succeed);
+
+    // slow path re-enters here
+    __ bind(unlock_done);
+    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+
+    __ bind(done);
+  }
+
+  Label dtrace_method_exit, dtrace_method_exit_done;
   {
-    Label Continue;
-
-    { unsigned long offset;
-      __ adrp(rscratch1,
-              ExternalAddress((address)SafepointSynchronize::address_of_state()),
-              offset);
-      __ ldrw(rscratch1, Address(rscratch1, offset));
+    unsigned long offset;
+    __ adrp(rscratch1, ExternalAddress((address)&DTraceMethodProbes), offset);
+    __ ldrb(rscratch1, Address(rscratch1, offset));
+    __ cbnzw(rscratch1, dtrace_method_exit);
+    __ bind(dtrace_method_exit_done);
+  }
+
+  __ reset_last_Java_frame(false, true);
+
+  // Unpack oop result
+  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
+      Label L;
+      __ cbz(r0, L);
+      __ ldr(r0, Address(r0, 0));
+      __ bind(L);
+      __ verify_oop(r0);
+  }
+
+  if (!is_critical_native) {
+    // reset handle block
+    __ ldr(r2, Address(rthread, JavaThread::active_handles_offset()));
+    __ str(zr, Address(r2, JNIHandleBlock::top_offset_in_bytes()));
+  }
+
+  __ leave();
+
+  if (!is_critical_native) {
+    // Any exception pending?
+    __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+    __ cbnz(rscratch1, exception_pending);
+  }
+
+  // record exit from native wrapper code
+  if (NotifySimulator) {
+    __ notify(Assembler::method_reentry);
+  }
+
+  // We're done
+  __ ret(lr);
+
+  // Unexpected paths are out of line and go here
+
+  if (!is_critical_native) {
+    // forward the exception
+    __ bind(exception_pending);
+
+    // and forward the exception
+    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
+  }
+
+  // Slow path locking & unlocking
+  if (method->is_synchronized()) {
+
+    __ block_comment("Slow path lock {");
+    __ bind(slow_path_lock);
+
+    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
+    // args are (oop obj, BasicLock* lock, JavaThread* thread)
+
+    // protect the args we've loaded
+    save_args(masm, total_c_args, c_arg, out_regs);
+
+    __ mov(c_rarg0, obj_reg);
+    __ mov(c_rarg1, lock_reg);
+    __ mov(c_rarg2, rthread);
+
+    // Not a leaf but we have last_Java_frame setup as we want
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+
+#ifdef ASSERT
+    { Label L;
+      __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+      __ cbz(rscratch1, L);
+      __ stop("no pending exception allowed on exit from monitorenter");
+      __ bind(L);
     }
-    __ cmpw(rscratch1, SafepointSynchronize::_not_synchronized);
-
-    Label L;
-    __ br(Assembler::NE, L);
-    __ ldrw(rscratch1, Address(rthread, JavaThread::suspend_flags_offset()));
-    __ cbz(rscratch1, Continue);
-    __ bind(L);
+#endif
+    __ b(lock_done);
+
+    __ block_comment("} Slow path lock");
+
+    __ block_comment("Slow path unlock {");
+    __ bind(slow_path_unlock);
+
+    // If we haven't already saved the native result we must save it now as xmm registers
+    // are still exposed.
+
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      save_native_result(masm, ret_type, stack_slots);
+    }
+
+    __ mov(c_rarg2, rthread);
+    __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
+    __ mov(c_rarg0, obj_reg);
+
+    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
+    // NOTE that obj_reg == r19 currently
+    __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+    __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+
+    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 3, 0, 1);
+
+#ifdef ASSERT
+    {
+      Label L;
+      __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+      __ cbz(rscratch1, L);
+      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
+      __ bind(L);
+    }
+#endif /* ASSERT */
+
+    __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
+
+    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
+      restore_native_result(masm, ret_type, stack_slots);
+    }
+    __ b(unlock_done);
+
+    __ block_comment("} Slow path unlock");
+
+  } // synchronized
+
+  // SLOW PATH Reguard the stack if needed
+
+  __ bind(reguard);
+  save_native_result(masm, ret_type, stack_slots);
+  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 0, 0, 0);
+  restore_native_result(masm, ret_type, stack_slots);
+  // and continue
+  __ b(reguard_done);
+
+  // SLOW PATH safepoint
+  {
+    __ block_comment("safepoint {");
+    __ bind(safepoint_in_progress);
 
     // Don't use call_VM as it will see a possible pending exception and forward it
     // and never return here preventing us from clearing _last_native_pc down below.
@@ -1960,209 +2159,45 @@
 
     if (is_critical_native) {
       // The call above performed the transition to thread_in_Java so
-      // skip the transition logic below.
+      // skip the transition logic above.
       __ b(after_transition);
     }
 
-    __ bind(Continue);
+    __ b(safepoint_in_progress_done);
+    __ block_comment("} safepoint");
   }
 
-  // change thread state
-  __ mov(rscratch1, _thread_in_Java);
-  __ lea(rscratch2, Address(rthread, JavaThread::thread_state_offset()));
-  __ stlrw(rscratch1, rscratch2);
-  __ bind(after_transition);
-
-  Label reguard;
-  Label reguard_done;
-  __ ldrb(rscratch1, Address(rthread, JavaThread::stack_guard_state_offset()));
-  __ cmpw(rscratch1, JavaThread::stack_guard_yellow_disabled);
-  __ br(Assembler::EQ, reguard);
-  __ bind(reguard_done);
-
-  // native result if any is live
-
-  // Unlock
-  Label unlock_done;
-  Label slow_path_unlock;
-  if (method->is_synchronized()) {
-
-    // Get locked oop from the handle we passed to jni
-    __ ldr(obj_reg, Address(oop_handle_reg, 0));
-
-    Label done;
-
-    if (UseBiasedLocking) {
-      __ biased_locking_exit(obj_reg, old_hdr, done);
-    }
-
-    // Simple recursive lock?
-
-    __ ldr(rscratch1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-    __ cbz(rscratch1, done);
-
-    // Must save r0 if if it is live now because cmpxchg must use it
-    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-      save_native_result(masm, ret_type, stack_slots);
-    }
-
-
-    // get address of the stack lock
-    __ lea(r0, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-    //  get old displaced header
-    __ ldr(old_hdr, Address(r0, 0));
-
-    // Atomic swap old header if oop still contains the stack lock
-    Label succeed;
-    __ cmpxchgptr(r0, old_hdr, obj_reg, rscratch1, succeed, &slow_path_unlock);
-    __ bind(succeed);
-
-    // slow path re-enters here
-    __ bind(unlock_done);
-    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
-      restore_native_result(masm, ret_type, stack_slots);
-    }
-
-    __ bind(done);
-
+  // SLOW PATH dtrace support
+  {
+    __ block_comment("dtrace entry {");
+    __ bind(dtrace_method_entry);
+
+    // We have all of the arguments setup at this point. We must not touch any register
+    // argument registers at this point (what if we save/restore them there are no oop?
+
+    save_args(masm, total_c_args, c_arg, out_regs);
+    __ mov_metadata(c_rarg1, method());
+    __ call_VM_leaf(
+      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
+      rthread, c_rarg1);
+    restore_args(masm, total_c_args, c_arg, out_regs);
+    __ b(dtrace_method_entry_done);
+    __ block_comment("} dtrace entry");
   }
+
   {
-    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
+    __ block_comment("dtrace exit {");
+    __ bind(dtrace_method_exit);
     save_native_result(masm, ret_type, stack_slots);
     __ mov_metadata(c_rarg1, method());
     __ call_VM_leaf(
          CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
          rthread, c_rarg1);
     restore_native_result(masm, ret_type, stack_slots);
+    __ b(dtrace_method_exit_done);
+    __ block_comment("} dtrace exit");
   }
 
-  __ reset_last_Java_frame(false, true);
-
-  // Unpack oop result
-  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
-      Label L;
-      __ cbz(r0, L);
-      __ ldr(r0, Address(r0, 0));
-      __ bind(L);
-      __ verify_oop(r0);
-  }
-
-  if (!is_critical_native) {
-    // reset handle block
-    __ ldr(r2, Address(rthread, JavaThread::active_handles_offset()));
-    __ str(zr, Address(r2, JNIHandleBlock::top_offset_in_bytes()));
-  }
-
-  __ leave();
-
-  if (!is_critical_native) {
-    // Any exception pending?
-    __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
-    __ cbnz(rscratch1, exception_pending);
-  }
-
-  // record exit from native wrapper code
-  if (NotifySimulator) {
-    __ notify(Assembler::method_reentry);
-  }
-
-  // We're done
-  __ ret(lr);
-
-  // Unexpected paths are out of line and go here
-
-  if (!is_critical_native) {
-    // forward the exception
-    __ bind(exception_pending);
-
-    // and forward the exception
-    __ far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
-  }
-
-  // Slow path locking & unlocking
-  if (method->is_synchronized()) {
-
-    // BEGIN Slow path lock
-    __ bind(slow_path_lock);
-
-    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
-    // args are (oop obj, BasicLock* lock, JavaThread* thread)
-
-    // protect the args we've loaded
-    save_args(masm, total_c_args, c_arg, out_regs);
-
-    __ mov(c_rarg0, obj_reg);
-    __ mov(c_rarg1, lock_reg);
-    __ mov(c_rarg2, rthread);
-
-    // Not a leaf but we have last_Java_frame setup as we want
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
-    restore_args(masm, total_c_args, c_arg, out_regs);
-
-#ifdef ASSERT
-    { Label L;
-      __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
-      __ cbz(rscratch1, L);
-      __ stop("no pending exception allowed on exit from monitorenter");
-      __ bind(L);
-    }
-#endif
-    __ b(lock_done);
-
-    // END Slow path lock
-
-    // BEGIN Slow path unlock
-    __ bind(slow_path_unlock);
-
-    // If we haven't already saved the native result we must save it now as xmm registers
-    // are still exposed.
-
-    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
-      save_native_result(masm, ret_type, stack_slots);
-    }
-
-    __ mov(c_rarg2, rthread);
-    __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
-    __ mov(c_rarg0, obj_reg);
-
-    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
-    // NOTE that obj_reg == r19 currently
-    __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
-    __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset())));
-
-    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 3, 0, 1);
-
-#ifdef ASSERT
-    {
-      Label L;
-      __ ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
-      __ cbz(rscratch1, L);
-      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
-      __ bind(L);
-    }
-#endif /* ASSERT */
-
-    __ str(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
-
-    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
-      restore_native_result(masm, ret_type, stack_slots);
-    }
-    __ b(unlock_done);
-
-    // END Slow path unlock
-
-  } // synchronized
-
-  // SLOW PATH Reguard the stack if needed
-
-  __ bind(reguard);
-  save_native_result(masm, ret_type, stack_slots);
-  rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), 0, 0, 0);
-  restore_native_result(masm, ret_type, stack_slots);
-  // and continue
-  __ b(reguard_done);
-
-
 
   __ flush();
 
@@ -2742,7 +2777,7 @@
   bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
 
   // Save registers, fpu state, and flags
-  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
 
   // The following is basically a call_VM.  However, we need the precise
   // address of the call in order to generate an oopmap. Hence, we do all the
@@ -2793,7 +2828,7 @@
   __ bind(noException);
 
   // Normal exit, restore registers and exit.
-  RegisterSaver::restore_live_registers(masm);
+  RegisterSaver::restore_live_registers(masm, save_vectors);
 
   __ ret(lr);
 
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -746,6 +746,9 @@
           const Register count = end; // 'end' register contains bytes count now
           __ mov(scratch, (address)ct->byte_map_base);
           __ add(start, start, scratch);
+          if (UseConcMarkSweepGC) {
+            __ membar(__ StoreStore);
+          }
           __ BIND(L_loop);
           __ strb(zr, Address(start, count));
           __ subs(count, count, 1);
@@ -2395,6 +2398,274 @@
     return start;
   }
 
+  /***
+   *  Arguments:
+   *
+   *  Inputs:
+   *   c_rarg0   - int   adler
+   *   c_rarg1   - byte* buff
+   *   c_rarg2   - int   len
+   *
+   * Output:
+   *   c_rarg0   - int adler result
+   */
+  address generate_updateBytesAdler32() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "updateBytesAdler32");
+    address start = __ pc();
+
+    Label L_simple_by1_loop, L_nmax, L_nmax_loop, L_by16, L_by16_loop, L_by1_loop, L_do_mod, L_combine, L_by1;
+
+    // Aliases
+    Register adler  = c_rarg0;
+    Register s1     = c_rarg0;
+    Register s2     = c_rarg3;
+    Register buff   = c_rarg1;
+    Register len    = c_rarg2;
+    Register nmax  = r4;
+    Register base = r5;
+    Register count = r6;
+    Register temp0 = rscratch1;
+    Register temp1 = rscratch2;
+    Register temp2 = r7;
+
+    // Max number of bytes we can process before having to take the mod
+    // 0x15B0 is 5552 in decimal, the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1
+    unsigned long BASE = 0xfff1;
+    unsigned long NMAX = 0x15B0;
+
+    __ mov(base, BASE);
+    __ mov(nmax, NMAX);
+
+    // s1 is initialized to the lower 16 bits of adler
+    // s2 is initialized to the upper 16 bits of adler
+    __ ubfx(s2, adler, 16, 16);  // s2 = ((adler >> 16) & 0xffff)
+    __ uxth(s1, adler);          // s1 = (adler & 0xffff)
+
+    // The pipelined loop needs at least 16 elements for 1 iteration
+    // It does check this, but it is more effective to skip to the cleanup loop
+    __ cmp(len, 16);
+    __ br(Assembler::HS, L_nmax);
+    __ cbz(len, L_combine);
+
+    __ bind(L_simple_by1_loop);
+    __ ldrb(temp0, Address(__ post(buff, 1)));
+    __ add(s1, s1, temp0);
+    __ add(s2, s2, s1);
+    __ subs(len, len, 1);
+    __ br(Assembler::HI, L_simple_by1_loop);
+
+    // s1 = s1 % BASE
+    __ subs(temp0, s1, base);
+    __ csel(s1, temp0, s1, Assembler::HS);
+
+    // s2 = s2 % BASE
+    __ lsr(temp0, s2, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(s2, temp1, s2, ext::uxth);
+
+    __ subs(temp0, s2, base);
+    __ csel(s2, temp0, s2, Assembler::HS);
+
+    __ b(L_combine);
+
+    __ bind(L_nmax);
+    __ subs(len, len, nmax);
+    __ sub(count, nmax, 16);
+    __ br(Assembler::LO, L_by16);
+
+    __ bind(L_nmax_loop);
+
+    __ ldp(temp0, temp1, Address(__ post(buff, 16)));
+
+    __ add(s1, s1, temp0, ext::uxtb);
+    __ ubfx(temp2, temp0, 8, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 16, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 24, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 32, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 40, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 48, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp0, Assembler::LSR, 56);
+    __ add(s2, s2, s1);
+
+    __ add(s1, s1, temp1, ext::uxtb);
+    __ ubfx(temp2, temp1, 8, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 16, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 24, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 32, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 40, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 48, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp1, Assembler::LSR, 56);
+    __ add(s2, s2, s1);
+
+    __ subs(count, count, 16);
+    __ br(Assembler::HS, L_nmax_loop);
+
+    // s1 = s1 % BASE
+    __ lsr(temp0, s1, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(temp1, temp1, s1, ext::uxth);
+
+    __ lsr(temp0, temp1, 16);
+    __ lsl(s1, temp0, 4);
+    __ sub(s1, s1, temp0);
+    __ add(s1, s1, temp1, ext:: uxth);
+
+    __ subs(temp0, s1, base);
+    __ csel(s1, temp0, s1, Assembler::HS);
+
+    // s2 = s2 % BASE
+    __ lsr(temp0, s2, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(temp1, temp1, s2, ext::uxth);
+
+    __ lsr(temp0, temp1, 16);
+    __ lsl(s2, temp0, 4);
+    __ sub(s2, s2, temp0);
+    __ add(s2, s2, temp1, ext:: uxth);
+
+    __ subs(temp0, s2, base);
+    __ csel(s2, temp0, s2, Assembler::HS);
+
+    __ subs(len, len, nmax);
+    __ sub(count, nmax, 16);
+    __ br(Assembler::HS, L_nmax_loop);
+
+    __ bind(L_by16);
+    __ adds(len, len, count);
+    __ br(Assembler::LO, L_by1);
+
+    __ bind(L_by16_loop);
+
+    __ ldp(temp0, temp1, Address(__ post(buff, 16)));
+
+    __ add(s1, s1, temp0, ext::uxtb);
+    __ ubfx(temp2, temp0, 8, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 16, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 24, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 32, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 40, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp0, 48, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp0, Assembler::LSR, 56);
+    __ add(s2, s2, s1);
+
+    __ add(s1, s1, temp1, ext::uxtb);
+    __ ubfx(temp2, temp1, 8, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 16, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 24, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 32, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 40, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ ubfx(temp2, temp1, 48, 8);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp2);
+    __ add(s2, s2, s1);
+    __ add(s1, s1, temp1, Assembler::LSR, 56);
+    __ add(s2, s2, s1);
+
+    __ subs(len, len, 16);
+    __ br(Assembler::HS, L_by16_loop);
+
+    __ bind(L_by1);
+    __ adds(len, len, 15);
+    __ br(Assembler::LO, L_do_mod);
+
+    __ bind(L_by1_loop);
+    __ ldrb(temp0, Address(__ post(buff, 1)));
+    __ add(s1, temp0, s1);
+    __ add(s2, s2, s1);
+    __ subs(len, len, 1);
+    __ br(Assembler::HS, L_by1_loop);
+
+    __ bind(L_do_mod);
+    // s1 = s1 % BASE
+    __ lsr(temp0, s1, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(temp1, temp1, s1, ext::uxth);
+
+    __ lsr(temp0, temp1, 16);
+    __ lsl(s1, temp0, 4);
+    __ sub(s1, s1, temp0);
+    __ add(s1, s1, temp1, ext:: uxth);
+
+    __ subs(temp0, s1, base);
+    __ csel(s1, temp0, s1, Assembler::HS);
+
+    // s2 = s2 % BASE
+    __ lsr(temp0, s2, 16);
+    __ lsl(temp1, temp0, 4);
+    __ sub(temp1, temp1, temp0);
+    __ add(temp1, temp1, s2, ext::uxth);
+
+    __ lsr(temp0, temp1, 16);
+    __ lsl(s2, temp0, 4);
+    __ sub(s2, s2, temp0);
+    __ add(s2, s2, temp1, ext:: uxth);
+
+    __ subs(temp0, s2, base);
+    __ csel(s2, temp0, s2, Assembler::HS);
+
+    // Combine lower bits and higher bits
+    __ bind(L_combine);
+    __ orr(s1, s1, s2, Assembler::LSL, 16); // adler = s1 | (s2 << 16)
+
+    __ ret(lr);
+
+    return start;
+  }
+
   /**
    *  Arguments:
    *
@@ -3613,6 +3884,11 @@
       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
     }
 
+    // generate Adler32 intrinsics code
+    if (UseAdler32Intrinsics) {
+      StubRoutines::_updateBytesAdler32 = generate_updateBytesAdler32();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
                                                        &StubRoutines::_safefetch32_fault_pc,
--- a/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -535,7 +535,7 @@
 //      r0
 //      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
 //      rscratch1, rscratch2 (scratch regs)
-void InterpreterGenerator::lock_method(void) {
+void TemplateInterpreterGenerator::lock_method() {
   // synchronize method
   const Address access_flags(rmethod, Method::access_flags_offset());
   const Address monitor_block_top(
@@ -721,8 +721,7 @@
 
     // generate a vanilla interpreter entry as the slow path
     __ bind(slow_path);
-    (void) generate_normal_entry(false);
-
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
     return entry;
   }
 #endif // INCLUDE_ALL_GCS
@@ -779,12 +778,10 @@
 
     // generate a vanilla native entry as the slow path
     __ bind(slow_path);
-
-    (void) generate_native_entry(false);
-
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
     return entry;
   }
-  return generate_native_entry(false);
+  return NULL;
 }
 
 /**
@@ -841,12 +838,10 @@
 
     // generate a vanilla native entry as the slow path
     __ bind(slow_path);
-
-    (void) generate_native_entry(false);
-
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
     return entry;
   }
-  return generate_native_entry(false);
+  return NULL;
 }
 
 void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -178,9 +178,8 @@
     warning("UseCRC32 specified, but not supported on this CPU");
   }
 
-  if (UseAdler32Intrinsics) {
-    warning("Adler32Intrinsics not available on this CPU.");
-    FLAG_SET_DEFAULT(UseAdler32Intrinsics, false);
+  if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
+    FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
   }
 
   if (auxv & HWCAP_AES) {
--- a/src/cpu/ppc/vm/c2_globals_ppc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/c2_globals_ppc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -60,6 +60,7 @@
 define_pd_global(bool, OptoPeephole,                 false);
 define_pd_global(bool, UseCISCSpill,                 false);
 define_pd_global(bool, OptoBundling,                 false);
+define_pd_global(bool, OptoRegScheduling,            false);
 // GL:
 // Detected a problem with unscaled compressed oops and
 // narrow_oop_use_complex_address() == false.
--- a/src/cpu/ppc/vm/compiledIC_ppc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/compiledIC_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -94,10 +94,12 @@
 
 const int IC_pos_in_java_to_interp_stub = 8;
 #define __ _masm.
-address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark/* = NULL*/) {
 #ifdef COMPILER2
-  // Get the mark within main instrs section which is set to the address of the call.
-  address call_addr = cbuf.insts_mark();
+  if (mark == NULL) {
+    // Get the mark within main instrs section which is set to the address of the call.
+    mark = cbuf.insts_mark();
+  }
 
   // Note that the code buffer's insts_mark is always relative to insts.
   // That's why we must use the macroassembler to generate a stub.
@@ -117,7 +119,7 @@
   // Create a static stub relocation which relates this stub
   // with the call instruction at insts_call_instruction_offset in the
   // instructions code-section.
-  __ relocate(static_stub_Relocation::spec(call_addr));
+  __ relocate(static_stub_Relocation::spec(mark));
   const int stub_start_offset = __ offset();
 
   // Now, create the stub's code:
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -46,7 +46,7 @@
   MacroAssembler::null_check_throw(a, offset, temp_reg, exception_entry);
 }
 
-void InterpreterMacroAssembler::branch_to_entry(address entry, Register Rscratch) {
+void InterpreterMacroAssembler::jump_to_entry(address entry, Register Rscratch) {
   assert(entry, "Entry must have been generated by now");
   if (is_within_range_of_b(entry, pc())) {
     b(entry);
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -39,7 +39,7 @@
 
   void null_check_throw(Register a, int offset, Register temp_reg);
 
-  void branch_to_entry(address entry, Register Rscratch);
+  void jump_to_entry(address entry, Register Rscratch);
 
   // Handy address generation macros.
 #define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread
--- a/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -31,12 +31,12 @@
  private:
 
   address generate_abstract_entry(void);
-  address generate_jump_to_normal_entry(void);
-  address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); }
-  address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
+  address generate_accessor_entry(void) { return NULL; }
+  address generate_empty_entry(void) { return NULL; }
   address generate_Reference_get_entry(void);
 
   address generate_CRC32_update_entry();
   address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
 
 #endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
--- a/src/cpu/ppc/vm/interpreter_ppc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/interpreter_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -427,18 +427,6 @@
   return entry;
 }
 
-// Call an accessor method (assuming it is resolved, otherwise drop into
-// vanilla (slow path) entry.
-address InterpreterGenerator::generate_jump_to_normal_entry(void) {
-  address entry = __ pc();
-  address normal_entry = Interpreter::entry_for_kind(Interpreter::zerolocals);
-  assert(normal_entry != NULL, "should already be generated.");
-  __ branch_to_entry(normal_entry, R11_scratch1);
-  __ flush();
-
-  return entry;
-}
-
 // Abstract method entry.
 //
 address InterpreterGenerator::generate_abstract_entry(void) {
@@ -529,13 +517,13 @@
   //   regular method entry code to generate the NPE.
   //
 
-  address entry = __ pc();
+  if (UseG1GC) {
+    address entry = __ pc();
 
-  const int referent_offset = java_lang_ref_Reference::referent_offset;
-  guarantee(referent_offset > 0, "referent offset not initialized");
+    const int referent_offset = java_lang_ref_Reference::referent_offset;
+    guarantee(referent_offset > 0, "referent offset not initialized");
 
-  if (UseG1GC) {
-     Label slow_path;
+    Label slow_path;
 
     // Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH);
 
@@ -577,13 +565,11 @@
 
     // Generate regular method entry.
     __ bind(slow_path);
-    __ branch_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1);
-    __ flush();
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals), R11_scratch1);
+    return entry;
+  }
 
-    return entry;
-  } else {
-    return generate_jump_to_normal_entry();
-  }
+  return NULL;
 }
 
 void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_ppc.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+  Unimplemented();
+  return 0;
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+  Unimplemented();
+}
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+  Unimplemented();
+}
+
+// convert JVMCI register indices (as used in oop maps) to HotSpot registers
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+  return NULL;
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+  return false;
+}
--- a/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -594,13 +594,6 @@
            "can't identify emitted call");
   } else {
     // variant 1:
-#if defined(ABI_ELFv2)
-    nop();
-    calculate_address_from_global_toc(R12, dest, true, true, false);
-    mtctr(R12);
-    nop();
-    nop();
-#else
     mr(R0, R11);  // spill R11 -> R0.
 
     // Load the destination address into CTR,
@@ -610,7 +603,6 @@
     mtctr(R11);
     mr(R11, R0);  // spill R11 <- R0.
     nop();
-#endif
 
     // do the call/jump
     if (link) {
@@ -4292,7 +4284,7 @@
 
 static void stop_on_request(int tp, const char* msg) {
   tty->print("PPC assembly code requires stop: (%s) %s\n", stop_types[tp%/*stop_end*/4], msg);
-  guarantee(false, err_msg("PPC assembly code requires stop: %s", msg));
+  guarantee(false, "PPC assembly code requires stop: %s", msg);
 }
 
 // Call a C-function that prints output.
--- a/src/cpu/ppc/vm/methodHandles_ppc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/methodHandles_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -60,7 +60,7 @@
 
 #ifdef ASSERT
 static int check_nonzero(const char* xname, int x) {
-  assert(x != 0, err_msg("%s should be nonzero", xname));
+  assert(x != 0, "%s should be nonzero", xname);
   return x;
 }
 #define NONZERO(x) check_nonzero(#x, x)
@@ -434,7 +434,7 @@
     }
 
     default:
-      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
       break;
     }
 
--- a/src/cpu/ppc/vm/nativeInst_ppc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/nativeInst_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -149,7 +149,7 @@
   if (!NativeCall::is_call_at(addr)) {
     tty->print_cr("not a NativeCall at " PTR_FORMAT, p2i(addr));
     // TODO: PPC port: Disassembler::decode(addr - 20, addr + 20, tty);
-    fatal(err_msg("not a NativeCall at " PTR_FORMAT, p2i(addr)));
+    fatal("not a NativeCall at " PTR_FORMAT, p2i(addr));
   }
 }
 #endif // ASSERT
@@ -162,7 +162,7 @@
   if (!NativeFarCall::is_far_call_at(addr)) {
     tty->print_cr("not a NativeFarCall at " PTR_FORMAT, p2i(addr));
     // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
-    fatal(err_msg("not a NativeFarCall at " PTR_FORMAT, p2i(addr)));
+    fatal("not a NativeFarCall at " PTR_FORMAT, p2i(addr));
   }
 }
 #endif // ASSERT
@@ -308,7 +308,7 @@
         ! MacroAssembler::is_bl(*((int*) addr))) {
       tty->print_cr("not a NativeMovConstReg at " PTR_FORMAT, p2i(addr));
       // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
-      fatal(err_msg("not a NativeMovConstReg at " PTR_FORMAT, p2i(addr)));
+      fatal("not a NativeMovConstReg at " PTR_FORMAT, p2i(addr));
     }
   }
 }
@@ -346,7 +346,7 @@
   if (!NativeJump::is_jump_at(addr)) {
     tty->print_cr("not a NativeJump at " PTR_FORMAT, p2i(addr));
     // TODO: PPC port: Disassembler::decode(addr, 20, 20, tty);
-    fatal(err_msg("not a NativeJump at " PTR_FORMAT, p2i(addr)));
+    fatal("not a NativeJump at " PTR_FORMAT, p2i(addr));
   }
 }
 #endif // ASSERT
--- a/src/cpu/ppc/vm/ppc.ad	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/ppc.ad	Mon Oct 19 12:30:17 2015 -0700
@@ -2064,6 +2064,10 @@
   return true;  // Per default match rules are supported.
 }
 
+const int Matcher::float_pressure(int default_pressure_threshold) {
+  return default_pressure_threshold;
+}
+
 int Matcher::regnum_to_fpu_offset(int regnum) {
   // No user for this method?
   Unimplemented();
--- a/src/cpu/ppc/vm/relocInfo_ppc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/relocInfo_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -125,8 +125,5 @@
 void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
 }
 
-void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
-}
-
 void metadata_Relocation::pd_fix_value(address x) {
 }
--- a/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -475,9 +475,8 @@
 
 // Is vector's size (in bytes) bigger than a size saved by default?
 bool SharedRuntime::is_wide_vector(int size) {
-  ResourceMark rm;
   // Note, MaxVectorSize == 8 on PPC64.
-  assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size));
+  assert(size <= 8, "%d bytes vectors are not supported", size);
   return size > 8;
 }
 #ifdef COMPILER2
@@ -957,11 +956,11 @@
   return c2i_entrypoint;
 }
 
-static void gen_i2c_adapter(MacroAssembler *masm,
-                            int total_args_passed,
-                            int comp_args_on_stack,
-                            const BasicType *sig_bt,
-                            const VMRegPair *regs) {
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+                                    int total_args_passed,
+                                    int comp_args_on_stack,
+                                    const BasicType *sig_bt,
+                                    const VMRegPair *regs) {
 
   // Load method's entry-point from method.
   __ ld(R12_scratch2, in_bytes(Method::from_compiled_offset()), R19_method);
@@ -1631,7 +1630,7 @@
   } else if (iid == vmIntrinsics::_invokeBasic) {
     has_receiver = true;
   } else {
-    fatal(err_msg_res("unexpected intrinsic id %d", iid));
+    fatal("unexpected intrinsic id %d", iid);
   }
 
   if (member_reg != noreg) {
--- a/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/stubGenerator_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -841,7 +841,7 @@
   // Only called by MacroAssembler::verify_oop
   static void verify_oop_helper(const char* message, oop o) {
     if (!o->is_oop_or_null()) {
-      fatal(message);
+      fatal("%s", message);
     }
     ++ StubRoutines::_verify_oop_count;
   }
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -620,7 +620,7 @@
 address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
   if (!math_entry_available(kind)) {
     NOT_PRODUCT(__ should_not_reach_here();)
-    return Interpreter::entry_for_kind(Interpreter::zerolocals);
+    return NULL;
   }
 
   address entry = __ pc();
@@ -1126,14 +1126,6 @@
 
   generate_fixed_frame(false, Rsize_of_parameters, Rsize_of_locals);
 
-#ifdef FAST_DISPATCH
-  __ unimplemented("Fast dispatch in generate_normal_entry");
-#if 0
-  __ set((intptr_t)Interpreter::dispatch_table(), IdispatchTables);
-  // Set bytecode dispatch table base.
-#endif
-#endif
-
   // --------------------------------------------------------------------------
   // Zero out non-parameter locals.
   // Note: *Always* zero out non-parameter locals as Sparc does. It's not
@@ -1266,9 +1258,8 @@
  *   int java.util.zip.CRC32.update(int crc, int b)
  */
 address InterpreterGenerator::generate_CRC32_update_entry() {
-  address start = __ pc();  // Remember stub start address (is rtn value).
-
   if (UseCRC32Intrinsics) {
+    address start = __ pc();  // Remember stub start address (is rtn value).
     Label slow_path;
 
     // Safepoint check
@@ -1313,11 +1304,11 @@
     // Generate a vanilla native entry as the slow path.
     BLOCK_COMMENT("} CRC32_update");
     BIND(slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1);
+    return start;
   }
 
-  (void) generate_native_entry(false);
-
-  return start;
+  return NULL;
 }
 
 // CRC32 Intrinsics.
@@ -1327,9 +1318,8 @@
  *   int java.util.zip.CRC32.updateByteBuffer(int crc, long* buf, int off, int len)
  */
 address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
-  address start = __ pc();  // Remember stub start address (is rtn value).
-
   if (UseCRC32Intrinsics) {
+    address start = __ pc();  // Remember stub start address (is rtn value).
     Label slow_path;
 
     // Safepoint check
@@ -1406,11 +1396,11 @@
     // Generate a vanilla native entry as the slow path.
     BLOCK_COMMENT("} CRC32_updateBytes(Buffer)");
     BIND(slow_path);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native), R11_scratch1);
+    return start;
   }
 
-  (void) generate_native_entry(false);
-
-  return start;
+  return NULL;
 }
 
 // These should never be compiled since the interpreter will prefer
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -389,7 +389,7 @@
 
   static void assert_signed_range(intptr_t x, int nbits) {
     assert(nbits == 32 || (-(1 << nbits-1) <= x  &&  x < ( 1 << nbits-1)),
-           err_msg("value out of range: x=" INTPTR_FORMAT ", nbits=%d", x, nbits));
+           "value out of range: x=" INTPTR_FORMAT ", nbits=%d", x, nbits);
   }
 
   static void assert_signed_word_disp_range(intptr_t x, int nbits) {
--- a/src/cpu/sparc/vm/c2_globals_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/c2_globals_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -64,6 +64,7 @@
 define_pd_global(bool, UseCISCSpill,                 false);
 define_pd_global(bool, OptoBundling,                 false);
 define_pd_global(bool, OptoScheduling,               true);
+define_pd_global(bool, OptoRegScheduling,            false);
 
 #ifdef _LP64
 // We need to make sure that all generated code is within
--- a/src/cpu/sparc/vm/compiledIC_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/compiledIC_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -53,14 +53,15 @@
 // ----------------------------------------------------------------------------
 
 #define __ _masm.
-address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
-#ifdef COMPILER2
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
   // Stub is fixed up when the corresponding call is converted from calling
   // compiled code to calling interpreted code.
   // set (empty), G5
   // jmp -1
 
-  address mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  if (mark == NULL) {
+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  }
 
   MacroAssembler _masm(&cbuf);
 
@@ -80,12 +81,11 @@
 
   __ delayed()->nop();
 
+  assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size");
+
   // Update current stubs pointer and restore code_end.
   __ end_a_stub();
   return base;
-#else
-  ShouldNotReachHere();
-#endif
 }
 #undef __
 
--- a/src/cpu/sparc/vm/cppInterpreterGenerator_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/cppInterpreterGenerator_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -31,6 +31,7 @@
 
   void generate_more_monitors();
   void generate_deopt_handling();
+  void lock_method(void);
   void adjust_callers_stack(Register args);
   void generate_compute_interpreter_state(const Register state,
                                           const Register prev_state,
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -468,7 +468,7 @@
 
   // If G1 is not enabled then attempt to go through the accessor entry point
   // Reference.get is an accessor
-  return generate_jump_to_normal_entry();
+  return NULL;
 }
 
 //
@@ -1164,7 +1164,7 @@
 }
 // Find preallocated  monitor and lock method (C++ interpreter)
 //
-void InterpreterGenerator::lock_method(void) {
+void CppInterpreterGenerator::lock_method() {
 // Lock the current method.
 // Destroys registers L2_scratch, L3_scratch, O0
 //
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -82,6 +82,7 @@
                                                                             \
   product(intx, UseVIS, 99,                                                 \
           "Highest supported VIS instructions set on Sparc")                \
+          range(0, 99)                                                      \
                                                                             \
   product(bool, UseCBCond, false,                                           \
           "Use compare and branch instruction on SPARC")                    \
@@ -91,12 +92,14 @@
                                                                             \
   product(intx, BlockZeroingLowLimit, 2048,                                 \
           "Minimum size in bytes when block zeroing will be used")          \
+          range(1, max_jint)                                                \
                                                                             \
   product(bool, UseBlockCopy, false,                                        \
           "Use special cpu instructions for block copy")                    \
                                                                             \
   product(intx, BlockCopyLowLimit, 2048,                                    \
           "Minimum size in bytes when block copy will be used")             \
+          range(1, max_jint)                                                \
                                                                             \
   develop(bool, UseV8InstrsOnly, false,                                     \
           "Use SPARC-V8 Compliant instruction subset")                      \
@@ -108,9 +111,11 @@
           "Do not use swap instructions, but only CAS (in a loop) on SPARC")\
                                                                             \
   product(uintx,  ArraycopySrcPrefetchDistance, 0,                          \
-          "Distance to prefetch source array in arracopy")                  \
+          "Distance to prefetch source array in arraycopy")                 \
+          constraint(ArraycopySrcPrefetchDistanceConstraintFunc, AfterErgo) \
                                                                             \
   product(uintx,  ArraycopyDstPrefetchDistance, 0,                          \
-          "Distance to prefetch destination array in arracopy")             \
+          "Distance to prefetch destination array in arraycopy")            \
+          constraint(ArraycopyDstPrefetchDistanceConstraintFunc, AfterErgo) \
 
 #endif // CPU_SPARC_VM_GLOBALS_SPARC_HPP
--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -59,6 +59,13 @@
 
 #endif // CC_INTERP
 
+void InterpreterMacroAssembler::jump_to_entry(address entry) {
+  assert(entry, "Entry must have been generated by now");
+  AddressLiteral al(entry);
+  jump_to(al, G3_scratch);
+  delayed()->nop();
+}
+
 void InterpreterMacroAssembler::compute_extra_locals_size_in_bytes(Register args_size, Register locals_size, Register delta) {
   // Note: this algorithm is also used by C1's OSR entry sequence.
   // Any changes should also be applied to CodeEmitter::emit_osr_entry().
@@ -1643,26 +1650,73 @@
     bind(skip_receiver_profile);
 
     // The method data pointer needs to be updated to reflect the new target.
+#if INCLUDE_JVMCI
+    if (MethodProfileWidth == 0) {
+      update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
+    }
+#else
     update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
-    bind (profile_continue);
+#endif
+    bind(profile_continue);
   }
 }
 
-void InterpreterMacroAssembler::record_klass_in_profile_helper(
-                                        Register receiver, Register scratch,
-                                        int start_row, Label& done, bool is_virtual_call) {
+#if INCLUDE_JVMCI
+void InterpreterMacroAssembler::profile_called_method(Register method, Register scratch) {
+  assert_different_registers(method, scratch);
+  if (ProfileInterpreter && MethodProfileWidth > 0) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(profile_continue);
+
+    Label done;
+    record_item_in_profile_helper(method, scratch, 0, done, MethodProfileWidth,
+      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
+    bind(done);
+
+    update_mdp_by_constant(in_bytes(VirtualCallData::virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+#endif // INCLUDE_JVMCI
+
+void InterpreterMacroAssembler::record_klass_in_profile_helper(Register receiver, Register scratch,
+                                                               Label& done, bool is_virtual_call) {
   if (TypeProfileWidth == 0) {
     if (is_virtual_call) {
       increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch);
     }
-    return;
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      increment_mdp_data_at(in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()), scratch);
+    }
+#endif
+  } else {
+    int non_profiled_offset = -1;
+    if (is_virtual_call) {
+      non_profiled_offset = in_bytes(CounterData::count_offset());
+    }
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
+    }
+#endif
+
+    record_item_in_profile_helper(receiver, scratch, 0, done, TypeProfileWidth,
+      &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
   }
-
-  int last_row = VirtualCallData::row_limit() - 1;
+}
+
+void InterpreterMacroAssembler::record_item_in_profile_helper(Register item,
+                                          Register scratch, int start_row, Label& done, int total_rows,
+                                          OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                          int non_profiled_offset) {
+  int last_row = total_rows - 1;
   assert(start_row <= last_row, "must be work left to do");
-  // Test this row for both the receiver and for null.
+  // Test this row for both the item and for null.
   // Take any of three different outcomes:
-  //   1. found receiver => increment count and goto done
+  //   1. found item => increment count and goto done
   //   2. found null => keep looking for case 1, maybe allocate this cell
   //   3. found something else => keep looking for cases 1 and 2
   // Case 3 is handled by a recursive call.
@@ -1670,28 +1724,28 @@
     Label next_test;
     bool test_for_null_also = (row == start_row);
 
-    // See if the receiver is receiver[n].
-    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
-    test_mdp_data_at(recvr_offset, receiver, next_test, scratch);
+    // See if the item is item[n].
+    int item_offset = in_bytes(item_offset_fn(row));
+    test_mdp_data_at(item_offset, item, next_test, scratch);
     // delayed()->tst(scratch);
 
-    // The receiver is receiver[n].  Increment count[n].
-    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    // The receiver is item[n].  Increment count[n].
+    int count_offset = in_bytes(item_count_offset_fn(row));
     increment_mdp_data_at(count_offset, scratch);
     ba_short(done);
     bind(next_test);
 
     if (test_for_null_also) {
       Label found_null;
-      // Failed the equality check on receiver[n]...  Test for null.
+      // Failed the equality check on item[n]...  Test for null.
       if (start_row == last_row) {
         // The only thing left to do is handle the null case.
-        if (is_virtual_call) {
+        if (non_profiled_offset >= 0) {
           brx(Assembler::zero, false, Assembler::pn, found_null);
           delayed()->nop();
-          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Item did not match any saved item and there is no empty row for it.
           // Increment total counter to indicate polymorphic case.
-          increment_mdp_data_at(in_bytes(CounterData::count_offset()), scratch);
+          increment_mdp_data_at(non_profiled_offset, scratch);
           ba_short(done);
           bind(found_null);
         } else {
@@ -1705,21 +1759,22 @@
       delayed()->nop();
 
       // Put all the "Case 3" tests here.
-      record_klass_in_profile_helper(receiver, scratch, start_row + 1, done, is_virtual_call);
-
-      // Found a null.  Keep searching for a matching receiver,
+      record_item_in_profile_helper(item, scratch, start_row + 1, done, total_rows,
+        item_offset_fn, item_count_offset_fn, non_profiled_offset);
+
+      // Found a null.  Keep searching for a matching item,
       // but remember that this is an empty (unused) slot.
       bind(found_null);
     }
   }
 
-  // In the fall-through case, we found no matching receiver, but we
-  // observed the receiver[start_row] is NULL.
-
-  // Fill in the receiver field and increment the count.
-  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
-  set_mdp_data_at(recvr_offset, receiver);
-  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  // In the fall-through case, we found no matching item, but we
+  // observed the item[start_row] is NULL.
+
+  // Fill in the item field and increment the count.
+  int item_offset = in_bytes(item_offset_fn(start_row));
+  set_mdp_data_at(item_offset, item);
+  int count_offset = in_bytes(item_count_offset_fn(start_row));
   mov(DataLayout::counter_increment, scratch);
   set_mdp_data_at(count_offset, scratch);
   if (start_row > 0) {
@@ -1732,7 +1787,7 @@
   assert(ProfileInterpreter, "must be profiling");
   Label done;
 
-  record_klass_in_profile_helper(receiver, scratch, 0, done, is_virtual_call);
+  record_klass_in_profile_helper(receiver, scratch, done, is_virtual_call);
 
   bind (done);
 }
@@ -1788,7 +1843,7 @@
     // The method data pointer needs to be updated.
     int mdp_delta = in_bytes(BitData::bit_data_size());
     if (TypeProfileCasts) {
-      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+      mdp_delta = in_bytes(ReceiverTypeData::receiver_type_data_size());
     }
     update_mdp_by_constant(mdp_delta);
 
@@ -1806,7 +1861,7 @@
 
     int mdp_delta = in_bytes(BitData::bit_data_size());
     if (TypeProfileCasts) {
-      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+      mdp_delta = in_bytes(ReceiverTypeData::receiver_type_data_size());
 
       // Record the object type.
       record_klass_in_profile(klass, scratch, false);
@@ -1828,7 +1883,7 @@
 
     int count_offset = in_bytes(CounterData::count_offset());
     // Back up the address, since we have already bumped the mdp.
-    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+    count_offset -= in_bytes(ReceiverTypeData::receiver_type_data_size());
 
     // *Decrement* the counter.  We expect to see zero or small negatives.
     increment_mdp_data_at(count_offset, scratch, true);
--- a/src/cpu/sparc/vm/interp_masm_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/interp_masm_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -30,6 +30,8 @@
 
 // This file specializes the assember with interpreter-specific macros
 
+typedef ByteSize (*OffsetFunction)(uint);
+
 REGISTER_DECLARATION(     Register, Otos_i , O0); // tos for ints, etc
 REGISTER_DECLARATION(     Register, Otos_l , O0); // for longs
 REGISTER_DECLARATION(     Register, Otos_l1, O0); // for 1st part of longs
@@ -80,6 +82,8 @@
   InterpreterMacroAssembler(CodeBuffer* c)
     : MacroAssembler(c) {}
 
+  void jump_to_entry(address entry);
+
 #ifndef CC_INTERP
   virtual void load_earlyret_value(TosState state);
 
@@ -299,7 +303,11 @@
 
   void record_klass_in_profile(Register receiver, Register scratch, bool is_virtual_call);
   void record_klass_in_profile_helper(Register receiver, Register scratch,
-                                      int start_row, Label& done, bool is_virtual_call);
+                                      Label& done, bool is_virtual_call);
+  void record_item_in_profile_helper(Register item,
+                                     Register scratch, int start_row, Label& done, int total_rows,
+                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                     int non_profiled_offset);
 
   void update_mdp_by_offset(int offset_of_disp, Register scratch);
   void update_mdp_by_offset(Register reg, int offset_of_disp,
@@ -312,6 +320,7 @@
   void profile_call(Register scratch);
   void profile_final_call(Register scratch);
   void profile_virtual_call(Register receiver, Register scratch, bool receiver_can_be_null = false);
+  void profile_called_method(Register method, Register scratch) NOT_JVMCI_RETURN;
   void profile_ret(TosState state, Register return_bci, Register scratch);
   void profile_null_seen(Register scratch);
   void profile_typecheck(Register klass, Register scratch);
--- a/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -34,11 +34,9 @@
   address generate_abstract_entry(void);
   // there are no math intrinsics on sparc
   address generate_math_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
-  address generate_jump_to_normal_entry(void);
-  address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); }
-  address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
+  address generate_accessor_entry(void) { return NULL; }
+  address generate_empty_entry(void) { return NULL; }
   address generate_Reference_get_entry(void);
-  void lock_method(void);
   void save_native_result(void);
   void restore_native_result(void);
 
@@ -48,4 +46,5 @@
   // Not supported
   address generate_CRC32_update_entry() { return NULL; }
   address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { return NULL; }
 #endif // CPU_SPARC_VM_INTERPRETERGENERATOR_SPARC_HPP
--- a/src/cpu/sparc/vm/interpreter_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/interpreter_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -241,15 +241,6 @@
 
 // Various method entries
 
-address InterpreterGenerator::generate_jump_to_normal_entry(void) {
-  address entry = __ pc();
-  assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
-  AddressLiteral al(Interpreter::entry_for_kind(Interpreter::zerolocals));
-  __ jump_to(al, G3_scratch);
-  __ delayed()->nop();
-  return entry;
-}
-
 // Abstract method entry
 // Attempt to execute abstract method. Throw exception
 //
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_sparc.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+  if (inst->is_call() || inst->is_jump()) {
+    return pc_offset + NativeCall::instruction_size;
+  } else if (inst->is_call_reg()) {
+    return pc_offset + NativeCallReg::instruction_size;
+  } else if (inst->is_sethi()) {
+    return pc_offset + NativeFarCall::instruction_size;
+  } else {
+    fatal("unsupported type of instruction for call site");
+    return 0;
+  }
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+  address pc = _instructions->start() + pc_offset;
+  Handle obj = HotSpotObjectConstantImpl::object(constant);
+  jobject value = JNIHandles::make_local(obj());
+  if (HotSpotObjectConstantImpl::compressed(constant)) {
+#ifdef _LP64
+    int oop_index = _oop_recorder->find_index(value);
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    _instructions->relocate(pc, rspec, 1);
+#else
+    fatal("compressed oop on 32bit");
+#endif
+  } else {
+    NativeMovConstReg* move = nativeMovConstReg_at(pc);
+    move->set_data((intptr_t) value);
+
+    // We need two relocations:  one on the sethi and one on the add.
+    int oop_index = _oop_recorder->find_index(value);
+    RelocationHolder rspec = oop_Relocation::spec(oop_index);
+    _instructions->relocate(pc + NativeMovConstReg::sethi_offset, rspec);
+    _instructions->relocate(pc + NativeMovConstReg::add_offset, rspec);
+  }
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  address pc = _instructions->start() + pc_offset;
+  NativeInstruction* inst = nativeInstruction_at(pc);
+  NativeInstruction* inst1 = nativeInstruction_at(pc + 4);
+  if(inst->is_sethi() && inst1->is_nop()) {
+      address const_start = _constants->start();
+      address dest = _constants->start() + data_offset;
+      if(_constants_size > 0) {
+        _instructions->relocate(pc + NativeMovConstReg::sethi_offset, internal_word_Relocation::spec((address) dest));
+        _instructions->relocate(pc + NativeMovConstReg::add_offset, internal_word_Relocation::spec((address) dest));
+      }
+      TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
+  }else {
+    int const_size = align_size_up(_constants->end()-_constants->start(), CodeEntryAlignment);
+    NativeMovRegMem* load = nativeMovRegMem_at(pc);
+    // This offset must match with SPARCLoadConstantTableBaseOp.emitCode
+    load->set_offset(- (const_size - data_offset + Assembler::min_simm13()));
+    TRACE_jvmci_3("relocating ld at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
+  }
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+  fatal("CodeInstaller::pd_relocate_CodeBlob - sparc unimp");
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+  address pc = (address) inst;
+  if (inst->is_call()) {
+    NativeCall* call = nativeCall_at(pc);
+    call->set_destination((address) foreign_call_destination);
+    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
+  } else if (inst->is_sethi()) {
+    NativeJump* jump = nativeJump_at(pc);
+    jump->set_jump_destination((address) foreign_call_destination);
+    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
+  } else {
+    fatal(err_msg("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)));
+  }
+  TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+#ifdef ASSERT
+  Method* method = NULL;
+  // we need to check, this might also be an unresolved method
+  if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
+    method = getMethodFromHotSpotMethod(hotspot_method);
+  }
+#endif
+  switch (_next_call_type) {
+    case INLINE_INVOKE:
+      break;
+    case INVOKEVIRTUAL:
+    case INVOKEINTERFACE: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokeinterface");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc));
+      break;
+    }
+    case INVOKESTATIC: {
+      assert(method == NULL || method->is_static(), "cannot call non-static method with invokestatic");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_static_call_stub());
+      _instructions->relocate(call->instruction_address(), relocInfo::static_call_type);
+      break;
+    }
+    case INVOKESPECIAL: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokespecial");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_opt_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type);
+      break;
+    }
+    default:
+      fatal("invalid _next_call_type value");
+      break;
+  }
+}
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+  switch (mark) {
+    case POLL_NEAR:
+      fatal("unimplemented");
+      break;
+    case POLL_FAR:
+      _instructions->relocate(pc, relocInfo::poll_type);
+      break;
+    case POLL_RETURN_NEAR:
+      fatal("unimplemented");
+      break;
+    case POLL_RETURN_FAR:
+      _instructions->relocate(pc, relocInfo::poll_return_type);
+      break;
+    default:
+      fatal("invalid mark value");
+      break;
+  }
+}
+
+// convert JVMCI register indices (as used in oop maps) to HotSpot registers
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+  if (jvmci_reg < RegisterImpl::number_of_registers) {
+    return as_Register(jvmci_reg)->as_VMReg();
+  } else {
+    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
+    floatRegisterNumber += MAX2(0, floatRegisterNumber-32); // Beginning with f32, only every second register is going to be addressed
+    if (floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
+      return as_FloatRegister(floatRegisterNumber)->as_VMReg();
+    }
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+  return !hotspotRegister->is_FloatRegister();
+}
--- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -1596,7 +1596,7 @@
   else {
      ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   }
-  assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+  assert(false, "DEBUG MESSAGE: %s", msg);
 }
 
 
--- a/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/memset_with_concurrent_readers_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -61,8 +61,8 @@
     " sub %[offset], %[end], %[offset]\n\t" // offset := start - end
     " sllx %[offset], 2, %[offset]\n\t" // scale offset for instruction size of 4
     " add %[offset], 40, %[offset]\n\t" // offset += 10 * instruction size
-    " rd %pc, %[pc]\n\t"                // dispatch on scaled offset
-    " jmpl %[pc]+%[offset], %g0\n\t"
+    " rd %%pc, %[pc]\n\t"               // dispatch on scaled offset
+    " jmpl %[pc]+%[offset], %%g0\n\t"
     "  nop\n\t"
     // DISPATCH: no direct reference, but without it the store block may be elided.
     "1:\n\t"
@@ -108,7 +108,7 @@
       // Unroll loop x8.
       " sub %[aend], %[ato], %[temp]\n\t"
       " cmp %[temp], 56\n\t"           // cc := (aligned_end - aligned_to) > 7 words
-      " ba %xcc, 2f\n\t"               // goto TEST always
+      " ba %%xcc, 2f\n\t"              // goto TEST always
       "  sub %[aend], 56, %[temp]\n\t" // limit := aligned_end - 7 words
       // LOOP:
       "1:\n\t"                         // unrolled x8 store loop top
@@ -123,7 +123,7 @@
       " stx %[xvalue], [%[ato]-8]\n\t"
       // TEST:
       "2:\n\t"
-      " bgu,a %xcc, 1b\n\t"            // goto LOOP if more than 7 words remaining
+      " bgu,a %%xcc, 1b\n\t"           // goto LOOP if more than 7 words remaining
       "  add %[ato], 64, %[ato]\n\t"   // aligned_to += 8, for next iteration
       // Fill remaining < 8 full words.
       // Dispatch on (aligned_end - aligned_to).
@@ -132,8 +132,8 @@
       " sub %[ato], %[aend], %[ato]\n\t" // offset := aligned_to - aligned_end
       " srax %[ato], 1, %[ato]\n\t"      // scale offset for instruction size of 4
       " add %[ato], 40, %[ato]\n\t"      // offset += 10 * instruction size
-      " rd %pc, %[temp]\n\t"             // dispatch on scaled offset
-      " jmpl %[temp]+%[ato], %g0\n\t"
+      " rd %%pc, %[temp]\n\t"            // dispatch on scaled offset
+      " jmpl %[temp]+%[ato], %%g0\n\t"
       "  nop\n\t"
       // DISPATCH: no direct reference, but without it the store block may be elided.
       "3:\n\t"
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -56,7 +56,7 @@
 
 #ifdef ASSERT
 static int check_nonzero(const char* xname, int x) {
-  assert(x != 0, err_msg("%s should be nonzero", xname));
+  assert(x != 0, "%s should be nonzero", xname);
   return x;
 }
 #define NONZERO(x) check_nonzero(#x, x)
@@ -453,7 +453,7 @@
     }
 
     default:
-      fatal(err_msg_res("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
       break;
     }
 
--- a/src/cpu/sparc/vm/nativeInst_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/nativeInst_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -53,6 +53,7 @@
 
   bool is_nop()                        { return long_at(0) == nop_instruction(); }
   bool is_call()                       { return is_op(long_at(0), Assembler::call_op); }
+  bool is_call_reg()                   { return is_op(long_at(0), Assembler::arith_op); }
   bool is_sethi()                      { return (is_op2(long_at(0), Assembler::sethi_op2)
                                           && inv_rd(long_at(0)) != G0); }
 
@@ -415,6 +416,19 @@
   return call;
 }
 
+class NativeCallReg: public NativeInstruction {
+ public:
+  enum Sparc_specific_constants {
+    instruction_size      = 8,
+    return_address_offset = 8,
+    instruction_offset    = 0
+  };
+
+  address next_instruction_address() const {
+    return addr_at(instruction_size);
+  }
+};
+
 // The NativeFarCall is an abstraction for accessing/manipulating native call-anywhere
 // instructions in the sparcv9 vm.  Used to call native methods which may be loaded
 // anywhere in the address space, possibly out of reach of a call instruction.
--- a/src/cpu/sparc/vm/relocInfo_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/relocInfo_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -197,8 +197,5 @@
 void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
 }
 
-void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
-}
-
 void metadata_Relocation::pd_fix_value(address x) {
 }
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -43,6 +43,9 @@
 #include "compiler/compileBroker.hpp"
 #include "shark/sharkCompiler.hpp"
 #endif
+#if INCLUDE_JVMCI
+#include "jvmci/jvmciJavaClasses.hpp"
+#endif
 
 #define __ masm->
 
@@ -316,7 +319,7 @@
 // 8 bytes FP registers are saved by default on SPARC.
 bool SharedRuntime::is_wide_vector(int size) {
   // Note, MaxVectorSize == 8 on SPARC.
-  assert(size <= 8, err_msg_res("%d bytes vectors are not supported", size));
+  assert(size <= 8, "%d bytes vectors are not supported", size);
   return size > 8;
 }
 
@@ -464,7 +467,7 @@
       break;
 
     default:
-      fatal(err_msg_res("unknown basic type %d", sig_bt[i]));
+      fatal("unknown basic type %d", sig_bt[i]);
       break;
     }
   }
@@ -513,10 +516,10 @@
                               const VMRegPair *regs,
                               Label& skip_fixup);
   void gen_i2c_adapter(int total_args_passed,
-                              // VMReg max_arg,
-                              int comp_args_on_stack, // VMRegStackSlots
-                              const BasicType *sig_bt,
-                              const VMRegPair *regs);
+                       // VMReg max_arg,
+                       int comp_args_on_stack, // VMRegStackSlots
+                       const BasicType *sig_bt,
+                       const VMRegPair *regs);
 
   AdapterGenerator(MacroAssembler *_masm) : masm(_masm) {}
 };
@@ -760,13 +763,11 @@
   __ bind(L_fail);
 }
 
-void AdapterGenerator::gen_i2c_adapter(
-                            int total_args_passed,
-                            // VMReg max_arg,
-                            int comp_args_on_stack, // VMRegStackSlots
-                            const BasicType *sig_bt,
-                            const VMRegPair *regs) {
-
+void AdapterGenerator::gen_i2c_adapter(int total_args_passed,
+                                       // VMReg max_arg,
+                                       int comp_args_on_stack, // VMRegStackSlots
+                                       const BasicType *sig_bt,
+                                       const VMRegPair *regs) {
   // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
   // layout.  Lesp was saved by the calling I-frame and will be restored on
   // return.  Meanwhile, outgoing arg space is all owned by the callee
@@ -990,6 +991,21 @@
 
   // Jump to the compiled code just as if compiled code was doing it.
   __ ld_ptr(G5_method, in_bytes(Method::from_compiled_offset()), G3);
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    // check if this call should be routed towards a specific entry point
+    __ ld(Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), G1);
+    __ cmp(G0, G1);
+    Label no_alternative_target;
+    __ br(Assembler::equal, false, Assembler::pn, no_alternative_target);
+    __ delayed()->nop();
+
+    __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset()), G3);
+    __ st_ptr(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
+
+    __ bind(no_alternative_target);
+  }
+#endif // INCLUDE_JVMCI
 
   // 6243940 We might end up in handle_wrong_method if
   // the callee is deoptimized as we race thru here. If that
@@ -1006,6 +1022,15 @@
   __ delayed()->nop();
 }
 
+void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
+                                    int total_args_passed,
+                                    int comp_args_on_stack,
+                                    const BasicType *sig_bt,
+                                    const VMRegPair *regs) {
+  AdapterGenerator agen(masm);
+  agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
+}
+
 // ---------------------------------------------------------------
 AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
                                                             int total_args_passed,
@@ -1016,9 +1041,7 @@
                                                             AdapterFingerPrint* fingerprint) {
   address i2c_entry = __ pc();
 
-  AdapterGenerator agen(masm);
-
-  agen.gen_i2c_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs);
+  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
 
 
   // -------------------------------------------------------------------------
@@ -1063,7 +1086,7 @@
   }
 
   address c2i_entry = __ pc();
-
+  AdapterGenerator agen(masm);
   agen.gen_c2i_adapter(total_args_passed, comp_args_on_stack, sig_bt, regs, L_skip_fixup);
 
   __ flush();
@@ -1859,7 +1882,7 @@
   } else if (iid == vmIntrinsics::_invokeBasic) {
     has_receiver = true;
   } else {
-    fatal(err_msg_res("unexpected intrinsic id %d", iid));
+    fatal("unexpected intrinsic id %d", iid);
   }
 
   if (member_reg != noreg) {
@@ -2916,6 +2939,11 @@
     pad += StackShadowPages*16 + 32;
   }
 #endif
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    pad += 1000; // Increase the buffer size when compiling for JVMCI
+  }
+#endif
 #ifdef _LP64
   CodeBuffer buffer("deopt_blob", 2100+pad, 512);
 #else
@@ -2982,6 +3010,45 @@
   __ ba(cont);
   __ delayed()->mov(Deoptimization::Unpack_deopt, L0deopt_mode);
 
+
+#if INCLUDE_JVMCI
+  Label after_fetch_unroll_info_call;
+  int implicit_exception_uncommon_trap_offset = 0;
+  int uncommon_trap_offset = 0;
+
+  if (EnableJVMCI) {
+    masm->block_comment("BEGIN implicit_exception_uncommon_trap");
+    implicit_exception_uncommon_trap_offset = __ offset() - start;
+
+    __ ld_ptr(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()), O7);
+    __ st_ptr(G0, Address(G2_thread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
+    __ add(O7, -8, O7);
+
+    uncommon_trap_offset = __ offset() - start;
+
+    // Save everything in sight.
+    (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
+    __ set_last_Java_frame(SP, NULL);
+
+    __ ld(G2_thread, in_bytes(JavaThread::pending_deoptimization_offset()), O1);
+    __ sub(G0, 1, L1);
+    __ st(L1, G2_thread, in_bytes(JavaThread::pending_deoptimization_offset()));
+
+    __ mov((int32_t)Deoptimization::Unpack_reexecute, L0deopt_mode);
+    __ mov(G2_thread, O0);
+    __ call(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap));
+    __ delayed()->nop();
+    oop_maps->add_gc_map( __ offset()-start, map->deep_copy());
+    __ get_thread();
+    __ add(O7, 8, O7);
+    __ reset_last_Java_frame();
+
+    __ ba(after_fetch_unroll_info_call);
+    __ delayed()->nop(); // Delay slot
+    masm->block_comment("END implicit_exception_uncommon_trap");
+  } // EnableJVMCI
+#endif // INCLUDE_JVMCI
+
   int exception_offset = __ offset() - start;
 
   // restore G2, the trampoline destroyed it
@@ -3004,6 +3071,7 @@
   int exception_in_tls_offset = __ offset() - start;
 
   // No need to update oop_map  as each call to save_live_registers will produce identical oopmap
+  // Opens a new stack frame
   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
 
   // Restore G2_thread
@@ -3035,7 +3103,12 @@
   // Reexecute entry, similar to c2 uncommon trap
   //
   int reexecute_offset = __ offset() - start;
-
+#if INCLUDE_JVMCI && !defined(COMPILER1)
+  if (EnableJVMCI && UseJVMCICompiler) {
+    // JVMCI does not use this kind of deoptimization
+    __ should_not_reach_here();
+  }
+#endif
   // No need to update oop_map  as each call to save_live_registers will produce identical oopmap
   (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
 
@@ -3059,6 +3132,11 @@
 
   __ reset_last_Java_frame();
 
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    __ bind(after_fetch_unroll_info_call);
+  }
+#endif
   // NOTE: we know that only O0/O1 will be reloaded by restore_result_registers
   // so this move will survive
 
@@ -3124,6 +3202,12 @@
   masm->flush();
   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_words);
   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
+    _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
+  }
+#endif
 }
 
 #ifdef COMPILER2
--- a/src/cpu/sparc/vm/sparc.ad	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Mon Oct 19 12:30:17 2015 -0700
@@ -1098,7 +1098,7 @@
   Register r = as_Register(ra_->get_encode(this));
   CodeSection* consts_section = __ code()->consts();
   int consts_size = consts_section->align_at_start(consts_section->size());
-  assert(constant_table.size() == consts_size, err_msg("must be: %d == %d", constant_table.size(), consts_size));
+  assert(constant_table.size() == consts_size, "must be: %d == %d", constant_table.size(), consts_size);
 
   if (UseRDPCForConstantTableBase) {
     // For the following RDPC logic to work correctly the consts
@@ -1860,6 +1860,10 @@
   return true;  // Per default match rules are supported.
 }
 
+const int Matcher::float_pressure(int default_pressure_threshold) {
+  return default_pressure_threshold;
+}
+
 int Matcher::regnum_to_fpu_offset(int regnum) {
   return regnum - 32; // The FP registers are in the second chunk
 }
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -204,6 +204,20 @@
 address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step) {
   address entry = __ pc();
   __ get_constant_pool_cache(LcpoolCache); // load LcpoolCache
+#if INCLUDE_JVMCI
+  // Check if we need to take lock at entry of synchronized method.
+  if (UseJVMCICompiler) {
+    Label L;
+    Address pending_monitor_enter_addr(G2_thread, JavaThread::pending_monitorenter_offset());
+    __ ldbool(pending_monitor_enter_addr, Gtemp);  // Load if pending monitor enter
+    __ cmp_and_br_short(Gtemp, G0, Assembler::equal, Assembler::pn, L);
+    // Clear flag.
+    __ stbool(G0, pending_monitor_enter_addr);
+    // Take lock.
+    lock_method();
+    __ bind(L);
+  }
+#endif
   { Label L;
     Address exception_addr(G2_thread, Thread::pending_exception_offset());
     __ ld_ptr(exception_addr, Gtemp);  // Load pending exception.
@@ -349,7 +363,7 @@
 // Allocate monitor and lock method (asm interpreter)
 // ebx - Method*
 //
-void InterpreterGenerator::lock_method(void) {
+void TemplateInterpreterGenerator::lock_method() {
   __ ld(Lmethod, in_bytes(Method::access_flags_offset()), O0);  // Load access flags.
 
 #ifdef ASSERT
@@ -779,14 +793,14 @@
 
     // Generate regular method entry
     __ bind(slow_path);
-    (void) generate_normal_entry(false);
+    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
     return entry;
   }
 #endif // INCLUDE_ALL_GCS
 
   // If G1 is not enabled then attempt to go through the accessor entry point
   // Reference.get is an accessor
-  return generate_jump_to_normal_entry();
+  return NULL;
 }
 
 //
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -37,9 +37,9 @@
 #ifdef _LP64
   // The sethi() instruction generates lots more instructions when shell
   // stack limit is unlimited, so that's why this is much bigger.
-  const static int InterpreterCodeSize = 210 * K;
+  const static int InterpreterCodeSize = 260 * K;
 #else
-  const static int InterpreterCodeSize = 180 * K;
+  const static int InterpreterCodeSize = 230 * K;
 #endif
 
 #endif // CPU_SPARC_VM_TEMPLATEINTERPRETER_SPARC_HPP
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -2949,12 +2949,14 @@
 
 
 void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) {
+  Register Rtemp = G4_scratch;
   Register Rcall = Rindex;
   assert_different_registers(Rcall, G5_method, Gargs, Rret);
 
   // get target Method* & entry point
   __ lookup_virtual_method(Rrecv, Rindex, G5_method);
   __ profile_arguments_type(G5_method, Rcall, Gargs, true);
+  __ profile_called_method(G5_method, Rtemp);
   __ call_from_interpreter(Rcall, Gargs, Rret);
 }
 
@@ -3211,6 +3213,7 @@
   assert_different_registers(Rcall, G5_method, Gargs, Rret);
 
   __ profile_arguments_type(G5_method, Rcall, Gargs, true);
+  __ profile_called_method(G5_method, Rscratch);
   __ call_from_interpreter(Rcall, Gargs, Rret);
 }
 
@@ -3486,7 +3489,8 @@
   Register RspecifiedKlass = O4;
 
   // Check for casting a NULL
-  __ br_null_short(Otos_i, Assembler::pn, is_null);
+  __ br_null(Otos_i, false, Assembler::pn, is_null);
+  __ delayed()->nop();
 
   // Get value klass in RobjKlass
   __ load_klass(Otos_i, RobjKlass); // get value klass
@@ -3542,7 +3546,8 @@
   Register RspecifiedKlass = O4;
 
   // Check for casting a NULL
-  __ br_null_short(Otos_i, Assembler::pt, is_null);
+  __ br_null(Otos_i, false, Assembler::pt, is_null);
+  __ delayed()->nop();
 
   // Get value klass in RobjKlass
   __ load_klass(Otos_i, RobjKlass); // get value klass
--- a/src/cpu/sparc/vm/vmStructs_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/vmStructs_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -37,10 +37,11 @@
   /******************************/                                                                                                   \
   /* JavaFrameAnchor            */                                                                                                   \
   /******************************/                                                                                                   \
-  volatile_nonstatic_field(JavaFrameAnchor,     _flags,                                          int)
+  volatile_nonstatic_field(JavaFrameAnchor,     _flags,                                          int)                                \
+  static_field(VM_Version, _features, int)
 
-#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
-
+#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
+  declare_toplevel_type(VM_Version)
 
 #define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
   /******************************/                                        \
@@ -78,7 +79,11 @@
   declare_c2_constant(R_G4_num)                                           \
   declare_c2_constant(R_G5_num)                                           \
   declare_c2_constant(R_G6_num)                                           \
-  declare_c2_constant(R_G7_num)
+  declare_c2_constant(R_G7_num)                                           \
+  declare_constant(VM_Version::vis1_instructions_m)                       \
+  declare_constant(VM_Version::vis2_instructions_m)                       \
+  declare_constant(VM_Version::vis3_instructions_m)                       \
+  declare_constant(VM_Version::cbcond_instructions_m)
 
 #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -40,10 +40,6 @@
   PrefetchScanIntervalInBytes = prefetch_scan_interval_in_bytes();
   PrefetchFieldsAhead         = prefetch_fields_ahead();
 
-  assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 1, "invalid value");
-  if( AllocatePrefetchInstr < 0 ) AllocatePrefetchInstr = 0;
-  if( AllocatePrefetchInstr > 1 ) AllocatePrefetchInstr = 0;
-
   // Allocation prefetch settings
   intx cache_line_size = prefetch_data_size();
   if( cache_line_size > AllocatePrefetchStepSize )
@@ -59,13 +55,6 @@
   AllocatePrefetchDistance = allocate_prefetch_distance();
   AllocatePrefetchStyle    = allocate_prefetch_style();
 
-  assert((AllocatePrefetchDistance % AllocatePrefetchStepSize) == 0 &&
-         (AllocatePrefetchDistance > 0), "invalid value");
-  if ((AllocatePrefetchDistance % AllocatePrefetchStepSize) != 0 ||
-      (AllocatePrefetchDistance <= 0)) {
-    AllocatePrefetchDistance = AllocatePrefetchStepSize;
-  }
-
   if (AllocatePrefetchStyle == 3 && !has_blk_init()) {
     warning("BIS instructions are not available on this CPU");
     FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
@@ -73,13 +62,6 @@
 
   guarantee(VM_Version::has_v9(), "only SPARC v9 is supported");
 
-  assert(ArraycopySrcPrefetchDistance < 4096, "invalid value");
-  if (ArraycopySrcPrefetchDistance >= 4096)
-    ArraycopySrcPrefetchDistance = 4064;
-  assert(ArraycopyDstPrefetchDistance < 4096, "invalid value");
-  if (ArraycopyDstPrefetchDistance >= 4096)
-    ArraycopyDstPrefetchDistance = 4064;
-
   UseSSE = 0; // Only on x86 and x64
 
   _supports_cx8 = has_v9();
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -29,6 +29,7 @@
 #include "runtime/vm_version.hpp"
 
 class VM_Version: public Abstract_VM_Version {
+  friend class VMStructs;
 protected:
   enum Feature_Flag {
     v8_instructions      = 0,
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -733,11 +733,11 @@
     // these asserts are somewhat nonsensical
 #ifndef _LP64
     assert(which == imm_operand || which == disp32_operand,
-           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
+           "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 #else
     assert((which == call32_operand || which == imm_operand) && is_64bit ||
            which == narrow_oop_operand && !is_64bit,
-           err_msg("which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip)));
+           "which %d is_64_bit %d ip " INTPTR_FORMAT, which, is_64bit, p2i(ip));
 #endif // _LP64
     return ip;
 
@@ -770,6 +770,7 @@
     case 0x55: // andnps
     case 0x56: // orps
     case 0x57: // xorps
+    case 0x59: //mulpd
     case 0x6E: // movd
     case 0x7E: // movd
     case 0xAE: // ldmxcsr, stmxcsr, fxrstor, fxsave, clflush
@@ -877,21 +878,35 @@
     // Check second byte
     NOT_LP64(assert((0xC0 & *ip) == 0xC0, "shouldn't have LDS and LES instructions"));
 
+    int vex_opcode;
     // First byte
     if ((0xFF & *inst) == VEX_3bytes) {
+      vex_opcode = VEX_OPCODE_MASK & *ip;
       ip++; // third byte
       is_64bit = ((VEX_W & *ip) == VEX_W);
+    } else {
+      vex_opcode = VEX_OPCODE_0F;
     }
     ip++; // opcode
     // To find the end of instruction (which == end_pc_operand).
-    switch (0xFF & *ip) {
-    case 0x61: // pcmpestri r, r/a, #8
-    case 0x70: // pshufd r, r/a, #8
-    case 0x73: // psrldq r, #8
-      tail_size = 1;  // the imm8
-      break;
-    default:
-      break;
+    switch (vex_opcode) {
+      case VEX_OPCODE_0F:
+        switch (0xFF & *ip) {
+        case 0x70: // pshufd r, r/a, #8
+        case 0x71: // ps[rl|ra|ll]w r, #8
+        case 0x72: // ps[rl|ra|ll]d r, #8
+        case 0x73: // ps[rl|ra|ll]q r, #8
+        case 0xC2: // cmp[ps|pd|ss|sd] r, r, r/a, #8
+        case 0xC4: // pinsrw r, r, r/a, #8
+        case 0xC5: // pextrw r/a, r, #8
+        case 0xC6: // shufp[s|d] r, r, r/a, #8
+          tail_size = 1;  // the imm8
+          break;
+        }
+        break;
+      case VEX_OPCODE_0F_3A:
+        tail_size = 1;
+        break;
     }
     ip++; // skip opcode
     debug_only(has_disp32 = true); // has both kinds of operands!
@@ -1604,6 +1619,85 @@
   emit_int8((unsigned char)0xA2);
 }
 
+// Opcode / Instruction                      Op /  En  64 - Bit Mode     Compat / Leg Mode Description                  Implemented
+// F2 0F 38 F0 / r       CRC32 r32, r / m8   RM        Valid             Valid             Accumulate CRC32 on r / m8.  v
+// F2 REX 0F 38 F0 / r   CRC32 r32, r / m8*  RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
+// F2 REX.W 0F 38 F0 / r CRC32 r64, r / m8   RM        Valid             N.E.              Accumulate CRC32 on r / m8.  -
+//
+// F2 0F 38 F1 / r       CRC32 r32, r / m16  RM        Valid             Valid             Accumulate CRC32 on r / m16. v
+//
+// F2 0F 38 F1 / r       CRC32 r32, r / m32  RM        Valid             Valid             Accumulate CRC32 on r / m32. v
+//
+// F2 REX.W 0F 38 F1 / r CRC32 r64, r / m64  RM        Valid             N.E.              Accumulate CRC32 on r / m64. v
+void Assembler::crc32(Register crc, Register v, int8_t sizeInBytes) {
+  assert(VM_Version::supports_sse4_2(), "");
+  int8_t w = 0x01;
+  Prefix p = Prefix_EMPTY;
+
+  emit_int8((int8_t)0xF2);
+  switch (sizeInBytes) {
+  case 1:
+    w = 0;
+    break;
+  case 2:
+  case 4:
+    break;
+  LP64_ONLY(case 8:)
+    // This instruction is not valid in 32 bits
+    // Note:
+    // http://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+    //
+    // Page B - 72   Vol. 2C says
+    // qwreg2 to qwreg            1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : 11 qwreg1 qwreg2
+    // mem64 to qwreg             1111 0010 : 0100 1R0B : 0000 1111 : 0011 1000 : 1111 0000 : mod qwreg r / m
+    //                                                                            F0!!!
+    // while 3 - 208 Vol. 2A
+    // F2 REX.W 0F 38 F1 / r       CRC32 r64, r / m64             RM         Valid      N.E.Accumulate CRC32 on r / m64.
+    //
+    // the 0 on a last bit is reserved for a different flavor of this instruction :
+    // F2 REX.W 0F 38 F0 / r       CRC32 r64, r / m8              RM         Valid      N.E.Accumulate CRC32 on r / m8.
+    p = REX_W;
+    break;
+  default:
+    assert(0, "Unsupported value for a sizeInBytes argument");
+    break;
+  }
+  LP64_ONLY(prefix(crc, v, p);)
+  emit_int8((int8_t)0x0F);
+  emit_int8(0x38);
+  emit_int8((int8_t)(0xF0 | w));
+  emit_int8(0xC0 | ((crc->encoding() & 0x7) << 3) | (v->encoding() & 7));
+}
+
+void Assembler::crc32(Register crc, Address adr, int8_t sizeInBytes) {
+  assert(VM_Version::supports_sse4_2(), "");
+  InstructionMark im(this);
+  int8_t w = 0x01;
+  Prefix p = Prefix_EMPTY;
+
+  emit_int8((int8_t)0xF2);
+  switch (sizeInBytes) {
+  case 1:
+    w = 0;
+    break;
+  case 2:
+  case 4:
+    break;
+  LP64_ONLY(case 8:)
+    // This instruction is not valid in 32 bits
+    p = REX_W;
+    break;
+  default:
+    assert(0, "Unsupported value for a sizeInBytes argument");
+    break;
+  }
+  LP64_ONLY(prefix(crc, adr, p);)
+  emit_int8((int8_t)0x0F);
+  emit_int8(0x38);
+  emit_int8((int8_t)(0xF0 | w));
+  emit_operand(crc, adr);
+}
+
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, /* legacy_mode */ true);
@@ -2399,7 +2493,7 @@
 
 void Assembler::movsbl(Register dst, Register src) { // movsxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
+  int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
   emit_int8(0x0F);
   emit_int8((unsigned char)0xBE);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -2516,7 +2610,7 @@
 
 void Assembler::movzbl(Register dst, Register src) { // movzxb
   NOT_LP64(assert(src->has_byte_register(), "must have byte register"));
-  int encode = prefix_and_encode(dst->encoding(), src->encoding(), true);
+  int encode = prefix_and_encode(dst->encoding(), false, src->encoding(), true);
   emit_int8(0x0F);
   emit_int8((unsigned char)0xB6);
   emit_int8(0xC0 | encode);
@@ -2951,6 +3045,15 @@
   emit_int8(imm8);
 }
 
+void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
+  assert(VM_Version::supports_sse2(), "");
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
+                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  emit_int8((unsigned char)0xC5);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
@@ -2969,6 +3072,15 @@
   emit_int8(imm8);
 }
 
+void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
+  assert(VM_Version::supports_sse2(), "");
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, /* no_mask_reg */ true,
+                                      VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ _legacy_mode_bw);
+  emit_int8((unsigned char)0xC4);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(imm8);
+}
+
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   if (VM_Version::supports_evex()) {
@@ -3984,6 +4096,16 @@
   }
 }
 
+void Assembler::mulpd(XMMRegister dst, Address src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+  }
+}
+
 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
   _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -4172,6 +4294,26 @@
   emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, /* no_mask_reg */ false, /* legacy_mode */ _legacy_mode_dq);
 }
 
+void Assembler::unpckhpd(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x15, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x15, dst, src, VEX_SIMD_66);
+  }
+}
+
+void Assembler::unpcklpd(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x14, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x14, dst, src, VEX_SIMD_66);
+  }
+}
+
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   if (VM_Version::supports_avx512dq()) {
@@ -4792,8 +4934,9 @@
 }
 
 
-// AND packed integers
+// logical operations packed integers
 void Assembler::pand(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
 }
@@ -4814,6 +4957,17 @@
   emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
 }
 
+void Assembler::pandn(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0xDF, dst, src, VEX_SIMD_66);
+  }
+  else {
+    emit_simd_arith(0xDF, dst, src, VEX_SIMD_66);
+  }
+}
+
 void Assembler::por(XMMRegister dst, XMMRegister src) {
   _instruction_uses_vl = true;
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -6223,6 +6377,14 @@
   emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
 }
 
+// 0F A4 / r ib
+void Assembler::shldl(Register dst, Register src, int8_t imm8) {
+  emit_int8(0x0F);
+  emit_int8((unsigned char)0xA4);
+  emit_int8((unsigned char)(0xC0 | src->encoding() << 3 | dst->encoding()));
+  emit_int8(imm8);
+}
+
 void Assembler::shrdl(Register dst, Register src) {
   emit_int8(0x0F);
   emit_int8((unsigned char)0xAD);
@@ -6362,12 +6524,12 @@
   return reg_enc;
 }
 
-int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) {
+int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) {
   if (dst_enc < 8) {
     if (src_enc >= 8) {
       prefix(REX_B);
       src_enc -= 8;
-    } else if (byteinst && src_enc >= 4) {
+    } else if ((src_is_byte && src_enc >= 4) || (dst_is_byte && dst_enc >= 4)) {
       prefix(REX);
     }
   } else {
@@ -6408,6 +6570,40 @@
   }
 }
 
+void Assembler::prefix(Register dst, Register src, Prefix p) {
+  if (src->encoding() >= 8) {
+    p = (Prefix)(p | REX_B);
+  }
+  if (dst->encoding() >= 8) {
+    p = (Prefix)( p | REX_R);
+  }
+  if (p != Prefix_EMPTY) {
+    // do not generate an empty prefix
+    prefix(p);
+  }
+}
+
+void Assembler::prefix(Register dst, Address adr, Prefix p) {
+  if (adr.base_needs_rex()) {
+    if (adr.index_needs_rex()) {
+      assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
+    } else {
+      prefix(REX_B);
+    }
+  } else {
+    if (adr.index_needs_rex()) {
+      assert(false, "prefix(Register dst, Address adr, Prefix p) does not support handling of an X");
+    }
+  }
+  if (dst->encoding() >= 8) {
+    p = (Prefix)(p | REX_R);
+  }
+  if (p != Prefix_EMPTY) {
+    // do not generate an empty prefix
+    prefix(p);
+  }
+}
+
 void Assembler::prefix(Address adr) {
   if (adr.base_needs_rex()) {
     if (adr.index_needs_rex()) {
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -506,7 +506,8 @@
 
     VEX_3bytes = 0xC4,
     VEX_2bytes = 0xC5,
-    EVEX_4bytes = 0x62
+    EVEX_4bytes = 0x62,
+    Prefix_EMPTY = 0x0
   };
 
   enum VexPrefix {
@@ -535,7 +536,8 @@
     VEX_OPCODE_NONE  = 0x0,
     VEX_OPCODE_0F    = 0x1,
     VEX_OPCODE_0F_38 = 0x2,
-    VEX_OPCODE_0F_3A = 0x3
+    VEX_OPCODE_0F_3A = 0x3,
+    VEX_OPCODE_MASK  = 0x1F
   };
 
   enum AvxVectorLen {
@@ -611,10 +613,15 @@
   int prefix_and_encode(int reg_enc, bool byteinst = false);
   int prefixq_and_encode(int reg_enc);
 
-  int prefix_and_encode(int dst_enc, int src_enc, bool byteinst = false);
+  int prefix_and_encode(int dst_enc, int src_enc) {
+    return prefix_and_encode(dst_enc, false, src_enc, false);
+  }
+  int prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte);
   int prefixq_and_encode(int dst_enc, int src_enc);
 
   void prefix(Register reg);
+  void prefix(Register dst, Register src, Prefix p);
+  void prefix(Register dst, Address adr, Prefix p);
   void prefix(Address adr);
   void prefixq(Address adr);
 
@@ -1177,6 +1184,10 @@
   // Identify processor type and features
   void cpuid();
 
+  // CRC32C
+  void crc32(Register crc, Register v, int8_t sizeInBytes);
+  void crc32(Register crc, Address adr, int8_t sizeInBytes);
+
   // Convert Scalar Double-Precision Floating-Point Value to Scalar Single-Precision Floating-Point Value
   void cvtsd2ss(XMMRegister dst, XMMRegister src);
   void cvtsd2ss(XMMRegister dst, Address src);
@@ -1672,10 +1683,14 @@
   // SSE 4.1 extract
   void pextrd(Register dst, XMMRegister src, int imm8);
   void pextrq(Register dst, XMMRegister src, int imm8);
+  // SSE 2 extract
+  void pextrw(Register dst, XMMRegister src, int imm8);
 
   // SSE 4.1 insert
   void pinsrd(XMMRegister dst, Register src, int imm8);
   void pinsrq(XMMRegister dst, Register src, int imm8);
+  // SSE 2 insert
+  void pinsrw(XMMRegister dst, Register src, int imm8);
 
   // SSE4.1 packed move
   void pmovzxbw(XMMRegister dst, XMMRegister src);
@@ -1783,6 +1798,7 @@
   void setb(Condition cc, Register dst);
 
   void shldl(Register dst, Register src);
+  void shldl(Register dst, Register src, int8_t imm8);
 
   void shll(Register dst, int imm8);
   void shll(Register dst);
@@ -1925,6 +1941,7 @@
 
   // Multiply Packed Floating-Point Values
   void mulpd(XMMRegister dst, XMMRegister src);
+  void mulpd(XMMRegister dst, Address src);
   void mulps(XMMRegister dst, XMMRegister src);
   void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
@@ -1951,6 +1968,9 @@
   void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
   void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
+  void unpckhpd(XMMRegister dst, XMMRegister src);
+  void unpcklpd(XMMRegister dst, XMMRegister src);
+
   // Bitwise Logical XOR of Packed Floating-Point Values
   void xorpd(XMMRegister dst, XMMRegister src);
   void xorps(XMMRegister dst, XMMRegister src);
@@ -2046,6 +2066,9 @@
   void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
+  // Andn packed integers
+  void pandn(XMMRegister dst, XMMRegister src);
+
   // Or packed integers
   void por(XMMRegister dst, XMMRegister src);
   void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
--- a/src/cpu/x86/vm/assembler_x86.inline.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/assembler_x86.inline.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -33,10 +33,12 @@
 inline int Assembler::prefix_and_encode(int reg_enc, bool byteinst) { return reg_enc; }
 inline int Assembler::prefixq_and_encode(int reg_enc) { return reg_enc; }
 
-inline int Assembler::prefix_and_encode(int dst_enc, int src_enc, bool byteinst) { return dst_enc << 3 | src_enc; }
+inline int Assembler::prefix_and_encode(int dst_enc, bool dst_is_byte, int src_enc, bool src_is_byte) { return dst_enc << 3 | src_enc; }
 inline int Assembler::prefixq_and_encode(int dst_enc, int src_enc) { return dst_enc << 3 | src_enc; }
 
 inline void Assembler::prefix(Register reg) {}
+inline void Assembler::prefix(Register dst, Register src, Prefix p) {}
+inline void Assembler::prefix(Register dst, Address adr, Prefix p) {}
 inline void Assembler::prefix(Address adr) {}
 inline void Assembler::prefixq(Address adr) {}
 
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -2457,9 +2457,6 @@
         // Should consider not saving rbx, if not necessary
         __ trigfunc('t', op->as_Op2()->fpu_stack_size());
         break;
-      case lir_exp :
-        __ exp_with_fallback(op->as_Op2()->fpu_stack_size());
-        break;
       case lir_pow :
         __ pow_with_fallback(op->as_Op2()->fpu_stack_size());
         break;
@@ -2684,7 +2681,7 @@
 #endif // _LP64
         }
       } else {
-        fatal(err_msg("unexpected type: %s", basictype_to_str(c->type())));
+        fatal("unexpected type: %s", basictype_to_str(c->type()));
       }
       // cpu register - address
     } else if (opr2->is_address()) {
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -808,6 +808,12 @@
 
 void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
   assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
+
+  if (x->id() == vmIntrinsics::_dexp) {
+    do_ExpIntrinsic(x);
+    return;
+  }
+
   LIRItem value(x->argument_at(0), this);
 
   bool use_fpu = false;
@@ -818,7 +824,6 @@
       case vmIntrinsics::_dtan:
       case vmIntrinsics::_dlog:
       case vmIntrinsics::_dlog10:
-      case vmIntrinsics::_dexp:
       case vmIntrinsics::_dpow:
         use_fpu = true;
     }
@@ -870,7 +875,6 @@
     case vmIntrinsics::_dtan:   __ tan  (calc_input, calc_result, tmp1, tmp2);              break;
     case vmIntrinsics::_dlog:   __ log  (calc_input, calc_result, tmp1);                    break;
     case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1);                    break;
-    case vmIntrinsics::_dexp:   __ exp  (calc_input, calc_result,              tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
     case vmIntrinsics::_dpow:   __ pow  (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
     default:                    ShouldNotReachHere();
   }
@@ -880,6 +884,32 @@
   }
 }
 
+void LIRGenerator::do_ExpIntrinsic(Intrinsic* x) {
+  LIRItem value(x->argument_at(0), this);
+  value.set_destroys_register();
+
+  LIR_Opr calc_result = rlock_result(x);
+  LIR_Opr result_reg = result_register_for(x->type());
+
+  BasicTypeList signature(1);
+  signature.append(T_DOUBLE);
+  CallingConvention* cc = frame_map()->c_calling_convention(&signature);
+
+  value.load_item_force(cc->at(0));
+
+#ifndef _LP64
+  LIR_Opr tmp = FrameMap::fpu0_double_opr;
+  result_reg = tmp;
+  if (VM_Version::supports_sse2()) {
+    __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
+  } else {
+    __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
+  }
+#else
+  __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
+#endif
+  __ move(result_reg, calc_result);
+}
 
 void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
   assert(x->number_of_arguments() == 5, "wrong type");
--- a/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -814,8 +814,7 @@
 
     case lir_tan:
     case lir_sin:
-    case lir_cos:
-    case lir_exp: {
+    case lir_cos: {
       // sin, cos and exp need two temporary fpu stack slots, so there are two temporary
       // registers (stored in right and temp of the operation).
       // the stack allocator must guarantee that the stack slots are really free,
--- a/src/cpu/x86/vm/c2_globals_x86.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/c2_globals_x86.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -48,11 +48,11 @@
 
 define_pd_global(intx, OnStackReplacePercentage,     140);
 define_pd_global(intx, ConditionalMoveLimit,         3);
-define_pd_global(intx, FLOATPRESSURE,                6);
 define_pd_global(intx, FreqInlineSize,               325);
 define_pd_global(intx, MinJumpTableSize,             10);
 #ifdef AMD64
 define_pd_global(intx, INTPRESSURE,                  13);
+define_pd_global(intx, FLOATPRESSURE,                14);
 define_pd_global(intx, InteriorEntryAlignment,       16);
 define_pd_global(size_t, NewSizeThreadIncrease,      ScaleForWordSize(4*K));
 define_pd_global(intx, LoopUnrollLimit,              60);
@@ -64,6 +64,7 @@
 define_pd_global(uint64_t, MaxRAM,                   128ULL*G);
 #else
 define_pd_global(intx, INTPRESSURE,                  6);
+define_pd_global(intx, FLOATPRESSURE,                6);
 define_pd_global(intx, InteriorEntryAlignment,       4);
 define_pd_global(size_t, NewSizeThreadIncrease,      4*K);
 define_pd_global(intx, LoopUnrollLimit,              50);     // Design center runs on 1.3.1
@@ -82,6 +83,7 @@
 define_pd_global(bool, UseCISCSpill,                 true);
 define_pd_global(bool, OptoScheduling,               false);
 define_pd_global(bool, OptoBundling,                 false);
+define_pd_global(bool, OptoRegScheduling,            true);
 
 define_pd_global(intx, ReservedCodeCacheSize,        48*M);
 define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
--- a/src/cpu/x86/vm/compiledIC_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/compiledIC_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -50,13 +50,15 @@
 // ----------------------------------------------------------------------------
 
 #define __ _masm.
-address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
   // Stub is fixed up when the corresponding call is converted from
   // calling compiled code to calling interpreted code.
   // movq rbx, 0
   // jmp -5 # to self
 
-  address mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  if (mark == NULL) {
+    mark = cbuf.insts_mark();  // Get mark within main instrs section.
+  }
 
   // Note that the code buffer's insts_mark is always relative to insts.
   // That's why we must use the macroassembler to generate a stub.
@@ -73,6 +75,8 @@
   // This is recognized as unresolved by relocs/nativeinst/ic code.
   __ jump(RuntimeAddress(__ pc()));
 
+  assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size");
+
   // Update current stubs pointer and restore insts_end.
   __ end_a_stub();
   return base;
@@ -104,10 +108,15 @@
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
   NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
 
-  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
+#ifdef ASSERT
+  // read the value once
+  intptr_t data = method_holder->data();
+  address destination = jump->jump_destination();
+  assert(data == 0 || data == (intptr_t)callee(),
          "a) MT-unsafe modification of inline cache");
-  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
+  assert(destination == (address)-1 || destination == entry,
          "b) MT-unsafe modification of inline cache");
+#endif
 
   // Update stub.
   method_holder->set_data((intptr_t)callee());
@@ -124,11 +133,12 @@
   assert(stub != NULL, "stub not found");
   // Creation also verifies the object.
   NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
-  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
   method_holder->set_data(0);
+  NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
   jump->set_jump_destination((address)-1);
 }
 
+
 //-----------------------------------------------------------------------------
 // Non-product mode code
 #ifndef PRODUCT
@@ -150,5 +160,4 @@
   // Verify state.
   assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
 }
-
 #endif // !PRODUCT
--- a/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/cppInterpreterGenerator_x86.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -29,6 +29,7 @@
 
   void generate_more_monitors();
   void generate_deopt_handling();
+  void lock_method(void);
   address generate_interpreter_frame_manager(bool synchronized); // C++ interpreter only
   void generate_compute_interpreter_state(const Register state,
                                           const Register prev_state,
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -741,7 +741,7 @@
 // Find preallocated  monitor and lock method (C++ interpreter)
 // rbx - Method*
 //
-void InterpreterGenerator::lock_method(void) {
+void CppInterpreterGenerator::lock_method() {
   // assumes state == rsi/r13 == pointer to current interpreterState
   // minimally destroys rax, rdx|c_rarg1, rdi
   //
@@ -807,7 +807,7 @@
 
   // If G1 is not enabled then attempt to go through the accessor entry point
   // Reference.get is an accessor
-  return generate_jump_to_normal_entry();
+  return NULL;
 }
 
 //
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/crc32c.h	Mon Oct 19 12:30:17 2015 -0700
@@ -0,0 +1,66 @@
+/*
+* Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+*
+* This code is free software; you can redistribute it and/or modify it
+* under the terms of the GNU General Public License version 2 only, as
+* published by the Free Software Foundation.
+*
+* This code is distributed in the hope that it will be useful, but WITHOUT
+* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+* FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+* version 2 for more details (a copy is included in the LICENSE file that
+* accompanied this code).
+*
+* You should have received a copy of the GNU General Public License version
+* 2 along with this work; if not, write to the Free Software Foundation,
+* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+*
+* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+* or visit www.oracle.com if you need additional information or have any
+* questions.
+*
+*/
+
+enum {
+  // S. Gueron / Information Processing Letters 112 (2012) 184
+  // shows than anything above 6K and below 32K is a good choice
+  // 32K does not deliver any further performance gains
+  // 6K=8*256 (*3 as we compute 3 blocks together)
+  //
+  // Thus selecting the smallest value so it could apply to the largest number
+  // of buffer sizes.
+  CRC32C_HIGH = 8 * 256,
+
+  // empirical
+  // based on ubench study using methodology described in
+  // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 8
+  //
+  // arbitrary value between 27 and 256
+  CRC32C_MIDDLE = 8 * 86,
+
+  // V. Gopal et al. / Fast CRC Computation for iSCSI Polynomial Using CRC32 Instruction April 2011 9
+  // shows that 240 and 1024 are equally good choices as the 216==8*27
+  //
+  // Selecting the smallest value which resulted in a significant performance improvement over
+  // sequential version
+  CRC32C_LOW = 8 * 27,
+
+  CRC32C_NUM_ChunkSizeInBytes = 3,
+
+  // We need to compute powers of 64N and 128N for each "chunk" size
+  CRC32C_NUM_PRECOMPUTED_CONSTANTS = ( 2 * CRC32C_NUM_ChunkSizeInBytes )
+};
+// Notes:
+// 1. Why we need to choose a "chunk" approach?
+// Overhead of computing a powers and powers of for an arbitrary buffer of size N is significant
+// (implementation approaches a library perf.)
+// 2. Why only 3 "chunks"?
+// Performance experiments results showed that a HIGH+LOW was not delivering a stable speedup
+// curve.
+//
+// Disclaimer:
+// If you ever decide to increase/decrease number of "chunks" be sure to modify
+// a) constants table generation (hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp)
+// b) constant fetch from that table (macroAssembler_x86.cpp)
+// c) unrolled for loop (macroAssembler_x86.cpp)
--- a/src/cpu/x86/vm/frame_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/frame_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -48,8 +48,6 @@
 }
 #endif
 
-PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
-
 // Profiling/safepoint support
 
 bool frame::safe_for_sender(JavaThread *thread) {
@@ -280,7 +278,7 @@
   address* pc_addr = &(((address*) sp())[-1]);
   if (TracePcPatching) {
     tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
-                  pc_addr, *pc_addr, pc);
+                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
   }
   // Either the return address is the original one or we are going to
   // patch in the same address that's already there.
@@ -458,11 +456,11 @@
   // This is the sp before any possible extension (adapter/locals).
   intptr_t* unextended_sp = interpreter_frame_sender_sp();
 
-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
   if (map->update_map()) {
     update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
   }
-#endif // COMPILER2
+#endif // COMPILER2 || INCLUDE_JVMCI
 
   return frame(sender_sp, unextended_sp, link(), sender_pc());
 }
@@ -683,10 +681,19 @@
     DESCRIBE_FP_OFFSET(interpreter_frame_locals);
     DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
     DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+#ifdef AMD64
+  } else if (is_entry_frame()) {
+    // This could be more descriptive if we use the enum in
+    // stubGenerator to map to real names but it's most important to
+    // claim these frame slots so the error checking works.
+    for (int i = 0; i < entry_frame_after_call_words; i++) {
+      values.describe(frame_no, fp() - i, err_msg("call_stub word fp - %d", i));
+    }
+#endif // AMD64
+  }
 #endif
-  }
 }
-#endif
+#endif // !PRODUCT
 
 intptr_t *frame::initial_deoptimization_info() {
   // used to reset the saved FP
--- a/src/cpu/x86/vm/frame_x86.inline.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/frame_x86.inline.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -78,7 +78,11 @@
     assert(((nmethod*)_cb)->insts_contains(_pc), "original PC must be in nmethod");
     _deopt_state = is_deoptimized;
   } else {
-    _deopt_state = not_deoptimized;
+    if (_cb->is_deoptimization_stub()) {
+      _deopt_state = is_deoptimized;
+    } else {
+      _deopt_state = not_deoptimized;
+    }
   }
 }
 
--- a/src/cpu/x86/vm/globals_x86.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -46,7 +46,7 @@
 // the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
 // the uep and the vep doesn't get real alignment but just slops on by
 // only assured that the entry instruction meets the 5 byte size requirement.
-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
 define_pd_global(intx, CodeEntryAlignment,       32);
 #else
 define_pd_global(intx, CodeEntryAlignment,       16);
@@ -91,6 +91,7 @@
                                                                             \
   product(intx, UseAVX, 99,                                                 \
           "Highest supported AVX instructions set on x86/x64")              \
+          range(0, 99)                                                      \
                                                                             \
   product(bool, UseCLMUL, false,                                            \
           "Control whether CLMUL instructions can be used on x86/x64")      \
--- a/src/cpu/x86/vm/interp_masm_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -40,6 +40,11 @@
 
 // Implementation of InterpreterMacroAssembler
 
+void InterpreterMacroAssembler::jump_to_entry(address entry) {
+  assert(entry, "Entry must have been generated by now");
+  jump(RuntimeAddress(entry));
+}
+
 #ifndef CC_INTERP
 void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
   Label update, next, none;
@@ -1497,13 +1502,39 @@
     bind(skip_receiver_profile);
 
     // The method data pointer needs to be updated to reflect the new target.
+#if INCLUDE_JVMCI
+    if (MethodProfileWidth == 0) {
+      update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+    }
+#else // INCLUDE_JVMCI
     update_mdp_by_constant(mdp,
                            in_bytes(VirtualCallData::
                                     virtual_call_data_size()));
+#endif // INCLUDE_JVMCI
     bind(profile_continue);
   }
 }
 
+#if INCLUDE_JVMCI
+void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
+  assert_different_registers(method, mdp, reg2);
+  if (ProfileInterpreter && MethodProfileWidth > 0) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    Label done;
+    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
+      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
+    bind(done);
+
+    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+#endif // INCLUDE_JVMCI
+
 // This routine creates a state machine for updating the multi-row
 // type profile at a virtual call site (or other type-sensitive bytecode).
 // The machine visits each row (of receiver/count) until the receiver type
@@ -1523,14 +1554,36 @@
     if (is_virtual_call) {
       increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
     }
-    return;
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()));
+    }
+#endif // INCLUDE_JVMCI
+  } else {
+    int non_profiled_offset = -1;
+    if (is_virtual_call) {
+      non_profiled_offset = in_bytes(CounterData::count_offset());
+    }
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
+    }
+#endif // INCLUDE_JVMCI
+
+    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
+        &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
   }
+}
 
-  int last_row = VirtualCallData::row_limit() - 1;
+void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
+                                        Register reg2, int start_row, Label& done, int total_rows,
+                                        OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                        int non_profiled_offset) {
+  int last_row = total_rows - 1;
   assert(start_row <= last_row, "must be work left to do");
-  // Test this row for both the receiver and for null.
+  // Test this row for both the item and for null.
   // Take any of three different outcomes:
-  //   1. found receiver => increment count and goto done
+  //   1. found item => increment count and goto done
   //   2. found null => keep looking for case 1, maybe allocate this cell
   //   3. found something else => keep looking for cases 1 and 2
   // Case 3 is handled by a recursive call.
@@ -1538,30 +1591,30 @@
     Label next_test;
     bool test_for_null_also = (row == start_row);
 
-    // See if the receiver is receiver[n].
-    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
-    test_mdp_data_at(mdp, recvr_offset, receiver,
+    // See if the item is item[n].
+    int item_offset = in_bytes(item_offset_fn(row));
+    test_mdp_data_at(mdp, item_offset, item,
                      (test_for_null_also ? reg2 : noreg),
                      next_test);
-    // (Reg2 now contains the receiver from the CallData.)
+    // (Reg2 now contains the item from the CallData.)
 
-    // The receiver is receiver[n].  Increment count[n].
-    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    // The item is item[n].  Increment count[n].
+    int count_offset = in_bytes(item_count_offset_fn(row));
     increment_mdp_data_at(mdp, count_offset);
     jmp(done);
     bind(next_test);
 
     if (test_for_null_also) {
       Label found_null;
-      // Failed the equality check on receiver[n]...  Test for null.
+      // Failed the equality check on item[n]...  Test for null.
       testptr(reg2, reg2);
       if (start_row == last_row) {
         // The only thing left to do is handle the null case.
-        if (is_virtual_call) {
+        if (non_profiled_offset >= 0) {
           jccb(Assembler::zero, found_null);
-          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Item did not match any saved item and there is no empty row for it.
           // Increment total counter to indicate polymorphic case.
-          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+          increment_mdp_data_at(mdp, non_profiled_offset);
           jmp(done);
           bind(found_null);
         } else {
@@ -1573,21 +1626,22 @@
       jcc(Assembler::zero, found_null);
 
       // Put all the "Case 3" tests here.
-      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
+      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
+        item_offset_fn, item_count_offset_fn, non_profiled_offset);
 
-      // Found a null.  Keep searching for a matching receiver,
+      // Found a null.  Keep searching for a matching item,
       // but remember that this is an empty (unused) slot.
       bind(found_null);
     }
   }
 
-  // In the fall-through case, we found no matching receiver, but we
-  // observed the receiver[start_row] is NULL.
+  // In the fall-through case, we found no matching item, but we
+  // observed the item[start_row] is NULL.
 
-  // Fill in the receiver field and increment the count.
-  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
-  set_mdp_data_at(mdp, recvr_offset, receiver);
-  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  // Fill in the item field and increment the count.
+  int item_offset = in_bytes(item_offset_fn(start_row));
+  set_mdp_data_at(mdp, item_offset, item);
+  int count_offset = in_bytes(item_count_offset_fn(start_row));
   movl(reg2, DataLayout::counter_increment);
   set_mdp_data_at(mdp, count_offset, reg2);
   if (start_row > 0) {
--- a/src/cpu/x86/vm/interp_masm_x86.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/interp_masm_x86.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -32,6 +32,7 @@
 
 // This file specializes the assember with interpreter-specific macros
 
+typedef ByteSize (*OffsetFunction)(uint);
 
 class InterpreterMacroAssembler: public MacroAssembler {
 
@@ -60,6 +61,8 @@
     _locals_register(LP64_ONLY(r14) NOT_LP64(rdi)),
     _bcp_register(LP64_ONLY(r13) NOT_LP64(rsi)) {}
 
+  void jump_to_entry(address entry);
+
   void load_earlyret_value(TosState state);
 
 #ifdef CC_INTERP
@@ -249,6 +252,10 @@
   void record_klass_in_profile_helper(Register receiver, Register mdp,
                                       Register reg2, int start_row,
                                       Label& done, bool is_virtual_call);
+  void record_item_in_profile_helper(Register item, Register mdp,
+                                     Register reg2, int start_row, Label& done, int total_rows,
+                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                     int non_profiled_offset);
 
   void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
   void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
@@ -262,6 +269,7 @@
   void profile_virtual_call(Register receiver, Register mdp,
                             Register scratch2,
                             bool receiver_can_be_null = false);
+  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
   void profile_ret(Register return_bci, Register mdp);
   void profile_null_seen(Register mdp);
   void profile_typecheck(Register mdp, Register klass, Register scratch);
--- a/src/cpu/x86/vm/interpreterGenerator_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/interpreterGenerator_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -31,17 +31,6 @@
 
 #define __ _masm->
 
-// Jump into normal path for accessor and empty entry to jump to normal entry
-// The "fast" optimization don't update compilation count therefore can disable inlining
-// for these functions that should be inlined.
-address InterpreterGenerator::generate_jump_to_normal_entry(void) {
-  address entry_point = __ pc();
-
-  assert(Interpreter::entry_for_kind(Interpreter::zerolocals) != NULL, "should already be generated");
-  __ jump(RuntimeAddress(Interpreter::entry_for_kind(Interpreter::zerolocals)));
-  return entry_point;
-}
-
 // Abstract method entry
 // Attempt to execute abstract method. Throw exception
 address InterpreterGenerator::generate_abstract_entry(void) {
--- a/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/interpreterGenerator_x86.hpp	Mon Oct 19 12:30:17 2015 -0700
@@ -36,19 +36,18 @@
   address generate_native_entry(bool synchronized);
   address generate_abstract_entry(void);
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
-  address generate_jump_to_normal_entry(void);
-  address generate_accessor_entry(void) { return generate_jump_to_normal_entry(); }
-  address generate_empty_entry(void) { return generate_jump_to_normal_entry(); }
+  address generate_accessor_entry(void) { return NULL; }
+  address generate_empty_entry(void) { return NULL; }
   address generate_Reference_get_entry();
   address generate_CRC32_update_entry();
   address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind);
+  address generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind);
 #ifndef _LP64
   address generate_Float_intBitsToFloat_entry();
   address generate_Float_floatToRawIntBits_entry();
   address generate_Double_longBitsToDouble_entry();
   address generate_Double_doubleToRawLongBits_entry();
 #endif
-  void lock_method(void);
   void generate_stack_overflow_check(void);
 
   void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue);
--- a/src/cpu/x86/vm/interpreter_x86_32.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/interpreter_x86_32.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -151,11 +151,15 @@
       __ pop_fTOS();
       break;
     case Interpreter::java_lang_math_exp:
-      __ exp_with_fallback(0);
-      // Store to stack to convert 80bit precision back to 64bits
-      __ push_fTOS();
-      __ pop_fTOS();
-      break;
+      __ subptr(rsp, 2*wordSize);
+      __ fstp_d(Address(rsp, 0));
+      if (VM_Version::supports_sse2()) {
+        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
+      } else {
+        __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dexp)));
+      }
+      __ addptr(rsp, 2*wordSize);
+    break;
     default                              :
         ShouldNotReachHere();
   }
--- a/src/cpu/x86/vm/interpreter_x86_64.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/interpreter_x86_64.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -52,8 +52,6 @@
 
 #define __ _masm->
 
-PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
-
 #ifdef _WIN64
 address AbstractInterpreterGenerator::generate_slow_signature_handler() {
   address entry = __ pc();
@@ -252,6 +250,9 @@
 
   if (kind == Interpreter::java_lang_math_sqrt) {
     __ sqrtsd(xmm0, Address(rsp, wordSize));
+  } else if (kind == Interpreter::java_lang_math_exp) {
+    __ movdbl(xmm0, Address(rsp, wordSize));
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
   } else {
     __ fld_d(Address(rsp, wordSize));
     switch (kind) {
@@ -278,9 +279,6 @@
                                               // empty stack slot)
           __ pow_with_fallback(0);
           break;
-      case Interpreter::java_lang_math_exp:
-          __ exp_with_fallback(0);
-           break;
       default                              :
           ShouldNotReachHere();
     }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#include "precompiled.hpp"
+#include "compiler/disassembler.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "jvmci/jvmciEnv.hpp"
+#include "jvmci/jvmciCodeInstaller.hpp"
+#include "jvmci/jvmciJavaClasses.hpp"
+#include "jvmci/jvmciCompilerToVM.hpp"
+#include "jvmci/jvmciRuntime.hpp"
+#include "asm/register.hpp"
+#include "classfile/vmSymbols.hpp"
+#include "code/vmreg.hpp"
+#include "vmreg_x86.inline.hpp"
+
+jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, oop method) {
+  if (inst->is_call() || inst->is_jump()) {
+    assert(NativeCall::instruction_size == (int)NativeJump::instruction_size, "unexpected size");
+    return (pc_offset + NativeCall::instruction_size);
+  } else if (inst->is_mov_literal64()) {
+    // mov+call instruction pair
+    jint offset = pc_offset + NativeMovConstReg::instruction_size;
+    u_char* call = (u_char*) (_instructions->start() + offset);
+    if (call[0] == Assembler::REX_B) {
+      offset += 1; /* prefix byte for extended register R8-R15 */
+      call++;
+    }
+    assert(call[0] == 0xFF, "expected call");
+    offset += 2; /* opcode byte + modrm byte */
+    return (offset);
+  } else if (inst->is_call_reg()) {
+    // the inlined vtable stub contains a "call register" instruction
+    assert(method != NULL, "only valid for virtual calls");
+    return (pc_offset + ((NativeCallReg *) inst)->next_instruction_offset());
+  } else if (inst->is_cond_jump()) {
+    address pc = (address) (inst);
+    return pc_offset + (jint) (Assembler::locate_next_instruction(pc) - pc);
+  } else {
+    fatal("unsupported type of instruction for call site");
+    return 0;
+  }
+}
+
+void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle& constant) {
+  address pc = _instructions->start() + pc_offset;
+  Handle obj = HotSpotObjectConstantImpl::object(constant);
+  jobject value = JNIHandles::make_local(obj());
+  if (HotSpotObjectConstantImpl::compressed(constant)) {
+#ifdef _LP64
+    address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand);
+    int oop_index = _oop_recorder->find_index(value);
+    _instructions->relocate(pc, oop_Relocation::spec(oop_index), Assembler::narrow_oop_operand);
+    TRACE_jvmci_3("relocating (narrow oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
+#else
+    fatal("compressed oop on 32bit");
+#endif
+  } else {
+    address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
+    *((jobject*) operand) = value;
+    _instructions->relocate(pc, oop_Relocation::spec_for_immediate(), Assembler::imm_operand);
+    TRACE_jvmci_3("relocating (oop constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
+  }
+}
+
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+  address pc = _instructions->start() + pc_offset;
+
+  address operand = Assembler::locate_operand(pc, Assembler::disp32_operand);
+  address next_instruction = Assembler::locate_next_instruction(pc);
+  address dest = _constants->start() + data_offset;
+
+  long disp = dest - next_instruction;
+  assert(disp == (jint) disp, "disp doesn't fit in 32 bits");
+  *((jint*) operand) = (jint) disp;
+
+  _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS), Assembler::disp32_operand);
+  TRACE_jvmci_3("relocating at " PTR_FORMAT "/" PTR_FORMAT " with destination at " PTR_FORMAT " (%d)", p2i(pc), p2i(operand), p2i(dest), data_offset);
+}
+
+void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+  if (cb->is_nmethod()) {
+    nmethod* nm = (nmethod*) cb;
+    nativeJump_at((address)inst)->set_jump_destination(nm->verified_entry_point());
+  } else {
+    nativeJump_at((address)inst)->set_jump_destination(cb->code_begin());
+  }
+  _instructions->relocate((address)inst, runtime_call_Relocation::spec(), Assembler::call32_operand);
+}
+
+void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
+  address pc = (address) inst;
+  if (inst->is_call()) {
+    // NOTE: for call without a mov, the offset must fit a 32-bit immediate
+    //       see also CompilerToVM.getMaxCallTargetOffset()
+    NativeCall* call = nativeCall_at(pc);
+    call->set_destination((address) foreign_call_destination);
+    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec(), Assembler::call32_operand);
+  } else if (inst->is_mov_literal64()) {
+    NativeMovConstReg* mov = nativeMovConstReg_at(pc);
+    mov->set_data((intptr_t) foreign_call_destination);
+    _instructions->relocate(mov->instruction_address(), runtime_call_Relocation::spec(), Assembler::imm_operand);
+  } else if (inst->is_jump()) {
+    NativeJump* jump = nativeJump_at(pc);
+    jump->set_jump_destination((address) foreign_call_destination);
+    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec(), Assembler::call32_operand);
+  } else if (inst->is_cond_jump()) {
+    address old_dest = nativeGeneralJump_at(pc)->jump_destination();
+    address disp = Assembler::locate_operand(pc, Assembler::call32_operand);
+    *(jint*) disp += ((address) foreign_call_destination) - old_dest;
+    _instructions->relocate(pc, runtime_call_Relocation::spec(), Assembler::call32_operand);
+  } else {
+    fatal("unsupported relocation for foreign call");
+  }
+
+  TRACE_jvmci_3("relocating (foreign call)  at " PTR_FORMAT, p2i(inst));
+}
+
+void CodeInstaller::pd_relocate_JavaMethod(oop hotspot_method, jint pc_offset) {
+#ifdef ASSERT
+  Method* method = NULL;
+  // we need to check, this might also be an unresolved method
+  if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
+    method = getMethodFromHotSpotMethod(hotspot_method);
+  }
+#endif
+  switch (_next_call_type) {
+    case INLINE_INVOKE:
+      break;
+    case INVOKEVIRTUAL:
+    case INVOKEINTERFACE: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokeinterface");
+
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(),
+                                             virtual_call_Relocation::spec(_invoke_mark_pc),
+                                             Assembler::call32_operand);
+      break;
+    }
+    case INVOKESTATIC: {
+      assert(method == NULL || method->is_static(), "cannot call non-static method with invokestatic");
+
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_static_call_stub());
+      _instructions->relocate(call->instruction_address(),
+                                             relocInfo::static_call_type, Assembler::call32_operand);
+      break;
+    }
+    case INVOKESPECIAL: {
+      assert(method == NULL || !method->is_static(), "cannot call static method with invokespecial");
+      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
+      call->set_destination(SharedRuntime::get_resolve_opt_virtual_call_stub());
+      _instructions->relocate(call->instruction_address(),
+                              relocInfo::opt_virtual_call_type, Assembler::call32_operand);
+      break;
+    }
+    default:
+      break;
+  }
+}
+
+static void relocate_poll_near(address pc) {
+  NativeInstruction* ni = nativeInstruction_at(pc);
+  int32_t* disp = (int32_t*) Assembler::locate_operand(pc, Assembler::disp32_operand);
+  int32_t offset = *disp; // The Java code installed the polling page offset into the disp32 operand
+  intptr_t new_disp = (intptr_t) (os::get_polling_page() + offset) - (intptr_t) ni;
+  *disp = (int32_t)new_disp;
+}
+
+
+void CodeInstaller::pd_relocate_poll(address pc, jint mark) {
+  switch (mark) {
+    case POLL_NEAR: {
+      relocate_poll_near(pc);
+      _instructions->relocate(pc, relocInfo::poll_type, Assembler::disp32_operand);
+      break;
+    }
+    case POLL_FAR:
+      // This is a load from a register so there is no relocatable operand.
+      // We just have to ensure that the format is not disp32_operand
+      // so that poll_Relocation::fix_relocation_after_move does the right
+      // thing (i.e. ignores this relocation record)
+      _instructions->relocate(pc, relocInfo::poll_type, Assembler::imm_operand);
+      break;
+    case POLL_RETURN_NEAR: {
+      relocate_poll_near(pc);
+      _instructions->relocate(pc, relocInfo::poll_return_type, Assembler::disp32_operand);
+      break;
+    }
+    case POLL_RETURN_FAR:
+      // see comment above for POLL_FAR
+      _instructions->relocate(pc, relocInfo::poll_return_type, Assembler::imm_operand);
+      break;
+    default:
+      fatal("invalid mark value");
+      break;
+  }
+}
+
+// convert JVMCI register indices (as used in oop maps) to HotSpot registers
+VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
+  if (jvmci_reg < RegisterImpl::number_of_registers) {
+    return as_Register(jvmci_reg)->as_VMReg();
+  } else {
+    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
+    if (floatRegisterNumber < XMMRegisterImpl::number_of_registers) {
+      return as_XMMRegister(floatRegisterNumber)->as_VMReg();
+    }
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
+  return !(hotspotRegister->is_FloatRegister() || hotspotRegister->is_XMMRegister());
+}
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Oct 15 15:15:17 2015 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Mon Oct 19 12:30:17 2015 -0700
@@ -45,6 +45,7 @@
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
+#include "crc32c.h"
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@@ -56,8 +57,6 @@
 
 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 
-PRAGMA_FORMAT_MUTE_WARNINGS_FOR_GCC
-
 #ifdef ASSERT
 bool AbstractAssembler::pd_check_instruction_mark() { return true; }
 #endif
@@ -417,7 +416,7 @@
     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
   }
   // Don't assert holding the ttyLock
-    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+    assert(false, "DEBUG MESSAGE: %s", msg);
   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
 }
 
@@ -883,7 +882,7 @@
     ttyLocker ttyl;
     ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n",
                     msg);
-    assert(false, err_msg("DEBUG MESSAGE: %s", msg));
+    assert(false, "DEBUG MESSAGE: %s", msg);
   }
 }
 
@@ -2888,7 +2887,7 @@
 }
 
 // !defined(COMPILER2) is because of stupid core builds
-#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2)
+#if !defined(_LP64) || defined(COMPILER1) || !defined(COMPILER2) || INCLUDE_JVMCI
 void MacroAssembler::empty_FPU_stack() {
   if (VM_Version::supports_mmx()) {
     emms();
@@ -2896,7 +2895,7 @@
     for (int i = 8; i-- > 0; ) ffree(i);
   }
 }
-#endif // !LP64 || C1 || !C2
+#endif // !LP64 || C1 || !C2 || INCLUDE_JVMCI
 
 
 // Defines obj, preserves var_size_in_bytes
@@ -3032,6 +3031,15 @@
   Assembler::fldcw(as_Address(src));
 }
 
+void MacroAssembler::mulpd(XMMRegister dst, AddressLiteral src) {
+  if (reachable(src)) {
+    Assembler::mulpd(dst, as_Address(src));
+  } else {
+    lea(rscratch1, src);
+    Assembler::mulpd(dst, Address(rscratch1, 0));
+  }
+}
+
 void MacroAssembler::pow_exp_core_encoding() {
   // kills rax, rcx, rdx
   subptr(rsp,sizeof(jdouble));
@@ -3104,19 +3112,7 @@
   BLOCK_COMMENT("} fast_pow");
 }
 
-void MacroAssembler::fast_exp() {
-  // computes exp(X) = 2^(X * log2(e))
-  // if fast computation is not possible, result is NaN. Requires
-  // fallback from user of this macro.
-  // increase precision for intermediate steps of the computation
-  increase_precision();
-  fldl2e();                // Stack: log2(e) X ...
-  fmulp(1);                // Stack: (X*log2(e)) ...
-  pow_exp_core_encoding(); // Stack: exp(X) ...
-  restore_precision();
-}
-
-void MacroAssembler::pow_or_exp(bool is_exp, int num_fpu_regs_in_use) {
+void MacroAssembler::pow_or_exp(int num_fpu_regs_in_use) {
   // kills rax, rcx, rdx
   // pow and exp needs 2 extra registers on the fpu stack.
   Label slow_case, done;
@@ -3128,182 +3124,164 @@
   Register tmp2 = rax;
   Register tmp3 = rcx;
 
-  if (is_exp) {
-    // Stack: X
-    fld_s(0);                   // duplicate argument for runtime call. Stack: X X
-    fast_exp();                 // Stack: exp(X) X
-    fcmp(tmp, 0, false, false); // Stack: exp(X) X
-    // exp(X) not equal to itself: exp(X) is NaN go to slow case.
-    jcc(Assembler::parity, slow_case);
-    // get rid of duplicate argument. Stack: exp(X)
-    if (num_fpu_regs_in_use > 0) {
-      fxch();
-      fpop();
-    } else {
-      ffree(1);
-    }
-    jmp(done);
-  } else {
-    // Stack: X Y
-    Label x_negative, y_not_2;
-
-    static double two = 2.0;
-    ExternalAddress two_addr((address)&two);
-
-    // constant maybe too far on 64 bit
-    lea(tmp2, two_addr);
-    fld_d(Address(tmp2, 0));    // Stack: 2 X Y
-    fcmp(tmp, 2, true, false);  // Stack: X Y
-    jcc(Assembler::parity, y_not_2);
-    jcc(Assembler::notEqual, y_not_2);
-
-    fxch(); fpop();             // Stack: X
-    fmul(0);                    // Stack: X*X
-
-    jmp(done);
-
-    bind(y_not_2);
-
-    fldz();                     // Stack: 0 X Y
-    fcmp(tmp, 1, true, false);  // Stack: X Y
-    jcc(Assembler::above, x_negative);
-
-    // X >= 0
-
-    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
-    fld_s(1);                   // Stack: X Y X Y
-    fast_pow();                 // Stack: X^Y X Y
-    fcmp(tmp, 0, false, false); // Stack: X^Y X Y
-    // X^Y not equal to itself: X^Y is NaN go to slow case.
-    jcc(Assembler::parity, slow_case);
-    // get rid of duplicate arguments. Stack: X^Y
-    if (num_fpu_regs_in_use > 0) {
-      fxch(); fpop();
-      fxch(); fpop();
-    } else {
-      ffree(2);
-      ffree(1);
-    }
-    jmp(done);
-
-    // X <= 0
-    bind(x_negative);
-
-    fld_s(1);                   // Stack: Y X Y
-    frndint();                  // Stack: int(Y) X Y
-    fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
-    jcc(Assembler::notEqual, slow_case);
-
-    subptr(rsp, 8);
-
-    // For X^Y, when X < 0, Y has to be an integer and the final
-    // result depends on whether it's odd or even. We just checked
-    // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
-    // integer to test its parity. If int(Y) is huge and doesn't fit
-    // in the 64 bit integer range, the integer indefinite value will
-    // end up in the gp registers. Huge numbers are all even, the
-    // integer indefinite number is even so it's fine.
+  // Stack: X Y
+  Label x_negative, y_not_2;
+
+  static double two = 2.0;
+  ExternalAddress two_addr((address)&two);
+
+  // constant maybe too far on 64 bit
+  lea(tmp2, two_addr);
+  fld_d(Address(tmp2, 0));    // Stack: 2 X Y
+  fcmp(tmp, 2, true, false);  // Stack: X Y
+  jcc(Assembler::parity, y_not_2);
+  jcc(Assembler::notEqual, y_not_2);
+
+  fxch(); fpop();             // Stack: X
+  fmul(0);                    // Stack: X*X
+
+  jmp(done);
+
+  bind(y_not_2);
+
+  fldz();                     // Stack: 0 X Y
+  fcmp(tmp, 1, true, false);  // Stack: X Y
+  jcc(Assembler::above, x_negative);
+
+  // X >= 0
+
+  fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+  fld_s(1);                   // Stack: X Y X Y
+  fast_pow();                 // Stack: X^Y X Y
+  fcmp(tmp, 0, false, false); // Stack: X^Y X Y
+  // X^Y not equal to itself: X^Y is NaN go to slow case.
+  jcc(Assembler::parity, slow_case);
+  // get rid of duplicate arguments. Stack: X^Y
+  if (num_fpu_regs_in_use > 0) {
+    fxch(); fpop();
+    fxch(); fpop();
+  } else {
+    ffree(2);
+    ffree(1);
+  }
+  jmp(done);
+
+  // X <= 0
+  bind(x_negative);
+
+  fld_s(1);                   // Stack: Y X Y
+  frndint();                  // Stack: int(Y) X Y
+  fcmp(tmp, 2, false, false); // Stack: int(Y) X Y
+  jcc(Assembler::notEqual, slow_case);
+
+  subptr(rsp, 8);
+
+  // For X^Y, when X < 0, Y has to be an integer and the final
+  // result depends on whether it's odd or even. We just checked
+  // that int(Y) == Y.  We move int(Y) to gp registers as a 64 bit
+  // integer to test its parity. If int(Y) is huge and doesn't fit
+  // in the 64 bit integer range, the integer indefinite value will
+  // end up in the gp registers. Huge numbers are all even, the
+  // integer indefinite number is even so it's fine.
 
 #ifdef ASSERT
-    // Let's check we don't end up with an integer indefinite number
-    // when not expected. First test for huge numbers: check whether
-    // int(Y)+1 == int(Y) which is true for very large numbers and
-    // those are all even. A 64 bit integer is guaranteed to not
-    // overflow for numbers where y+1 != y (when precision is set to
-    // double precision).
-    Label y_not_huge;
-
-    fld1();                     // Stack: 1 int(Y) X Y
-    fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
+  // Let's check we don't end up with an integer indefinite number
+  // when not expected. First test for huge numbers: check whether
+  // int(Y)+1 == int(Y) which is true for very large numbers and
+  // those are all even. A 64 bit integer is guaranteed to not
+  // overflow for numbers where y+1 != y (when precision is set to
+  // double precision).
+  Label y_not_huge;
+
+  fld1();                     // Stack: 1 int(Y) X Y
+  fadd(1);                    // Stack: 1+int(Y) int(Y) X Y
 
 #ifdef _LP64
-    // trip to memory to force the precision down from double extended
-    // precision
-    fstp_d(Address(rsp, 0));
-    fld_d(Address(rsp, 0));
+  // trip to memory to force the precision down from double extended
+  // precision
+  fstp_d(Address(rsp, 0));
+  fld_d(Address(rsp, 0));
 #endif
 
-    fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
+  fcmp(tmp, 1, true, false);  // Stack: int(Y) X Y
 #endif
 
-    // move int(Y) as 64 bit integer to thread's stack
-    fistp_d(Address(rsp,0));    // Stack: X Y
+  // move int(Y) as 64 bit integer to thread's stack
+  fistp_d(Address(rsp,0));    // Stack: X Y
 
 #ifdef ASSERT
-    jcc(Assembler::notEqual, y_not_huge);
-
-    // Y is huge so we know it's even. It may not fit in a 64 bit
-    // integer and we don't want the debug code below to see the
-    // integer indefinite value so overwrite int(Y) on the thread's
-    // stack with 0.
-    movl(Address(rsp, 0), 0);
-    movl(Address(rsp, 4), 0);
-
-    bind(y_not_huge);
+  jcc(Assembler::notEqual, y_not_huge);
+
+  // Y is huge so we know it's even. It may not fit in a 64 bit
+  // integer and we don't want the debug code below to see the
+  // integer indefinite value so overwrite int(Y) on the thread's
+  // stack with 0.
+  movl(Address(rsp, 0), 0);
+  movl(Address(rsp, 4), 0);
+
+  bind(y_not_huge);
 #endif
 
-    fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
-    fld_s(1);                   // Stack: X Y X Y
-    fabs();                     // Stack: abs(X) Y X Y
-    fast_pow();                 // Stack: abs(X)^Y X Y
-    fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
-    // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
-
-    pop(tmp2);
-    NOT_LP64(pop(tmp3));
-    jcc(Assembler::parity, slow_case);
+  fld_s(1);                   // duplicate arguments for runtime call. Stack: Y X Y
+  fld_s(1);                   // Stack: X Y X Y
+  fabs();                     // Stack: abs(X) Y X Y
+  fast_pow();                 // Stack: abs(X)^Y X Y
+  fcmp(tmp, 0, false, false); // Stack: abs(X)^Y X Y
+  // abs(X)^Y not equal to itself: abs(X)^Y is NaN go to slow case.
+
+  pop(tmp2);
+  NOT_LP64(pop(tmp3));
+  jcc(Assembler::parity, slow_case);
 
 #ifdef ASSERT
-    // Check that int(Y) is not integer indefinite value (int
-    // overflow). Shouldn't happen because for values that would
-    // overflow, 1+int(Y)==Y which was tested earlier.
+  // Check that int(Y) is not integer indefinite value (int
+  // overflow). Shouldn't happen because for values that would
+  // overflow, 1+int(Y)==Y which was tested earlier.
 #ifndef _LP64
-    {
-      Label integer;
-      testl(tmp2, tmp2);
-      jcc(Assembler::notZero, integer);
-      cmpl(tmp3, 0x80000000);
-      jcc(Assembler::notZero, integer);
-      STOP("integer indefinite value shouldn't be seen here");
-      bind(integer);
-    }
+  {
+    Label integer;
+    testl(tmp2, tmp2);
+    jcc(Assembler::notZero, integer);
+    cmpl(tmp3, 0x80000000);
+    jcc(Assembler::notZero, integer);
+    STOP("integer indefinite value shouldn't be seen here");
+    bind(integer);
+  }
 #else
-    {
-      Label integer;
-      mov(tmp3, tmp2); // preserve tmp2 for parity check below
-      shlq(tmp3, 1);
-      jcc(Assembler::carryClear, integer);
-      jcc(Assembler::notZero, integer);
-      STOP("integer indefinite value shouldn't be seen here");
-      bind(integer);
-    }
+  {
+    Label integer;
+    mov(tmp3, tmp2); // preserve tmp2 for parity check below
+    shlq(tmp3, 1);
+    jcc(Assembler::carryClear, integer);
+    jcc(Assembler::notZero, integer);
+    STOP("integer indefinite value shouldn't be seen here");
+    bind(integer);
+  }
 #endif
 #endif
 
-    // get rid of duplicate arguments. Stack: X^Y
-    if (num_fpu_regs_in_use > 0) {
-      fxch(); fpop();
-      fxch(); fpop();
-    } else {
-      ffree(2);
-      ffree(1);
-    }
-
-    testl(tmp2, 1);
-    jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
-    // X <= 0, Y even: X^Y = -abs(X)^Y
-
-    fchs();                     // Stack: -abs(X)^Y Y
-    jmp(done);
-  }
+  // get rid of duplicate arguments. Stack: X^Y
+  if (num_fpu_regs_in_use > 0) {
+    fxch(); fpop();
+    fxch(); fpop();
+  } else {
+    ffree(2);
+    ffree(1);
+  }
+
+  testl(tmp2, 1);
+  jcc(Assembler::zero, done); // X <= 0, Y even: X^Y = abs(X)^Y
+  // X <= 0, Y even: X^Y = -abs(X)^Y
+
+  fchs();                     // Stack: -abs(X)^Y Y
+  jmp(done);
 
   // slow case: runtime call
   bind(slow_case);
 
   fpop();                       // pop incorrect result or int(Y)
 
-  fp_runtime_fallback(is_exp ? CAST_FROM_FN_PTR(address, SharedRuntime::dexp) : CAST_FROM_FN_PTR(address, SharedRuntime::dpow),
-                      is_exp ? 1 : 2, num_fpu_regs_in_use);
+  fp_runtime_fallback(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), 2, num_fpu_regs_in_use);
 
   // Come here with result in F-TOS
   bind(done);
@@ -6267,7 +6245,9 @@
     // Save caller's stack pointer into RBP if the frame pointer is preserved.
     if (PreserveFramePointer) {
       movptr(rbp, rsp);
-      addptr(rbp, framesize + wordSize);