changeset 31508:3c15616b82c2

Merge
author duke
date Wed, 05 Jul 2017 20:40:50 +0200
parents 1f783cf1d945 1f5244fe53ab
children 61d2d0629b6d
files jaxp/src/java.xml/share/classes/com/sun/org/apache/bcel/internal/util/JavaWrapper.java jaxp/src/java.xml/share/classes/com/sun/org/apache/xalan/internal/xsltc/trax/SmartTransformerFactoryImpl.java jdk/src/java.desktop/windows/native/libawt/windows/hand.cur jdk/src/jdk.accessibility/windows/conf/accessibility.properties nashorn/test/script/basic/NASHORN-627.js nashorn/test/script/basic/NASHORN-627.js.EXPECTED
diffstat 1156 files changed, 19866 insertions(+), 8446 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags-top-repo	Thu Jul 02 16:08:17 2015 -0700
+++ b/.hgtags-top-repo	Wed Jul 05 20:40:50 2017 +0200
@@ -313,3 +313,4 @@
 70e4272790b6199e9ca89df2758ff9cb58ec4125 jdk9-b68
 1bcfd6b8726582cff5a42dbfc75903e36f9dd4fe jdk9-b69
 eed77fcd77711fcdba05f18fc22f37d86efb243c jdk9-b70
+c706ef5ea5da00078dc5e4334660315f7d99c15b jdk9-b71
--- a/common/autoconf/generated-configure.sh	Thu Jul 02 16:08:17 2015 -0700
+++ b/common/autoconf/generated-configure.sh	Wed Jul 05 20:40:50 2017 +0200
@@ -4364,7 +4364,7 @@
 #CUSTOM_AUTOCONF_INCLUDE
 
 # Do not change or remove the following line, it is needed for consistency checks:
-DATE_WHEN_GENERATED=1434614912
+DATE_WHEN_GENERATED=1435822080
 
 ###############################################################################
 #
@@ -42961,7 +42961,7 @@
         if test "x$x_libraries" = xNONE; then
           if test -f "$SYSROOT/usr/X11R6/lib/libX11.so"; then
             x_libraries="$SYSROOT/usr/X11R6/lib"
-          elif test "$SYSROOT/usr/lib64/libX11.so" && test "x$OPENJDK_TARGET_CPU_BITS" = x64; then
+          elif test -f "$SYSROOT/usr/lib64/libX11.so" && test "x$OPENJDK_TARGET_CPU_BITS" = x64; then
             x_libraries="$SYSROOT/usr/lib64"
           elif test -f "$SYSROOT/usr/lib/libX11.so"; then
             x_libraries="$SYSROOT/usr/lib"
--- a/common/autoconf/libraries.m4	Thu Jul 02 16:08:17 2015 -0700
+++ b/common/autoconf/libraries.m4	Wed Jul 05 20:40:50 2017 +0200
@@ -113,7 +113,7 @@
         if test "x$x_libraries" = xNONE; then
           if test -f "$SYSROOT/usr/X11R6/lib/libX11.so"; then
             x_libraries="$SYSROOT/usr/X11R6/lib"
-          elif test "$SYSROOT/usr/lib64/libX11.so" && test "x$OPENJDK_TARGET_CPU_BITS" = x64; then
+          elif test -f "$SYSROOT/usr/lib64/libX11.so" && test "x$OPENJDK_TARGET_CPU_BITS" = x64; then
             x_libraries="$SYSROOT/usr/lib64"
           elif test -f "$SYSROOT/usr/lib/libX11.so"; then
             x_libraries="$SYSROOT/usr/lib"
--- a/common/bin/compare.sh	Thu Jul 02 16:08:17 2015 -0700
+++ b/common/bin/compare.sh	Wed Jul 05 20:40:50 2017 +0200
@@ -1188,28 +1188,11 @@
         OTHER_JDK="$OTHER/install/jdk"
         OTHER_JRE="$OTHER/install/jre"
         echo "Selecting install images for compare"
-    elif [ -d "$THIS/deploy/jdk" -o -d "$THIS/deploy/images/jdk" ] \
-	     && [ -d "$OTHER/deploy/jdk" -o -d "$OTHER/deploy/images/jdk" ]; then
-	if [ -d "$THIS/deploy/images/jdk" ]; then
-            THIS_JDK="$THIS/deploy/images/jdk"
-            THIS_JRE="$THIS/deploy/images/jre"
-	else
-            THIS_JDK="$THIS/deploy/jdk"
-            THIS_JRE="$THIS/deploy/jre"
-	fi
-	if [ -d "$OTHER/deploy/images/jdk" ]; then
-            OTHER_JDK="$OTHER/deploy/images/jdk"
-            OTHER_JRE="$OTHER/deploy/images/jre"
-	else
-            OTHER_JDK="$OTHER/deploy/jdk"
-            OTHER_JRE="$OTHER/deploy/jre"
-	fi
-        echo "Selecting deploy images for compare"
-    elif [ -d "$THIS/deploy/images/jdk" ] && [ -d "$OTHER/deploy/jdk" ]; then
-        THIS_JDK="$THIS/deploy/jdk"
-        THIS_JRE="$THIS/deploy/jre"
-        OTHER_JDK="$OTHER/deploy/jdk"
-        OTHER_JRE="$OTHER/deploy/jre"
+    elif [ -d "$THIS/images/jdk" ] && [ -d "$OTHER/deploy/images/jdk" ]; then
+        THIS_JDK="$THIS/images/jdk"
+        THIS_JRE="$THIS/images/jre"
+        OTHER_JDK="$OTHER/deploy/images/jdk"
+        OTHER_JRE="$OTHER/deploy/images/jre"
         echo "Selecting deploy images for compare"
     elif [ -d "$THIS/images/jdk" ] && [ -d "$OTHER/images/jdk" ]; then
         THIS_JDK="$THIS/images/jdk"
@@ -1221,30 +1204,28 @@
         echo "No common images found."
         exit 1
     fi
+    echo "  $THIS_JDK"
+    echo "  $OTHER_JDK"
 
-    if [ -d "$THIS/deploy/jdk-bundle" -o -d "$THIS/deploy/images/jdk-bundle" ] \
-	     && [ -d "$OTHER/deploy/jdk-bundle" -o -d "$OTHER/deploy/images/jdk-bundle" ]; then
+    if [ -d "$THIS/images/jdk-bundle" -o -d "$THIS/deploy/images/jdk-bundle" ] \
+	     && [ -d "$OTHER/images/jdk-bundle" -o -d "$OTHER/deploy/images/jdk-bundle" ]; then
 	if [ -d "$THIS/deploy/images/jdk-bundle" ]; then
             THIS_JDK_BUNDLE="$THIS/deploy/images/jdk-bundle"
             THIS_JRE_BUNDLE="$THIS/deploy/images/jre-bundle"
 	else
-            THIS_JDK_BUNDLE="$THIS/deploy/jdk-bundle"
-            THIS_JRE_BUNDLE="$THIS/deploy/jre-bundle"
+            THIS_JDK_BUNDLE="$THIS/images/jdk-bundle"
+            THIS_JRE_BUNDLE="$THIS/images/jre-bundle"
 	fi
 	if [ -d "$OTHER/deploy/images/jdk-bundle" ]; then
             OTHER_JDK_BUNDLE="$OTHER/deploy/images/jdk-bundle"
             OTHER_JRE_BUNDLE="$OTHER/deploy/images/jre-bundle"
 	else
-            OTHER_JDK_BUNDLE="$OTHER/deploy/jdk-bundle"
-            OTHER_JRE_BUNDLE="$OTHER/deploy/jre-bundle"
+            OTHER_JDK_BUNDLE="$OTHER/images/jdk-bundle"
+            OTHER_JRE_BUNDLE="$OTHER/images/jre-bundle"
 	fi
-        echo "Also comparing deploy macosx bundles"
-    elif [ -d "$THIS/images/jdk-bundle" ] && [ -d "$OTHER/images/jdk-bundle" ]; then
-        THIS_JDK_BUNDLE="$THIS/images/jdk-bundle"
-        THIS_JRE_BUNDLE="$THIS/images/jre-bundle"
-        OTHER_JDK_BUNDLE="$OTHER/images/jdk-bundle"
-        OTHER_JRE_BUNDLE="$OTHER/images/jre-bundle"
         echo "Also comparing macosx bundles"
+        echo "  $THIS_JDK_BUNDLE"
+        echo "  $OTHER_JDK_BUNDLE"
     fi
 
     if [ -d "$THIS/deploy/bundles" -o -d "$THIS/deploy/images/bundles" ] \
@@ -1262,19 +1243,21 @@
         echo "Also comparing deploy javadoc bundles"
     fi
 
-    if [ -d "$THIS/deploy/JavaAppletPlugin.plugin" -o -d "$THIS/deploy/images/JavaAppletPlugin.plugin" ] \
-	     && [ -d "$OTHER/deploy/JavaAppletPlugin.plugin" -o -d "$OTHER/deploy/images/JavaAppletPlugin.plugin" ]; then
-	if [ -d "$THIS/deploy/images/bundles" ]; then
+    if [ -d "$THIS/images/JavaAppletPlugin.plugin" ] \
+	     && [ -d "$OTHER/images/JavaAppletPlugin.plugin" -o -d "$OTHER/deploy/images/JavaAppletPlugin.plugin" ]; then
+	if [ -d "$THIS/images/JavaAppletPlugin.plugin" ]; then
+            THIS_DEPLOY_APPLET_PLUGIN_DIR="$THIS/images/JavaAppletPlugin.plugin"
+	else
             THIS_DEPLOY_APPLET_PLUGIN_DIR="$THIS/deploy/images/JavaAppletPlugin.plugin"
+	fi
+	if [ -d "$OTHER/images/JavaAppletPlugin.plugin" ]; then
+            OTHER_DEPLOY_APPLET_PLUGIN_DIR="$OTHER/images/JavaAppletPlugin.plugin"
 	else
-            THIS_DEPLOY_APPLET_PLUGIN_DIR="$THIS/deploy/JavaAppletPlugin.plugin"
-	fi
-	if [ -d "$OTHER/deploy/images/bundles" ]; then
             OTHER_DEPLOY_APPLET_PLUGIN_DIR="$OTHER/deploy/images/JavaAppletPlugin.plugin"
-	else
-            OTHER_DEPLOY_APPLET_PLUGIN_DIR="$OTHER/deploy/JavaAppletPlugin.plugin"
 	fi
         echo "Also comparing deploy applet image"
+        echo "  $THIS_DEPLOY_APPLET_PLUGIN_DIR"
+        echo "  $OTHER_DEPLOY_APPLET_PLUGIN_DIR"
     fi
 
     if [ -d "$OTHER/images" ]; then
--- a/corba/.hgtags	Thu Jul 02 16:08:17 2015 -0700
+++ b/corba/.hgtags	Wed Jul 05 20:40:50 2017 +0200
@@ -313,3 +313,4 @@
 8efad64f40eb8cd4df376c0a5275892eeb396bbd jdk9-b68
 de8acedcb5b5870f1dc54cba575aaa5d33897ea2 jdk9-b69
 e7cf01990ed366bd493080663259281e91ce223b jdk9-b70
+cd39ed501fb0504554a7f58ac6cf3dd2b64afec0 jdk9-b71
--- a/corba/src/java.corba/share/classes/com/sun/corba/se/impl/io/ObjectStreamClass.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/corba/src/java.corba/share/classes/com/sun/corba/se/impl/io/ObjectStreamClass.java	Wed Jul 05 20:40:50 2017 +0200
@@ -65,7 +65,7 @@
 import sun.corba.Bridge;
 
 /**
- * A ObjectStreamClass describes a class that can be serialized to a stream
+ * An ObjectStreamClass describes a class that can be serialized to a stream
  * or a class that was serialized to a stream.  It contains the name
  * and the serialVersionUID of the class.
  * <br>
@@ -788,9 +788,9 @@
     /* Compare the base class names of streamName and localName.
      *
      * @return  Return true iff the base class name compare.
-     * @parameter streamName    Fully qualified class name.
-     * @parameter localName     Fully qualified class name.
-     * @parameter pkgSeparator  class names use either '.' or '/'.
+     * @param streamName    Fully qualified class name.
+     * @param localName     Fully qualified class name.
+     * @param pkgSeparator  class names use either '.' or '/'.
      *
      * Only compare base class name to allow package renaming.
      */
--- a/corba/src/java.corba/share/classes/com/sun/corba/se/impl/orbutil/ObjectStreamClass_1_3_1.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/corba/src/java.corba/share/classes/com/sun/corba/se/impl/orbutil/ObjectStreamClass_1_3_1.java	Wed Jul 05 20:40:50 2017 +0200
@@ -656,9 +656,9 @@
     /* Compare the base class names of streamName and localName.
      *
      * @return  Return true iff the base class name compare.
-     * @parameter streamName    Fully qualified class name.
-     * @parameter localName     Fully qualified class name.
-     * @parameter pkgSeparator  class names use either '.' or '/'.
+     * @param streamName    Fully qualified class name.
+     * @param localName     Fully qualified class name.
+     * @param pkgSeparator  class names use either '.' or '/'.
      *
      * Only compare base class name to allow package renaming.
      */
--- a/corba/src/java.corba/share/classes/org/omg/CORBA/BoundsHelper.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/corba/src/java.corba/share/classes/org/omg/CORBA/BoundsHelper.java	Wed Jul 05 20:40:50 2017 +0200
@@ -27,10 +27,10 @@
 
 
 /**
- * This Helper class is used to facilitate the marshalling of <tt>Bounds</tt>.
+ * This Helper class is used to facilitate the marshalling of {@code Bounds}.
  * For more information on Helper files, see
  * <a href="doc-files/generatedfiles.html#helper">
- * "Generated Files: Helper Files"</a>.<P>
+ * "Generated Files: Helper Files"</a>.
  */
 
 abstract public class BoundsHelper
--- a/corba/src/java.corba/share/classes/org/omg/CORBA/ORBPackage/InvalidNameHelper.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/corba/src/java.corba/share/classes/org/omg/CORBA/ORBPackage/InvalidNameHelper.java	Wed Jul 05 20:40:50 2017 +0200
@@ -28,10 +28,10 @@
 
 /**
  * This Helper class is used to facilitate the marshalling of
- * <tt>ORBPackage/InvalidName</tt>.
+ * {@code ORBPackage/InvalidName}.
  * For more information on Helper files, see
  * <a href="doc-files/generatedfiles.html#helper">
- * "Generated Files: Helper Files"</a>.<P>
+ * "Generated Files: Helper Files"</a>.
  */
 
 abstract public class InvalidNameHelper
--- a/corba/src/java.corba/share/classes/org/omg/CORBA/TypeCodePackage/BadKindHelper.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/corba/src/java.corba/share/classes/org/omg/CORBA/TypeCodePackage/BadKindHelper.java	Wed Jul 05 20:40:50 2017 +0200
@@ -28,10 +28,10 @@
 
 /**
  * This Helper class is used to facilitate the marshalling of
- * <tt>TypeCodePackage/BadKind</tt>.
+ * {@code TypeCodePackage/BadKind}.
  * For more information on Helper files, see
  * <a href="doc-files/generatedfiles.html#helper">
- * "Generated Files: Helper Files"</a>.<P>
+ * "Generated Files: Helper Files"</a>.
  */
 
 abstract public class BadKindHelper
--- a/corba/src/java.corba/share/classes/org/omg/CORBA/TypeCodePackage/BoundsHelper.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/corba/src/java.corba/share/classes/org/omg/CORBA/TypeCodePackage/BoundsHelper.java	Wed Jul 05 20:40:50 2017 +0200
@@ -28,10 +28,10 @@
 
 /**
  * This Helper class is used to facilitate the marshalling of
- * <tt>TypeCodePackage/Bounds</tt>.
+ * {@code TypeCodePackage/Bounds}.
  * For more information on Helper files, see
  * <a href="doc-files/generatedfiles.html#helper">
- * "Generated Files: Helper Files"</a>.<P>
+ * "Generated Files: Helper Files"</a>.
  */
 
 abstract public class BoundsHelper
--- a/hotspot/.hgtags	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/.hgtags	Wed Jul 05 20:40:50 2017 +0200
@@ -473,3 +473,4 @@
 11af3990d56c97b40318bc1f20608e86f051a3f7 jdk9-b68
 ff0929a59ced0e144201aa05819ae2e47d6f2c61 jdk9-b69
 8672e9264db30c21504063932dbc374eabc287a1 jdk9-b70
+07c6b035d68b0c41b1dcd442157b50b41a2551e9 jdk9-b71
--- a/hotspot/agent/make/Makefile	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/make/Makefile	Wed Jul 05 20:40:50 2017 +0200
@@ -58,6 +58,7 @@
 sun.jvm.hotspot.debugger.dummy \
 sun.jvm.hotspot.debugger.linux \
 sun.jvm.hotspot.debugger.linux.amd64 \
+sun.jvm.hotspot.debugger.linux.aarch64 \
 sun.jvm.hotspot.debugger.linux.ppc64 \
 sun.jvm.hotspot.debugger.linux.x86 \
 sun.jvm.hotspot.debugger.posix \
@@ -65,6 +66,7 @@
 sun.jvm.hotspot.debugger.ppc64 \
 sun.jvm.hotspot.debugger.proc \
 sun.jvm.hotspot.debugger.proc.amd64 \
+sun.jvm.hotspot.debugger.proc.aarch64 \
 sun.jvm.hotspot.debugger.proc.ppc64 \
 sun.jvm.hotspot.debugger.proc.sparc \
 sun.jvm.hotspot.debugger.proc.x86 \
@@ -91,11 +93,13 @@
 sun.jvm.hotspot.prims \
 sun.jvm.hotspot.runtime \
 sun.jvm.hotspot.runtime.amd64 \
+sun.jvm.hotspot.runtime.aarch64 \
 sun.jvm.hotspot.runtime.bsd \
 sun.jvm.hotspot.runtime.bsd_amd64 \
 sun.jvm.hotspot.runtime.bsd_x86 \
 sun.jvm.hotspot.runtime.linux \
 sun.jvm.hotspot.runtime.linux_amd64 \
+sun.jvm.hotspot.runtime.linux_aarch64 \
 sun.jvm.hotspot.runtime.linux_ppc64 \
 sun.jvm.hotspot.runtime.linux_sparc \
 sun.jvm.hotspot.runtime.linux_x86 \
@@ -149,16 +153,19 @@
 sun/jvm/hotspot/debugger/linux/*.java \
 sun/jvm/hotspot/debugger/linux/ppc64/*.java \
 sun/jvm/hotspot/debugger/linux/x86/*.java \
+sun/jvm/hotspot/debugger/linux/aarch64/*.java \
 sun/jvm/hotspot/debugger/posix/*.java \
 sun/jvm/hotspot/debugger/posix/elf/*.java \
 sun/jvm/hotspot/debugger/ppc64/*.java \
 sun/jvm/hotspot/debugger/proc/*.java \
 sun/jvm/hotspot/debugger/proc/amd64/*.java \
+sun/jvm/hotspot/debugger/proc/aarch64/*.java \
 sun/jvm/hotspot/debugger/proc/ppc64/*.java \
 sun/jvm/hotspot/debugger/proc/sparc/*.java \
 sun/jvm/hotspot/debugger/proc/x86/*.java \
 sun/jvm/hotspot/debugger/remote/*.java \
 sun/jvm/hotspot/debugger/remote/amd64/*.java \
+sun/jvm/hotspot/debugger/remote/aarch64/*.java \
 sun/jvm/hotspot/debugger/remote/ppc64/*.java \
 sun/jvm/hotspot/debugger/remote/sparc/*.java \
 sun/jvm/hotspot/debugger/remote/x86/*.java \
@@ -178,11 +185,13 @@
 sun/jvm/hotspot/prims/*.java \
 sun/jvm/hotspot/runtime/*.java \
 sun/jvm/hotspot/runtime/amd64/*.java \
+sun/jvm/hotspot/runtime/aarch64/*.java \
 sun/jvm/hotspot/runtime/bsd/*.java \
 sun/jvm/hotspot/runtime/bsd_amd64/*.java \
 sun/jvm/hotspot/runtime/bsd_x86/*.java \
 sun/jvm/hotspot/runtime/linux/*.java \
 sun/jvm/hotspot/runtime/linux_amd64/*.java \
+sun/jvm/hotspot/runtime/linux_aarch64/*.java \
 sun/jvm/hotspot/runtime/linux_ppc64/*.java \
 sun/jvm/hotspot/runtime/linux_sparc/*.java \
 sun/jvm/hotspot/runtime/linux_x86/*.java \
--- a/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/os/linux/LinuxDebuggerLocal.c	Wed Jul 05 20:40:50 2017 +0200
@@ -53,6 +53,10 @@
 #include "sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext.h"
 #endif
 
+#ifdef aarch64
+#include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
+#endif
+
 static jfieldID p_ps_prochandle_ID = 0;
 static jfieldID threadList_ID = 0;
 static jfieldID loadObjectList_ID = 0;
@@ -368,7 +372,7 @@
 #define NPRGREG sun_jvm_hotspot_debugger_amd64_AMD64ThreadContext_NPRGREG
 #endif
 #ifdef aarch64
-#define NPRGREG 32
+#define NPRGREG sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_NPRGREG
 #endif
 #if defined(sparc) || defined(sparcv9)
 #define NPRGREG sun_jvm_hotspot_debugger_sparc_SPARCThreadContext_NPRGREG
@@ -473,6 +477,13 @@
 
 #define REG_INDEX(reg) sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext_##reg
 
+  {
+    int i;
+    for (i = 0; i < 31; i++)
+      regs[i] = gregs.regs[i];
+    regs[REG_INDEX(SP)] = gregs.sp;
+    regs[REG_INDEX(PC)] = gregs.pc;
+  }
 #endif /* aarch64 */
 
 #ifdef ppc64
--- a/hotspot/agent/src/os/linux/Makefile	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/os/linux/Makefile	Wed Jul 05 20:40:50 2017 +0200
@@ -53,14 +53,15 @@
         $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \
 		sun.jvm.hotspot.debugger.x86.X86ThreadContext \
 		sun.jvm.hotspot.debugger.sparc.SPARCThreadContext \
-		sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext 
+		sun.jvm.hotspot.debugger.amd64.AMD64ThreadContext \
+		sun.jvm.hotspot.debugger.aarch64.AARCH64ThreadContext 
         $(GCC) $(CFLAGS) $< -o $@
 
 $(ARCH)/sadis.o:  ../../share/native/sadis.c
         $(JAVAH) -jni -classpath ../../../build/classes -d $(ARCH) \
                 sun.jvm.hotspot.asm.Disassembler
         $(GCC) $(CFLAGS) $< -o $@
- 
+
 $(ARCH)/%.o: %.c
         $(GCC) $(CFLAGS) $< -o $@
 
--- a/hotspot/agent/src/os/linux/libproc.h	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/os/linux/libproc.h	Wed Jul 05 20:40:50 2017 +0200
@@ -72,6 +72,7 @@
 #define user_regs_struct  pt_regs
 #endif
 #if defined(aarch64)
+#include <asm/ptrace.h>
 #define user_regs_struct user_pt_regs
 #endif
 
--- a/hotspot/agent/src/os/linux/proc_service.h	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/os/linux/proc_service.h	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -30,7 +30,7 @@
 
 // Linux does not have the proc service library, though it does provide the
 // thread_db library which can be used to manipulate threads without having
-// to know the details of LinuxThreads or NPTL
+// to know the details of NPTL
 
 // copied from Solaris "proc_service.h"
 typedef enum {
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/HSDB.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/HSDB.java	Wed Jul 05 20:40:50 2017 +0200
@@ -983,19 +983,15 @@
                                                      curFrame.getFP(),
                                                      anno));
             } else {
-              if (VM.getVM().getCPU().equals("x86") || VM.getVM().getCPU().equals("amd64")) {
-                // For C2, which has null frame pointers on x86/amd64
-                CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC());
-                Address sp = curFrame.getSP();
-                if (Assert.ASSERTS_ENABLED) {
-                  Assert.that(cb.getFrameSize() > 0, "CodeBlob must have non-zero frame size");
-                }
-                annoPanel.addAnnotation(new Annotation(sp,
-                                                       sp.addOffsetTo(cb.getFrameSize()),
-                                                       anno));
-              } else {
-                Assert.that(VM.getVM().getCPU().equals("ia64"), "only ia64 should reach here");
+              // For C2, which has null frame pointers on x86/amd64/aarch64
+              CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC());
+              Address sp = curFrame.getSP();
+              if (Assert.ASSERTS_ENABLED) {
+                Assert.that(cb.getFrameSize() > 0, "CodeBlob must have non-zero frame size");
               }
+              annoPanel.addAnnotation(new Annotation(sp,
+                                                     sp.addOffsetTo(cb.getFrameSize()),
+                                                     anno));
             }
 
             // Add interpreter frame annotations
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/aarch64/AARCH64ThreadContext.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,123 @@
+/*
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.aarch64;
+
+import java.lang.annotation.Native;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+
+/** Specifies the thread context on aarch64 platforms; only a sub-portion
+ * of the context is guaranteed to be present on all operating
+ * systems. */
+
+public abstract class AARCH64ThreadContext implements ThreadContext {
+    // Taken from /usr/include/asm/sigcontext.h on Linux/AARCH64.
+
+    // NOTE: the indices for the various registers must be maintained as
+    // listed across various operating systems. However, only a small
+    // subset of the registers' values are guaranteed to be present (and
+    // must be present for the SA's stack walking to work)
+
+    // One instance of the Native annotation is enough to trigger header generation
+    // for this file.
+    @Native
+    public static final int R0 = 0;
+    public static final int R1 = 1;
+    public static final int R2 = 2;
+    public static final int R3 = 3;
+    public static final int R4 = 4;
+    public static final int R5 = 5;
+    public static final int R6 = 6;
+    public static final int R7 = 7;
+    public static final int R8 = 8;
+    public static final int R9 = 9;
+    public static final int R10 = 10;
+    public static final int R11 = 11;
+    public static final int R12 = 12;
+    public static final int R13 = 13;
+    public static final int R14 = 14;
+    public static final int R15 = 15;
+    public static final int R16 = 16;
+    public static final int R17 = 17;
+    public static final int R18 = 18;
+    public static final int R19 = 19;
+    public static final int R20 = 20;
+    public static final int R21 = 21;
+    public static final int R22 = 22;
+    public static final int R23 = 23;
+    public static final int R24 = 24;
+    public static final int R25 = 25;
+    public static final int R26 = 26;
+    public static final int R27 = 27;
+    public static final int R28 = 28;
+    public static final int FP = 29;
+    public static final int LR = 30;
+    public static final int SP = 31;
+    public static final int PC = 32;
+
+    public static final int NPRGREG = 33;
+
+    private long[] data;
+
+    public AARCH64ThreadContext() {
+        data = new long[NPRGREG];
+    }
+
+    public int getNumRegisters() {
+        return NPRGREG;
+    }
+
+    public String getRegisterName(int index) {
+        switch (index) {
+        case LR: return "lr";
+        case SP: return "sp";
+        case PC: return "pc";
+        default:
+            return "r" + index;
+        }
+    }
+
+    public void setRegister(int index, long value) {
+        data[index] = value;
+    }
+
+    public long getRegister(int index) {
+        return data[index];
+    }
+
+    public CFrame getTopFrame(Debugger dbg) {
+        return null;
+    }
+
+    /** This can't be implemented in this class since we would have to
+     * tie the implementation to, for example, the debugging system */
+    public abstract void setRegisterAsAddress(int index, Address value);
+
+    /** This can't be implemented in this class since we would have to
+     * tie the implementation to, for example, the debugging system */
+    public abstract Address getRegisterAsAddress(int index);
+}
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,6 @@
 /*
  * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -31,12 +32,14 @@
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.x86.*;
 import sun.jvm.hotspot.debugger.amd64.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
 import sun.jvm.hotspot.debugger.sparc.*;
 import sun.jvm.hotspot.debugger.ppc64.*;
 import sun.jvm.hotspot.debugger.linux.x86.*;
 import sun.jvm.hotspot.debugger.linux.amd64.*;
 import sun.jvm.hotspot.debugger.linux.sparc.*;
 import sun.jvm.hotspot.debugger.linux.ppc64.*;
+import sun.jvm.hotspot.debugger.linux.aarch64.*;
 import sun.jvm.hotspot.utilities.*;
 
 class LinuxCDebugger implements CDebugger {
@@ -106,6 +109,13 @@
         Address pc  = context.getRegisterAsAddress(PPC64ThreadContext.PC);
         if (pc == null) return null;
         return new LinuxPPC64CFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize());
+    } else if (cpu.equals("aarch64")) {
+       AARCH64ThreadContext context = (AARCH64ThreadContext) thread.getContext();
+       Address fp = context.getRegisterAsAddress(AARCH64ThreadContext.FP);
+       if (fp == null) return null;
+       Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
+       if (pc == null) return null;
+       return new LinuxAARCH64CFrame(dbg, fp, pc);
      } else {
        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
        ThreadContext context = (ThreadContext) thread.getContext();
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/aarch64/LinuxAARCH64CFrame.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,86 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
+import sun.jvm.hotspot.debugger.linux.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
+import sun.jvm.hotspot.debugger.cdbg.basic.*;
+
+final public class LinuxAARCH64CFrame extends BasicCFrame {
+   public LinuxAARCH64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
+      super(dbg.getCDebugger());
+      this.fp = fp;
+      this.pc = pc;
+      this.dbg = dbg;
+   }
+
+   // override base class impl to avoid ELF parsing
+   public ClosestSymbol closestSymbolToPC() {
+      // try native lookup in debugger.
+      return dbg.lookup(dbg.getAddressValue(pc()));
+   }
+
+   public Address pc() {
+      return pc;
+   }
+
+   public Address localVariableBase() {
+      return fp;
+   }
+
+   public CFrame sender(ThreadProxy thread) {
+      AARCH64ThreadContext context = (AARCH64ThreadContext) thread.getContext();
+      Address rsp = context.getRegisterAsAddress(AARCH64ThreadContext.SP);
+
+      if ((fp == null) || fp.lessThan(rsp)) {
+        return null;
+      }
+
+      // Check alignment of fp
+      if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) {
+        return null;
+      }
+
+      Address nextFP = fp.getAddressAt(0 * ADDRESS_SIZE);
+      if (nextFP == null || nextFP.lessThanOrEqual(fp)) {
+        return null;
+      }
+      Address nextPC  = fp.getAddressAt(1 * ADDRESS_SIZE);
+      if (nextPC == null) {
+        return null;
+      }
+      return new LinuxAARCH64CFrame(dbg, nextFP, nextPC);
+   }
+
+   // package/class internals only
+   private static final int ADDRESS_SIZE = 8;
+   private Address pc;
+   private Address sp;
+   private Address fp;
+   private LinuxDebugger dbg;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/aarch64/LinuxAARCH64ThreadContext.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.linux.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
+import sun.jvm.hotspot.debugger.linux.*;
+
+public class LinuxAARCH64ThreadContext extends AARCH64ThreadContext {
+  private LinuxDebugger debugger;
+
+  public LinuxAARCH64ThreadContext(LinuxDebugger debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java	Wed Jul 05 20:40:50 2017 +0200
@@ -31,11 +31,13 @@
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.proc.amd64.*;
+import sun.jvm.hotspot.debugger.proc.aarch64.*;
 import sun.jvm.hotspot.debugger.proc.sparc.*;
 import sun.jvm.hotspot.debugger.proc.ppc64.*;
 import sun.jvm.hotspot.debugger.proc.x86.*;
 import sun.jvm.hotspot.debugger.ppc64.*;
 import sun.jvm.hotspot.debugger.amd64.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
 import sun.jvm.hotspot.debugger.sparc.*;
 import sun.jvm.hotspot.debugger.x86.*;
 import sun.jvm.hotspot.utilities.*;
@@ -88,6 +90,10 @@
             threadFactory = new ProcAMD64ThreadFactory(this);
             pcRegIndex = AMD64ThreadContext.RIP;
             fpRegIndex = AMD64ThreadContext.RBP;
+        } else if (cpu.equals("aarch64")) {
+            threadFactory = new ProcAARCH64ThreadFactory(this);
+            pcRegIndex = AARCH64ThreadContext.PC;
+            fpRegIndex = AARCH64ThreadContext.FP;
         } else if (cpu.equals("ppc64")) {
             threadFactory = new ProcPPC64ThreadFactory(this);
             pcRegIndex = PPC64ThreadContext.PC;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64Thread.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class ProcAARCH64Thread implements ThreadProxy {
+    private ProcDebugger debugger;
+    private int         id;
+
+    public ProcAARCH64Thread(ProcDebugger debugger, Address addr) {
+        this.debugger = debugger;
+
+        // FIXME: the size here should be configurable. However, making it
+        // so would produce a dependency on the "types" package from the
+        // debugger package, which is not desired.
+        this.id       = (int) addr.getCIntegerAt(0, 4, true);
+    }
+
+    public ProcAARCH64Thread(ProcDebugger debugger, long id) {
+        this.debugger = debugger;
+        this.id = (int) id;
+    }
+
+    public ThreadContext getContext() throws IllegalThreadStateException {
+        ProcAARCH64ThreadContext context = new ProcAARCH64ThreadContext(debugger);
+        long[] regs = debugger.getThreadIntegerRegisterSet(id);
+        if (Assert.ASSERTS_ENABLED) {
+            Assert.that(regs.length == AARCH64ThreadContext.NPRGREG, "size mismatch");
+        }
+        for (int i = 0; i < regs.length; i++) {
+            context.setRegister(i, regs[i]);
+        }
+        return context;
+    }
+
+    public boolean canSetContext() throws DebuggerException {
+        return false;
+    }
+
+    public void setContext(ThreadContext context)
+    throws IllegalThreadStateException, DebuggerException {
+        throw new DebuggerException("Unimplemented");
+    }
+
+    public String toString() {
+        return "t@" + id;
+    }
+
+    public boolean equals(Object obj) {
+        if ((obj == null) || !(obj instanceof ProcAARCH64Thread)) {
+            return false;
+        }
+
+        return (((ProcAARCH64Thread) obj).id == id);
+    }
+
+    public int hashCode() {
+        return id;
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64ThreadContext.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
+import sun.jvm.hotspot.debugger.proc.*;
+
+public class ProcAARCH64ThreadContext extends AARCH64ThreadContext {
+    private ProcDebugger debugger;
+
+    public ProcAARCH64ThreadContext(ProcDebugger debugger) {
+        super();
+        this.debugger = debugger;
+    }
+
+    public void setRegisterAsAddress(int index, Address value) {
+        setRegister(index, debugger.getAddressValue(value));
+    }
+
+    public Address getRegisterAsAddress(int index) {
+        return debugger.newAddress(getRegister(index));
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/aarch64/ProcAARCH64ThreadFactory.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.proc.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.proc.*;
+
+public class ProcAARCH64ThreadFactory implements ProcThreadFactory {
+    private ProcDebugger debugger;
+
+    public ProcAARCH64ThreadFactory(ProcDebugger debugger) {
+        this.debugger = debugger;
+    }
+
+    public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+        return new ProcAARCH64Thread(debugger, threadIdentifierAddr);
+    }
+
+    public ThreadProxy createThreadWrapper(long id) {
+        return new ProcAARCH64Thread(debugger, id);
+    }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64Thread.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class RemoteAARCH64Thread extends RemoteThread  {
+  public RemoteAARCH64Thread(RemoteDebuggerClient debugger, Address addr) {
+     super(debugger, addr);
+  }
+
+  public RemoteAARCH64Thread(RemoteDebuggerClient debugger, long id) {
+     super(debugger, id);
+  }
+
+  public ThreadContext getContext() throws IllegalThreadStateException {
+    RemoteAARCH64ThreadContext context = new RemoteAARCH64ThreadContext(debugger);
+    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
+                                  debugger.getThreadIntegerRegisterSet(id);
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(regs.length == AARCH64ThreadContext.NPRGREG, "size of register set must match");
+    }
+    for (int i = 0; i < regs.length; i++) {
+      context.setRegister(i, regs[i]);
+    }
+    return context;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64ThreadContext.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,47 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
+import sun.jvm.hotspot.debugger.remote.*;
+
+public class RemoteAARCH64ThreadContext extends AARCH64ThreadContext {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteAARCH64ThreadContext(RemoteDebuggerClient debugger) {
+    super();
+    this.debugger = debugger;
+  }
+
+  public void setRegisterAsAddress(int index, Address value) {
+    setRegister(index, debugger.getAddressValue(value));
+  }
+
+  public Address getRegisterAsAddress(int index) {
+    return debugger.newAddress(getRegister(index));
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/aarch64/RemoteAARCH64ThreadFactory.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,45 @@
+/*
+ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.debugger.remote.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.remote.*;
+
+public class RemoteAARCH64ThreadFactory implements RemoteThreadFactory {
+  private RemoteDebuggerClient debugger;
+
+  public RemoteAARCH64ThreadFactory(RemoteDebuggerClient debugger) {
+    this.debugger = debugger;
+  }
+
+  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
+    return new RemoteAARCH64Thread(debugger, threadIdentifierAddr);
+  }
+
+  public ThreadProxy createThreadWrapper(long id) {
+    return new RemoteAARCH64Thread(debugger, id);
+  }
+}
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc/shared/Generation.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/gc/shared/Generation.java	Wed Jul 05 20:40:50 2017 +0200
@@ -49,7 +49,6 @@
 public abstract class Generation extends VMObject {
   private static long          reservedFieldOffset;
   private static long          virtualSpaceFieldOffset;
-  private static CIntegerField levelField;
   protected static final int  K = 1024;
   // Fields for class StatRecord
   private static Field         statRecordField;
@@ -75,7 +74,6 @@
 
     reservedFieldOffset     = type.getField("_reserved").getOffset();
     virtualSpaceFieldOffset = type.getField("_virtual_space").getOffset();
-    levelField              = type.getCIntegerField("_level");
     // StatRecord
     statRecordField         = type.getField("_stat_record");
     type                    = db.lookupType("Generation::StatRecord");
@@ -130,14 +128,6 @@
      }
   }
 
-  public GenerationSpec spec() {
-    return ((GenCollectedHeap) VM.getVM().getUniverse().heap()).spec(level());
-  }
-
-  public int level() {
-    return (int) levelField.getValue(addr);
-  }
-
   public int invocations() {
     return getStatRecord().getInvocations();
   }
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -357,12 +357,6 @@
   // FIXME: avoiding implementing this for now if possible
   //  public void interpreter_frame_set_monitor_end(BasicObjectLock* value);
   //  public void interpreter_frame_verify_monitor(BasicObjectLock* value) const;
-  //
-  // Tells whether the current interpreter_frame frame pointer
-  // corresponds to the old compiled/deoptimized fp
-  // The receiver used to be a top level frame
-  // public boolean interpreter_frame_equals_unpacked_fp(intptr_t* fp);
-
   //--------------------------------------------------------------------------------
   // Method and constant pool cache:
   //
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java	Wed Jul 05 20:40:50 2017 +0200
@@ -35,6 +35,7 @@
 import sun.jvm.hotspot.runtime.win32_x86.Win32X86JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_x86.LinuxX86JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
+import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
 import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
@@ -91,6 +92,8 @@
                 access = new LinuxSPARCJavaThreadPDAccess();
             } else if (cpu.equals("ppc64")) {
                 access = new LinuxPPC64JavaThreadPDAccess();
+            } else if (cpu.equals("aarch64")) {
+                access = new LinuxAARCH64JavaThreadPDAccess();
             } else {
               try {
                 access = (JavaThreadPDAccess)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64CurrentFrameGuess.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,244 @@
+/*
+ * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.interpreter.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.runtime.aarch64.*;
+
+/** <P> Should be able to be used on all aarch64 platforms we support
+    (Linux/aarch64) to implement JavaThread's "currentFrameGuess()"
+    functionality. Input is an AARCH64ThreadContext; output is SP, FP,
+    and PC for an AARCH64Frame. Instantiation of the AARCH64Frame is
+    left to the caller, since we may need to subclass AARCH64Frame to
+    support signal handler frames on Unix platforms. </P>
+
+    <P> Algorithm is to walk up the stack within a given range (say,
+    512K at most) looking for a plausible PC and SP for a Java frame,
+    also considering those coming in from the context. If we find a PC
+    that belongs to the VM (i.e., in generated code like the
+    interpreter or CodeCache) then we try to find an associated FP.
+    We repeat this until we either find a complete frame or run out of
+    stack to look at. </P> */
+
+public class AARCH64CurrentFrameGuess {
+  private AARCH64ThreadContext context;
+  private JavaThread       thread;
+  private Address          spFound;
+  private Address          fpFound;
+  private Address          pcFound;
+
+  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.aarch64.AARCH64Frame.DEBUG")
+                                       != null;
+
+  public AARCH64CurrentFrameGuess(AARCH64ThreadContext context,
+                              JavaThread thread) {
+    this.context = context;
+    this.thread  = thread;
+  }
+
+  /** Returns false if not able to find a frame within a reasonable range. */
+  public boolean run(long regionInBytesToSearch) {
+    Address sp  = context.getRegisterAsAddress(AARCH64ThreadContext.SP);
+    Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
+    Address fp  = context.getRegisterAsAddress(AARCH64ThreadContext.FP);
+    if (sp == null) {
+      // Bail out if no last java frame either
+      if (thread.getLastJavaSP() != null) {
+        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
+        return true;
+      }
+      return false;
+    }
+    Address end = sp.addOffsetTo(regionInBytesToSearch);
+    VM vm       = VM.getVM();
+
+    setValues(null, null, null); // Assume we're not going to find anything
+
+    if (vm.isJavaPCDbg(pc)) {
+      if (vm.isClientCompiler()) {
+        // If the topmost frame is a Java frame, we are (pretty much)
+        // guaranteed to have a viable FP. We should be more robust
+        // than this (we have the potential for losing entire threads'
+        // stack traces) but need to see how much work we really have
+        // to do here. Searching the stack for an (SP, FP) pair is
+        // hard since it's easy to misinterpret inter-frame stack
+        // pointers as base-of-frame pointers; we also don't know the
+        // sizes of C1 frames (not registered in the nmethod) so can't
+        // derive them from SP.
+
+        setValues(sp, fp, pc);
+        return true;
+      } else {
+        if (vm.getInterpreter().contains(pc)) {
+          if (DEBUG) {
+            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
+                               sp + ", fp = " + fp + ", pc = " + pc);
+          }
+          setValues(sp, fp, pc);
+          return true;
+        }
+
+        // For the server compiler, FP is not guaranteed to be valid
+        // for compiled code. In addition, an earlier attempt at a
+        // non-searching algorithm (see below) failed because the
+        // stack pointer from the thread context was pointing
+        // (considerably) beyond the ostensible end of the stack, into
+        // garbage; walking from the topmost frame back caused a crash.
+        //
+        // This algorithm takes the current PC as a given and tries to
+        // find the correct corresponding SP by walking up the stack
+        // and repeatedly performing stackwalks (very inefficient).
+        //
+        // FIXME: there is something wrong with stackwalking across
+        // adapter frames...this is likely to be the root cause of the
+        // failure with the simpler algorithm below.
+
+        for (long offset = 0;
+             offset < regionInBytesToSearch;
+             offset += vm.getAddressSize()) {
+          try {
+            Address curSP = sp.addOffsetTo(offset);
+            Frame frame = new AARCH64Frame(curSP, null, pc);
+            RegisterMap map = thread.newRegisterMap(false);
+            while (frame != null) {
+              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
+                // We were able to traverse all the way to the
+                // bottommost Java frame.
+                // This sp looks good. Keep it.
+                if (DEBUG) {
+                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
+                }
+                setValues(curSP, null, pc);
+                return true;
+              }
+              frame = frame.sender(map);
+            }
+          } catch (Exception e) {
+            if (DEBUG) {
+              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
+            }
+            // Bad SP. Try another.
+          }
+        }
+
+        // Were not able to find a plausible SP to go with this PC.
+        // Bail out.
+        return false;
+
+        /*
+        // Original algorithm which does not work because SP was
+        // pointing beyond where it should have:
+
+        // For the server compiler, FP is not guaranteed to be valid
+        // for compiled code. We see whether the PC is in the
+        // interpreter and take care of that, otherwise we run code
+        // (unfortunately) duplicated from AARCH64Frame.senderForCompiledFrame.
+
+        CodeCache cc = vm.getCodeCache();
+        if (cc.contains(pc)) {
+          CodeBlob cb = cc.findBlob(pc);
+
+          // See if we can derive a frame pointer from SP and PC
+          // NOTE: This is the code duplicated from AARCH64Frame
+          Address saved_fp = null;
+          int llink_offset = cb.getLinkOffset();
+          if (llink_offset >= 0) {
+            // Restore base-pointer, since next frame might be an interpreter frame.
+            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
+            saved_fp = fp_addr.getAddressAt(0);
+          }
+
+          setValues(sp, saved_fp, pc);
+          return true;
+        }
+        */
+      }
+    } else {
+      // If the current program counter was not known to us as a Java
+      // PC, we currently assume that we are in the run-time system
+      // and attempt to look to thread-local storage for saved SP and
+      // FP. Note that if these are null (because we were, in fact,
+      // in Java code, i.e., vtable stubs or similar, and the SA
+      // didn't have enough insight into the target VM to understand
+      // that) then we are going to lose the entire stack trace for
+      // the thread, which is sub-optimal. FIXME.
+
+      if (DEBUG) {
+        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
+                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
+      }
+      if (thread.getLastJavaSP() == null) {
+        return false; // No known Java frames on stack
+      }
+
+      // The runtime has a nasty habit of not saving fp in the frame
+      // anchor, leaving us to grovel about in the stack to find a
+      // plausible address.  Fortunately, this only happens in
+      // compiled code; there we always have a valid PC, and we always
+      // push LR and FP onto the stack as a pair, with FP at the lower
+      // address.
+      pc = thread.getLastJavaPC();
+      fp = thread.getLastJavaFP();
+      sp = thread.getLastJavaSP();
+
+      if (fp == null) {
+        CodeCache cc = vm.getCodeCache();
+        if (cc.contains(pc)) {
+          CodeBlob cb = cc.findBlob(pc);
+          if (DEBUG) {
+            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
+          }
+          // See if we can derive a frame pointer from SP and PC
+          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
+          if (link_offset >= 0) {
+            fp = sp.addOffsetTo(link_offset);
+          }
+        }
+      }
+
+      setValues(sp, fp, null);
+
+      return true;
+    }
+  }
+
+  public Address getSP() { return spFound; }
+  public Address getFP() { return fpFound; }
+  /** May be null if getting values from thread-local storage; take
+      care to call the correct AARCH64Frame constructor to recover this if
+      necessary */
+  public Address getPC() { return pcFound; }
+
+  private void setValues(Address sp, Address fp, Address pc) {
+    spFound = sp;
+    fpFound = fp;
+    pcFound = pc;
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64Frame.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,555 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.aarch64;
+
+import java.util.*;
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.compiler.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.oops.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+/** Specialization of and implementation of abstract methods of the
+    Frame class for the aarch64 family of CPUs. */
+
+public class AARCH64Frame extends Frame {
+  private static final boolean DEBUG;
+  static {
+    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.aarch64.AARCH64Frame.DEBUG") != null;
+  }
+
+  // All frames
+  private static final int LINK_OFFSET                =  0;
+  private static final int RETURN_ADDR_OFFSET         =  1;
+  private static final int SENDER_SP_OFFSET           =  2;
+
+  // Interpreter frames
+  private static final int INTERPRETER_FRAME_MIRROR_OFFSET    =  2; // for native calls only
+  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
+  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
+  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1;
+  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
+  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
+  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
+  private static       int INTERPRETER_FRAME_BCX_OFFSET;
+  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
+  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
+
+  // Entry frames
+  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -8;
+
+  // Native frames
+  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
+
+  private static VMReg fp = new VMReg(29);
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    INTERPRETER_FRAME_MDX_OFFSET                  = INTERPRETER_FRAME_METHOD_OFFSET - 1;
+    INTERPRETER_FRAME_CACHE_OFFSET                = INTERPRETER_FRAME_MDX_OFFSET - 1;
+    INTERPRETER_FRAME_LOCALS_OFFSET               = INTERPRETER_FRAME_CACHE_OFFSET - 1;
+    INTERPRETER_FRAME_BCX_OFFSET                  = INTERPRETER_FRAME_LOCALS_OFFSET - 1;
+    INTERPRETER_FRAME_INITIAL_SP_OFFSET           = INTERPRETER_FRAME_BCX_OFFSET - 1;
+    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET    = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET;
+  }
+
+
+  // an additional field beyond sp and pc:
+  Address raw_fp; // frame pointer
+  private Address raw_unextendedSP;
+
+  private AARCH64Frame() {
+  }
+
+  private void adjustForDeopt() {
+    if ( pc != null) {
+      // Look for a deopt pc and if it is deopted convert to original pc
+      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
+      if (cb != null && cb.isJavaMethod()) {
+        NMethod nm = (NMethod) cb;
+        if (pc.equals(nm.deoptHandlerBegin())) {
+          if (Assert.ASSERTS_ENABLED) {
+            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
+          }
+          // adjust pc if frame is deoptimized.
+          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
+          deoptimized = true;
+        }
+      }
+    }
+  }
+
+  public AARCH64Frame(Address raw_sp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("AARCH64Frame(sp, fp, pc): " + this);
+      dumpStack();
+    }
+  }
+
+  public AARCH64Frame(Address raw_sp, Address raw_fp) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_sp;
+    this.raw_fp = raw_fp;
+    this.pc = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("AARCH64Frame(sp, fp): " + this);
+      dumpStack();
+    }
+  }
+
+  public AARCH64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
+    this.raw_sp = raw_sp;
+    this.raw_unextendedSP = raw_unextendedSp;
+    this.raw_fp = raw_fp;
+    this.pc = pc;
+    adjustUnextendedSP();
+
+    // Frame must be fully constructed before this call
+    adjustForDeopt();
+
+    if (DEBUG) {
+      System.out.println("AARCH64Frame(sp, unextendedSP, fp, pc): " + this);
+      dumpStack();
+    }
+
+  }
+
+  public Object clone() {
+    AARCH64Frame frame = new AARCH64Frame();
+    frame.raw_sp = raw_sp;
+    frame.raw_unextendedSP = raw_unextendedSP;
+    frame.raw_fp = raw_fp;
+    frame.pc = pc;
+    frame.deoptimized = deoptimized;
+    return frame;
+  }
+
+  public boolean equals(Object arg) {
+    if (arg == null) {
+      return false;
+    }
+
+    if (!(arg instanceof AARCH64Frame)) {
+      return false;
+    }
+
+    AARCH64Frame other = (AARCH64Frame) arg;
+
+    return (AddressOps.equal(getSP(), other.getSP()) &&
+            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
+            AddressOps.equal(getFP(), other.getFP()) &&
+            AddressOps.equal(getPC(), other.getPC()));
+  }
+
+  public int hashCode() {
+    if (raw_sp == null) {
+      return 0;
+    }
+
+    return raw_sp.hashCode();
+  }
+
+  public String toString() {
+    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
+         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
+         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
+         ", pc: " + (pc == null? "null" : pc.toString());
+  }
+
+  // accessors for the instance variables
+  public Address getFP() { return raw_fp; }
+  public Address getSP() { return raw_sp; }
+  public Address getID() { return raw_sp; }
+
+  // FIXME: not implemented yet
+  public boolean isSignalHandlerFrameDbg() { return false; }
+  public int     getSignalNumberDbg()      { return 0;     }
+  public String  getSignalNameDbg()        { return null;  }
+
+  public boolean isInterpretedFrameValid() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
+    }
+
+    // These are reasonable sanity checks
+    if (getFP() == null || getFP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getSP() == null || getSP().andWithMask(0x3) != null) {
+      return false;
+    }
+
+    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
+      return false;
+    }
+
+    // These are hacks to keep us out of trouble.
+    // The problem with these is that they mask other problems
+    if (getFP().lessThanOrEqual(getSP())) {
+      // this attempts to deal with unsigned comparison above
+      return false;
+    }
+
+    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
+      // stack frames shouldn't be large.
+      return false;
+    }
+
+    return true;
+  }
+
+  // FIXME: not applicable in current system
+  //  void    patch_pc(Thread* thread, address pc);
+
+  public Frame sender(RegisterMap regMap, CodeBlob cb) {
+    AARCH64RegisterMap map = (AARCH64RegisterMap) regMap;
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // Default is we done have to follow them. The sender_for_xxx will
+    // update it accordingly
+    map.setIncludeArgumentOops(false);
+
+    if (isEntryFrame())       return senderForEntryFrame(map);
+    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
+
+    if(cb == null) {
+      cb = VM.getVM().getCodeCache().findBlob(getPC());
+    } else {
+      if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
+      }
+    }
+
+    if (cb != null) {
+      return senderForCompiledFrame(map, cb);
+    }
+
+    // Must be native-compiled frame, i.e. the marshaling code for native
+    // methods that exists in the core system.
+    return new AARCH64Frame(getSenderSP(), getLink(), getSenderPC());
+  }
+
+  private Frame senderForEntryFrame(AARCH64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForEntryFrame");
+    }
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+    // Java frame called from C; skip all C frames and return top C
+    // frame of that chunk as the sender
+    AARCH64JavaCallWrapper jcw = (AARCH64JavaCallWrapper) getEntryFrameCallWrapper();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
+      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
+    }
+    AARCH64Frame fr;
+    if (jcw.getLastJavaPC() != null) {
+      fr = new AARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
+    } else {
+      fr = new AARCH64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
+    }
+    map.clear();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
+    }
+    return fr;
+  }
+
+  //------------------------------------------------------------------------------
+  // frame::adjust_unextended_sp
+  private void adjustUnextendedSP() {
+    // If we are returning to a compiled MethodHandle call site, the
+    // saved_fp will in fact be a saved value of the unextended SP.  The
+    // simplest way to tell whether we are returning to such a call site
+    // is as follows:
+
+    CodeBlob cb = cb();
+    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
+    if (senderNm != null) {
+      // If the sender PC is a deoptimization point, get the original
+      // PC.  For MethodHandle call site the unextended_sp is stored in
+      // saved_fp.
+      if (senderNm.isDeoptMhEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP()));
+        raw_unextendedSP = getFP();
+      }
+      else if (senderNm.isDeoptEntry(getPC())) {
+        // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp));
+      }
+      else if (senderNm.isMethodHandleReturn(getPC())) {
+        raw_unextendedSP = getFP();
+      }
+    }
+  }
+
+  private Frame senderForInterpreterFrame(AARCH64RegisterMap map) {
+    if (DEBUG) {
+      System.out.println("senderForInterpreterFrame");
+    }
+    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
+    // We do not need to update the callee-save register mapping because above
+    // us is either another interpreter frame or a converter-frame, but never
+    // directly a compiled frame.
+    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
+    // However c2 no longer uses callee save register for java calls so there
+    // are no callee register to find.
+
+    if (map.getUpdateMap())
+      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
+
+    return new AARCH64Frame(sp, unextendedSP, getLink(), getSenderPC());
+  }
+
+  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
+    map.setLocation(fp, savedFPAddr);
+  }
+
+  private Frame senderForCompiledFrame(AARCH64RegisterMap map, CodeBlob cb) {
+    if (DEBUG) {
+      System.out.println("senderForCompiledFrame");
+    }
+
+    //
+    // NOTE: some of this code is (unfortunately) duplicated  AARCH64CurrentFrameGuess
+    //
+
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "map must be set");
+    }
+
+    // frame owned by optimizing compiler
+    if (Assert.ASSERTS_ENABLED) {
+        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
+    }
+    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
+
+    // The return_address is always the word on the stack
+    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
+
+    // This is the saved value of FP which may or may not really be an FP.
+    // It is only an FP if the sender is an interpreter frame.
+    Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize());
+
+    if (map.getUpdateMap()) {
+      // Tell GC to use argument oopmaps for some runtime stubs that need it.
+      // For C1, the runtime stub might not have oop maps, so set this flag
+      // outside of update_register_map.
+      map.setIncludeArgumentOops(cb.callerMustGCArguments());
+
+      if (cb.getOopMaps() != null) {
+        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
+      }
+
+      // Since the prolog does the save and restore of FP there is no oopmap
+      // for it so we must fill in its location as if there was an oopmap entry
+      // since if our caller was compiled code there could be live jvm state in it.
+      updateMapWithSavedLink(map, savedFPAddr);
+    }
+
+    return new AARCH64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
+  }
+
+  protected boolean hasSenderPD() {
+    return true;
+  }
+
+  public long frameSize() {
+    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
+  }
+
+    public Address getLink() {
+        try {
+            if (DEBUG) {
+                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
+                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
+            }
+            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
+        } catch (Exception e) {
+            if (DEBUG)
+                System.out.println("Returning null");
+            return null;
+        }
+    }
+
+  // FIXME: not implementable yet
+  //inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
+
+  public Address getUnextendedSP() { return raw_unextendedSP; }
+
+  // Return address:
+  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
+  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
+
+  // return address of param, zero origin index.
+  public Address getNativeParamAddr(int idx) {
+    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
+  }
+
+  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
+
+  public Address addressOfInterpreterFrameLocals() {
+    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
+  }
+
+  private Address addressOfInterpreterFrameBCX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
+  }
+
+  public int getInterpreterFrameBCI() {
+    // FIXME: this is not atomic with respect to GC and is unsuitable
+    // for use in a non-debugging, or reflective, system. Need to
+    // figure out how to express this.
+    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
+    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
+    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
+    return bcpToBci(bcp, method);
+  }
+
+  public Address addressOfInterpreterFrameMDX() {
+    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
+  }
+
+  // FIXME
+  //inline int frame::interpreter_frame_monitor_size() {
+  //  return BasicObjectLock::size();
+  //}
+
+  // expression stack
+  // (the max_stack arguments are used by the GC; see class FrameClosure)
+
+  public Address addressOfInterpreterFrameExpressionStack() {
+    Address monitorEnd = interpreterFrameMonitorEnd().address();
+    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
+  }
+
+  public int getInterpreterFrameExpressionStackDirection() { return -1; }
+
+  // top of expression stack
+  public Address addressOfInterpreterFrameTOS() {
+    return getSP();
+  }
+
+  /** Expression stack from top down */
+  public Address addressOfInterpreterFrameTOSAt(int slot) {
+    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
+  }
+
+  public Address getInterpreterFrameSenderSP() {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(isInterpretedFrame(), "interpreted frame expected");
+    }
+    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
+  }
+
+  // Monitors
+  public BasicObjectLock interpreterFrameMonitorBegin() {
+    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
+  }
+
+  public BasicObjectLock interpreterFrameMonitorEnd() {
+    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
+    if (Assert.ASSERTS_ENABLED) {
+      // make sure the pointer points inside the frame
+      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
+      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
+    }
+    return new BasicObjectLock(result);
+  }
+
+  public int interpreterFrameMonitorSize() {
+    return BasicObjectLock.size();
+  }
+
+  // Method
+  public Address addressOfInterpreterFrameMethod() {
+    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
+  }
+
+  // Constant pool cache
+  public Address addressOfInterpreterFrameCPCache() {
+    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
+  }
+
+  // Entry frames
+  public JavaCallWrapper getEntryFrameCallWrapper() {
+    return new AARCH64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
+  }
+
+  protected Address addressOfSavedOopResult() {
+    // offset is 2 for compiler2 and 3 for compiler1
+    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
+                               VM.getVM().getAddressSize());
+  }
+
+  protected Address addressOfSavedReceiver() {
+    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
+  }
+
+  private void dumpStack() {
+    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
+         AddressOps.lt(addr, getSP());
+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+      System.out.println(addr + ": " + addr.getAddressAt(0));
+    }
+    System.out.println("-----------------------");
+    for (Address addr = getSP();
+         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
+         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
+      System.out.println(addr + ": " + addr.getAddressAt(0));
+    }
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64JavaCallWrapper.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.aarch64;
+
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.runtime.*;
+
+public class AARCH64JavaCallWrapper extends JavaCallWrapper {
+  private static AddressField lastJavaFPField;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaFrameAnchor");
+
+    lastJavaFPField  = type.getAddressField("_last_Java_fp");
+  }
+
+  public AARCH64JavaCallWrapper(Address addr) {
+    super(addr);
+  }
+
+  public Address getLastJavaFP() {
+    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/aarch64/AARCH64RegisterMap.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,52 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.aarch64;
+
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+
+public class AARCH64RegisterMap extends RegisterMap {
+
+  /** This is the only public constructor */
+  public AARCH64RegisterMap(JavaThread thread, boolean updateMap) {
+    super(thread, updateMap);
+  }
+
+  protected AARCH64RegisterMap(RegisterMap map) {
+    super(map);
+  }
+
+  public Object clone() {
+    AARCH64RegisterMap retval = new AARCH64RegisterMap(this);
+    return retval;
+  }
+
+  // no PD state to clear or copy:
+  protected void clearPD() {}
+  protected void initializePD() {}
+  protected void initializeFromPD(RegisterMap map) {}
+  protected Address getLocationPD(VMReg reg) { return null; }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/runtime/linux_aarch64/LinuxAARCH64JavaThreadPDAccess.java	Wed Jul 05 20:40:50 2017 +0200
@@ -0,0 +1,132 @@
+/*
+ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.runtime.linux_aarch64;
+
+import java.io.*;
+import java.util.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.aarch64.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.runtime.aarch64.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class LinuxAARCH64JavaThreadPDAccess implements JavaThreadPDAccess {
+  private static AddressField  lastJavaFPField;
+  private static AddressField  osThreadField;
+
+  // Field from OSThread
+  private static CIntegerField osThreadThreadIDField;
+
+  // This is currently unneeded but is being kept in case we change
+  // the currentFrameGuess algorithm
+  private static final long GUESS_SCAN_RANGE = 128 * 1024;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static synchronized void initialize(TypeDataBase db) {
+    Type type = db.lookupType("JavaThread");
+    osThreadField           = type.getAddressField("_osthread");
+
+    Type anchorType = db.lookupType("JavaFrameAnchor");
+    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
+
+    Type osThreadType = db.lookupType("OSThread");
+    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
+  }
+
+  public Address getLastJavaFP(Address addr) {
+    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
+  }
+
+  public Address getLastJavaPC(Address addr) {
+    return null;
+  }
+
+  public Address getBaseOfStackPointer(Address addr) {
+    return null;
+  }
+
+  public Frame getLastFramePD(JavaThread thread, Address addr) {
+    Address fp = thread.getLastJavaFP();
+    if (fp == null) {
+      return null; // no information
+    }
+    return new AARCH64Frame(thread.getLastJavaSP(), fp);
+  }
+
+  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
+    return new AARCH64RegisterMap(thread, updateMap);
+  }
+
+  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    AARCH64ThreadContext context = (AARCH64ThreadContext) t.getContext();
+    AARCH64CurrentFrameGuess guesser = new AARCH64CurrentFrameGuess(context, thread);
+    if (!guesser.run(GUESS_SCAN_RANGE)) {
+      return null;
+    }
+    if (guesser.getPC() == null) {
+      return new AARCH64Frame(guesser.getSP(), guesser.getFP());
+    } else {
+      return new AARCH64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
+    }
+  }
+
+  public void printThreadIDOn(Address addr, PrintStream tty) {
+    tty.print(getThreadProxy(addr));
+  }
+
+  public void printInfoOn(Address threadAddr, PrintStream tty) {
+    tty.print("Thread id: ");
+    printThreadIDOn(threadAddr, tty);
+//    tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
+  }
+
+  public Address getLastSP(Address addr) {
+    ThreadProxy t = getThreadProxy(addr);
+    AARCH64ThreadContext context = (AARCH64ThreadContext) t.getContext();
+    return context.getRegisterAsAddress(AARCH64ThreadContext.SP);
+  }
+
+  public ThreadProxy getThreadProxy(Address addr) {
+    // Addr is the address of the JavaThread.
+    // Fetch the OSThread (for now and for simplicity, not making a
+    // separate "OSThread" class in this package)
+    Address osThreadAddr = osThreadField.getValue(addr);
+    // Get the address of the _thread_id from the OSThread
+    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
+
+    JVMDebugger debugger = VM.getVM().getDebugger();
+    return debugger.getThreadForIdentifierAddress(threadIdAddr);
+  }
+}
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/AltPlatformInfo.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/AltPlatformInfo.java	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,10 @@
 package sun.jvm.hotspot.utilities;
 
 public interface AltPlatformInfo {
+
   // Additional cpu types can be tested via this interface
+  public boolean knownCPU(String cpu);
 
-  public boolean knownCPU(String cpu);
-}
\ No newline at end of file
+  // Mangle a cpu name if necessary
+  public String getCPU(String cpu);
+}
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -52,27 +52,54 @@
     }
   }
 
-  /* Returns "sparc" for SPARC based platforms and "x86" for x86 based
-     platforms. Otherwise returns the value of os.arch.  If the value
-     is not recognized as supported, an exception is thrown instead. */
+  public static boolean knownCPU(String cpu) {
+    final String[] KNOWN =
+        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "aarch64"};
+
+    for(String s : KNOWN) {
+      if(s.equals(cpu))
+        return true;
+    }
+
+    return false;
+  }
+
+  /* Returns "sparc" for SPARC based platforms "x86" for x86 based
+     platforms and x86_64 for 64bit x86 based platform. Otherwise
+     returns the value of os.arch. If the value is not recognized as supported,
+     an exception is thrown instead. */
+
   public static String getCPU() throws UnsupportedPlatformException {
     String cpu = System.getProperty("os.arch");
-    if (cpu.equals("i386") || cpu.equals("x86")) {
+
+    // Let any additional CPU mangling fire first
+    try {
+      Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed");
+      AltPlatformInfo api = (AltPlatformInfo) pic.newInstance();
+      if (api.knownCPU(cpu)) {
+        return api.getCPU(cpu);
+      }
+    } catch (Exception e) {
+       // Ignored
+    }
+
+    // Check that CPU is supported
+    if (!knownCPU(cpu)) {
+       throw new UnsupportedPlatformException("CPU type " + cpu + " not yet supported");
+    }
+
+    // Tweeks
+    if (cpu.equals("i386"))
       return "x86";
-    } else if (cpu.equals("sparc") || cpu.equals("sparcv9")) {
+
+    if (cpu.equals("sparcv9"))
       return "sparc";
-    } else if (cpu.equals("ia64") || cpu.equals("amd64") || cpu.equals("x86_64") || cpu.equals("ppc64") || cpu.equals("aarch64")) {
-      return cpu;
-    } else {
-      try {
-        Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed");
-        AltPlatformInfo api = (AltPlatformInfo)pic.newInstance();
-        if (api.knownCPU(cpu)) {
-          return cpu;
-        }
-      } catch (Exception e) {}
-      throw new UnsupportedPlatformException("CPU type " + cpu + " not yet supported");
-    }
+
+    if (cpu.equals("x86_64"))
+      return "amd64";
+
+    return cpu;
+
   }
 
   // this main is invoked from Makefile to make platform specific agent Makefile(s).
--- a/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PointerLocation.java	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/agent/src/share/classes/sun/jvm/hotspot/utilities/PointerLocation.java	Wed Jul 05 20:40:50 2017 +0200
@@ -84,11 +84,11 @@
   }
 
   public boolean isInNewGen() {
-    return ((gen != null) && (gen.level() == 0));
+    return ((gen != null) && (gen == ((GenCollectedHeap)heap).getGen(0)));
   }
 
   public boolean isInOldGen() {
-    return ((gen != null) && (gen.level() == 1));
+    return ((gen != null) && (gen == ((GenCollectedHeap)heap).getGen(1)));
   }
 
   public boolean inOtherGen() {
@@ -207,8 +207,6 @@
           tty.print("In new generation ");
         } else if (isInOldGen()) {
           tty.print("In old generation ");
-        } else if (gen != null) {
-          tty.print("In Generation " + getGeneration().level());
         } else {
           tty.print("In unknown section of Java heap");
         }
--- a/hotspot/make/bsd/makefiles/dtrace.make	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/make/bsd/makefiles/dtrace.make	Wed Jul 05 20:40:50 2017 +0200
@@ -263,14 +263,19 @@
 $(DtraceOutDir):
 	mkdir $(DtraceOutDir)
 
+# When building using a devkit, dtrace cannot find the correct preprocessor so
+# we run it explicitly before runing dtrace.
 $(DtraceOutDir)/hotspot.h: $(DTRACE_COMMON_SRCDIR)/hotspot.d | $(DtraceOutDir)
-	$(QUIETLY) $(DTRACE_PROG) $(DTRACE_OPTS) -C -I. -h -o $@ -s $(DTRACE_COMMON_SRCDIR)/hotspot.d
+	$(QUIETLY) $(CC) -E $(DTRACE_OPTS) -I. -x c $(DTRACE_COMMON_SRCDIR)/hotspot.d > $(DtraceOutDir)/hotspot.d
+	$(QUIETLY) $(DTRACE_PROG) -h -o $@ -s $(DtraceOutDir)/hotspot.d
 
 $(DtraceOutDir)/hotspot_jni.h: $(DTRACE_COMMON_SRCDIR)/hotspot_jni.d | $(DtraceOutDir)
-	$(QUIETLY) $(DTRACE_PROG) $(DTRACE_OPTS) -C -I. -h -o $@ -s $(DTRACE_COMMON_SRCDIR)/hotspot_jni.d
+	$(QUIETLY) $(CC) -E $(DTRACE_OPTS) -I. -x c $(DTRACE_COMMON_SRCDIR)/hotspot_jni.d > $(DtraceOutDir)/hotspot_jni.d
+	$(QUIETLY) $(DTRACE_PROG) -h -o $@ -s $(DtraceOutDir)/hotspot_jni.d
 
 $(DtraceOutDir)/hs_private.h: $(DTRACE_COMMON_SRCDIR)/hs_private.d | $(DtraceOutDir)
-	$(QUIETLY) $(DTRACE_PROG) $(DTRACE_OPTS) -C -I. -h -o $@ -s $(DTRACE_COMMON_SRCDIR)/hs_private.d
+	$(QUIETLY) $(CC) -E $(DTRACE_OPTS) -I. -x c $(DTRACE_COMMON_SRCDIR)/hs_private.d > $(DtraceOutDir)/hs_private.d
+	$(QUIETLY) $(DTRACE_PROG) -h -o $@ -s $(DtraceOutDir)/hs_private.d
 
 dtrace_gen_headers: $(DtraceOutDir)/hotspot.h $(DtraceOutDir)/hotspot_jni.h $(DtraceOutDir)/hs_private.h 
 
--- a/hotspot/make/bsd/makefiles/universal.gmk	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/make/bsd/makefiles/universal.gmk	Wed Jul 05 20:40:50 2017 +0200
@@ -56,13 +56,14 @@
 universalize: $(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST)
 	$(RM) -r $(EXPORT_PATH)/lib/{i386,amd64}
 
+LIPO ?= lipo
 
 # Package built libraries in a universal binary
 $(UNIVERSAL_LIPO_LIST):
 	BUILT_LIPO_FILES="`find $(EXPORT_LIB_DIR)/{i386,amd64}/$(subst $(EXPORT_LIB_DIR)/,,$@) 2>/dev/null`" || test $$? = "1"; \
 	if [ -n "$${BUILT_LIPO_FILES}" ]; then \
 	  $(MKDIR) -p $(shell dirname $@); \
-	  lipo -create -output $@ $${BUILT_LIPO_FILES}; \
+	  $(LIPO) -create -output $@ $${BUILT_LIPO_FILES}; \
 	fi
 
 
--- a/hotspot/make/sa.files	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/make/sa.files	Wed Jul 05 20:40:50 2017 +0200
@@ -44,6 +44,7 @@
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/compiler/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/amd64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/aarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/amd64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/bsd/x86/*.java \
@@ -55,6 +56,7 @@
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/amd64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/ia64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/ppc64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/aarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/x86/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/linux/sparc/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/posix/*.java \
@@ -63,6 +65,7 @@
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/amd64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/ppc64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/aarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/sparc/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/proc/x86/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/*.java \
@@ -70,6 +73,7 @@
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/ppc64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/sparc/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/x86/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/remote/aarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/sparc/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/win32/coff/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/debugger/windbg/*.java \
@@ -92,11 +96,13 @@
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/prims/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/amd64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/aarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_amd64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/bsd_x86/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_amd64/*.java \
+$(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_aarch64/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_x86/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_sparc/*.java \
 $(AGENT_SRC_DIR)/sun/jvm/hotspot/runtime/linux_ppc64/*.java \
--- a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -526,16 +526,6 @@
   return frame(sender_sp(), link(), sender_pc());
 }
 
-bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
-  assert(is_interpreted_frame(), "must be interpreter frame");
-  Method* method = interpreter_frame_method();
-  // When unpacking an optimized frame the frame pointer is
-  // adjusted with:
-  int diff = (method->max_locals() - method->size_of_parameters()) *
-             Interpreter::stackElementWords;
-  return _fp == (fp - diff);
-}
-
 bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
 // QQQ
 #ifdef CC_INTERP
--- a/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/globals_aarch64.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -84,7 +84,7 @@
 
 #ifdef BUILTIN_SIM
 #define UseBuiltinSim           true
-#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
                                                                         \
   product(bool, NotifySimulator, UseBuiltinSim,                         \
          "tell the AArch64 sim where we are in method code")            \
@@ -112,7 +112,7 @@
 #define NotifySimulator         false
 #define UseSimulatorCache       false
 #define DisableBCCheck          true
-#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
                                                                         \
   product(bool, NearCpool, true,                                        \
          "constant pool is close to instructions")                      \
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -2934,41 +2934,40 @@
   cmp(src1, rscratch1);
 }
 
+void MacroAssembler::store_check(Register obj, Address dst) {
+  store_check(obj);
+}
+
 void MacroAssembler::store_check(Register obj) {
   // Does a store check for the oop in register obj. The content of
   // register obj is destroyed afterwards.
-  store_check_part_1(obj);
-  store_check_part_2(obj);
-}
-
-void MacroAssembler::store_check(Register obj, Address dst) {
-  store_check(obj);
-}
-
-
-// split the store check operation so that other instructions can be scheduled inbetween
-void MacroAssembler::store_check_part_1(Register obj) {
+
   BarrierSet* bs = Universe::heap()->barrier_set();
   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+
+  CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
+
   lsr(obj, obj, CardTableModRefBS::card_shift);
-}
-
-void MacroAssembler::store_check_part_2(Register obj) {
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  CardTableModRefBS* ct = (CardTableModRefBS*)bs;
-  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
-
-  // The calculation for byte_map_base is as follows:
-  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
-  // So this essentially converts an address to a displacement and
-  // it will never need to be relocated.
-
-  // FIXME: It's not likely that disp will fit into an offset so we
-  // don't bother to check, but it could save an instruction.
-  intptr_t disp = (intptr_t) ct->byte_map_base;
-  mov(rscratch1, disp);
-  strb(zr, Address(obj, rscratch1));
+
+  assert(CardTableModRefBS::dirty_card_val() == 0, "must be");
+
+  {
+    ExternalAddress cardtable((address) ct->byte_map_base);
+    unsigned long offset;
+    adrp(rscratch1, cardtable, offset);
+    assert(offset == 0, "byte_map_base is misaligned");
+  }
+
+  if (UseCondCardMark) {
+    Label L_already_dirty;
+    ldrb(rscratch2,  Address(obj, rscratch1));
+    cbz(rscratch2, L_already_dirty);
+    strb(zr, Address(obj, rscratch1));
+    bind(L_already_dirty);
+  } else {
+    strb(zr, Address(obj, rscratch1));
+  }
 }
 
 void MacroAssembler::load_klass(Register dst, Register src) {
--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -719,10 +719,6 @@
 
 #endif // INCLUDE_ALL_GCS
 
-  // split store_check(Register obj) to enhance instruction interleaving
-  void store_check_part_1(Register obj);
-  void store_check_part_2(Register obj);
-
   // oop manipulations
   void load_klass(Register dst, Register src);
   void store_klass(Register dst, Register src);
--- a/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -1,6 +1,6 @@
 /*
- * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * Copyright (c) 2003, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2120,6 +2120,7 @@
       save_native_result(masm, ret_type, stack_slots);
     }
 
+    __ mov(c_rarg2, rthread);
     __ lea(c_rarg1, Address(sp, lock_slot_offset * VMRegImpl::stack_slot_size));
     __ mov(c_rarg0, obj_reg);
 
@@ -2128,7 +2129,7 @@
     __ ldr(r19, Address(rthread, in_bytes(Thread::pending_exception_offset())));
     __ str(zr, Address(rthread, in_bytes(Thread::pending_exception_offset())));
 
-    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 2, 0, 1);
+    rt_call(masm, CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), 3, 0, 1);
 
 #ifdef ASSERT
     {
--- a/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2015, Red Hat Inc. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -190,6 +190,11 @@
     }
   }
 
+  if (UseGHASHIntrinsics) {
+    warning("GHASH intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+  }
+
   if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
     UseCRC32Intrinsics = true;
   }
--- a/hotspot/src/cpu/ppc/vm/globals_ppc.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/globals_ppc.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -63,7 +63,7 @@
 define_pd_global(uintx, TypeProfileLevel, 111);
 
 // Platform dependent flag handling: flags only defined on this platform.
-#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct)  \
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint)  \
                                                                             \
   /* Load poll address from thread. This is used to implement per-thread */ \
   /* safepoints on platforms != IA64. */                                    \
--- a/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/sharedRuntime_ppc.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -2475,7 +2475,8 @@
 
     // Slow case of monitor enter.
     // Inline a special case of call_VM that disallows any pending_exception.
-    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box);
+    // Arguments are (oop obj, BasicLock* lock, JavaThread* thread).
+    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), r_oop, r_box, R16_thread);
 
     __ asm_assert_mem8_is_zero(thread_(pending_exception),
        "no pending exception allowed on exit from SharedRuntime::complete_monitor_unlocking_C", 0);
--- a/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/ppc/vm/vm_version_ppc.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -176,6 +176,11 @@
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }
 
+  if (UseGHASHIntrinsics) {
+    warning("GHASH intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+  }
+
   if (UseSHA) {
     warning("SHA instructions are not available on this CPU");
     FLAG_SET_DEFAULT(UseSHA, false);
@@ -505,7 +510,8 @@
 
 void VM_Version::determine_features() {
 #if defined(ABI_ELFv2)
-  const int code_size = (num_features+1+2*7)*BytesPerInstWord; // TODO(asmundak): calculation is incorrect.
+  // 1 InstWord per call for the blr instruction.
+  const int code_size = (num_features+1+2*1)*BytesPerInstWord;
 #else
   // 7 InstWords for each call (function descriptor + blr instruction).
   const int code_size = (num_features+1+2*7)*BytesPerInstWord;
@@ -540,7 +546,8 @@
   a->popcntw(R7, R5);                          // code[6]  -> popcntw
   a->fcfids(F3, F4);                           // code[7]  -> fcfids
   a->vand(VR0, VR0, VR0);                      // code[8]  -> vand
-  a->lqarx_unchecked(R7, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
+  // arg0 of lqarx must be an even register, (arg1 + arg2) must be a multiple of 16
+  a->lqarx_unchecked(R6, R3_ARG1, R4_ARG2, 1); // code[9]  -> lqarx_m
   a->vcipher(VR0, VR1, VR2);                   // code[10] -> vcipher
   a->vpmsumb(VR0, VR1, VR2);                   // code[11] -> vpmsumb
   a->tcheck(0);                                // code[12] -> tcheck
@@ -572,7 +579,8 @@
 
   // Execute code. Illegal instructions will be replaced by 0 in the signal handler.
   VM_Version::_is_determine_features_test_running = true;
-  (*test)((address)mid_of_test_area, (uint64_t)0);
+  // We must align the first argument to 16 bytes because of the lqarx check.
+  (*test)((address)align_size_up((intptr_t)mid_of_test_area, 16), (uint64_t)0);
   VM_Version::_is_determine_features_test_running = false;
 
   // determine which instructions are legal.
@@ -614,12 +622,12 @@
   MacroAssembler* a = new MacroAssembler(&cb);
 
   // Emit code.
-  uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->emit_fd();
+  uint64_t (*get_dscr)() = (uint64_t(*)())(void *)a->function_entry();
   uint32_t *code = (uint32_t *)a->pc();
   a->mfdscr(R3);
   a->blr();
 
-  void (*set_dscr)(long) = (void(*)(long))(void *)a->emit_fd();
+  void (*set_dscr)(long) = (void(*)(long))(void *)a->function_entry();
   a->mtdscr(R3);
   a->blr();
 
--- a/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -129,6 +129,7 @@
     flog3_op3    = 0x36,
     edge_op3     = 0x36,
     fsrc_op3     = 0x36,
+    xmulx_op3    = 0x36,
     impdep2_op3  = 0x37,
     stpartialf_op3 = 0x37,
     jmpl_op3     = 0x38,
@@ -220,6 +221,8 @@
     mdtox_opf          = 0x110,
     mstouw_opf         = 0x111,
     mstosw_opf         = 0x113,
+    xmulx_opf          = 0x115,
+    xmulxhi_opf        = 0x116,
     mxtod_opf          = 0x118,
     mwtos_opf          = 0x119,
 
@@ -1212,6 +1215,9 @@
   void movwtos( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::S) | op3(mftoi_op3) | opf(mwtos_opf) | rs2(s)); }
   void movxtod( Register s, FloatRegister d ) { vis3_only();  emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(mftoi_op3) | opf(mxtod_opf) | rs2(s)); }
 
+  void xmulx(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulx_opf) | rs2(s2)); }
+  void xmulxhi(Register s1, Register s2, Register d) { vis3_only(); emit_int32( op(arith_op) | rd(d) | op3(xmulx_op3) | rs1(s1) | opf(xmulxhi_opf) | rs2(s2)); }
+
   // Crypto SHA instructions
 
   void sha1()   { sha1_only();    emit_int32( op(arith_op) | op3(sha_op3) | opf(sha1_opf)); }
--- a/hotspot/src/cpu/sparc/vm/frame_sparc.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/frame_sparc.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -599,12 +599,6 @@
   return next_younger_sp_or_null(valid_sp, sp) != NULL;
 }
 
-
-bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
-  assert(is_interpreted_frame(), "must be interpreter frame");
-  return this->fp() == fp;
-}
-
 bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
 #ifdef CC_INTERP
   // Is there anything to do?
--- a/hotspot/src/cpu/sparc/vm/globals_sparc.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/globals_sparc.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -81,7 +81,7 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
-#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
                                                                             \
   product(intx, UseVIS, 99,                                                 \
           "Highest supported VIS instructions set on Sparc")                \
--- a/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -4786,6 +4786,130 @@
     return start;
   }
 
+  /* Single and multi-block ghash operations */
+  address generate_ghash_processBlocks() {
+      __ align(CodeEntryAlignment);
+      Label L_ghash_loop, L_aligned, L_main;
+      StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+      address start = __ pc();
+
+      Register state = I0;
+      Register subkeyH = I1;
+      Register data = I2;
+      Register len = I3;
+
+      __ save_frame(0);
+
+      __ ldx(state, 0, O0);
+      __ ldx(state, 8, O1);
+
+      // Loop label for multiblock operations
+      __ BIND(L_ghash_loop);
+
+      // Check if 'data' is unaligned
+      __ andcc(data, 7, G1);
+      __ br(Assembler::zero, false, Assembler::pt, L_aligned);
+      __ delayed()->nop();
+
+      Register left_shift = L1;
+      Register right_shift = L2;
+      Register data_ptr = L3;
+
+      // Get left and right shift values in bits
+      __ sll(G1, LogBitsPerByte, left_shift);
+      __ mov(64, right_shift);
+      __ sub(right_shift, left_shift, right_shift);
+
+      // Align to read 'data'
+      __ sub(data, G1, data_ptr);
+
+      // Load first 8 bytes of 'data'
+      __ ldx(data_ptr, 0, O4);
+      __ sllx(O4, left_shift, O4);
+      __ ldx(data_ptr, 8, O5);
+      __ srlx(O5, right_shift, G4);
+      __ bset(G4, O4);
+
+      // Load second 8 bytes of 'data'
+      __ sllx(O5, left_shift, O5);
+      __ ldx(data_ptr, 16, G4);
+      __ srlx(G4, right_shift, G4);
+      __ ba(L_main);
+      __ delayed()->bset(G4, O5);
+
+      // If 'data' is aligned, load normally
+      __ BIND(L_aligned);
+      __ ldx(data, 0, O4);
+      __ ldx(data, 8, O5);
+
+      __ BIND(L_main);
+      __ ldx(subkeyH, 0, O2);
+      __ ldx(subkeyH, 8, O3);
+
+      __ xor3(O0, O4, O0);
+      __ xor3(O1, O5, O1);
+
+      __ xmulxhi(O0, O3, G3);
+      __ xmulx(O0, O2, O5);
+      __ xmulxhi(O1, O2, G4);
+      __ xmulxhi(O1, O3, G5);
+      __ xmulx(O0, O3, G1);
+      __ xmulx(O1, O3, G2);
+      __ xmulx(O1, O2, O3);
+      __ xmulxhi(O0, O2, O4);
+
+      __ mov(0xE1, O0);
+      __ sllx(O0, 56, O0);
+
+      __ xor3(O5, G3, O5);
+      __ xor3(O5, G4, O5);
+      __ xor3(G5, G1, G1);
+      __ xor3(G1, O3, G1);
+      __ srlx(G2, 63, O1);
+      __ srlx(G1, 63, G3);
+      __ sllx(G2, 63, O3);
+      __ sllx(G2, 58, O2);
+      __ xor3(O3, O2, O2);
+
+      __ sllx(G1, 1, G1);
+      __ or3(G1, O1, G1);
+
+      __ xor3(G1, O2, G1);
+
+      __ sllx(G2, 1, G2);
+
+      __ xmulxhi(G1, O0, O1);
+      __ xmulx(G1, O0, O2);
+      __ xmulxhi(G2, O0, O3);
+      __ xmulx(G2, O0, G1);
+
+      __ xor3(O4, O1, O4);
+      __ xor3(O5, O2, O5);
+      __ xor3(O5, O3, O5);
+
+      __ sllx(O4, 1, O2);
+      __ srlx(O5, 63, O3);
+
+      __ or3(O2, O3, O0);
+
+      __ sllx(O5, 1, O1);
+      __ srlx(G1, 63, O2);
+      __ or3(O1, O2, O1);
+      __ xor3(O1, G3, O1);
+
+      __ deccc(len);
+      __ br(Assembler::notZero, true, Assembler::pt, L_ghash_loop);
+      __ delayed()->add(data, 16, data);
+
+      __ stx(O0, I0, 0);
+      __ stx(O1, I0, 8);
+
+      __ ret();
+      __ delayed()->restore();
+
+      return start;
+  }
+
   void generate_initial() {
     // Generates all stubs and initializes the entry points
 
@@ -4859,6 +4983,10 @@
       StubRoutines::_cipherBlockChaining_encryptAESCrypt = generate_cipherBlockChaining_encryptAESCrypt();
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
     }
+    // generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
 
     // generate SHA1/SHA256/SHA512 intrinsics code
     if (UseSHA1Intrinsics) {
--- a/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/sparc/vm/vm_version_sparc.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -300,6 +300,17 @@
     }
   }
 
+  // GHASH/GCM intrinsics
+  if (has_vis3() && (UseVIS > 2)) {
+    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
+      UseGHASHIntrinsics = true;
+    }
+  } else if (UseGHASHIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
+      warning("GHASH intrinsics require VIS3 insructions support. Intriniscs will be disabled");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+  }
+
   // SHA1, SHA256, and SHA512 instructions were added to SPARC T-series at different times
   if (has_sha1() || has_sha256() || has_sha512()) {
     if (UseVIS > 0) { // SHA intrinsics use VIS1 instructions
--- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -1347,7 +1347,7 @@
 
 void Assembler::andnl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode(dst, src1, src2, false);
+  int encode = vex_prefix_0F38_and_encode_legacy(dst, src1, src2, false);
   emit_int8((unsigned char)0xF2);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1355,7 +1355,7 @@
 void Assembler::andnl(Register dst, Register src1, Address src2) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38(dst, src1, src2, false);
+  vex_prefix_0F38_legacy(dst, src1, src2, false);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2);
 }
@@ -1382,7 +1382,7 @@
 
 void Assembler::blsil(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode(rbx, dst, src, false);
+  int encode = vex_prefix_0F38_and_encode_legacy(rbx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1390,14 +1390,14 @@
 void Assembler::blsil(Register dst, Address src) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38(rbx, dst, src, false);
+  vex_prefix_0F38_legacy(rbx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src);
 }
 
 void Assembler::blsmskl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode(rdx, dst, src, false);
+  int encode = vex_prefix_0F38_and_encode_legacy(rdx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1412,7 +1412,7 @@
 
 void Assembler::blsrl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode(rcx, dst, src, false);
+  int encode = vex_prefix_0F38_and_encode_legacy(rcx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1420,7 +1420,7 @@
 void Assembler::blsrl(Register dst, Address src) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38(rcx, dst, src, false);
+  vex_prefix_0F38_legacy(rcx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src);
 }
@@ -3095,26 +3095,35 @@
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F,
-                                      false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
+  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift);
 }
 
+void Assembler::pslldq(XMMRegister dst, int shift) {
+  // Shift left 128 bit value in xmm register by number of bytes.
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
+  emit_int8(0x73);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8(shift);
+}
+
 void Assembler::ptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, false,
+              VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
-                                      false, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3126,7 +3135,7 @@
   assert(dst != xnoreg, "sanity");
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
+  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len, true, false);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
@@ -3135,7 +3144,7 @@
   assert(VM_Version::supports_avx(), "");
   int vector_len = AVX_256bit;
   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
-                                     vector_len, VEX_OPCODE_0F_38);
+                                     vector_len, VEX_OPCODE_0F_38, true, false);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3146,12 +3155,12 @@
   if (VM_Version::supports_evex()) {
     tuple_type = EVEX_FVM;
   }
-  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66, false, (VM_Version::supports_avx512vlbw() == false));
 }
 
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66, false, (VM_Version::supports_avx512vlbw() == false));
 }
 
 void Assembler::punpckldq(XMMRegister dst, Address src) {
@@ -4979,7 +4988,51 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-// duplicate 4-bytes integer data from src into 8 locations in dest
+// duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
+void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+                                     vector_len, VEX_OPCODE_0F_38, false);
+  emit_int8(0x78);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  tuple_type = EVEX_T1S;
+  input_size_in_bits = EVEX_8bit;
+  InstructionMark im(this);
+  assert(dst != xnoreg, "sanity");
+  int dst_enc = dst->encoding();
+  // swap src<->dst for encoding
+  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
+  emit_int8(0x78);
+  emit_operand(dst, src);
+}
+
+// duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
+void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+                                     vector_len, VEX_OPCODE_0F_38, false);
+  emit_int8(0x79);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  tuple_type = EVEX_T1S;
+  input_size_in_bits = EVEX_16bit;
+  InstructionMark im(this);
+  assert(dst != xnoreg, "sanity");
+  int dst_enc = dst->encoding();
+  // swap src<->dst for encoding
+  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
+  emit_int8(0x79);
+  emit_operand(dst, src);
+}
+
+// duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
 void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_evex(), "");
   int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
@@ -4988,6 +5041,121 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  tuple_type = EVEX_T1S;
+  input_size_in_bits = EVEX_32bit;
+  InstructionMark im(this);
+  assert(dst != xnoreg, "sanity");
+  int dst_enc = dst->encoding();
+  // swap src<->dst for encoding
+  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
+  emit_int8(0x58);
+  emit_operand(dst, src);
+}
+
+// duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
+void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
+                                     VEX_OPCODE_0F_38, true, vector_len, false, false);
+  emit_int8(0x59);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  tuple_type = EVEX_T1S;
+  input_size_in_bits = EVEX_64bit;
+  InstructionMark im(this);
+  assert(dst != xnoreg, "sanity");
+  int dst_enc = dst->encoding();
+  // swap src<->dst for encoding
+  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len);
+  emit_int8(0x59);
+  emit_operand(dst, src);
+}
+
+// duplicate single precision fp from src into 4|8|16 locations in dest : requires AVX512VL
+void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
+                                     VEX_OPCODE_0F_38, false, vector_len, false, false);
+  emit_int8(0x18);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  tuple_type = EVEX_T1S;
+  input_size_in_bits = EVEX_32bit;
+  InstructionMark im(this);
+  assert(dst != xnoreg, "sanity");
+  int dst_enc = dst->encoding();
+  // swap src<->dst for encoding
+  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
+  emit_int8(0x18);
+  emit_operand(dst, src);
+}
+
+// duplicate double precision fp from src into 2|4|8 locations in dest : requires AVX512VL
+void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
+                                     VEX_OPCODE_0F_38, true, vector_len, false, false);
+  emit_int8(0x19);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  tuple_type = EVEX_T1S;
+  input_size_in_bits = EVEX_64bit;
+  InstructionMark im(this);
+  assert(dst != xnoreg, "sanity");
+  int dst_enc = dst->encoding();
+  // swap src<->dst for encoding
+  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len);
+  emit_int8(0x19);
+  emit_operand(dst, src);
+}
+
+// duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
+void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
+                                     VEX_OPCODE_0F_38, false, vector_len, false, false);
+  emit_int8(0x7A);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// duplicate 2-byte integer data from src into 8|16||32 locations in dest : requires AVX512BW and AVX512VL
+void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
+                                     VEX_OPCODE_0F_38, false, vector_len, false, false);
+  emit_int8(0x7B);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// duplicate 4-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
+void Assembler::evpbroadcastd(XMMRegister dst, Register src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
+                                     VEX_OPCODE_0F_38, false, vector_len, false, false);
+  emit_int8(0x7C);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// duplicate 8-byte integer data from src into 4|8|16 locations in dest : requires AVX512VL
+void Assembler::evpbroadcastq(XMMRegister dst, Register src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66,
+                                     VEX_OPCODE_0F_38, true, vector_len, false, false);
+  emit_int8(0x7C);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 // Carry-Less Multiplication Quadword
 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
   assert(VM_Version::supports_clmul(), "");
@@ -5598,7 +5766,7 @@
 
 void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
                            VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
-  bool vex_r = (xreg_enc >= 8);
+  bool vex_r = ((xreg_enc & 8) == 8) ? 1 : 0;
   bool vex_b = adr.base_needs_rex();
   bool vex_x = adr.index_needs_rex();
   avx_vector_len = vector_len;
@@ -5626,8 +5794,8 @@
 
 int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
                                      bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
-  bool vex_r = (dst_enc >= 8);
-  bool vex_b = (src_enc >= 8);
+  bool vex_r = ((dst_enc & 8) == 8) ? 1 : 0;
+  bool vex_b = ((src_enc & 8) == 8) ? 1 : 0;
   bool vex_x = false;
   avx_vector_len = vector_len;
 
@@ -6272,19 +6440,15 @@
 
 void Assembler::andnq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q(dst, src1, src2);
+  int encode = vex_prefix_0F38_and_encode_q_legacy(dst, src1, src2);
   emit_int8((unsigned char)0xF2);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::andnq(Register dst, Register src1, Address src2) {
-  if (VM_Version::supports_evex()) {
-    tuple_type = EVEX_T1S;
-    input_size_in_bits = EVEX_64bit;
-  }
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q(dst, src1, src2);
+  vex_prefix_0F38_q_legacy(dst, src1, src2);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2);
 }
@@ -6311,7 +6475,7 @@
 
 void Assembler::blsiq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q(rbx, dst, src);
+  int encode = vex_prefix_0F38_and_encode_q_legacy(rbx, dst, src);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -6319,14 +6483,14 @@
 void Assembler::blsiq(Register dst, Address src) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q(rbx, dst, src);
+  vex_prefix_0F38_q_legacy(rbx, dst, src);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src);
 }
 
 void Assembler::blsmskq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q(rdx, dst, src);
+  int encode = vex_prefix_0F38_and_encode_q_legacy(rdx, dst, src);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -6334,14 +6498,14 @@
 void Assembler::blsmskq(Register dst, Address src) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q(rdx, dst, src);
+  vex_prefix_0F38_q_legacy(rdx, dst, src);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src);
 }
 
 void Assembler::blsrq(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode_q(rcx, dst, src);
+  int encode = vex_prefix_0F38_and_encode_q_legacy(rcx, dst, src);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -6349,7 +6513,7 @@
 void Assembler::blsrq(Register dst, Address src) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38_q(rcx, dst, src);
+  vex_prefix_0F38_q_legacy(rcx, dst, src);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src);
 }
--- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -661,6 +661,14 @@
                vector_len, no_mask_reg);
   }
 
+  void vex_prefix_0F38_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
+    bool vex_w = false;
+    int vector_len = AVX_128bit;
+    vex_prefix(src, nds->encoding(), dst->encoding(),
+               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
+               vector_len, true, no_mask_reg);
+  }
+
   void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
     bool vex_w = true;
     int vector_len = AVX_128bit;
@@ -668,6 +676,15 @@
                VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
                vector_len, no_mask_reg);
   }
+
+  void vex_prefix_0F38_q_legacy(Register dst, Register nds, Address src, bool no_mask_reg = false) {
+    bool vex_w = true;
+    int vector_len = AVX_128bit;
+    vex_prefix(src, nds->encoding(), dst->encoding(),
+               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
+               vector_len, true, no_mask_reg);
+  }
+
   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc,
                              bool vex_w, int vector_len,
@@ -680,6 +697,15 @@
                                  VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
                                  false, no_mask_reg);
   }
+
+  int  vex_prefix_0F38_and_encode_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
+    bool vex_w = false;
+    int vector_len = AVX_128bit;
+    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
+      VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
+      true, no_mask_reg);
+  }
+
   int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
     bool vex_w = true;
     int vector_len = AVX_128bit;
@@ -687,6 +713,15 @@
                                  VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
                                  false, no_mask_reg);
   }
+
+  int  vex_prefix_0F38_and_encode_q_legacy(Register dst, Register nds, Register src, bool no_mask_reg = false) {
+    bool vex_w = true;
+    int vector_len = AVX_128bit;
+    return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
+                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
+                                 true, no_mask_reg);
+  }
+
   int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
                              VexSimdPrefix pre, int vector_len = AVX_128bit,
                              VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
@@ -1666,6 +1701,8 @@
 
   // Shift Right by bytes Logical DoubleQuadword Immediate
   void psrldq(XMMRegister dst, int shift);
+  // Shift Left by bytes Logical DoubleQuadword Immediate
+  void pslldq(XMMRegister dst, int shift);
 
   // Logical Compare 128bit
   void ptest(XMMRegister dst, XMMRegister src);
@@ -2024,8 +2061,25 @@
   // duplicate 4-bytes integer data from src into 8 locations in dest
   void vpbroadcastd(XMMRegister dst, XMMRegister src);
 
-  // duplicate 4-bytes integer data from src into vector_len locations in dest
+  // duplicate n-bytes integer data from src into vector_len locations in dest
+  void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
+  void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
+  void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
+  void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
   void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
+  void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
+  void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
+  void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
+
+  void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
+  void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
+  void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
+  void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
+
+  void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
+  void evpbroadcastw(XMMRegister dst, Register src, int vector_len);
+  void evpbroadcastd(XMMRegister dst, Register src, int vector_len);
+  void evpbroadcastq(XMMRegister dst, Register src, int vector_len);
 
   // Carry-Less Multiplication Quadword
   void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
--- a/hotspot/src/cpu/x86/vm/c2_init_x86.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/c2_init_x86.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -58,4 +58,6 @@
       OptoReg::invalidate(i);
     }
   }
+
+  SuperWordLoopUnrollAnalysis = true;
 }
--- a/hotspot/src/cpu/x86/vm/frame_x86.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/frame_x86.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -524,17 +524,6 @@
   return frame(sender_sp(), link(), sender_pc());
 }
 
-
-bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
-  assert(is_interpreted_frame(), "must be interpreter frame");
-  Method* method = interpreter_frame_method();
-  // When unpacking an optimized frame the frame pointer is
-  // adjusted with:
-  int diff = (method->max_locals() - method->size_of_parameters()) *
-             Interpreter::stackElementWords;
-  return _fp == (fp - diff);
-}
-
 bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
 // QQQ
 #ifdef CC_INTERP
--- a/hotspot/src/cpu/x86/vm/globals_x86.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/globals_x86.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -84,7 +84,7 @@
 
 define_pd_global(bool, PreserveFramePointer, false);
 
-#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
                                                                             \
   develop(bool, IEEEPrecision, true,                                        \
           "Enables IEEE precision (for INTEL only)")                        \
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -4260,31 +4260,24 @@
 //////////////////////////////////////////////////////////////////////////////////
 
 
+void MacroAssembler::store_check(Register obj, Address dst) {
+  store_check(obj);
+}
+
 void MacroAssembler::store_check(Register obj) {
   // Does a store check for the oop in register obj. The content of
   // register obj is destroyed afterwards.
-  store_check_part_1(obj);
-  store_check_part_2(obj);
-}
-
-void MacroAssembler::store_check(Register obj, Address dst) {
-  store_check(obj);
-}
-
-
-// split the store check operation so that other instructions can be scheduled inbetween
-void MacroAssembler::store_check_part_1(Register obj) {
+
   BarrierSet* bs = Universe::heap()->barrier_set();
   assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
-  shrptr(obj, CardTableModRefBS::card_shift);
-}
-
-void MacroAssembler::store_check_part_2(Register obj) {
-  BarrierSet* bs = Universe::heap()->barrier_set();
-  assert(bs->kind() == BarrierSet::CardTableModRef, "Wrong barrier set kind");
+
   CardTableModRefBS* ct = barrier_set_cast<CardTableModRefBS>(bs);
   assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
 
+  shrptr(obj, CardTableModRefBS::card_shift);
+
+  Address card_addr;
+
   // The calculation for byte_map_base is as follows:
   // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
   // So this essentially converts an address to a displacement and it will
@@ -4292,8 +4285,7 @@
   // large for a 32bit displacement.
   intptr_t disp = (intptr_t) ct->byte_map_base;
   if (is_simm32(disp)) {
-    Address cardtable(noreg, obj, Address::times_1, disp);
-    movb(cardtable, 0);
+    card_addr = Address(noreg, obj, Address::times_1, disp);
   } else {
     // By doing it as an ExternalAddress 'disp' could be converted to a rip-relative
     // displacement and done in a single instruction given favorable mapping and a
@@ -4301,7 +4293,21 @@
     // entry and that entry is not properly handled by the relocation code.
     AddressLiteral cardtable((address)ct->byte_map_base, relocInfo::none);
     Address index(noreg, obj, Address::times_1);
-    movb(as_Address(ArrayAddress(cardtable, index)), 0);
+    card_addr = as_Address(ArrayAddress(cardtable, index));
+  }
+
+  int dirty = CardTableModRefBS::dirty_card_val();
+  if (UseCondCardMark) {
+    Label L_already_dirty;
+    if (UseConcMarkSweepGC) {
+      membar(Assembler::StoreLoad);
+    }
+    cmpb(card_addr, dirty);
+    jcc(Assembler::equal, L_already_dirty);
+    movb(card_addr, dirty);
+    bind(L_already_dirty);
+  } else {
+    movb(card_addr, dirty);
   }
 }
 
--- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -315,10 +315,6 @@
 
 #endif // INCLUDE_ALL_GCS
 
-  // split store_check(Register obj) to enhance instruction interleaving
-  void store_check_part_1(Register obj);
-  void store_check_part_2(Register obj);
-
   // C 'boolean' to Java boolean: x == 0 ? 0 : 1
   void c2bool(Register x);
 
--- a/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/sharedRuntime_x86_64.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -365,22 +365,22 @@
     map->set_callee_saved(STACK_OFFSET(xmm14H_off), xmm14->as_VMReg()->next());
     map->set_callee_saved(STACK_OFFSET(xmm15H_off), xmm15->as_VMReg()->next());
     if (UseAVX > 2) {
-      map->set_callee_saved(STACK_OFFSET(xmm16H_off), xmm16->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm17H_off), xmm17->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm18H_off), xmm18->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm19H_off), xmm19->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm20H_off), xmm20->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm21H_off), xmm21->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm22H_off), xmm22->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg());
-      map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg());
+      map->set_callee_saved(STACK_OFFSET(xmm16H_off), xmm16->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm17H_off), xmm17->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm18H_off), xmm18->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm19H_off), xmm19->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm20H_off), xmm20->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm21H_off), xmm21->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm22H_off), xmm22->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm23H_off), xmm23->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm24H_off), xmm24->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm25H_off), xmm25->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm26H_off), xmm26->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm27H_off), xmm27->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm28H_off), xmm28->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm29H_off), xmm29->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm30H_off), xmm30->as_VMReg()->next());
+      map->set_callee_saved(STACK_OFFSET(xmm31H_off), xmm31->as_VMReg()->next());
     }
   }
 
@@ -466,7 +466,7 @@
       __ vinsertf64x4h(xmm29, Address(rsp, 928));
       __ vinsertf64x4h(xmm30, Address(rsp, 960));
       __ vinsertf64x4h(xmm31, Address(rsp, 992));
-      __ subptr(rsp, 1024);
+      __ addptr(rsp, 1024);
     }
   }
 #else
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -2727,6 +2727,167 @@
     return start;
   }
 
+  // byte swap x86 long
+  address generate_ghash_long_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
+    address start = __ pc();
+    __ emit_data(0x0b0a0908, relocInfo::none, 0);
+    __ emit_data(0x0f0e0d0c, relocInfo::none, 0);
+    __ emit_data(0x03020100, relocInfo::none, 0);
+    __ emit_data(0x07060504, relocInfo::none, 0);
+
+  return start;
+  }
+
+  // byte swap x86 byte array
+  address generate_ghash_byte_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
+    address start = __ pc();
+    __ emit_data(0x0c0d0e0f, relocInfo::none, 0);
+    __ emit_data(0x08090a0b, relocInfo::none, 0);
+    __ emit_data(0x04050607, relocInfo::none, 0);
+    __ emit_data(0x00010203, relocInfo::none, 0);
+  return start;
+  }
+
+  /* Single and multi-block ghash operations */
+  address generate_ghash_processBlocks() {
+    assert(UseGHASHIntrinsics, "need GHASH intrinsics and CLMUL support");
+    __ align(CodeEntryAlignment);
+    Label L_ghash_loop, L_exit;
+    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+    address start = __ pc();
+
+    const Register state        = rdi;
+    const Register subkeyH      = rsi;
+    const Register data         = rdx;
+    const Register blocks       = rcx;
+
+    const Address  state_param(rbp, 8+0);
+    const Address  subkeyH_param(rbp, 8+4);
+    const Address  data_param(rbp, 8+8);
+    const Address  blocks_param(rbp, 8+12);
+
+    const XMMRegister xmm_temp0 = xmm0;
+    const XMMRegister xmm_temp1 = xmm1;
+    const XMMRegister xmm_temp2 = xmm2;
+    const XMMRegister xmm_temp3 = xmm3;
+    const XMMRegister xmm_temp4 = xmm4;
+    const XMMRegister xmm_temp5 = xmm5;
+    const XMMRegister xmm_temp6 = xmm6;
+    const XMMRegister xmm_temp7 = xmm7;
+
+    __ enter();
+
+    __ movptr(state, state_param);
+    __ movptr(subkeyH, subkeyH_param);
+    __ movptr(data, data_param);
+    __ movptr(blocks, blocks_param);
+
+    __ movdqu(xmm_temp0, Address(state, 0));
+    __ pshufb(xmm_temp0, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ movdqu(xmm_temp1, Address(subkeyH, 0));
+    __ pshufb(xmm_temp1, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ BIND(L_ghash_loop);
+    __ movdqu(xmm_temp2, Address(data, 0));
+    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
+
+    __ pxor(xmm_temp0, xmm_temp2);
+
+    //
+    // Multiply with the hash key
+    //
+    __ movdqu(xmm_temp3, xmm_temp0);
+    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
+    __ movdqu(xmm_temp4, xmm_temp0);
+    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
+
+    __ movdqu(xmm_temp5, xmm_temp0);
+    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
+    __ movdqu(xmm_temp6, xmm_temp0);
+    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
+
+    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
+
+    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
+    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
+    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
+    __ pxor(xmm_temp3, xmm_temp5);
+    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
+                                        // of the carry-less multiplication of
+                                        // xmm0 by xmm1.
+
+    // We shift the result of the multiplication by one bit position
+    // to the left to cope for the fact that the bits are reversed.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp6);
+    __ pslld (xmm_temp3, 1);
+    __ pslld(xmm_temp6, 1);
+    __ psrld(xmm_temp7, 31);
+    __ psrld(xmm_temp4, 31);
+    __ movdqu(xmm_temp5, xmm_temp7);
+    __ pslldq(xmm_temp4, 4);
+    __ pslldq(xmm_temp7, 4);
+    __ psrldq(xmm_temp5, 12);
+    __ por(xmm_temp3, xmm_temp7);
+    __ por(xmm_temp6, xmm_temp4);
+    __ por(xmm_temp6, xmm_temp5);
+
+    //
+    // First phase of the reduction
+    //
+    // Move xmm3 into xmm4, xmm5, xmm7 in order to perform the shifts
+    // independently.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
+    __ pslld(xmm_temp4, 30);    // packed right shift shifting << 30
+    __ pslld(xmm_temp5, 25);    // packed right shift shifting << 25
+    __ pxor(xmm_temp7, xmm_temp4);      // xor the shifted versions
+    __ pxor(xmm_temp7, xmm_temp5);
+    __ movdqu(xmm_temp4, xmm_temp7);
+    __ pslldq(xmm_temp7, 12);
+    __ psrldq(xmm_temp4, 4);
+    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
+
+    //
+    // Second phase of the reduction
+    //
+    // Make 3 copies of xmm3 in xmm2, xmm5, xmm7 for doing these
+    // shift operations.
+    __ movdqu(xmm_temp2, xmm_temp3);
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
+    __ psrld(xmm_temp7, 2);     // packed left shifting >> 2
+    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
+    __ pxor(xmm_temp2, xmm_temp7);      // xor the shifted versions
+    __ pxor(xmm_temp2, xmm_temp5);
+    __ pxor(xmm_temp2, xmm_temp4);
+    __ pxor(xmm_temp3, xmm_temp2);
+    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
+
+    __ decrement(blocks);
+    __ jcc(Assembler::zero, L_exit);
+    __ movdqu(xmm_temp0, xmm_temp6);
+    __ addptr(data, 16);
+    __ jmp(L_ghash_loop);
+
+    __ BIND(L_exit);
+       // Byte swap 16-byte result
+    __ pshufb(xmm_temp6, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
+
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
   /**
    *  Arguments:
    *
@@ -3026,6 +3187,13 @@
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt();
     }
 
+    // Generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
+      StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry,
                                                    &StubRoutines::_safefetch32_fault_pc,
--- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -382,7 +382,12 @@
 
     // restore regs belonging to calling function
 #ifdef _WIN64
-    for (int i = 15; i >= 6; i--) {
+    int xmm_ub = 15;
+    if (UseAVX > 2) {
+      xmm_ub = 31;
+    }
+    // emit the restores for xmm regs
+    for (int i = 6; i <= xmm_ub; i++) {
       __ movdqu(as_XMMRegister(i), xmm_save(i));
     }
 #endif
@@ -3681,6 +3686,175 @@
     return start;
   }
 
+
+  // byte swap x86 long
+  address generate_ghash_long_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
+    address start = __ pc();
+    __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
+    __ emit_data64(0x0706050403020100, relocInfo::none );
+  return start;
+  }
+
+  // byte swap x86 byte array
+  address generate_ghash_byte_swap_mask() {
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
+    address start = __ pc();
+    __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
+    __ emit_data64(0x0001020304050607, relocInfo::none );
+  return start;
+  }
+
+  /* Single and multi-block ghash operations */
+  address generate_ghash_processBlocks() {
+    __ align(CodeEntryAlignment);
+    Label L_ghash_loop, L_exit;
+    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");
+    address start = __ pc();
+
+    const Register state        = c_rarg0;
+    const Register subkeyH      = c_rarg1;
+    const Register data         = c_rarg2;
+    const Register blocks       = c_rarg3;
+
+#ifdef _WIN64
+    const int XMM_REG_LAST  = 10;
+#endif
+
+    const XMMRegister xmm_temp0 = xmm0;
+    const XMMRegister xmm_temp1 = xmm1;
+    const XMMRegister xmm_temp2 = xmm2;
+    const XMMRegister xmm_temp3 = xmm3;
+    const XMMRegister xmm_temp4 = xmm4;
+    const XMMRegister xmm_temp5 = xmm5;
+    const XMMRegister xmm_temp6 = xmm6;
+    const XMMRegister xmm_temp7 = xmm7;
+    const XMMRegister xmm_temp8 = xmm8;
+    const XMMRegister xmm_temp9 = xmm9;
+    const XMMRegister xmm_temp10 = xmm10;
+
+    __ enter();
+
+#ifdef _WIN64
+    // save the xmm registers which must be preserved 6-10
+    __ subptr(rsp, -rsp_after_call_off * wordSize);
+    for (int i = 6; i <= XMM_REG_LAST; i++) {
+      __ movdqu(xmm_save(i), as_XMMRegister(i));
+    }
+#endif
+
+    __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
+
+    __ movdqu(xmm_temp0, Address(state, 0));
+    __ pshufb(xmm_temp0, xmm_temp10);
+
+
+    __ BIND(L_ghash_loop);
+    __ movdqu(xmm_temp2, Address(data, 0));
+    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
+
+    __ movdqu(xmm_temp1, Address(subkeyH, 0));
+    __ pshufb(xmm_temp1, xmm_temp10);
+
+    __ pxor(xmm_temp0, xmm_temp2);
+
+    //
+    // Multiply with the hash key
+    //
+    __ movdqu(xmm_temp3, xmm_temp0);
+    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
+    __ movdqu(xmm_temp4, xmm_temp0);
+    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
+
+    __ movdqu(xmm_temp5, xmm_temp0);
+    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
+    __ movdqu(xmm_temp6, xmm_temp0);
+    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
+
+    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
+
+    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
+    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
+    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
+    __ pxor(xmm_temp3, xmm_temp5);
+    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
+                                        // of the carry-less multiplication of
+                                        // xmm0 by xmm1.
+
+    // We shift the result of the multiplication by one bit position
+    // to the left to cope for the fact that the bits are reversed.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp8, xmm_temp6);
+    __ pslld(xmm_temp3, 1);
+    __ pslld(xmm_temp6, 1);
+    __ psrld(xmm_temp7, 31);
+    __ psrld(xmm_temp8, 31);
+    __ movdqu(xmm_temp9, xmm_temp7);
+    __ pslldq(xmm_temp8, 4);
+    __ pslldq(xmm_temp7, 4);
+    __ psrldq(xmm_temp9, 12);
+    __ por(xmm_temp3, xmm_temp7);
+    __ por(xmm_temp6, xmm_temp8);
+    __ por(xmm_temp6, xmm_temp9);
+
+    //
+    // First phase of the reduction
+    //
+    // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
+    // independently.
+    __ movdqu(xmm_temp7, xmm_temp3);
+    __ movdqu(xmm_temp8, xmm_temp3);
+    __ movdqu(xmm_temp9, xmm_temp3);
+    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
+    __ pslld(xmm_temp8, 30);    // packed right shift shifting << 30
+    __ pslld(xmm_temp9, 25);    // packed right shift shifting << 25
+    __ pxor(xmm_temp7, xmm_temp8);      // xor the shifted versions
+    __ pxor(xmm_temp7, xmm_temp9);
+    __ movdqu(xmm_temp8, xmm_temp7);
+    __ pslldq(xmm_temp7, 12);
+    __ psrldq(xmm_temp8, 4);
+    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
+
+    //
+    // Second phase of the reduction
+    //
+    // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
+    // shift operations.
+    __ movdqu(xmm_temp2, xmm_temp3);
+    __ movdqu(xmm_temp4, xmm_temp3);
+    __ movdqu(xmm_temp5, xmm_temp3);
+    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
+    __ psrld(xmm_temp4, 2);     // packed left shifting >> 2
+    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
+    __ pxor(xmm_temp2, xmm_temp4);      // xor the shifted versions
+    __ pxor(xmm_temp2, xmm_temp5);
+    __ pxor(xmm_temp2, xmm_temp8);
+    __ pxor(xmm_temp3, xmm_temp2);
+    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
+
+    __ decrement(blocks);
+    __ jcc(Assembler::zero, L_exit);
+    __ movdqu(xmm_temp0, xmm_temp6);
+    __ addptr(data, 16);
+    __ jmp(L_ghash_loop);
+
+    __ BIND(L_exit);
+    __ pshufb(xmm_temp6, xmm_temp10);          // Byte swap 16-byte result
+    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
+
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    for (int i = 6; i <= XMM_REG_LAST; i++) {
+      __ movdqu(as_XMMRegister(i), xmm_save(i));
+    }
+#endif
+    __ leave();
+    __ ret(0);
+    return start;
+  }
+
   /**
    *  Arguments:
    *
@@ -4120,6 +4294,13 @@
       StubRoutines::_cipherBlockChaining_decryptAESCrypt = generate_cipherBlockChaining_decryptAESCrypt_Parallel();
     }
 
+    // Generate GHASH intrinsics code
+    if (UseGHASHIntrinsics) {
+      StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask();
+      StubRoutines::x86::_ghash_byte_swap_mask_addr = generate_ghash_byte_swap_mask();
+      StubRoutines::_ghash_processBlocks = generate_ghash_processBlocks();
+    }
+
     // Safefetch stubs.
     generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
                                                        &StubRoutines::_safefetch32_fault_pc,
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -33,6 +33,8 @@
 
 address StubRoutines::x86::_verify_mxcsr_entry = NULL;
 address StubRoutines::x86::_key_shuffle_mask_addr = NULL;
+address StubRoutines::x86::_ghash_long_swap_mask_addr = NULL;
+address StubRoutines::x86::_ghash_byte_swap_mask_addr = NULL;
 
 uint64_t StubRoutines::x86::_crc_by128_masks[] =
 {
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -36,10 +36,15 @@
   // masks and table for CRC32
   static uint64_t _crc_by128_masks[];
   static juint    _crc_table[];
+  // swap mask for ghash
+  static address _ghash_long_swap_mask_addr;
+  static address _ghash_byte_swap_mask_addr;
 
  public:
   static address verify_mxcsr_entry()    { return _verify_mxcsr_entry; }
   static address key_shuffle_mask_addr() { return _key_shuffle_mask_addr; }
   static address crc_by128_masks_addr()  { return (address)_crc_by128_masks; }
+  static address ghash_long_swap_mask_addr() { return _ghash_long_swap_mask_addr; }
+  static address ghash_byte_swap_mask_addr() { return _ghash_byte_swap_mask_addr; }
 
 #endif // CPU_X86_VM_STUBROUTINES_X86_32_HPP
--- a/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/stubRoutines_x86_64.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -33,7 +33,7 @@
 
 enum platform_dependent_constants {
   code_size1 = 19000,          // simply increase if too small (assembler will crash if too small)
-  code_size2 = 23000           // simply increase if too small (assembler will crash if too small)
+  code_size2 = 24000           // simply increase if too small (assembler will crash if too small)
 };
 
 class x86 {
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp	Wed Jul 05 20:40:50 2017 +0200
@@ -677,6 +677,17 @@
     FLAG_SET_DEFAULT(UseAESIntrinsics, false);
   }
 
+  // GHASH/GCM intrinsics
+  if (UseCLMUL && (UseSSE > 2)) {
+    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
+      UseGHASHIntrinsics = true;
+    }
+  } else if (UseGHASHIntrinsics) {
+    if (!FLAG_IS_DEFAULT(UseGHASHIntrinsics))
+      warning("GHASH intrinsic requires CLMUL and SSE2 instructions on this CPU");
+    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
+  }
+
   if (UseSHA) {
     warning("SHA instructions are not available on this CPU");
     FLAG_SET_DEFAULT(UseSHA, false);
--- a/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/vm_version_x86.hpp	Wed Jul 05 20:40:50 2017 +0200
@@ -702,6 +702,7 @@
   static bool supports_avx512cd() { return (_cpuFeatures & CPU_AVX512CD) != 0; }
   static bool supports_avx512bw() { return (_cpuFeatures & CPU_AVX512BW) != 0; }
   static bool supports_avx512vl() { return (_cpuFeatures & CPU_AVX512VL) != 0; }
+  static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
   // Intel features
   static bool is_intel_family_core() { return is_intel() &&
                                        extended_cpu_family() == CPU_FAMILY_INTEL_CORE; }
--- a/hotspot/src/cpu/x86/vm/x86.ad	Thu Jul 02 16:08:17 2015 -0700
+++ b/hotspot/src/cpu/x86/vm/x86.ad	Wed Jul 05 20:40:50 2017 +0200
@@ -2894,6 +2894,457 @@
   ins_pipe( pipe_slow );
 %}
 
+// ====================LEGACY REPLICATE=======================================
+
+instruct Repl4B_mem(vecS dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "punpcklbw $dst,$mem\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate4B" %}
+  ins_encode %{
+    __ punpcklbw($dst$$XMMRegister, $mem$$Address);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8B_mem(vecD dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "punpcklbw $dst,$mem\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate8B" %}
+  ins_encode %{
+    __ punpcklbw($dst$$XMMRegister, $mem$$Address);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\t! replicate16B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "punpcklbw $dst,$mem\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\t! replicate16B" %}
+  ins_encode %{
+    __ punpcklbw($dst$$XMMRegister, $mem$$Address);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "punpcklbw $dst,$mem\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
+  ins_encode %{
+    __ punpcklbw($dst$$XMMRegister, $mem$$Address);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B_imm(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B_imm(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4S(vecD dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate4S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4S_mem(vecD dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS (LoadS mem)));
+  format %{ "pshuflw $dst,$mem,0x00\t! replicate4S" %}
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8S(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\t! replicate8S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8S_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS (LoadS mem)));
+  format %{ "pshuflw $dst,$mem,0x00\n\t"
+            "punpcklqdq $dst,$dst\t! replicate8S" %}
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8S_imm(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS (LoadS mem)));
+  format %{ "pshuflw $dst,$mem,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S_imm(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4I(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4I_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI (LoadI mem)));
+  format %{ "pshufd  $dst,$mem,0x00\t! replicate4I" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshufd  $dst,$dst,0x00\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI (LoadI mem)));
+  format %{ "pshufd  $dst,$mem,0x00\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4I_imm(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
+            "punpcklqdq $dst,$dst" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_imm(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Long could be loaded into xmm register directly from memory.
+instruct Repl2L_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateL (LoadL mem)));
+  format %{ "movq    $dst,$mem\n\t"
+            "punpcklqdq $dst,$dst\t! replicate2L" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate long (8 byte) scalar to be vector
+#ifdef _LP64
+instruct Repl4L(vecY dst, rRegL src) %{
+  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL src));
+  format %{ "movdq   $dst,$src\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    __ movdq($dst$$XMMRegister, $src$$Register);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+#else // _LP64
+instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
+  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL src));
+  effect(TEMP dst, USE src, TEMP tmp);
+  format %{ "movdl   $dst,$src.lo\n\t"
+            "movdl   $tmp,$src.hi\n\t"
+            "punpckldq $dst,$tmp\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+#endif // _LP64
+
+instruct Repl4L_imm(vecY dst, immL con) %{
+  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress($con));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL (LoadL mem)));
+  format %{ "movq    $dst,$mem\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl2F_mem(vecD dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateF (LoadF mem)));
+  format %{ "pshufd  $dst,$mem,0x00\t! replicate2F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4F_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateF (LoadF mem)));
+  format %{ "pshufd  $dst,$mem,0x00\t! replicate4F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8F(vecY dst, regF src) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateF src));
+  format %{ "pshufd  $dst,$src,0x00\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8F_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateF (LoadF mem)));
+  format %{ "pshufd  $dst,$mem,0x00\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl2D_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateD (LoadD mem)));
+  format %{ "pshufd  $dst,$mem,0x44\t! replicate2D" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4D(vecY dst, regD src) %{
+  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateD src));
+  format %{ "pshufd  $dst,$src,0x44\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4D_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateD (LoadD mem)));
+  format %{ "pshufd  $dst,$mem,0x44\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ====================GENERIC REPLICATE==========================================
+
 // Replicate byte scalar to be vector
 instruct Repl4B(vecS dst, rRegI src) %{
   predicate(n->as_Vector()->length() == 4);
@@ -2923,60 +3374,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct Repl16B(vecX dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateB src));
-  format %{ "movd    $dst,$src\n\t"
-            "punpcklbw $dst,$dst\n\t"
-            "pshuflw $dst,$dst,0x00\n\t"
-            "punpcklqdq $dst,$dst\t! replicate16B" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl32B(vecY dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 32);
-  match(Set dst (ReplicateB src));
-  format %{ "movd    $dst,$src\n\t"
-            "punpcklbw $dst,$dst\n\t"
-            "pshuflw $dst,$dst,0x00\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate32B" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl64B(vecZ dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 64);
-  match(Set dst (ReplicateB src));
-  format %{ "movd    $dst,$src\n\t"
-            "punpcklbw $dst,$dst\n\t"
-            "pshuflw $dst,$dst,0x00\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate32B\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate632B" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Replicate byte scalar immediate to be vector by loading from const table.
 instruct Repl4B_imm(vecS dst, immI con) %{
   predicate(n->as_Vector()->length() == 4);
@@ -2998,48 +3395,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct Repl16B_imm(vecX dst, immI con) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateB con));
-  format %{ "movq    $dst,[$constantaddress]\n\t"
-            "punpcklqdq $dst,$dst\t! replicate16B($con)" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl32B_imm(vecY dst, immI con) %{
-  predicate(n->as_Vector()->length() == 32);
-  match(Set dst (ReplicateB con));
-  format %{ "movq    $dst,[$constantaddress]\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lreplicate32B($con)" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl64B_imm(vecZ dst, immI con) %{
-  predicate(n->as_Vector()->length() == 64);
-  match(Set dst (ReplicateB con));
-  format %{ "movq    $dst,[$constantaddress]\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate32B($con)\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate32B($con)" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Replicate byte scalar zero to be vector
 instruct Repl4B_zero(vecS dst, immI0 zero) %{
   predicate(n->as_Vector()->length() == 4);
@@ -3083,18 +3438,6 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl64B_zero(vecZ dst, immI0 zero) %{
-  predicate(n->as_Vector()->length() == 64);
-  match(Set dst (ReplicateB zero));
-  format %{ "vpxor   $dst k0,$dst,$dst\t! replicate64B zero" %}
-  ins_encode %{
-    // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
-    int vector_len = 2;
-    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 // Replicate char/short (2 byte) scalar to be vector
 instruct Repl2S(vecS dst, rRegI src) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3108,66 +3451,6 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl4S(vecD dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateS src));
-  format %{ "movd    $dst,$src\n\t"
-            "pshuflw $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct Repl8S(vecX dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateS src));
-  format %{ "movd    $dst,$src\n\t"
-            "pshuflw $dst,$dst,0x00\n\t"
-            "punpcklqdq $dst,$dst\t! replicate8S" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl16S(vecY dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateS src));
-  format %{ "movd    $dst,$src\n\t"
-            "pshuflw $dst,$dst,0x00\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate16S" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl32S(vecZ dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 32);
-  match(Set dst (ReplicateS src));
-  format %{ "movd    $dst,$src\n\t"
-            "pshuflw $dst,$dst,0x00\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate16S\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
 instruct Repl2S_imm(vecS dst, immI con) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3189,48 +3472,6 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl8S_imm(vecX dst, immI con) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateS con));
-  format %{ "movq    $dst,[$constantaddress]\n\t"
-            "punpcklqdq $dst,$dst\t! replicate8S($con)" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl16S_imm(vecY dst, immI con) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateS con));
-  format %{ "movq    $dst,[$constantaddress]\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate16S($con)" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl32S_imm(vecZ dst, immI con) %{
-  predicate(n->as_Vector()->length() == 32);
-  match(Set dst (ReplicateS con));
-  format %{ "movq    $dst,[$constantaddress]\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate16S($con)\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate16S($con)" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Replicate char/short (2 byte) scalar zero to be vector
 instruct Repl2S_zero(vecS dst, immI0 zero) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3274,18 +3515,6 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl32S_zero(vecZ dst, immI0 zero) %{
-  predicate(n->as_Vector()->length() == 32);
-  match(Set dst (ReplicateS zero));
-  format %{ "vpxor   $dst k0,$dst,$dst\t! replicate32S zero" %}
-  ins_encode %{
-    // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
-    int vector_len = 2;
-    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 // Replicate integer (4 byte) scalar to be vector
 instruct Repl2I(vecD dst, rRegI src) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3299,101 +3528,6 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl4I(vecX dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateI src));
-  format %{ "movd    $dst,$src\n\t"
-            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl8I(vecY dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateI src));
-  format %{ "movd    $dst,$src\n\t"
-            "pshufd  $dst,$dst,0x00\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl16I(vecZ dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateI src));
-  format %{ "movd    $dst,$src\n\t"
-            "pshufd  $dst,$dst,0x00\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
-instruct Repl2I_imm(vecD dst, immI con) %{
-  predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateI con));
-  format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-instruct Repl4I_imm(vecX dst, immI con) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateI con));
-  format %{ "movq    $dst,[$constantaddress]\t! replicate4I($con)\n\t"
-            "punpcklqdq $dst,$dst" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl8I_imm(vecY dst, immI con) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateI con));
-  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl16I_imm(vecZ dst, immI con) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateI con));
-  format %{ "movq    $dst,[$constantaddress]\t! replicate16I($con)\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Integer could be loaded into xmm register directly from memory.
 instruct Repl2I_mem(vecD dst, memory mem) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3407,46 +3541,15 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl4I_mem(vecX dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateI (LoadI mem)));
-  format %{ "movd    $dst,$mem\n\t"
-            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $mem$$Address);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl8I_mem(vecY dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateI (LoadI mem)));
-  format %{ "movd    $dst,$mem\n\t"
-            "pshufd  $dst,$dst,0x00\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate8I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $mem$$Address);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl16I_mem(vecZ dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateI (LoadI mem)));
-  format %{ "movd    $dst,$mem\n\t"
-            "pshufd  $dst,$dst,0x00\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate8I\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate8I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $mem$$Address);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
+// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2I_imm(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate2I($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+  %}
+  ins_pipe( fpu_reg_reg );
 %}
 
 // Replicate integer (4 byte) scalar zero to be vector
@@ -3482,18 +3585,6 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl16I_zero(vecZ dst, immI0 zero) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateI zero));
-  format %{ "vpxor   $dst k0,$dst,$dst\t! replicate16I zero" %}
-  ins_encode %{
-    // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it).
-    int vector_len = 2;
-    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 // Replicate long (8 byte) scalar to be vector
 #ifdef _LP64
 instruct Repl2L(vecX dst, rRegL src) %{
@@ -3507,36 +3598,6 @@
   %}
   ins_pipe( pipe_slow );
 %}
-
-instruct Repl4L(vecY dst, rRegL src) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateL src));
-  format %{ "movdq   $dst,$src\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
-  ins_encode %{
-    __ movdq($dst$$XMMRegister, $src$$Register);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl8L(vecZ dst, rRegL src) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateL src));
-  format %{ "movdq   $dst,$src\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
-  ins_encode %{
-    __ movdq($dst$$XMMRegister, $src$$Register);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
 #else // _LP64
 instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3554,45 +3615,6 @@
   %}
   ins_pipe( pipe_slow );
 %}
-
-instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateL src));
-  effect(TEMP dst, USE src, TEMP tmp);
-  format %{ "movdl   $dst,$src.lo\n\t"
-            "movdl   $tmp,$src.hi\n\t"
-            "punpckldq $dst,$tmp\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
-    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl8L(vecZ dst, eRegL src, regD tmp) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateL src));
-  effect(TEMP dst, USE src, TEMP tmp);
-  format %{ "movdl   $dst,$src.lo\n\t"
-            "movdl   $tmp,$src.hi\n\t"
-            "punpckldq $dst,$tmp\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
-    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
 #endif // _LP64
 
 // Replicate long (8 byte) scalar immediate to be vector by loading from const table.
@@ -3608,79 +3630,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct Repl4L_imm(vecY dst, immL con) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateL con));
-  format %{ "movq    $dst,[$constantaddress]\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate4L($con)" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress($con));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl8L_imm(vecZ dst, immL con) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateL con));
-  format %{ "movq    $dst,[$constantaddress]\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate4L($con)\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L($con)" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $constantaddress($con));
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Long could be loaded into xmm register directly from memory.
-instruct Repl2L_mem(vecX dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 2);
-  match(Set dst (ReplicateL (LoadL mem)));
-  format %{ "movq    $dst,$mem\n\t"
-            "punpcklqdq $dst,$dst\t! replicate2L" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl4L_mem(vecY dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateL (LoadL mem)));
-  format %{ "movq    $dst,$mem\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! replicate4L" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl8L_mem(vecZ dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateL (LoadL mem)));
-  format %{ "movq    $dst,$mem\n\t"
-            "punpcklqdq $dst,$dst\n\t"
-            "vinserti128h $dst,$dst,$dst\t! lower replicate4L\n\t"
-            "vinserti64x4h $dst k0,$dst,$dst\t! upper replicate4L" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinserti64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Replicate long (8 byte) scalar zero to be vector
 instruct Repl2L_zero(vecX dst, immL0 zero) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3704,18 +3653,6 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl8L_zero(vecZ dst, immL0 zero) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateL zero));
-  format %{ "vpxor   $dst k0,$dst,$dst\t! replicate8L zero" %}
-  ins_encode %{
-    // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
-    int vector_len = 2;
-    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 // Replicate float (4 byte) scalar to be vector
 instruct Repl2F(vecD dst, regF src) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3737,32 +3674,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct Repl8F(vecY dst, regF src) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateF src));
-  format %{ "pshufd  $dst,$src,0x00\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl16F(vecZ dst, regF src) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateF src));
-  format %{ "pshufd  $dst,$src,0x00\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! lower replicate8F\n\t"
-            "vinsertf64x4h $dst k0,$dst,$dst\t! lower replicate8F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Replicate float (4 byte) scalar zero to be vector
 instruct Repl2F_zero(vecD dst, immF0 zero) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3795,17 +3706,6 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl16F_zero(vecZ dst, immF0 zero) %{
-  predicate(n->as_Vector()->length() == 16);
-  match(Set dst (ReplicateF zero));
-  format %{ "vxorps  $dst k0,$dst,$dst\t! replicate16F zero" %}
-  ins_encode %{
-    int vector_len = 2;
-    __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 // Replicate double (8 bytes) scalar to be vector
 instruct Repl2D(vecX dst, regD src) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3817,32 +3717,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct Repl4D(vecY dst, regD src) %{
-  predicate(n->as_Vector()->length() == 4);
-  match(Set dst (ReplicateD src));
-  format %{ "pshufd  $dst,$src,0x44\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl8D(vecZ dst, regD src) %{
-  predicate(n->as_Vector()->length() == 8);
-  match(Set dst (ReplicateD src));
-  format %{ "pshufd  $dst,$src,0x44\n\t"
-            "vinsertf128h $dst,$dst,$dst\t! lower replicate4D\n\t"
-            "vinsertf64x4h $dst k0,$dst,$dst\t! upper replicate4D" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
-    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-    __ vinsertf64x4h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Replicate double (8 byte) scalar zero to be vector
 instruct Repl2D_zero(vecX dst, immD0 zero) %{
   predicate(n->as_Vector()->length() == 2);
@@ -3865,8 +3739,636 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl8D_zero(vecZ dst, immD0 zero) %{
-  predicate(n->as_Vector()->length() == 8);
+// ====================EVEX REPLICATE=============================================
+
+instruct Repl4B_mem_evex(vecS dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "vpbroadcastb  $dst,$mem\t! replicate4B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8B_mem_evex(vecD dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "vpbroadcastb  $dst,$mem\t! replicate8B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B_evex(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB src));
+  format %{ "vpbroadcastb $dst,$src\t! replicate16B" %}
+  ins_encode %{
+   int vector_len = 0;
+    __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B_mem_evex(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "vpbroadcastb  $dst,$mem\t! replicate16B" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B_evex(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB src));
+  format %{ "vpbroadcastb $dst,$src\t! replicate32B" %}
+  ins_encode %{
+   int vector_len = 1;
+    __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B_mem_evex(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "vpbroadcastb  $dst,$mem\t! replicate32B" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl64B_evex(vecZ dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
+  match(Set dst (ReplicateB src));
+  format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %}
+  ins_encode %{
+   int vector_len = 2;
+    __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl64B_mem_evex(vecZ dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "vpbroadcastb  $dst,$mem\t! replicate64B" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B_imm_evex(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "vpbroadcastb $dst,$dst\t! replicate16B" %}
+  ins_encode %{
+   int vector_len = 0;
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B_imm_evex(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "vpbroadcastb $dst,$dst\t! replicate32B" %}
+  ins_encode %{
+   int vector_len = 1;
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl64B_imm_evex(vecZ dst, immI con) %{
+  predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
+  match(Set dst (ReplicateB con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "vpbroadcastb $dst,$dst\t! upper replicate64B" %}
+  ins_encode %{
+   int vector_len = 2;
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl64B_zero_evex(vecZ dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 64 && UseAVX > 2);
+  match(Set dst (ReplicateB zero));
+  format %{ "vpxor   $dst k0,$dst,$dst\t! replicate64B zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
+    int vector_len = 2;
+    __ vpxor($dst$$XMMRegister,$dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S_evex(vecD dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS src));
+  format %{ "vpbroadcastw $dst,$src\t! replicate4S" %}
+  ins_encode %{
+   int vector_len = 0;
+    __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4S_mem_evex(vecD dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS (LoadS mem)));
+  format %{ "vpbroadcastw  $dst,$mem\t! replicate4S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8S_evex(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS src));
+  format %{ "vpbroadcastw $dst,$src\t! replicate8S" %}
+  ins_encode %{
+   int vector_len = 0;
+    __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8S_mem_evex(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS (LoadS mem)));
+  format %{ "vpbroadcastw  $dst,$mem\t! replicate8S" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S_evex(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS src));
+  format %{ "vpbroadcastw $dst,$src\t! replicate16S" %}
+  ins_encode %{
+   int vector_len = 1;
+    __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S_mem_evex(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS (LoadS mem)));
+  format %{ "vpbroadcastw  $dst,$mem\t! replicate16S" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32S_evex(vecZ dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
+  match(Set dst (ReplicateS src));
+  format %{ "vpbroadcastw $dst,$src\t! replicate32S" %}
+  ins_encode %{
+   int vector_len = 2;
+    __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32S_mem_evex(vecZ dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
+  match(Set dst (ReplicateS (LoadS mem)));
+  format %{ "vpbroadcastw  $dst,$mem\t! replicate32S" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8S_imm_evex(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "vpbroadcastw $dst,$dst\t! replicate8S" %}
+  ins_encode %{
+   int vector_len = 0;
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S_imm_evex(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "vpbroadcastw $dst,$dst\t! replicate16S" %}
+  ins_encode %{
+   int vector_len = 1;
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32S_imm_evex(vecZ dst, immI con) %{
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
+  match(Set dst (ReplicateS con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "vpbroadcastw $dst,$dst\t! replicate32S" %}
+  ins_encode %{
+   int vector_len = 2;
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32S_zero_evex(vecZ dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2);
+  match(Set dst (ReplicateS zero));
+  format %{ "vpxor   $dst k0,$dst,$dst\t! replicate32S zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 512-bit (EVEX will have it).
+    int vector_len = 2;
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_evex(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI src));
+  format %{ "vpbroadcastd  $dst,$src\t! replicate4I" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4I_mem_evex(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI (LoadI mem)));
+  format %{ "vpbroadcastd  $dst,$mem\t! replicate4I" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_evex(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI src));
+  format %{ "vpbroadcastd  $dst,$src\t! replicate8I" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_mem_evex(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI (LoadI mem)));
+  format %{ "vpbroadcastd  $dst,$mem\t! replicate8I" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16I_evex(vecZ dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
+  match(Set dst (ReplicateI src));
+  format %{ "vpbroadcastd  $dst,$src\t! replicate16I" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16I_mem_evex(vecZ dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
+  match(Set dst (ReplicateI (LoadI mem)));
+  format %{ "vpbroadcastd  $dst,$mem\t! replicate16I" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4I_imm_evex(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
+            "vpbroadcastd  $dst,$dst\t! replicate4I" %}
+  ins_encode %{
+    int vector_len = 0;
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_imm_evex(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
+            "vpbroadcastd  $dst,$dst\t! replicate8I" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16I_imm_evex(vecZ dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t! replicate16I($con)\n\t"
+            "vpbroadcastd  $dst,$dst\t! replicate16I" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16I_zero_evex(vecZ dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
+  match(Set dst (ReplicateI zero));
+  format %{ "vpxor   $dst k0,$dst,$dst\t! replicate16I zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 512-bit (AVX2 will have it).
+    int vector_len = 2;
+    __ vpxor($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate long (8 byte) scalar to be vector
+#ifdef _LP64
+instruct Repl4L_evex(vecY dst, rRegL src) %{
+  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL src));
+  format %{ "vpbroadcastq  $dst,$src\t! replicate4L" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8L_evex(vecZ dst, rRegL src) %{
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
+  match(Set dst (ReplicateL src));
+  format %{ "vpbroadcastq  $dst,$src\t! replicate8L" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+#else // _LP64
+instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{
+  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL src));
+  effect(TEMP dst, USE src, TEMP tmp);
+  format %{ "movdl   $dst,$src.lo\n\t"
+            "movdl   $tmp,$src.hi\n\t"
+            "punpckldq $dst,$tmp\n\t"
+            "vpbroadcastq  $dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
+  match(Set dst (ReplicateL src));
+  effect(TEMP dst, USE src, TEMP tmp);
+  format %{ "movdl   $dst,$src.lo\n\t"
+            "movdl   $tmp,$src.hi\n\t"
+            "punpckldq $dst,$tmp\n\t"
+            "vpbroadcastq  $dst,$dst\t! replicate8L" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+#endif // _LP64
+
+instruct Repl4L_imm_evex(vecY dst, immL con) %{
+  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "vpbroadcastq  $dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    int vector_len = 1;
+    __ movq($dst$$XMMRegister, $constantaddress($con));