changeset 52741:a748603468bb jep-334

Automatic merge with default
author mcimadamore
date Thu, 27 Sep 2018 22:04:49 +0200
parents 8d50dbdbffed dade6dd87bb4
children 2e17f5315d59 0bcf1413c14f
files src/java.base/share/classes/module-info.java src/java.desktop/share/classes/com/sun/awt/SecurityWarning.java src/java.desktop/share/classes/sun/applet/AppletClassLoader.java src/java.desktop/share/classes/sun/applet/AppletSecurity.java src/java.desktop/share/classes/sun/applet/AppletThreadGroup.java src/java.desktop/share/classes/sun/awt/HToolkit.java src/java.desktop/unix/native/libawt_xawt/awt/HPkeysym.h test/jdk/com/sun/awt/SecurityWarning/CustomSecurityManager.java test/jdk/com/sun/awt/SecurityWarning/GetSizeShouldNotReturnZero.java test/jdk/com/sun/awt/TEST.properties test/jdk/com/sun/jdi/RedefineException.sh test/jdk/com/sun/jdi/RedefineFinal.sh test/jdk/com/sun/jdi/RedefineIntConstantToLong.sh test/jdk/com/sun/jdi/RedefineMulti.sh test/jdk/com/sun/jdi/RedefinePop.sh test/jdk/com/sun/jdi/RedefineStep.sh test/jdk/com/sun/jdi/RedefineTTYLineNumber.sh test/jdk/com/sun/jdi/StringConvertTest.sh test/jdk/com/sun/jdi/WatchFramePop.sh test/jdk/java/awt/dnd/BadSerializaionTest/BadSerializationTest.java test/jdk/java/awt/dnd/BadSerializaionTest/badAction test/jdk/java/awt/dnd/BadSerializaionTest/good test/jdk/java/awt/dnd/BadSerializaionTest/noEvents test/jdk/java/awt/dnd/BadSerializaionTest/nullComponent test/jdk/java/awt/dnd/BadSerializaionTest/nullDragSource test/jdk/java/awt/dnd/BadSerializaionTest/nullOrigin test/jdk/lib/testlibrary/jdk/testlibrary/Asserts.java test/langtools/tools/javadoc/api/basic/IsSupportedOptionTest.java
diffstat 684 files changed, 11606 insertions(+), 11230 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Thu Sep 20 22:05:07 2018 +0200
+++ b/.hgtags	Thu Sep 27 22:04:49 2018 +0200
@@ -513,3 +513,4 @@
 8f594f75e0547d4ca16649cb3501659e3155e81b jdk-12+10
 f0f5d23449d31f1b3580c8a73313918cafeaefd7 jdk-12+11
 15094d12a632f452a2064318a4e416d0c7a9ce0c jdk-12+12
+511a9946f83e3e3c7b9dbe1840367063fb39b4e1 jdk-12+13
--- a/.jcheck/conf	Thu Sep 20 22:05:07 2018 +0200
+++ b/.jcheck/conf	Thu Sep 27 22:04:49 2018 +0200
@@ -1,2 +1,2 @@
-project=jdk10
+project=jdk
 bugids=dup
--- a/make/RunTests.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/RunTests.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -531,8 +531,8 @@
     $1_JTREG_BASIC_OPTIONS += $$(addprefix -exclude:, $$($1_JTREG_PROBLEM_LIST))
   endif
 
-  ifneq ($$(JIB_JAR), )
-    $1_JTREG_BASIC_OPTIONS += -cpa:$$(JIB_JAR)
+  ifneq ($$(JIB_HOME), )
+    $1_JTREG_BASIC_OPTIONS += -e:JIB_HOME=$$(JIB_HOME)
   endif
 
   $1_JTREG_BASIC_OPTIONS += -e:TEST_IMAGE_GRAAL_DIR=${TEST_IMAGE_DIR}/hotspot/jtreg/graal
--- a/make/autoconf/flags-cflags.m4	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/autoconf/flags-cflags.m4	Thu Sep 27 22:04:49 2018 +0200
@@ -161,14 +161,35 @@
     microsoft)
       DISABLE_WARNING_PREFIX="-wd"
       CFLAGS_WARNINGS_ARE_ERRORS="-WX"
+
+      WARNINGS_ENABLE_ALL="-W3"
+      DISABLED_WARNINGS="4800"
       ;;
+
     solstudio)
       DISABLE_WARNING_PREFIX="-erroff="
-      CFLAGS_WARNINGS_ARE_ERRORS="-errtags -errwarn=%all"
+      CFLAGS_WARNINGS_ARE_ERRORS="-errwarn=%all"
+
+      WARNINGS_ENABLE_ALL_CFLAGS="-v"
+      WARNINGS_ENABLE_ALL_CXXFLAGS="+w"
+
+      DISABLED_WARNINGS_C=""
+      DISABLED_WARNINGS_CXX=""
       ;;
+
     gcc)
       DISABLE_WARNING_PREFIX="-Wno-"
       CFLAGS_WARNINGS_ARE_ERRORS="-Werror"
+
+      # Additional warnings that are not activated by -Wall and -Wextra
+      WARNINGS_ENABLE_ADDITIONAL="-Wpointer-arith -Wsign-compare \
+          -Wunused-function -Wundef -Wunused-value -Wreturn-type"
+      WARNINGS_ENABLE_ADDITIONAL_CXX="-Woverloaded-virtual -Wreorder"
+      WARNINGS_ENABLE_ALL_CFLAGS="-Wall -Wextra -Wformat=2 $WARNINGS_ENABLE_ADDITIONAL"
+      WARNINGS_ENABLE_ALL_CXXFLAGS="$WARNINGS_ENABLE_ALL_CFLAGS $WARNINGS_ENABLE_ADDITIONAL_CXX"
+
+      DISABLED_WARNINGS="unused-parameter unused"
+
       # Repeate the check for the BUILD_CC and BUILD_CXX. Need to also reset
       # CFLAGS since any target specific flags will likely not work with the
       # build compiler
@@ -183,18 +204,40 @@
       CXX="$CXX_OLD"
       CFLAGS="$CFLAGS_OLD"
       ;;
+
     clang)
       DISABLE_WARNING_PREFIX="-Wno-"
       CFLAGS_WARNINGS_ARE_ERRORS="-Werror"
+
+      # Additional warnings that are not activated by -Wall and -Wextra
+      WARNINGS_ENABLE_ADDITIONAL="-Wpointer-arith -Wsign-compare -Wreorder \
+          -Wunused-function -Wundef -Wunused-value -Woverloaded-virtual"
+      WARNINGS_ENABLE_ALL="-Wall -Wextra -Wformat=2 $WARNINGS_ENABLE_ADDITIONAL"
+
+      DISABLED_WARNINGS="unused-parameter unused"
+
+      if test "x$OPENJDK_TARGET_OS" = xmacosx; then
+        # missing-method-return-type triggers in JavaNativeFoundation framework
+        DISABLED_WARNINGS="$DISABLED_WARNINGS missing-method-return-type"
+      fi
+
       ;;
+
     xlc)
       DISABLE_WARNING_PREFIX="-qsuppress="
       CFLAGS_WARNINGS_ARE_ERRORS="-qhalt=w"
+
+      # Possibly a better subset than "all" is "lan:trx:ret:zea:cmp:ret"
+      WARNINGS_ENABLE_ALL="-qinfo=all -qformat=all"
+      DISABLED_WARNINGS=""
       ;;
   esac
   AC_SUBST(DISABLE_WARNING_PREFIX)
   AC_SUBST(BUILD_CC_DISABLE_WARNING_PREFIX)
   AC_SUBST(CFLAGS_WARNINGS_ARE_ERRORS)
+  AC_SUBST(DISABLED_WARNINGS)
+  AC_SUBST(DISABLED_WARNINGS_C)
+  AC_SUBST(DISABLED_WARNINGS_CXX)
 ])
 
 AC_DEFUN([FLAGS_SETUP_QUALITY_CHECKS],
@@ -512,11 +555,14 @@
       TOOLCHAIN_CFLAGS_JDK_CONLY="-fno-strict-aliasing" # technically NOT for CXX
     fi
   elif test "x$TOOLCHAIN_TYPE" = xsolstudio; then
-    TOOLCHAIN_CFLAGS_JDK="-mt"
-    TOOLCHAIN_CFLAGS_JDK_CONLY="-xc99=%none -xCC -Xa -v -W0,-noglobal" # C only
+    TOOLCHAIN_FLAGS="-errtags -errfmt"
+    TOOLCHAIN_CFLAGS="-errshort=tags"
+
+    TOOLCHAIN_CFLAGS_JDK="-mt $TOOLCHAIN_FLAGS"
+    TOOLCHAIN_CFLAGS_JDK_CONLY="-xc99=%none -xCC -Xa -W0,-noglobal $TOOLCHAIN_CFLAGS" # C only
     TOOLCHAIN_CFLAGS_JDK_CXXONLY="-features=no%except -norunpath -xnolib" # CXX only
     TOOLCHAIN_CFLAGS_JVM="-template=no%extdef -features=no%split_init \
-        -library=stlport4 -mt -features=no%except"
+        -library=stlport4 -mt -features=no%except $TOOLCHAIN_FLAGS"
     if test "x$DEBUG_LEVEL" = xslowdebug; then
       # Previously -g was used instead of -g0 for slowdebug; this is equivalent
       # to setting +d.
@@ -524,6 +570,7 @@
     fi
 
   elif test "x$TOOLCHAIN_TYPE" = xxlc; then
+    # Suggested additions: -qsrcmsg to get improved error reporting
     TOOLCHAIN_CFLAGS_JDK="-qchars=signed -qfullpath -qsaveopt"  # add on both CFLAGS
     TOOLCHAIN_CFLAGS_JVM="-qtune=balanced \
         -qalias=noansi -qstrict -qtls=default -qlanglvl=c99vla \
@@ -535,37 +582,24 @@
 
   # CFLAGS WARNINGS STUFF
   # Set JVM_CFLAGS warning handling
-  if test "x$TOOLCHAIN_TYPE" = xgcc || test "x$TOOLCHAIN_TYPE" = xclang; then
-    # COMMON to gcc and clang
-    WARNING_CFLAGS_JVM="-Wpointer-arith -Wsign-compare -Wunused-function"
-    if ! HOTSPOT_CHECK_JVM_VARIANT(zero); then
-      # Non-zero builds have stricter warnings
-      WARNING_CFLAGS_JVM="$WARNING_CFLAGS_JVM -Wundef -Wformat=2"
-    fi
+  if test "x$TOOLCHAIN_TYPE" = xgcc; then
+    WARNING_CFLAGS_JDK_CONLY="$WARNINGS_ENABLE_ALL_CFLAGS"
+    WARNING_CFLAGS_JDK_CXXONLY="$WARNINGS_ENABLE_ALL_CXXFLAGS"
+    WARNING_CFLAGS_JVM="$WARNINGS_ENABLE_ALL_CXXFLAGS"
 
-  fi
-  if test "x$TOOLCHAIN_TYPE" = xgcc; then
-    WARNING_CFLAGS_JDK="-Wall -Wextra -Wno-unused -Wno-unused-parameter -Wformat=2"
-    WARNING_CFLAGS_JVM="$WARNING_CFLAGS_JVM -Wunused-value -Woverloaded-virtual -Wreorder"
+  elif test "x$TOOLCHAIN_TYPE" = xclang; then
+    WARNING_CFLAGS="$WARNINGS_ENABLE_ALL"
 
-    if ! HOTSPOT_CHECK_JVM_VARIANT(zero); then
-      # Non-zero builds have stricter warnings
-      WARNING_CFLAGS_JVM="$WARNING_CFLAGS_JVM -Wreturn-type"
-    fi
-  elif test "x$TOOLCHAIN_TYPE" = xclang; then
-    WARNING_CFLAGS_JVM="$WARNING_CFLAGS_JVM -Wno-deprecated -Wreorder"
-    if test "x$OPENJDK_TARGET_OS" = xlinux; then
-      WARNING_CFLAGS_JVM="$WARNING_CFLAGS_JVM -Wno-sometimes-uninitialized"
-      WARNING_CFLAGS_JDK="-Wall -Wextra -Wno-unused -Wno-unused-parameter -Wformat=2"
-    fi
   elif test "x$TOOLCHAIN_TYPE" = xsolstudio; then
-    WARNING_CFLAGS_JDK_CONLY="-errshort=tags"
-    WARNING_CFLAGS_JDK_CXXONLY="+w"
-    WARNING_CFLAGS_JDK="-errtags=yes -errfmt"
+    WARNING_CFLAGS_JDK_CONLY="$WARNINGS_ENABLE_ALL_CFLAGS"
+    WARNING_CFLAGS_JDK_CXXONLY="$WARNINGS_ENABLE_ALL_CXXFLAGS"
+    WARNING_CFLAGS_JVM="$WARNINGS_ENABLE_ALL_CXXFLAGS"
+
   elif test "x$TOOLCHAIN_TYPE" = xmicrosoft; then
-    WARNING_CFLAGS="-W3"
-    WARNING_CFLAGS_JDK="-wd4800"
-    WARNING_CFLAGS_JVM="-wd4800"
+    WARNING_CFLAGS="$WARNINGS_ENABLE_ALL"
+
+  elif test "x$TOOLCHAIN_TYPE" = xxlc; then
+    WARNING_CFLAGS=""  # currently left empty
   fi
 
   # Set some additional per-OS defines.
--- a/make/autoconf/flags-ldflags.m4	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/autoconf/flags-ldflags.m4	Thu Sep 27 22:04:49 2018 +0200
@@ -139,6 +139,14 @@
     fi
   fi
 
+  # Setup warning flags
+  if test "x$TOOLCHAIN_TYPE" = xsolstudio; then
+    LDFLAGS_WARNINGS_ARE_ERRORS="-Wl,-z,fatal-warnings"
+  else
+    LDFLAGS_WARNINGS_ARE_ERRORS=""
+  fi
+  AC_SUBST(LDFLAGS_WARNINGS_ARE_ERRORS)
+
   # Setup LDFLAGS for linking executables
   if test "x$TOOLCHAIN_TYPE" = xgcc; then
     EXECUTABLE_LDFLAGS="$EXECUTABLE_LDFLAGS -Wl,--allow-shlib-undefined"
--- a/make/autoconf/spec.gmk.in	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/autoconf/spec.gmk.in	Thu Sep 27 22:04:49 2018 +0200
@@ -423,6 +423,10 @@
 
 DISABLE_WARNING_PREFIX := @DISABLE_WARNING_PREFIX@
 CFLAGS_WARNINGS_ARE_ERRORS:=@CFLAGS_WARNINGS_ARE_ERRORS@
+LDFLAGS_WARNINGS_ARE_ERRORS:=@LDFLAGS_WARNINGS_ARE_ERRORS@
+DISABLED_WARNINGS := @DISABLED_WARNINGS@
+DISABLED_WARNINGS_C := @DISABLED_WARNINGS_C@
+DISABLED_WARNINGS_CXX := @DISABLED_WARNINGS_CXX@
 
 # A global flag (true or false) determining if native warnings are considered errors.
 WARNINGS_AS_ERRORS := @WARNINGS_AS_ERRORS@
@@ -719,7 +723,7 @@
 XATTR:=@XATTR@
 JT_HOME:=@JT_HOME@
 JTREGEXE:=@JTREGEXE@
-JIB_JAR:=@JIB_JAR@
+JIB_HOME:=@JIB_HOME@
 XCODEBUILD=@XCODEBUILD@
 DTRACE := @DTRACE@
 FIXPATH:=@FIXPATH@
--- a/make/autoconf/toolchain.m4	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/autoconf/toolchain.m4	Thu Sep 27 22:04:49 2018 +0200
@@ -1144,5 +1144,5 @@
     fi
   fi
 
-  AC_SUBST(JIB_JAR)
+  AC_SUBST(JIB_HOME)
 ])
--- a/make/common/NativeCompilation.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/common/NativeCompilation.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -594,9 +594,13 @@
   # Pick up disabled warnings, if possible on this platform.
   ifneq ($(DISABLE_WARNING_PREFIX), )
     $1_EXTRA_CFLAGS += $$(addprefix $(DISABLE_WARNING_PREFIX), \
+        $$(DISABLED_WARNINGS) \
+        $$(DISABLED_WARNINGS_C) \
         $$($1_DISABLED_WARNINGS_$(TOOLCHAIN_TYPE)) \
         $$($1_DISABLED_WARNINGS_C_$(TOOLCHAIN_TYPE)))
     $1_EXTRA_CXXFLAGS += $$(addprefix $(DISABLE_WARNING_PREFIX), \
+        $$(DISABLED_WARNINGS) \
+        $$(DISABLED_WARNINGS_CXX) \
         $$($1_DISABLED_WARNINGS_$(TOOLCHAIN_TYPE)) \
         $$($1_DISABLED_WARNINGS_CXX_$(TOOLCHAIN_TYPE)))
   endif
@@ -614,6 +618,7 @@
   ifeq ($$($1_WARNINGS_AS_ERRORS_$(TOOLCHAIN_TYPE)), true)
     $1_EXTRA_CFLAGS += $(CFLAGS_WARNINGS_ARE_ERRORS)
     $1_EXTRA_CXXFLAGS += $(CFLAGS_WARNINGS_ARE_ERRORS)
+    $1_EXTRA_LDFLAGS += $(LDFLAGS_WARNINGS_ARE_ERRORS)
   endif
 
   ifeq (NONE, $$($1_OPTIMIZATION))
@@ -780,8 +785,8 @@
 
   # Pickup extra OPENJDK_TARGET_OS_TYPE and/or OPENJDK_TARGET_OS dependent variables
   # for LDFLAGS and LIBS
-  $1_EXTRA_LDFLAGS := $$($1_LDFLAGS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LDFLAGS_$(OPENJDK_TARGET_OS))
-  $1_EXTRA_LIBS := $$($1_LIBS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LIBS_$(OPENJDK_TARGET_OS))
+  $1_EXTRA_LDFLAGS += $$($1_LDFLAGS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LDFLAGS_$(OPENJDK_TARGET_OS))
+  $1_EXTRA_LIBS += $$($1_LIBS_$(OPENJDK_TARGET_OS_TYPE)) $$($1_LIBS_$(OPENJDK_TARGET_OS))
   ifneq ($$($1_REAL_MAPFILE), )
     $1_EXTRA_LDFLAGS += $(call SET_SHARED_LIBRARY_MAPFILE,$$($1_REAL_MAPFILE))
   endif
--- a/make/common/TestFilesCompilation.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/common/TestFilesCompilation.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -97,8 +97,10 @@
         CFLAGS := $$($1_BASE_CFLAGS) $$($1_CFLAGS) $$($1_CFLAGS_$$(name)), \
         CXXFLAGS := $$($1_BASE_CXXFLAGS) $$($1_CFLAGS) $$($1_CFLAGS_$$(name)), \
         LDFLAGS := $$($1_LDFLAGS) $$($1_LDFLAGS_$$(name)), \
+        DISABLED_WARNINGS_gcc := format undef unused-function unused-value, \
+        DISABLED_WARNINGS_clang := undef format-nonliteral \
+            missing-field-initializers sometimes-uninitialized, \
         DISABLED_WARNINGS_CXX_solstudio := wvarhidenmem, \
-        DISABLED_WARNINGS_CXX_gcc := format, \
         LIBS := $$($1_LIBS_$$(name)), \
         TOOLCHAIN := $(if $$(filter %.cpp, $$(file)), TOOLCHAIN_LINK_CXX, TOOLCHAIN_DEFAULT), \
         OPTIMIZATION := $$(if $$($1_OPTIMIZATION_$$(name)),$$($1_OPTIMIZATION_$$(name)),LOW), \
--- a/make/conf/jib-profiles.js	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/conf/jib-profiles.js	Thu Sep 27 22:04:49 2018 +0200
@@ -840,7 +840,7 @@
         linux_x64: "gcc7.3.0-OEL6.4+1.0",
         macosx_x64: "Xcode9.4-MacOSX10.13+1.0",
         solaris_x64: "SS12u4-Solaris11u1+1.0",
-        solaris_sparcv9: "SS12u4-Solaris11u1+1.1",
+        solaris_sparcv9: "SS12u6-Solaris11u3+1.0",
         windows_x64: "VS2017-15.5.5+1.0",
         linux_aarch64: (input.profile != null && input.profile.indexOf("arm64") >= 0
                     ? "gcc-linaro-aarch64-linux-gnu-4.8-2013.11_linux+1.0"
@@ -961,9 +961,9 @@
             ext: "zip",
             classifier: "distribution",
             revision: "3.0-SNAPSHOT",
-            environment_name: "JIB_JAR",
+            environment_name: "JIB_HOME",
             environment_value: input.get("jib", "install_path")
-                + "/jib-3.0-SNAPSHOT-distribution/lib/jib-3.0-SNAPSHOT.jar"
+                + "/jib-3.0-SNAPSHOT-distribution"
         },
 
         ant: {
--- a/make/devkit/createSolarisDevkit12.6.sh	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/devkit/createSolarisDevkit12.6.sh	Thu Sep 27 22:04:49 2018 +0200
@@ -34,18 +34,19 @@
 # install in a separate temporary image.
 #
 # The Solaris Studio installation must contain at least these packages:
-# developer/developerstudio-126/backend               12.6-1.0.0.0               i--
-# developer/developerstudio-126/c++                   12.6-1.0.0.0               i--
-# developer/developerstudio-126/cc                    12.6-1.0.0.0               i--
-# developer/developerstudio-126/dbx (solarisstudio)   12.6-1.0.0.0               i--
-# developer/developerstudio-126/library/c++-libs      12.6-1.0.0.0               i--
-# developer/developerstudio-126/library/math-libs     12.6-1.0.0.0               i--
-# developer/developerstudio-126/library/c-libs        12.6-1.0.0.0               i--
-# developer/developerstudio-126/library/studio-gccrt  12.6-1.0.0.0               i--
-# developer/developerstudio-126/studio-common         12.6-1.0.0.0               i--
-# developer/developerstudio-126/studio-ja             12.6-1.0.0.0               i--
-# developer/developerstudio-126/studio-legal          12.6-1.0.0.0               i--
-# developer/developerstudio-126/studio-zhCN           12.6-1.0.0.0               i--
+#developer/developerstudio-126/backend                12.6-1.0.0.1
+#developer/developerstudio-126/c++                    12.6-1.0.2.0
+#developer/developerstudio-126/cc                     12.6-1.0.1.0
+#developer/developerstudio-126/dbx                    12.6-1.0.0.1
+#developer/developerstudio-126/library/c++-libs       12.6-1.0.2.0
+#developer/developerstudio-126/library/c-libs         12.6-1.0.0.1
+#developer/developerstudio-126/library/f90-libs       12.6-1.0.0.1
+#developer/developerstudio-126/library/math-libs      12.6-1.0.0.1
+#developer/developerstudio-126/library/studio-gccrt   12.6-1.0.0.1
+#developer/developerstudio-126/studio-common          12.6-1.0.0.1
+#developer/developerstudio-126/studio-ja              12.6-1.0.0.1
+#developer/developerstudio-126/studio-legal           12.6-1.0.0.1
+#developer/developerstudio-126/studio-zhCN            12.6-1.0.0.1
 #
 # erik.joelsson@oracle.com
 
@@ -93,7 +94,7 @@
   pkg -R $INSTALL_ROOT set-publisher -P -g ${PUBLISHER_URI} solaris
   sudo pkg -R $INSTALL_ROOT install --accept entire@$SOLARIS_ENTIRE_VERSION \
       system/install developer/gnu-binutils system/library/mmheap system/picl \
-      developer/assembler
+      developer/assembler system/library/freetype-2
 else
   echo "Skipping installing packages"
 fi
--- a/make/hotspot/gensrc/GensrcDtrace.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/hotspot/gensrc/GensrcDtrace.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -74,6 +74,8 @@
         TOOLCHAIN := $(TOOLCHAIN_BUILD), \
         LDFLAGS := -m64, \
         CFLAGS := -m64 $(JVM_CFLAGS), \
+        DISABLED_WARNINGS_solstudio := hidef w_novirtualdescr unknownpragma \
+            doubunder nokeyworddefine wunreachable, \
         EXTRA_DEPS := $(JVMTI_H), \
         OBJECT_DIR := $(JVM_VARIANT_OUTPUTDIR)/tools/dtrace-gen-offsets/objs, \
         OUTPUT_DIR := $(JVM_VARIANT_OUTPUTDIR)/tools/dtrace-gen-offsets, \
--- a/make/hotspot/lib/CompileGtest.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/hotspot/lib/CompileGtest.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -73,11 +73,14 @@
     CFLAGS_windows := -EHsc, \
     CFLAGS_solaris := -DGTEST_HAS_EXCEPTIONS=0 -library=stlport4, \
     CFLAGS_macosx := -DGTEST_OS_MAC=1, \
-    DISABLED_WARNINGS_gcc := undef, \
-    DISABLED_WARNINGS_clang := undef switch format-nonliteral \
-        tautological-undefined-compare $(BUILD_LIBJVM_DISABLED_WARNINGS_clang), \
-    DISABLED_WARNINGS_solstudio := identexpected, \
-    DISABLED_WARNINGS_CXX_microsoft := 4996, \
+    DISABLED_WARNINGS_gcc := $(DISABLED_WARNINGS_gcc) \
+        undef, \
+    DISABLED_WARNINGS_clang := $(DISABLED_WARNINGS_clang) \
+        undef switch format-nonliteral tautological-undefined-compare, \
+    DISABLED_WARNINGS_solstudio := $(DISABLED_WARNINGS_solstudio) \
+        identexpected, \
+    DISABLED_WARNINGS_microsoft := $(DISABLED_WARNINGS_microsoft) \
+        4996, \
     LDFLAGS := $(JVM_LDFLAGS), \
     LDFLAGS_solaris := -library=stlport4 $(call SET_SHARED_LIBRARY_ORIGIN), \
     LIBS := $(JVM_LIBS), \
--- a/make/hotspot/lib/CompileJvm.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/hotspot/lib/CompileJvm.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -80,6 +80,35 @@
     #
 
 ################################################################################
+# Disabled warnings
+
+DISABLED_WARNINGS_gcc := extra parentheses comment unknown-pragmas address \
+    delete-non-virtual-dtor char-subscripts array-bounds int-in-bool-context \
+    ignored-qualifiers  missing-field-initializers implicit-fallthrough \
+    empty-body strict-overflow sequence-point maybe-uninitialized \
+    misleading-indentation
+
+ifeq ($(call check-jvm-feature, zero), true)
+  DISABLED_WARNINGS_gcc += return-type switch
+endif
+
+DISABLED_WARNINGS_clang := tautological-compare deprecated-declarations \
+    undefined-var-template sometimes-uninitialized unknown-pragmas \
+    delete-non-virtual-dtor missing-braces char-subscripts \
+    ignored-qualifiers missing-field-initializers mismatched-tags
+
+DISABLED_WARNINGS_solstudio := labelnotused hidef w_novirtualdescr inlafteruse \
+    unknownpragma doubunder w_enumnotused w_toomanyenumnotused \
+    wvarhidenmem wunreachable wnoretvalue notemsource
+
+DISABLED_WARNINGS_xlc := 1540-0216 1540-0198 1540-1090 1540-1639 1540-1088 \
+    1500-010
+
+DISABLED_WARNINGS_microsoft :=
+
+
+
+################################################################################
 # Platform specific setup
 
 # ARM source selection
@@ -115,7 +144,7 @@
   endif
   # Exclude warnings in devstudio 12.6
   ifeq ($(CC_VERSION_NUMBER), 5.15)
-    DISABLED_WARNINGS_solstudio := SEC_ARR_OUTSIDE_BOUND_READ \
+    DISABLED_WARNINGS_solstudio += SEC_ARR_OUTSIDE_BOUND_READ \
       SEC_ARR_OUTSIDE_BOUND_WRITE
   endif
 endif
@@ -158,10 +187,11 @@
     CFLAGS := $(JVM_CFLAGS), \
     vm_version.cpp_CXXFLAGS := $(CFLAGS_VM_VERSION), \
     arguments.cpp_CXXFLAGS := $(CFLAGS_VM_VERSION), \
-    DISABLED_WARNINGS_clang := tautological-compare, \
+    DISABLED_WARNINGS_gcc := $(DISABLED_WARNINGS_gcc), \
+    DISABLED_WARNINGS_clang := $(DISABLED_WARNINGS_clang), \
     DISABLED_WARNINGS_solstudio := $(DISABLED_WARNINGS_solstudio), \
-    DISABLED_WARNINGS_xlc := 1540-0216 1540-0198 1540-1090 1540-1639 \
-        1540-1088 1500-010, \
+    DISABLED_WARNINGS_xlc := $(DISABLED_WARNINGS_xlc), \
+    DISABLED_WARNINGS_microsoft := $(DISABLED_WARNINGS_microsoft), \
     ASFLAGS := $(JVM_ASFLAGS), \
     LDFLAGS := $(JVM_LDFLAGS), \
     LIBS := $(JVM_LIBS), \
--- a/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/langtools/src/classes/build/tools/symbolgenerator/CreateSymbols.java	Thu Sep 27 22:04:49 2018 +0200
@@ -209,13 +209,15 @@
      * {@code ctDescriptionFile}, using the file as a recipe to create the sigfiles.
      */
     @SuppressWarnings("unchecked")
-    public void createSymbols(String ctDescriptionFileExtra, String ctDescriptionFile, String ctSymLocation, CtSymKind ctSymKind) throws IOException {
+    public void createSymbols(String ctDescriptionFileExtra, String ctDescriptionFile, String ctSymLocation) throws IOException {
         LoadDescriptions data = load(ctDescriptionFileExtra != null ? Paths.get(ctDescriptionFileExtra)
                                                                     : null,
                                      Paths.get(ctDescriptionFile), null);
 
         splitHeaders(data.classes);
 
+        Map<String, Map<Character, String>> package2Version2Module = new HashMap<>();
+
         for (ModuleDescription md : data.modules.values()) {
             for (ModuleHeaderDescription mhd : md.header) {
                 List<String> versionsList =
@@ -224,26 +226,41 @@
                                         md,
                                         mhd,
                                         versionsList);
+                mhd.exports.stream().forEach(pkg -> {
+                    for (char v : mhd.versions.toCharArray()) {
+                        package2Version2Module.computeIfAbsent(pkg, dummy -> new HashMap<>()).put(v, md.name);
+                    }
+                });
             }
         }
 
         for (ClassDescription classDescription : data.classes) {
+            Map<Character, String> version2Module = package2Version2Module.getOrDefault(classDescription.packge().replace('.', '/'), Collections.emptyMap());
             for (ClassHeaderDescription header : classDescription.header) {
-                switch (ctSymKind) {
-                    case JOINED_VERSIONS:
-                        Set<String> jointVersions = new HashSet<>();
-                        jointVersions.add(header.versions);
-                        limitJointVersion(jointVersions, classDescription.fields);
-                        limitJointVersion(jointVersions, classDescription.methods);
-                        writeClassesForVersions(ctSymLocation, classDescription, header, jointVersions);
-                        break;
-                    case SEPARATE:
-                        Set<String> versions = new HashSet<>();
-                        for (char v : header.versions.toCharArray()) {
-                            versions.add("" + v);
+                Set<String> jointVersions = new HashSet<>();
+                jointVersions.add(header.versions);
+                limitJointVersion(jointVersions, classDescription.fields);
+                limitJointVersion(jointVersions, classDescription.methods);
+                Map<String, StringBuilder> module2Versions = new HashMap<>();
+                for (char v : header.versions.toCharArray()) {
+                    String module = version2Module.get(v);
+                    if (module == null) {
+                        if (v >= '9') {
+                            throw new AssertionError("No module for " + classDescription.name +
+                                                     " and version " + v);
                         }
-                        writeClassesForVersions(ctSymLocation, classDescription, header, versions);
-                        break;
+                        module = version2Module.get('9');
+                        if (module == null) {
+                            module = "java.base";
+                        }
+                    }
+                    module2Versions.computeIfAbsent(module, dummy -> new StringBuilder()).append(v);
+                }
+                for (Entry<String, StringBuilder> e : module2Versions.entrySet()) {
+                    Set<String> currentVersions = new HashSet<>(jointVersions);
+                    limitJointVersion(currentVersions, e.getValue().toString());
+                    currentVersions = currentVersions.stream().filter(vers -> !disjoint(vers, e.getValue().toString())).collect(Collectors.toSet());
+                    writeClassesForVersions(ctSymLocation, classDescription, header, e.getKey(), currentVersions);
                 }
             }
         }
@@ -591,7 +608,7 @@
                     newHeader.innerClasses = header.innerClasses;
                     newHeader.runtimeAnnotations = header.runtimeAnnotations;
                     newHeader.signature = header.signature;
-                    newHeader.versions = reduce(versions, header.versions);
+                    newHeader.versions = reduce(header.versions, versions);
 
                     newHeaders.add(newHeader);
                 }
@@ -603,26 +620,30 @@
 
     void limitJointVersion(Set<String> jointVersions, List<? extends FeatureDescription> features) {
         for (FeatureDescription feature : features) {
-            for (String version : jointVersions) {
-                if (!containsAll(feature.versions, version) &&
-                    !disjoint(feature.versions, version)) {
-                    StringBuilder featurePart = new StringBuilder();
-                    StringBuilder otherPart = new StringBuilder();
-                    for (char v : version.toCharArray()) {
-                        if (feature.versions.indexOf(v) != (-1)) {
-                            featurePart.append(v);
-                        } else {
-                            otherPart.append(v);
-                        }
+            limitJointVersion(jointVersions, feature.versions);
+        }
+    }
+
+    void limitJointVersion(Set<String> jointVersions, String versions) {
+        for (String version : jointVersions) {
+            if (!containsAll(versions, version) &&
+                !disjoint(versions, version)) {
+                StringBuilder featurePart = new StringBuilder();
+                StringBuilder otherPart = new StringBuilder();
+                for (char v : version.toCharArray()) {
+                    if (versions.indexOf(v) != (-1)) {
+                        featurePart.append(v);
+                    } else {
+                        otherPart.append(v);
                     }
-                    jointVersions.remove(version);
-                    if (featurePart.length() == 0 || otherPart.length() == 0) {
-                        throw new AssertionError();
-                    }
-                    jointVersions.add(featurePart.toString());
-                    jointVersions.add(otherPart.toString());
-                    break;
                 }
+                jointVersions.remove(version);
+                if (featurePart.length() == 0 || otherPart.length() == 0) {
+                    throw new AssertionError();
+                }
+                jointVersions.add(featurePart.toString());
+                jointVersions.add(otherPart.toString());
+                break;
             }
         }
     }
@@ -646,10 +667,11 @@
     void writeClassesForVersions(String ctSymLocation,
                                  ClassDescription classDescription,
                                  ClassHeaderDescription header,
+                                 String module,
                                  Iterable<String> versions)
             throws IOException {
         for (String ver : versions) {
-            writeClass(ctSymLocation, classDescription, header, ver);
+            writeClass(ctSymLocation, classDescription, header, module, ver);
         }
     }
 
@@ -663,11 +685,6 @@
         }
     }
 
-    public enum CtSymKind {
-        JOINED_VERSIONS,
-        SEPARATE;
-    }
-
     //<editor-fold defaultstate="collapsed" desc="Class Writing">
     void writeModule(String ctSymLocation,
                     ModuleDescription moduleDescription,
@@ -697,7 +714,7 @@
                 attributes);
 
         Path outputClassFile = Paths.get(ctSymLocation,
-                                         version + "-modules",
+                                         version,
                                          moduleDescription.name,
                                          "module-info" + EXTENSION);
 
@@ -713,6 +730,7 @@
     void writeClass(String ctSymLocation,
                     ClassDescription classDescription,
                     ClassHeaderDescription header,
+                    String module,
                     String version) throws IOException {
         List<CPInfo> constantPool = new ArrayList<>();
         constantPool.add(null);
@@ -765,7 +783,13 @@
                 methods.toArray(new Method[0]),
                 attributes);
 
-        Path outputClassFile = Paths.get(ctSymLocation, version, classDescription.name + EXTENSION);
+        Path outputClassFile = Paths.get(ctSymLocation, version);
+
+        if (module != null) {
+            outputClassFile = outputClassFile.resolve(module);
+        }
+
+        outputClassFile = outputClassFile.resolve(classDescription.name + EXTENSION);
 
         Files.createDirectories(outputClassFile.getParent());
 
@@ -3652,8 +3676,7 @@
 
                 new CreateSymbols().createSymbols(ctDescriptionFileExtra,
                                                   ctDescriptionFile,
-                                                  ctSymLocation,
-                                                  CtSymKind.JOINED_VERSIONS);
+                                                  ctSymLocation);
                 break;
         }
     }
--- a/make/launcher/Launcher-jdk.pack.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/launcher/Launcher-jdk.pack.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -90,6 +90,7 @@
     CFLAGS_linux := -fPIC, \
     CFLAGS_solaris := -KPIC, \
     CFLAGS_macosx := -fPIC, \
+    DISABLED_WARNINGS_clang := format-nonliteral, \
     LDFLAGS := $(UNPACKEXE_ZIPOBJS) \
         $(LDFLAGS_JDKEXE) $(LDFLAGS_CXX_JDK) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
--- a/make/launcher/LauncherCommon.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/launcher/LauncherCommon.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -36,9 +36,7 @@
   LAUNCHER_CFLAGS += -fvisibility=hidden
   LDFLAGS_JDKEXE += -Wl,--exclude-libs,ALL
 else ifeq ($(TOOLCHAIN_TYPE), clang)
-  ifneq ($(OPENJDK_TARGET_OS), macosx)
-    LAUNCHER_CFLAGS += -fvisibility=hidden
-  endif
+  LAUNCHER_CFLAGS += -fvisibility=hidden
 else ifeq ($(TOOLCHAIN_TYPE), solstudio)
   LAUNCHER_CFLAGS += -xldscope=hidden
 else ifeq ($(TOOLCHAIN_TYPE), xlc)
@@ -152,6 +150,7 @@
       CFLAGS_linux := -fPIC, \
       CFLAGS_solaris := -KPIC -DHAVE_GETHRTIME, \
       CFLAGS_windows := $$($1_CFLAGS_windows), \
+      DISABLED_WARNINGS_gcc := unused-function, \
       LDFLAGS := $$(LDFLAGS_JDKEXE) \
           $$(call SET_EXECUTABLE_ORIGIN) \
           $$($1_LDFLAGS), \
--- a/make/lib/Awt2dLibraries.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Awt2dLibraries.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -55,6 +55,7 @@
     OPTIMIZATION := HIGHEST, \
     CFLAGS := $(CFLAGS_JDKLIB) \
         $(BUILD_LIBMLIB_CFLAGS), \
+    DISABLED_WARNINGS_gcc := unused-function, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
     LIBS := $(JDKLIB_LIBS), \
@@ -225,8 +226,9 @@
     CFLAGS := $(CFLAGS_JDKLIB) $(LIBAWT_CFLAGS), \
     EXTRA_HEADER_DIRS := $(LIBAWT_EXTRA_HEADER_DIRS), \
     DISABLED_WARNINGS_gcc := sign-compare unused-result maybe-uninitialized \
-        format-nonliteral parentheses, \
-    DISABLED_WARNINGS_clang := logical-op-parentheses extern-initializer, \
+        format-nonliteral parentheses unused-value unused-function, \
+    DISABLED_WARNINGS_clang := logical-op-parentheses extern-initializer \
+        sign-compare format-nonliteral, \
     DISABLED_WARNINGS_solstudio := E_DECLARATION_IN_CODE, \
     DISABLED_WARNINGS_microsoft := 4244 4267 4996, \
     ASFLAGS := $(LIBAWT_ASFLAGS), \
@@ -334,7 +336,9 @@
         DISABLED_WARNINGS_gcc := type-limits pointer-to-int-cast \
             unused-result maybe-uninitialized format \
             format-security int-to-pointer-cast parentheses \
-            implicit-fallthrough, \
+            implicit-fallthrough undef unused-function, \
+        DISABLED_WARNINGS_clang := parentheses format undef \
+            logical-op-parentheses format-nonliteral int-conversion, \
         DISABLED_WARNINGS_solstudio := E_DECLARATION_IN_CODE \
             E_ASSIGNMENT_TYPE_MISMATCH E_NON_CONST_INIT, \
         LDFLAGS := $(LDFLAGS_JDKLIB) \
@@ -380,8 +384,9 @@
         common/awt/debug \
         libawt/java2d, \
     HEADERS_FROM_SRC := $(LIBLCMS_HEADERS_FROM_SRC), \
-    DISABLED_WARNINGS_gcc := format-nonliteral type-limits misleading-indentation, \
-    DISABLED_WARNINGS_clang := tautological-compare, \
+    DISABLED_WARNINGS_gcc := format-nonliteral type-limits \
+        misleading-indentation undef unused-function, \
+    DISABLED_WARNINGS_clang := tautological-compare format-nonliteral undef, \
     DISABLED_WARNINGS_solstudio := E_STATEMENT_NOT_REACHED, \
     DISABLED_WARNINGS_microsoft := 4819, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
@@ -466,6 +471,7 @@
       CFLAGS := $(CFLAGS_JDKLIB) \
           $(LIBAWT_HEADLESS_CFLAGS), \
       EXTRA_HEADER_DIRS := $(LIBAWT_HEADLESS_EXTRA_HEADER_DIRS), \
+      DISABLED_WARNINGS_gcc := unused-function, \
       DISABLED_WARNINGS_xlc := 1506-356, \
       DISABLED_WARNINGS_solstudio := E_EMPTY_TRANSLATION_UNIT, \
       LDFLAGS := $(LDFLAGS_JDKLIB) \
@@ -604,11 +610,13 @@
     EXTRA_HEADER_DIRS := $(LIBFONTMANAGER_EXTRA_HEADER_DIRS), \
     WARNINGS_AS_ERRORS_xlc := false, \
     DISABLED_WARNINGS_gcc := sign-compare int-to-pointer-cast \
-        type-limits missing-field-initializers implicit-fallthrough strict-aliasing, \
+        type-limits missing-field-initializers implicit-fallthrough \
+        strict-aliasing undef unused-function, \
     DISABLED_WARNINGS_CXX_gcc := reorder delete-non-virtual-dtor strict-overflow \
         maybe-uninitialized, \
     DISABLED_WARNINGS_clang := unused-value incompatible-pointer-types \
-        tautological-constant-out-of-range-compare int-to-pointer-cast, \
+        tautological-constant-out-of-range-compare int-to-pointer-cast \
+        sign-compare undef missing-field-initializers, \
     DISABLED_WARNINGS_C_solstudio = \
         E_INTEGER_OVERFLOW_DETECTED \
         E_ARG_INCOMPATIBLE_WITH_ARG_L \
@@ -715,6 +723,7 @@
       OPTIMIZATION := LOW, \
       CFLAGS := $(CFLAGS_JDKLIB) \
           $(JAWT_CFLAGS), \
+      DISABLED_WARNINGS_clang := sign-compare, \
       EXTRA_HEADER_DIRS := \
           include \
           common/awt, \
@@ -848,8 +857,9 @@
           $(GIFLIB_CFLAGS) $(LIBJPEG_CFLAGS) $(PNG_CFLAGS) $(LIBZ_CFLAGS), \
       EXTRA_HEADER_DIRS := $(LIBSPLASHSCREEN_HEADER_DIRS), \
       DISABLED_WARNINGS_gcc := sign-compare type-limits unused-result \
-          maybe-uninitialized shift-negative-value implicit-fallthrough, \
-      DISABLED_WARNINGS_clang := incompatible-pointer-types, \
+          maybe-uninitialized shift-negative-value implicit-fallthrough \
+          unused-function, \
+      DISABLED_WARNINGS_clang := incompatible-pointer-types sign-compare, \
       DISABLED_WARNINGS_solstudio := E_NEWLINE_NOT_LAST E_DECLARATION_IN_CODE \
           E_STATEMENT_NOT_REACHED, \
       DISABLED_WARNINGS_microsoft := 4018 4244 4267, \
@@ -908,7 +918,9 @@
       EXTRA_HEADER_DIRS := $(LIBAWT_LWAWT_EXTRA_HEADER_DIRS), \
       DISABLED_WARNINGS_clang := incomplete-implementation enum-conversion \
           deprecated-declarations objc-method-access bitwise-op-parentheses \
-          incompatible-pointer-types parentheses-equality extra-tokens, \
+          incompatible-pointer-types parentheses-equality extra-tokens \
+          sign-compare semicolon-before-method-body format-nonliteral undef \
+          pointer-arith, \
       LDFLAGS := $(LDFLAGS_JDKLIB) \
           $(call SET_SHARED_LIBRARY_ORIGIN) \
           -L$(INSTALL_LIBRARIES_HERE), \
@@ -949,7 +961,7 @@
       EXTRA_HEADER_DIRS := \
           libawt_lwawt/awt \
           libosxapp, \
-      DISABLED_WARNINGS_clang := deprecated-declarations, \
+      DISABLED_WARNINGS_clang := deprecated-declarations sign-compare, \
       LDFLAGS := $(LDFLAGS_JDKLIB) \
           $(call SET_SHARED_LIBRARY_ORIGIN) \
           -Wl$(COMMA)-rpath$(COMMA)@loader_path \
--- a/make/lib/CoreLibraries.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/CoreLibraries.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -59,6 +59,7 @@
     CFLAGS_windows_debug := -DLOGGING, \
     CFLAGS_aix := -qfloat=nomaf, \
     DISABLED_WARNINGS_gcc := sign-compare misleading-indentation array-bounds, \
+    DISABLED_WARNINGS_clang := sign-compare, \
     DISABLED_WARNINGS_microsoft := 4146 4244 4018, \
     ARFLAGS := $(ARFLAGS), \
     OBJECT_DIR := $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libfdlibm, \
@@ -77,7 +78,7 @@
     NAME := verify, \
     OPTIMIZATION := $(LIBVERIFY_OPTIMIZATION), \
     CFLAGS := $(CFLAGS_JDKLIB), \
-    DISABLED_WARNINGS_gcc := implicit-fallthrough, \
+    DISABLED_WARNINGS_gcc := implicit-fallthrough unused-function, \
     DISABLED_WARNINGS_microsoft := 4244 4267, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
@@ -105,7 +106,7 @@
     jdk_util.c_CFLAGS := $(VERSION_CFLAGS), \
     EXTRA_HEADER_DIRS := libfdlibm, \
     WARNINGS_AS_ERRORS_xlc := false, \
-    DISABLED_WARNINGS_gcc := unused-result, \
+    DISABLED_WARNINGS_gcc := unused-result unused-function, \
     DISABLED_WARNINGS_solstudio := E_STATEMENT_NOT_REACHED, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
@@ -148,6 +149,7 @@
     CFLAGS := $(CFLAGS_JDKLIB) \
         $(LIBZ_CFLAGS), \
     CFLAGS_unix := $(BUILD_LIBZIP_MMAP) -UDEBUG, \
+    DISABLED_WARNINGS_gcc := unused-function, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
     LIBS_unix := -ljvm -ljava $(LIBZ_LIBS), \
@@ -213,6 +215,8 @@
     EXTRA_FILES := $(LIBJLI_EXTRA_FILES), \
     OPTIMIZATION := HIGH, \
     CFLAGS := $(CFLAGS_JDKLIB) $(LIBJLI_CFLAGS), \
+    DISABLED_WARNINGS_gcc := unused-function, \
+    DISABLED_WARNINGS_clang := sometimes-uninitialized format-nonliteral, \
     DISABLED_WARNINGS_solstudio := \
         E_ASM_DISABLES_OPTIMIZATION \
         E_STATEMENT_NOT_REACHED, \
@@ -240,7 +244,7 @@
       EXCLUDE_FILES := $(LIBJLI_EXCLUDE_FILES), \
       EXTRA_FILES := $(LIBJLI_EXTRA_FILES), \
       OPTIMIZATION := HIGH, \
-      CFLAGS := $(STATIC_LIBRARY_FLAGS) $(LIBJLI_CFLAGS_JDKLIB) $(LIBJLI_CFLAGS) \
+      CFLAGS := $(STATIC_LIBRARY_FLAGS) $(CFLAGS_JDKLIB) $(LIBJLI_CFLAGS) \
           $(addprefix -I, $(LIBJLI_SRC_DIRS)), \
       ARFLAGS := $(ARFLAGS), \
       OBJECT_DIR := $(SUPPORT_OUTPUTDIR)/native/$(MODULE)/libjli_static))
--- a/make/lib/Lib-java.base.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-java.base.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -43,8 +43,9 @@
     NAME := net, \
     OPTIMIZATION := LOW, \
     CFLAGS := $(CFLAGS_JDKLIB), \
-    DISABLED_WARNINGS_gcc := format-nonliteral, \
-    DISABLED_WARNINGS_clang := parentheses-equality constant-logical-operand, \
+    DISABLED_WARNINGS_gcc := format-nonliteral unused-function, \
+    DISABLED_WARNINGS_clang := parentheses-equality constant-logical-operand \
+        format-nonliteral undef, \
     DISABLED_WARNINGS_microsoft := 4244 4047 4133 4996, \
     DISABLED_WARNINGS_solstudio := E_ARG_INCOMPATIBLE_WITH_ARG_L, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
@@ -71,6 +72,8 @@
     OPTIMIZATION := HIGH, \
     WARNINGS_AS_ERRORS_xlc := false, \
     CFLAGS := $(CFLAGS_JDKLIB), \
+    DISABLED_WARNINGS_gcc := undef, \
+    DISABLED_WARNINGS_clang := undef, \
     EXTRA_HEADER_DIRS := \
         libnio/ch \
         libnio/fs \
@@ -104,7 +107,8 @@
         NAME := osxsecurity, \
         OPTIMIZATION := LOW, \
         CFLAGS := $(CFLAGS_JDKLIB), \
-        DISABLED_WARNINGS_clang := deprecated-declarations, \
+        DISABLED_WARNINGS_clang := deprecated-declarations \
+            missing-method-return-type, \
         LDFLAGS := $(LDFLAGS_JDKLIB) \
             -L$(SUPPORT_OUTPUTDIR)/modules_libs/java.base \
             $(call SET_SHARED_LIBRARY_ORIGIN) \
@@ -130,7 +134,9 @@
   ifeq ($(STATIC_BUILD), false)
     $(eval $(call SetupJdkLibrary, BUILD_LIBJSIG, \
         NAME := jsig, \
+        OPTIMIZATION := LOW, \
         CFLAGS := $(CFLAGS_JDKLIB) $(LIBJSIG_CFLAGS), \
+        DISABLED_WARNINGS_gcc := undef, \
         LDFLAGS := $(LDFLAGS_JDKLIB) \
             $(call SET_SHARED_LIBRARY_ORIGIN), \
         LIBS_linux := $(LIBDL), \
--- a/make/lib/Lib-java.desktop.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-java.desktop.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -66,6 +66,8 @@
       CFLAGS := $(CFLAGS_JDKLIB) \
           $(LIBJSOUND_CFLAGS), \
       CXXFLAGS := $(CXXFLAGS_JDKLIB) $(LIBJSOUND_CFLAGS), \
+      DISABLED_WARNINGS_gcc := undef, \
+      DISABLED_WARNINGS_clang := undef, \
       LDFLAGS := $(LDFLAGS_JDKLIB) \
           $(call SET_SHARED_LIBRARY_ORIGIN), \
       LIBS_unix := -ljava -ljvm, \
@@ -92,7 +94,7 @@
       OPTIMIZATION := LOW, \
       CFLAGS := $(CFLAGS_JDKLIB), \
       DISABLED_WARNINGS_clang := objc-method-access objc-root-class \
-          deprecated-declarations, \
+          deprecated-declarations format-nonliteral, \
       LDFLAGS := $(LDFLAGS_JDKLIB) \
           $(call SET_SHARED_LIBRARY_ORIGIN), \
       LIBS := \
--- a/make/lib/Lib-java.instrument.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-java.instrument.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -42,6 +42,7 @@
     CFLAGS := $(CFLAGS_JDKLIB) $(LIBINSTRUMENT_CFLAGS), \
     CFLAGS_debug := -DJPLIS_LOGGING, \
     CFLAGS_release := -DNO_JPLIS_LOGGING, \
+    DISABLED_WARNINGS_gcc := unused-function, \
     EXTRA_HEADER_DIRS := java.base:libjli, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
         $(call SET_SHARED_LIBRARY_ORIGIN) \
--- a/make/lib/Lib-java.security.jgss.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-java.security.jgss.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -31,6 +31,8 @@
     NAME := j2gss, \
     OPTIMIZATION := LOW, \
     CFLAGS := $(CFLAGS_JDKLIB), \
+    DISABLED_WARNINGS_gcc := undef, \
+    DISABLED_WARNINGS_clang := undef, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
     LIBS := $(LIBDL), \
--- a/make/lib/Lib-jdk.crypto.cryptoki.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-jdk.crypto.cryptoki.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -31,6 +31,7 @@
     NAME := j2pkcs11, \
     OPTIMIZATION := LOW, \
     CFLAGS := $(CFLAGS_JDKLIB), \
+    DISABLED_WARNINGS_clang := format-nonliteral, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
     LIBS_unix := $(LIBDL), \
--- a/make/lib/Lib-jdk.crypto.ec.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-jdk.crypto.ec.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -35,7 +35,8 @@
       CFLAGS := $(CFLAGS_JDKLIB) \
           -DMP_API_COMPATIBLE -DNSS_ECC_MORE_THAN_SUITE_B, \
       CXXFLAGS := $(CXXFLAGS_JDKLIB), \
-      DISABLED_WARNINGS_gcc := sign-compare implicit-fallthrough, \
+      DISABLED_WARNINGS_gcc := sign-compare implicit-fallthrough unused-value, \
+      DISABLED_WARNINGS_clang := sign-compare, \
       DISABLED_WARNINGS_microsoft := 4101 4244 4146 4018, \
       LDFLAGS := $(LDFLAGS_JDKLIB) $(LDFLAGS_CXX_JDK), \
       LDFLAGS_macosx := $(call SET_SHARED_LIBRARY_ORIGIN), \
--- a/make/lib/Lib-jdk.hotspot.agent.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-jdk.hotspot.agent.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -54,7 +54,8 @@
     NAME := saproc, \
     OPTIMIZATION := HIGH, \
     DISABLED_WARNINGS_microsoft := 4267, \
-    DISABLED_WARNINGS_gcc := sign-compare, \
+    DISABLED_WARNINGS_gcc := sign-compare pointer-arith, \
+    DISABLED_WARNINGS_clang := sign-compare pointer-arith format-nonliteral, \
     DISABLED_WARNINGS_CXX_solstudio := truncwarn unknownpragma, \
     CFLAGS := $(CFLAGS_JDKLIB) $(SA_CFLAGS), \
     CXXFLAGS := $(CXXFLAGS_JDKLIB) $(SA_CFLAGS) $(SA_CXXFLAGS), \
--- a/make/lib/Lib-jdk.jdwp.agent.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-jdk.jdwp.agent.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -53,6 +53,9 @@
     NAME := jdwp, \
     OPTIMIZATION := LOW, \
     CFLAGS := $(CFLAGS_JDKLIB) -DJDWP_LOGGING, \
+    DISABLED_WARNINGS_gcc := unused-function, \
+    DISABLED_WARNINGS_clang := sometimes-uninitialized format-nonliteral \
+        self-assign, \
     EXTRA_HEADER_DIRS := \
       include \
       libjdwp/export, \
--- a/make/lib/Lib-jdk.management.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-jdk.management.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -48,6 +48,7 @@
     NAME := management_ext, \
     OPTIMIZATION := $(LIBMANAGEMENT_EXT_OPTIMIZATION), \
     CFLAGS := $(CFLAGS_JDKLIB) $(LIBMANAGEMENT_EXT_CFLAGS), \
+    DISABLED_WARNINGS_clang := format-nonliteral, \
     LDFLAGS := $(LDFLAGS_JDKLIB) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
     LIBS := $(JDKLIB_LIBS), \
--- a/make/lib/Lib-jdk.pack.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-jdk.pack.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -37,6 +37,7 @@
     CFLAGS_release := -DPRODUCT, \
     EXTRA_HEADER_DIRS := $(call GetJavaHeaderDir, java.base), \
     DISABLED_WARNINGS_gcc := implicit-fallthrough, \
+    DISABLED_WARNINGS_clang := format-nonliteral, \
     LDFLAGS := $(LDFLAGS_JDKLIB) $(LDFLAGS_CXX_JDK) \
         $(call SET_SHARED_LIBRARY_ORIGIN), \
     LDFLAGS_windows := -map:$(SUPPORT_OUTPUTDIR)/native/$(MODULE)/unpack.map -debug, \
--- a/make/lib/Lib-jdk.sctp.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/Lib-jdk.sctp.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -34,6 +34,8 @@
         NAME := sctp, \
         OPTIMIZATION := LOW, \
         CFLAGS := $(CFLAGS_JDKLIB), \
+        DISABLED_WARNINGS_gcc := undef, \
+        DISABLED_WARNINGS_clang := undef, \
         EXTRA_HEADER_DIRS := \
             $(call GetJavaHeaderDir, java.base) \
             java.base:libnet \
--- a/make/lib/LibCommon.gmk	Thu Sep 20 22:05:07 2018 +0200
+++ b/make/lib/LibCommon.gmk	Thu Sep 27 22:04:49 2018 +0200
@@ -46,11 +46,9 @@
   LDFLAGS_JDKLIB += -Wl,--exclude-libs,ALL
   EXPORT_ALL_SYMBOLS := -fvisibility=default
 else ifeq ($(TOOLCHAIN_TYPE), clang)
-  ifneq ($(OPENJDK_TARGET_OS), macosx)
-    CFLAGS_JDKLIB += -fvisibility=hidden
-    CXXFLAGS_JDKLIB += -fvisibility=hidden
-    EXPORT_ALL_SYMBOLS := -fvisibility=default
-  endif
+  CFLAGS_JDKLIB += -fvisibility=hidden
+  CXXFLAGS_JDKLIB += -fvisibility=hidden
+  EXPORT_ALL_SYMBOLS := -fvisibility=default
 else ifeq ($(TOOLCHAIN_TYPE), solstudio)
   CFLAGS_JDKLIB += -xldscope=hidden
   CXXFLAGS_JDKLIB += -xldscope=hidden
--- a/src/demo/share/jfc/J2Ddemo/java2d/DemoInstVarsAccessor.java	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/demo/share/jfc/J2Ddemo/java2d/DemoInstVarsAccessor.java	Thu Sep 27 22:04:49 2018 +0200
@@ -1,3 +1,34 @@
+/*
+ *
+ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   - Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of Oracle nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 package java2d;
 
 import java.awt.Color;
--- a/src/demo/share/jfc/J2Ddemo/java2d/DemoInstVarsAccessorImplBase.java	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/demo/share/jfc/J2Ddemo/java2d/DemoInstVarsAccessorImplBase.java	Thu Sep 27 22:04:49 2018 +0200
@@ -1,3 +1,34 @@
+/*
+ *
+ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *   - Redistributions of source code must retain the above copyright
+ *     notice, this list of conditions and the following disclaimer.
+ *
+ *   - Redistributions in binary form must reproduce the above copyright
+ *     notice, this list of conditions and the following disclaimer in the
+ *     documentation and/or other materials provided with the distribution.
+ *
+ *   - Neither the name of Oracle nor the names of its
+ *     contributors may be used to endorse or promote products derived
+ *     from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+ * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 package java2d;
 
 import java.awt.Color;
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -1025,37 +1025,17 @@
   return exact_log2(elem_size);
 }
 
-void LIR_Assembler::arithmetic_idiv(LIR_Op3* op, bool is_irem) {
-  Register Rdividend = op->in_opr1()->as_register();
-  Register Rdivisor  = op->in_opr2()->as_register();
-  Register Rscratch  = op->in_opr3()->as_register();
-  Register Rresult   = op->result_opr()->as_register();
-  int divisor = -1;
-
-  /*
-  TODO: For some reason, using the Rscratch that gets passed in is
-  not possible because the register allocator does not see the tmp reg
-  as used, and assignes it the same register as Rdividend. We use rscratch1
-   instead.
-
-  assert(Rdividend != Rscratch, "");
-  assert(Rdivisor  != Rscratch, "");
-  */
-
-  if (Rdivisor == noreg && is_power_of_2(divisor)) {
-    // convert division by a power of two into some shifts and logical operations
-  }
-
-  __ corrected_idivl(Rresult, Rdividend, Rdivisor, is_irem, rscratch1);
-}
 
 void LIR_Assembler::emit_op3(LIR_Op3* op) {
   switch (op->code()) {
   case lir_idiv:
-    arithmetic_idiv(op, false);
-    break;
   case lir_irem:
-    arithmetic_idiv(op, true);
+    arithmetic_idiv(op->code(),
+                    op->in_opr1(),
+                    op->in_opr2(),
+                    op->in_opr3(),
+                    op->result_opr(),
+                    op->info());
     break;
   case lir_fmad:
     __ fmaddd(op->result_opr()->as_double_reg(),
@@ -1752,16 +1732,43 @@
       }
 
     } else if (right->is_constant()) {
-      jlong c = right->as_constant_ptr()->as_jlong_bits();
+      jlong c = right->as_constant_ptr()->as_jlong();
       Register dreg = as_reg(dest);
-      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
-      if (c == 0 && dreg == lreg_lo) {
-        COMMENT("effective nop elided");
-        return;
-      }
       switch (code) {
-        case lir_add: __ add(dreg, lreg_lo, c); break;
-        case lir_sub: __ sub(dreg, lreg_lo, c); break;
+        case lir_add:
+        case lir_sub:
+          if (c == 0 && dreg == lreg_lo) {
+            COMMENT("effective nop elided");
+            return;
+          }
+          code == lir_add ? __ add(dreg, lreg_lo, c) : __ sub(dreg, lreg_lo, c);
+          break;
+        case lir_div:
+          assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
+          if (c == 1) {
+            // move lreg_lo to dreg if divisor is 1
+            __ mov(dreg, lreg_lo);
+          } else {
+            unsigned int shift = exact_log2_long(c);
+            // use rscratch1 as intermediate result register
+            __ asr(rscratch1, lreg_lo, 63);
+            __ add(rscratch1, lreg_lo, rscratch1, Assembler::LSR, 64 - shift);
+            __ asr(dreg, rscratch1, shift);
+          }
+          break;
+        case lir_rem:
+          assert(c > 0 && is_power_of_2_long(c), "divisor must be power-of-2 constant");
+          if (c == 1) {
+            // move 0 to dreg if divisor is 1
+            __ mov(dreg, zr);
+          } else {
+            // use rscratch1 as intermediate result register
+            __ negs(rscratch1, lreg_lo);
+            __ andr(dreg, lreg_lo, c - 1);
+            __ andr(rscratch1, rscratch1, c - 1);
+            __ csneg(dreg, dreg, rscratch1, Assembler::MI);
+          }
+          break;
         default:
           ShouldNotReachHere();
       }
@@ -1862,7 +1869,51 @@
 
 
 
-void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) { Unimplemented(); }
+void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr illegal, LIR_Opr result, CodeEmitInfo* info) {
+
+  // opcode check
+  assert((code == lir_idiv) || (code == lir_irem), "opcode must be idiv or irem");
+  bool is_irem = (code == lir_irem);
+
+  // operand check
+  assert(left->is_single_cpu(),   "left must be register");
+  assert(right->is_single_cpu() || right->is_constant(),  "right must be register or constant");
+  assert(result->is_single_cpu(), "result must be register");
+  Register lreg = left->as_register();
+  Register dreg = result->as_register();
+
+  // power-of-2 constant check and codegen
+  if (right->is_constant()) {
+    int c = right->as_constant_ptr()->as_jint();
+    assert(c > 0 && is_power_of_2(c), "divisor must be power-of-2 constant");
+    if (is_irem) {
+      if (c == 1) {
+        // move 0 to dreg if divisor is 1
+        __ movw(dreg, zr);
+      } else {
+        // use rscratch1 as intermediate result register
+        __ negsw(rscratch1, lreg);
+        __ andw(dreg, lreg, c - 1);
+        __ andw(rscratch1, rscratch1, c - 1);
+        __ csnegw(dreg, dreg, rscratch1, Assembler::MI);
+      }
+    } else {
+      if (c == 1) {
+        // move lreg to dreg if divisor is 1
+        __ movw(dreg, lreg);
+      } else {
+        unsigned int shift = exact_log2(c);
+        // use rscratch1 as intermediate result register
+        __ asrw(rscratch1, lreg, 31);
+        __ addw(rscratch1, lreg, rscratch1, Assembler::LSR, 32 - shift);
+        __ asrw(dreg, rscratch1, shift);
+      }
+    }
+  } else {
+    Register rreg = right->as_register();
+    __ corrected_idivl(dreg, lreg, rreg, is_irem, rscratch1);
+  }
+}
 
 
 void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
@@ -2792,7 +2843,10 @@
 }
 
 
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+  // tmp must be unused
+  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
+
   if (left->is_single_cpu()) {
     assert(dest->is_single_cpu(), "expect single result reg");
     __ negw(dest->as_register(), left->as_register());
--- a/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/aarch64/c1_LIRAssembler_aarch64.hpp	Thu Sep 27 22:04:49 2018 +0200
@@ -75,8 +75,6 @@
     _deopt_handler_size = 7 * NativeInstruction::instruction_size
   };
 
-  void arithmetic_idiv(LIR_Op3* op, bool is_irem);
-
 public:
 
   void store_parameter(Register r, int offset_from_esp_in_words);
--- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -440,17 +440,26 @@
 
   if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
 
-    // the check for division by zero destroys the right operand
-    right.set_destroys_register();
-
-    // check for division by zero (destroys registers of right operand!)
-    CodeEmitInfo* info = state_for(x);
-
     left.load_item();
-    right.load_item();
-
-    __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
-    __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+    bool need_zero_check = true;
+    if (right.is_constant()) {
+      jlong c = right.get_jlong_constant();
+      // no need to do div-by-zero check if the divisor is a non-zero constant
+      if (c != 0) need_zero_check = false;
+      // do not load right if the divisor is a power-of-2 constant
+      if (c > 0 && is_power_of_2_long(c)) {
+        right.dont_load_item();
+      } else {
+        right.load_item();
+      }
+    } else {
+      right.load_item();
+    }
+    if (need_zero_check) {
+      CodeEmitInfo* info = state_for(x);
+      __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
+      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
+    }
 
     rlock_result(x);
     switch (x->op()) {
@@ -506,19 +515,32 @@
   // do not need to load right, as we can handle stack and constants
   if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
 
-    right_arg->load_item();
     rlock_result(x);
+    bool need_zero_check = true;
+    if (right.is_constant()) {
+      jint c = right.get_jint_constant();
+      // no need to do div-by-zero check if the divisor is a non-zero constant
+      if (c != 0) need_zero_check = false;
+      // do not load right if the divisor is a power-of-2 constant
+      if (c > 0 && is_power_of_2(c)) {
+        right_arg->dont_load_item();
+      } else {
+        right_arg->load_item();
+      }
+    } else {
+      right_arg->load_item();
+    }
+    if (need_zero_check) {
+      CodeEmitInfo* info = state_for(x);
+      __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
+      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
+    }
 
-    CodeEmitInfo* info = state_for(x);
-    LIR_Opr tmp = new_register(T_INT);
-    __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
-    __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
-    info = state_for(x);
-
+    LIR_Opr ill = LIR_OprFact::illegalOpr;
     if (x->op() == Bytecodes::_irem) {
-      __ irem(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
+      __ irem(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
     } else if (x->op() == Bytecodes::_idiv) {
-      __ idiv(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
+      __ idiv(left_arg->result(), right_arg->result(), x->operand(), ill, NULL);
     }
 
   } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -822,6 +822,15 @@
   return stub_start_addr;
 }
 
+void MacroAssembler::c2bool(Register x) {
+  // implements x == 0 ? 0 : 1
+  // note: must only look at least-significant byte of x
+  //       since C-style booleans are stored in one byte
+  //       only! (was bug)
+  tst(x, 0xff);
+  cset(x, Assembler::NE);
+}
+
 address MacroAssembler::ic_call(address entry, jint method_index) {
   RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
   // address const_ptr = long_constant((jlong)Universe::non_oop_word());
--- a/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/aarch64/macroAssembler_aarch64.hpp	Thu Sep 27 22:04:49 2018 +0200
@@ -782,6 +782,9 @@
 
   void resolve_jobject(Register value, Register thread, Register tmp);
 
+  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
+  void c2bool(Register x);
+
   // oop manipulations
   void load_klass(Register dst, Register src);
   void store_klass(Register dst, Register src);
--- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -1924,7 +1924,7 @@
 
   // Unpack native results.
   switch (ret_type) {
-  case T_BOOLEAN: __ ubfx(r0, r0, 0, 8);             break;
+  case T_BOOLEAN: __ c2bool(r0);                     break;
   case T_CHAR   : __ ubfx(r0, r0, 0, 16);            break;
   case T_BYTE   : __ sbfx(r0, r0, 0, 8);             break;
   case T_SHORT  : __ sbfx(r0, r0, 0, 16);            break;
--- a/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/aarch64/templateInterpreterGenerator_aarch64.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -557,7 +557,7 @@
         BasicType type) {
     address entry = __ pc();
   switch (type) {
-  case T_BOOLEAN: __ uxtb(r0, r0);        break;
+  case T_BOOLEAN: __ c2bool(r0);         break;
   case T_CHAR   : __ uxth(r0, r0);       break;
   case T_BYTE   : __ sxtb(r0, r0);        break;
   case T_SHORT  : __ sxth(r0, r0);        break;
--- a/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/c1_LIRAssembler_arm.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -2359,8 +2359,11 @@
           ShouldNotReachHere();
       }
     } else if (opr2->is_single_cpu()) {
-      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY || opr1->type() == T_METADATA || opr1->type() == T_ADDRESS) {
-        assert(opr2->type() == T_OBJECT || opr2->type() == T_ARRAY || opr2->type() == T_METADATA || opr2->type() == T_ADDRESS, "incompatibe type");
+      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
+        assert(opr2->type() == T_OBJECT || opr2->type() == T_ARRAY, "incompatibe type");
+        __ cmpoop(opr1->as_register(), opr2->as_register());
+      } else if (opr1->type() == T_METADATA || opr1->type() == T_ADDRESS) {
+        assert(opr2->type() == T_METADATA || opr2->type() == T_ADDRESS, "incompatibe type");
         __ cmp(opr1->as_register(), opr2->as_register());
       } else {
         assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY && opr2->type() != T_METADATA && opr2->type() != T_ADDRESS, "incompatibe type");
@@ -2761,6 +2764,9 @@
   assert(length == R4, "code assumption");
 #endif // AARCH64
 
+  __ resolve(ACCESS_READ, src);
+  __ resolve(ACCESS_WRITE, dst);
+
   CodeStub* stub = op->stub();
 
   int flags = op->flags();
@@ -3126,6 +3132,7 @@
     __ b(*op->stub()->entry());
   } else if (op->code() == lir_lock) {
     assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+    __ resolve(ACCESS_READ | ACCESS_WRITE, obj);
     int null_check_offset = __ lock_object(hdr, obj, lock, tmp, *op->stub()->entry());
     if (op->info() != NULL) {
       add_debug_info_for_null_check(null_check_offset, op->info());
@@ -3258,7 +3265,9 @@
 }
 
 
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+  // tmp must be unused
+  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
 
   if (left->is_single_cpu()) {
     assert (dest->type() == T_INT, "unexpected result type");
--- a/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/c1_MacroAssembler_arm.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -90,7 +90,6 @@
     tlab_allocate(obj, obj_end, tmp1, size_expression, slow_case);
   } else {
     eden_allocate(obj, obj_end, tmp1, tmp2, size_expression, slow_case);
-    incr_allocated_bytes(size_expression, tmp1);
   }
 }
 
--- a/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/c1_Runtime1_arm.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -569,7 +569,6 @@
 
           __ ldr_u32(obj_size, Address(klass, Klass::layout_helper_offset()));
           __ eden_allocate(result, obj_end, tmp1, tmp2, obj_size, slow_case);        // initializes result and obj_end
-          __ incr_allocated_bytes(obj_size, tmp2);
           __ initialize_object(result, obj_end, klass, noreg /* len */, tmp1, tmp2,
                                instanceOopDesc::header_size() * HeapWordSize, -1,
                                /* is_tlab_allocated */ false);
@@ -658,7 +657,6 @@
           // eden_allocate destroys tmp2, so reload header_size after allocation
           // eden_allocate initializes result and obj_end
           __ eden_allocate(result, obj_end, tmp1, tmp2, arr_size, slow_case);
-          __ incr_allocated_bytes(arr_size, tmp2);
           __ ldrb(tmp2, Address(klass, in_bytes(Klass::layout_helper_offset()) +
                                        Klass::_lh_header_size_shift / BitsPerByte));
           __ initialize_object(result, obj_end, klass, length, tmp1, tmp2, tmp2, -1, /* is_tlab_allocated */ false);
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -24,6 +24,8 @@
 
 #include "precompiled.hpp"
 #include "gc/shared/barrierSetAssembler.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "runtime/thread.hpp"
 
 #define __ masm->
 
@@ -162,3 +164,122 @@
   }
 }
 
+void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
+                                     Register obj1, Register obj2) {
+  __ cmp(obj1, obj2);
+}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1, Register tmp2,
+                                 RegisterOrConstant size_expression, Label& slow_case) {
+  if (!Universe::heap()->supports_inline_contig_alloc()) {
+    __ b(slow_case);
+    return;
+  }
+
+  CollectedHeap* ch = Universe::heap();
+
+  const Register top_addr = tmp1;
+  const Register heap_end = tmp2;
+
+  if (size_expression.is_register()) {
+    assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
+  } else {
+    assert_different_registers(obj, obj_end, top_addr, heap_end);
+  }
+
+  bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
+  if (load_const) {
+    __ mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
+  } else {
+    __ ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
+  }
+  // Calculate new heap_top by adding the size of the object
+  Label retry;
+  __ bind(retry);
+
+#ifdef AARCH64
+  __ ldxr(obj, top_addr);
+#else
+  __ ldr(obj, Address(top_addr));
+#endif // AARCH64
+
+  __ ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
+  __ add_rc(obj_end, obj, size_expression);
+  // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
+  __ cmp(obj_end, obj);
+  __ b(slow_case, lo);
+  // Update heap_top if allocation succeeded
+  __ cmp(obj_end, heap_end);
+  __ b(slow_case, hi);
+
+#ifdef AARCH64
+  __ stxr(heap_end/*scratched*/, obj_end, top_addr);
+  __ cbnz_w(heap_end, retry);
+#else
+  __ atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
+  __ b(retry, ne);
+#endif // AARCH64
+
+  incr_allocated_bytes(masm, size_expression, tmp1);
+}
+
+// Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
+void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, Register obj, Register obj_end, Register tmp1,
+                                 RegisterOrConstant size_expression, Label& slow_case) {
+  const Register tlab_end = tmp1;
+  assert_different_registers(obj, obj_end, tlab_end);
+
+  __ ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
+  __ ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
+  __ add_rc(obj_end, obj, size_expression);
+  __ cmp(obj_end, tlab_end);
+  __ b(slow_case, hi);
+  __ str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
+}
+
+void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, RegisterOrConstant size_in_bytes, Register tmp) {
+#ifdef AARCH64
+  __ ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+  __ add_rc(tmp, tmp, size_in_bytes);
+  __ str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
+#else
+  // Bump total bytes allocated by this thread
+  Label done;
+
+  // Borrow the Rthread for alloc counter
+  Register Ralloc = Rthread;
+  __ add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
+  __ ldr(tmp, Address(Ralloc));
+  __ adds(tmp, tmp, size_in_bytes);
+  __ str(tmp, Address(Ralloc), cc);
+  __ b(done, cc);
+
+  // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
+  // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
+  // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
+  Register low, high;
+  // Select ether R0/R1 or R2/R3
+
+  if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
+    low = R2;
+    high  = R3;
+  } else {
+    low = R0;
+    high  = R1;
+  }
+  __ push(RegisterSet(low, high));
+
+  __ ldrd(low, Address(Ralloc));
+  __ adds(low, low, size_in_bytes);
+  __ adc(high, high, 0);
+  __ strd(low, Address(Ralloc));
+
+  __ pop(RegisterSet(low, high));
+
+  __ bind(done);
+
+  // Unborrow the Rthread
+  __ sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
+#endif // AARCH64
+}
--- a/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/gc/shared/barrierSetAssembler_arm.hpp	Thu Sep 27 22:04:49 2018 +0200
@@ -30,6 +30,12 @@
 #include "oops/access.hpp"
 
 class BarrierSetAssembler: public CHeapObj<mtGC> {
+private:
+  void incr_allocated_bytes(MacroAssembler* masm,
+    RegisterOrConstant size_in_bytes,
+    Register           tmp
+);
+
 public:
   virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
                                   Register addr, Register count, int callee_saved_regs) {}
@@ -41,7 +47,31 @@
   virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
                         Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null);
 
+  virtual void obj_equals(MacroAssembler* masm,
+                          Register obj1, Register obj2);
+
+  virtual void eden_allocate(MacroAssembler* masm,
+    Register           obj,              // result: pointer to object after successful allocation
+    Register           obj_end,          // result: pointer to end of object after successful allocation
+    Register           tmp1,             // temp register
+    Register           tmp2,             // temp register
+    RegisterOrConstant size_expression,  // size of object
+    Label&             slow_case         // continuation point if fast allocation fails
+  );
+
+  virtual void tlab_allocate(MacroAssembler* masm,
+    Register           obj,              // result: pointer to object after successful allocation
+    Register           obj_end,          // result: pointer to end of object after successful allocation
+    Register           tmp1,             // temp register
+    RegisterOrConstant size_expression,  // size of object
+    Label&             slow_case         // continuation point if fast allocation fails
+  );
+
   virtual void barrier_stubs_init() {}
+
+  virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) {
+    // Default implementation does not need to do anything.
+  }
 };
 
 #endif // CPU_ARM_GC_SHARED_BARRIERSETASSEMBLER_ARM_HPP
--- a/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -1037,6 +1037,11 @@
 
 #ifndef AARCH64
 
+void MacroAssembler::cmpoop(Register obj1, Register obj2) {
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->obj_equals(this, obj1, obj2);
+}
+
 void MacroAssembler::long_move(Register rd_lo, Register rd_hi,
                                Register rn_lo, Register rn_hi,
                                AsmCondition cond) {
@@ -1251,68 +1256,15 @@
 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
 void MacroAssembler::eden_allocate(Register obj, Register obj_end, Register tmp1, Register tmp2,
                                  RegisterOrConstant size_expression, Label& slow_case) {
-  if (!Universe::heap()->supports_inline_contig_alloc()) {
-    b(slow_case);
-    return;
-  }
-
-  CollectedHeap* ch = Universe::heap();
-
-  const Register top_addr = tmp1;
-  const Register heap_end = tmp2;
-
-  if (size_expression.is_register()) {
-    assert_different_registers(obj, obj_end, top_addr, heap_end, size_expression.as_register());
-  } else {
-    assert_different_registers(obj, obj_end, top_addr, heap_end);
-  }
-
-  bool load_const = AARCH64_ONLY(false) NOT_AARCH64(VM_Version::supports_movw() ); // TODO-AARCH64 check performance
-  if (load_const) {
-    mov_address(top_addr, (address)Universe::heap()->top_addr(), symbolic_Relocation::eden_top_reference);
-  } else {
-    ldr(top_addr, Address(Rthread, JavaThread::heap_top_addr_offset()));
-  }
-  // Calculate new heap_top by adding the size of the object
-  Label retry;
-  bind(retry);
-
-#ifdef AARCH64
-  ldxr(obj, top_addr);
-#else
-  ldr(obj, Address(top_addr));
-#endif // AARCH64
-
-  ldr(heap_end, Address(top_addr, (intptr_t)ch->end_addr() - (intptr_t)ch->top_addr()));
-  add_rc(obj_end, obj, size_expression);
-  // Check if obj_end wrapped around, i.e., obj_end < obj. If yes, jump to the slow case.
-  cmp(obj_end, obj);
-  b(slow_case, lo);
-  // Update heap_top if allocation succeeded
-  cmp(obj_end, heap_end);
-  b(slow_case, hi);
-
-#ifdef AARCH64
-  stxr(heap_end/*scratched*/, obj_end, top_addr);
-  cbnz_w(heap_end, retry);
-#else
-  atomic_cas_bool(obj, obj_end, top_addr, 0, heap_end/*scratched*/);
-  b(retry, ne);
-#endif // AARCH64
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->eden_allocate(this, obj, obj_end, tmp1, tmp2, size_expression, slow_case);
 }
 
 // Puts address of allocated object into register `obj` and end of allocated object into register `obj_end`.
 void MacroAssembler::tlab_allocate(Register obj, Register obj_end, Register tmp1,
                                  RegisterOrConstant size_expression, Label& slow_case) {
-  const Register tlab_end = tmp1;
-  assert_different_registers(obj, obj_end, tlab_end);
-
-  ldr(obj, Address(Rthread, JavaThread::tlab_top_offset()));
-  ldr(tlab_end, Address(Rthread, JavaThread::tlab_end_offset()));
-  add_rc(obj_end, obj, size_expression);
-  cmp(obj_end, tlab_end);
-  b(slow_case, hi);
-  str(obj_end, Address(Rthread, JavaThread::tlab_top_offset()));
+  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  bs->tlab_allocate(this, obj, obj_end, tmp1, size_expression, slow_case);
 }
 
 // Fills memory regions [start..end] with zeroes. Clobbers `start` and `tmp` registers.
@@ -1358,52 +1310,6 @@
 #endif // AARCH64
 }
 
-void MacroAssembler::incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp) {
-#ifdef AARCH64
-  ldr(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
-  add_rc(tmp, tmp, size_in_bytes);
-  str(tmp, Address(Rthread, in_bytes(JavaThread::allocated_bytes_offset())));
-#else
-  // Bump total bytes allocated by this thread
-  Label done;
-
-  // Borrow the Rthread for alloc counter
-  Register Ralloc = Rthread;
-  add(Ralloc, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
-  ldr(tmp, Address(Ralloc));
-  adds(tmp, tmp, size_in_bytes);
-  str(tmp, Address(Ralloc), cc);
-  b(done, cc);
-
-  // Increment the high word and store single-copy atomically (that is an unlikely scenario on typical embedded systems as it means >4GB has been allocated)
-  // To do so ldrd/strd instructions used which require an even-odd pair of registers. Such a request could be difficult to satisfy by
-  // allocating those registers on a higher level, therefore the routine is ready to allocate a pair itself.
-  Register low, high;
-  // Select ether R0/R1 or R2/R3
-
-  if (size_in_bytes.is_register() && (size_in_bytes.as_register() == R0 || size_in_bytes.as_register() == R1)) {
-    low = R2;
-    high  = R3;
-  } else {
-    low = R0;
-    high  = R1;
-  }
-  push(RegisterSet(low, high));
-
-  ldrd(low, Address(Ralloc));
-  adds(low, low, size_in_bytes);
-  adc(high, high, 0);
-  strd(low, Address(Ralloc));
-
-  pop(RegisterSet(low, high));
-
-  bind(done);
-
-  // Unborrow the Rthread
-  sub(Rthread, Ralloc, in_bytes(JavaThread::allocated_bytes_offset()));
-#endif // AARCH64
-}
-
 void MacroAssembler::arm_stack_overflow_check(int frame_size_in_bytes, Register tmp) {
   // Version of AbstractAssembler::generate_stack_overflow_check optimized for ARM
   if (UseStackBanging) {
@@ -2269,6 +2175,11 @@
                                       Register chr1, Register chr2, Label& Ldone) {
   Label Lvector, Lloop;
 
+  // if (ary1 == ary2)
+  //     return true;
+  cmpoop(ary1, ary2);
+  b(Ldone, eq);
+
   // Note: limit contains number of bytes (2*char_elements) != 0.
   tst(limit, 0x2); // trailing character ?
   b(Lvector, eq);
@@ -2728,6 +2639,14 @@
   }
 }
 
+void MacroAssembler::resolve(DecoratorSet decorators, Register obj) {
+  // Use stronger ACCESS_WRITE|ACCESS_READ by default.
+  if ((decorators & (ACCESS_READ | ACCESS_WRITE)) == 0) {
+    decorators |= ACCESS_READ | ACCESS_WRITE;
+  }
+  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
+  return bs->resolve(this, decorators, obj);
+}
 
 #ifdef AARCH64
 
--- a/src/hotspot/cpu/arm/macroAssembler_arm.hpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/macroAssembler_arm.hpp	Thu Sep 27 22:04:49 2018 +0200
@@ -361,8 +361,6 @@
 
   void zero_memory(Register start, Register end, Register tmp);
 
-  void incr_allocated_bytes(RegisterOrConstant size_in_bytes, Register tmp);
-
   static bool needs_explicit_null_check(intptr_t offset);
 
   void arm_stack_overflow_check(int frame_size_in_bytes, Register tmp);
@@ -1058,6 +1056,10 @@
   void access_load_at(BasicType type, DecoratorSet decorators, Address src, Register dst, Register tmp1, Register tmp2, Register tmp3);
   void access_store_at(BasicType type, DecoratorSet decorators, Address obj, Register new_val, Register tmp1, Register tmp2, Register tmp3, bool is_null);
 
+  // Resolves obj for access. Result is placed in the same register.
+  // All other registers are preserved.
+  void resolve(DecoratorSet decorators, Register obj);
+
 #ifdef AARCH64
   void encode_heap_oop(Register dst, Register src);
   void encode_heap_oop(Register r) {
@@ -1246,6 +1248,8 @@
   }
 
 #ifndef AARCH64
+  void cmpoop(Register obj1, Register obj2);
+
   void long_move(Register rd_lo, Register rd_hi,
                  Register rn_lo, Register rn_hi,
                  AsmCondition cond = al);
--- a/src/hotspot/cpu/arm/methodHandles_arm.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/methodHandles_arm.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -173,7 +173,7 @@
     // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
     Label L;
     __ ldr(tmp, __ receiver_argument_address(Rparams, tmp, tmp));
-    __ cmp(tmp, recv);
+    __ cmpoop(tmp, recv);
     __ b(L, eq);
     __ stop("receiver not on stack");
     __ bind(L);
--- a/src/hotspot/cpu/arm/sharedRuntime_arm.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/sharedRuntime_arm.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -1573,6 +1573,8 @@
     // Remember the handle for the unlocking code
     __ mov(sync_handle, R1);
 
+    __ resolve(IS_NOT_NULL, sync_obj);
+
     if(UseBiasedLocking) {
       __ biased_locking_enter(sync_obj, tmp, disp_hdr/*scratched*/, false, Rtemp, lock_done, slow_lock_biased);
     }
@@ -1690,6 +1692,8 @@
   if (method->is_synchronized()) {
     __ ldr(sync_obj, Address(sync_handle));
 
+    __ resolve(IS_NOT_NULL, sync_obj);
+
     if(UseBiasedLocking) {
       __ biased_locking_exit(sync_obj, Rtemp, unlock_done);
       // disp_hdr may not have been saved on entry with biased locking
--- a/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/templateInterpreterGenerator_arm.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -605,6 +605,7 @@
 #endif // AARCH64
     __ load_mirror(R0, Rmethod, Rtemp);
     __ bind(done);
+    __ resolve(IS_NOT_NULL, R0);
   }
 
   // add space for monitor & lock
--- a/src/hotspot/cpu/arm/templateTable_arm.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/arm/templateTable_arm.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -2559,7 +2559,7 @@
   // assume branch is more often taken than not (loops use backward branches)
   Label not_taken;
   __ pop_ptr(R1_tmp);
-  __ cmp(R1_tmp, R0_tos);
+  __ cmpoop(R1_tmp, R0_tos);
   __ b(not_taken, convNegCond(cc));
   branch(false, false);
   __ bind(not_taken);
@@ -4502,12 +4502,7 @@
     const Register Rtlab_end = R2_tmp;
     assert_different_registers(Robj, Rsize, Rklass, Rtlab_top, Rtlab_end);
 
-    __ ldr(Robj, Address(Rthread, JavaThread::tlab_top_offset()));
-    __ ldr(Rtlab_end, Address(Rthread, in_bytes(JavaThread::tlab_end_offset())));
-    __ add(Rtlab_top, Robj, Rsize);
-    __ cmp(Rtlab_top, Rtlab_end);
-    __ b(slow_case, hi);
-    __ str(Rtlab_top, Address(Rthread, JavaThread::tlab_top_offset()));
+    __ tlab_allocate(Robj, Rtlab_top, Rtlab_end, Rsize, slow_case);
     if (ZeroTLAB) {
       // the fields have been already cleared
       __ b(initialize_header);
@@ -4523,34 +4518,7 @@
       const Register Rheap_end = Rtemp;
       assert_different_registers(Robj, Rklass, Rsize, Rheap_top_addr, Rheap_top, Rheap_end, LR);
 
-      // heap_end now (re)loaded in the loop since also used as a scratch register in the CAS
-      __ ldr_literal(Rheap_top_addr, Lheap_top_addr);
-
-      Label retry;
-      __ bind(retry);
-
-#ifdef AARCH64
-      __ ldxr(Robj, Rheap_top_addr);
-#else
-      __ ldr(Robj, Address(Rheap_top_addr));
-#endif // AARCH64
-
-      __ ldr(Rheap_end, Address(Rheap_top_addr, (intptr_t)Universe::heap()->end_addr()-(intptr_t)Universe::heap()->top_addr()));
-      __ add(Rheap_top, Robj, Rsize);
-      __ cmp(Rheap_top, Rheap_end);
-      __ b(slow_case, hi);
-
-      // Update heap top atomically.
-      // If someone beats us on the allocation, try again, otherwise continue.
-#ifdef AARCH64
-      __ stxr(Rtemp2, Rheap_top, Rheap_top_addr);
-      __ cbnz_w(Rtemp2, retry);
-#else
-      __ atomic_cas_bool(Robj, Rheap_top, Rheap_top_addr, 0, Rheap_end/*scratched*/);
-      __ b(retry, ne);
-#endif // AARCH64
-
-      __ incr_allocated_bytes(Rsize, Rtemp);
+      __ eden_allocate(Robj, Rheap_top, Rheap_top_addr, Rheap_end, Rsize, slow_case);
     }
   }
 
@@ -4917,6 +4885,8 @@
   // check for NULL object
   __ null_check(Robj, Rtemp);
 
+  __ resolve(IS_NOT_NULL, Robj);
+
   const int entry_size = (frame::interpreter_frame_monitor_size() * wordSize);
   assert (entry_size % StackAlignmentInBytes == 0, "keep stack alignment");
   Label allocate_monitor, allocated;
@@ -5047,6 +5017,8 @@
   // check for NULL object
   __ null_check(Robj, Rtemp);
 
+  __ resolve(IS_NOT_NULL, Robj);
+
   const int entry_size = (frame::interpreter_frame_monitor_size() * wordSize);
   Label found, throw_exception;
 
--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -2840,7 +2840,9 @@
 }
 
 
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+  // tmp must be unused
+  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
   assert(left->is_register(), "can only handle registers");
 
   if (left->is_single_cpu()) {
--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -2850,7 +2850,9 @@
   ShouldNotCallThis(); // There are no delay slots on ZARCH_64.
 }
 
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+  // tmp must be unused
+  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
   assert(left->is_register(), "can only handle registers");
 
   if (left->is_single_cpu()) {
--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -3024,7 +3024,9 @@
 }
 
 
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
+  // tmp must be unused
+  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
   assert(left->is_register(), "can only handle registers");
 
   if (left->is_single_cpu()) {
--- a/src/hotspot/cpu/x86/assembler_x86.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -2199,7 +2199,7 @@
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
-  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_rex_vex_w_reverted();
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x28);
@@ -2209,7 +2209,7 @@
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8(0x28);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -2217,7 +2217,7 @@
 
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -2465,8 +2465,7 @@
 
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_128bit;
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -2583,7 +2582,7 @@
   assert(VM_Version::supports_avx512vlbw(), "");
   assert(is_vector_masking(), "");    // For stub code use only
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.set_embedded_opmask_register_specifier(mask);
   attributes.set_is_evex_instruction();
@@ -2608,7 +2607,7 @@
   assert(is_vector_masking(), "");
   assert(VM_Version::supports_avx512vlbw(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
+  InstructionAttr attributes(vector_len, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.set_embedded_opmask_register_specifier(mask);
   attributes.set_is_evex_instruction();
@@ -2752,7 +2751,7 @@
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   attributes.set_rex_vex_w_reverted();
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
@@ -3512,7 +3511,7 @@
 void Assembler::evpcmpgtb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx512vlbw(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.set_is_evex_instruction();
   int dst_enc = kdst->encoding();
@@ -3525,7 +3524,7 @@
   assert(is_vector_masking(), "");
   assert(VM_Version::supports_avx512vlbw(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.reset_is_clear_context();
   attributes.set_embedded_opmask_register_specifier(mask);
@@ -3538,7 +3537,7 @@
 
 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
   assert(VM_Version::supports_avx512vlbw(), "");
-  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(kdst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x3E);
@@ -3549,7 +3548,7 @@
 void Assembler::evpcmpuw(KRegister kdst, KRegister mask, XMMRegister nds, XMMRegister src, ComparisonPredicate vcc, int vector_len) {
   assert(is_vector_masking(), "");
   assert(VM_Version::supports_avx512vlbw(), "");
-  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.reset_is_clear_context();
   attributes.set_embedded_opmask_register_specifier(mask);
   attributes.set_is_evex_instruction();
@@ -3562,7 +3561,7 @@
 void Assembler::evpcmpuw(KRegister kdst, XMMRegister nds, Address src, ComparisonPredicate vcc, int vector_len) {
   assert(VM_Version::supports_avx512vlbw(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.set_is_evex_instruction();
   int dst_enc = kdst->encoding();
@@ -3575,7 +3574,7 @@
 void Assembler::evpcmpeqb(KRegister kdst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx512bw(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   int dst_enc = kdst->encoding();
@@ -3588,7 +3587,7 @@
   assert(VM_Version::supports_avx512vlbw(), "");
   assert(is_vector_masking(), "");    // For stub code use only
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_reg_mask */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.reset_is_clear_context();
   attributes.set_embedded_opmask_register_specifier(mask);
@@ -3741,7 +3740,7 @@
 
 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -3750,7 +3749,7 @@
 
 void Assembler::pextrd(Address dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
@@ -3760,7 +3759,7 @@
 
 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(src, xnoreg, as_XMMRegister(dst->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -3769,7 +3768,7 @@
 
 void Assembler::pextrq(Address dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x16);
@@ -3779,7 +3778,7 @@
 
 void Assembler::pextrw(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC5);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -3788,7 +3787,7 @@
 
 void Assembler::pextrw(Address dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0x15);
@@ -3798,7 +3797,7 @@
 
 void Assembler::pextrb(Address dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
   simd_prefix(src, xnoreg, dst, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x14);
@@ -3808,7 +3807,7 @@
 
 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -3817,7 +3816,7 @@
 
 void Assembler::pinsrd(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_32bit);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
@@ -3827,7 +3826,7 @@
 
 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -3836,7 +3835,7 @@
 
 void Assembler::pinsrq(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ true, /* legacy_mode */ _legacy_mode_dq, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_64bit);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x22);
@@ -3846,7 +3845,7 @@
 
 void Assembler::pinsrw(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC4);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -3855,7 +3854,7 @@
 
 void Assembler::pinsrw(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse2(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_16bit);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC4);
@@ -3865,7 +3864,7 @@
 
 void Assembler::pinsrb(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T1S, /* input_size_in_bits */ EVEX_8bit);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x20);
@@ -3876,7 +3875,7 @@
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
   simd_prefix(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
@@ -3885,7 +3884,7 @@
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -3895,7 +3894,7 @@
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
   assert(dst != xnoreg, "sanity");
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
   vex_prefix(src, 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
@@ -3906,7 +3905,7 @@
   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
   vector_len == AVX_256bit? VM_Version::supports_avx2() :
   vector_len == AVX_512bit? VM_Version::supports_avx512bw() : 0, "");
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x30);
   emit_int8((unsigned char) (0xC0 | encode));
@@ -3918,7 +3917,7 @@
   assert(VM_Version::supports_avx512vlbw(), "");
   assert(dst != xnoreg, "sanity");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.set_embedded_opmask_register_specifier(mask);
   attributes.set_is_evex_instruction();
@@ -3930,7 +3929,7 @@
   assert(VM_Version::supports_avx512vlbw(), "");
   assert(src != xnoreg, "sanity");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.set_is_evex_instruction();
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
@@ -3943,7 +3942,7 @@
   assert(VM_Version::supports_avx512vlbw(), "");
   assert(src != xnoreg, "sanity");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_HVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.reset_is_clear_context();
   attributes.set_embedded_opmask_register_specifier(mask);
@@ -3957,7 +3956,7 @@
   assert(VM_Version::supports_evex(), "");
   assert(src != xnoreg, "sanity");
   InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_QVM, /* input_size_in_bits */ EVEX_NObit);
   attributes.set_is_evex_instruction();
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_F3, VEX_OPCODE_0F_38, &attributes);
@@ -3969,7 +3968,7 @@
   assert(vector_len == AVX_128bit? VM_Version::supports_avx() :
   vector_len == AVX_256bit? VM_Version::supports_avx2() :
   vector_len == AVX_512bit? VM_Version::supports_evex() : 0, " ");
-  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x33);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -4082,7 +4081,7 @@
 
 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_ssse3(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x00);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -4101,7 +4100,7 @@
 void Assembler::pshufb(XMMRegister dst, Address src) {
   assert(VM_Version::supports_ssse3(), "");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x00);
@@ -4147,7 +4146,7 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x70);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -4159,7 +4158,7 @@
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_FVM, /* input_size_in_bits */ EVEX_NObit);
   simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, VEX_OPCODE_0F, &attributes);
   emit_int8(0x70);
@@ -4180,7 +4179,7 @@
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -4190,7 +4189,7 @@
 void Assembler::pslldq(XMMRegister dst, int shift) {
   // Shift left 128 bit value in dst XMMRegister by shift number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   // XMM7 is for /7 encoding: 66 0F 73 /7 ib
   int encode = simd_prefix_and_encode(xmm7, dst, dst, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8(0x73);
@@ -4456,7 +4455,7 @@
 
 void Assembler::palignr(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_ssse3(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0x0F);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -4477,6 +4476,7 @@
 void Assembler::evalignq(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
   InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x3);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -5591,7 +5591,7 @@
 void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx() && (vector_len == 0) ||
          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -5600,7 +5600,7 @@
 void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx() && (vector_len == 0) ||
          VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -5650,7 +5650,7 @@
 
 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse3(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -5658,7 +5658,7 @@
 
 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse3(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6281,6 +6281,15 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
+  emit_int8((unsigned char)0xDF);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+
 void Assembler::por(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6369,8 +6378,7 @@
 void Assembler::vinserti128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx2(), "");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6383,9 +6391,8 @@
   assert(VM_Version::supports_avx2(), "");
   assert(dst != xnoreg, "sanity");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
-  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
@@ -6398,7 +6405,8 @@
 void Assembler::vinserti32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x38);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6413,10 +6421,10 @@
   assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
-  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
+  attributes.set_is_evex_instruction();
   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
@@ -6430,9 +6438,10 @@
 void Assembler::vinserti64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
-  emit_int8(0x38);
+  emit_int8(0x3A);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 256 bits
   // 0x01 - insert into upper 256 bits
@@ -6445,8 +6454,7 @@
 void Assembler::vinsertf128(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx(), "");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6459,9 +6467,8 @@
   assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
-  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
@@ -6472,16 +6479,16 @@
 }
 
 void Assembler::vinsertf32x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
-  assert(VM_Version::supports_evex(), "");
+  assert(VM_Version::supports_avx2(), "");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into q0 128 bits (0..127)
   // 0x01 - insert into q1 128 bits (128..255)
-  // 0x02 - insert into q2 128 bits (256..383)
-  // 0x03 - insert into q3 128 bits (384..511)
+  // 0x02 - insert into q0 128 bits (256..383)
+  // 0x03 - insert into q1 128 bits (384..512)
   emit_int8(imm8 & 0x03);
 }
 
@@ -6489,24 +6496,24 @@
   assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
-  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x18);
   emit_operand(dst, src);
   // 0x00 - insert into q0 128 bits (0..127)
   // 0x01 - insert into q1 128 bits (128..255)
-  // 0x02 - insert into q2 128 bits (256..383)
-  // 0x03 - insert into q3 128 bits (384..511)
+  // 0x02 - insert into q0 128 bits (256..383)
+  // 0x03 - insert into q1 128 bits (384..512)
   emit_int8(imm8 & 0x03);
 }
 
 void Assembler::vinsertf64x4(XMMRegister dst, XMMRegister nds, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1A);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6520,8 +6527,9 @@
   assert(dst != xnoreg, "sanity");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
+  attributes.set_is_evex_instruction();
   vex_prefix(src, nds->encoding(), dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1A);
   emit_operand(dst, src);
@@ -6534,10 +6542,9 @@
 // vextracti forms
 
 void Assembler::vextracti128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
-  assert(VM_Version::supports_avx(), "");
+  assert(VM_Version::supports_avx2(), "");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6550,9 +6557,8 @@
   assert(VM_Version::supports_avx2(), "");
   assert(src != xnoreg, "sanity");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
-  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   attributes.reset_is_clear_context();
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -6564,10 +6570,10 @@
 }
 
 void Assembler::vextracti32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
-  assert(VM_Version::supports_avx(), "");
+  assert(VM_Version::supports_evex(), "");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6583,9 +6589,10 @@
   assert(src != xnoreg, "sanity");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   attributes.reset_is_clear_context();
+  attributes.set_is_evex_instruction();
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_operand(src, dst);
@@ -6599,7 +6606,8 @@
 void Assembler::vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx512dq(), "");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x39);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6613,7 +6621,8 @@
 void Assembler::vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x3B);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6622,14 +6631,28 @@
   emit_int8(imm8 & 0x01);
 }
 
-
+void Assembler::vextracti64x4(Address dst, XMMRegister src, uint8_t imm8) {
+  assert(VM_Version::supports_evex(), "");
+  assert(src != xnoreg, "sanity");
+  assert(imm8 <= 0x01, "imm8: %u", imm8);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_64bit);
+  attributes.reset_is_clear_context();
+  attributes.set_is_evex_instruction();
+  vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
+  emit_int8(0x38);
+  emit_operand(src, dst);
+  // 0x00 - extract from lower 256 bits
+  // 0x01 - extract from upper 256 bits
+  emit_int8(imm8 & 0x01);
+}
 // vextractf forms
 
 void Assembler::vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx(), "");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6642,9 +6665,8 @@
   assert(VM_Version::supports_avx(), "");
   assert(src != xnoreg, "sanity");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_avx512novl() ? AVX_512bit : AVX_256bit;
-  InstructionMark im(this);
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionMark im(this);
+  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   attributes.reset_is_clear_context();
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
@@ -6656,10 +6678,10 @@
 }
 
 void Assembler::vextractf32x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
-  assert(VM_Version::supports_avx(), "");
+  assert(VM_Version::supports_evex(), "");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
-  int vector_len = VM_Version::supports_evex() ? AVX_512bit : AVX_256bit;
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6675,9 +6697,10 @@
   assert(src != xnoreg, "sanity");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4, /* input_size_in_bits */ EVEX_32bit);
   attributes.reset_is_clear_context();
+  attributes.set_is_evex_instruction();
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_operand(src, dst);
@@ -6691,7 +6714,8 @@
 void Assembler::vextractf64x2(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_avx512dq(), "");
   assert(imm8 <= 0x03, "imm8: %u", imm8);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6705,7 +6729,8 @@
 void Assembler::vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8) {
   assert(VM_Version::supports_evex(), "");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
+  attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(src->encoding(), 0, dst->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1B);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6719,9 +6744,10 @@
   assert(src != xnoreg, "sanity");
   assert(imm8 <= 0x01, "imm8: %u", imm8);
   InstructionMark im(this);
-  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_512bit, /* vex_w */ true, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_address_attributes(/* tuple_type */ EVEX_T4,/* input_size_in_bits */  EVEX_64bit);
   attributes.reset_is_clear_context();
+  attributes.set_is_evex_instruction();
   vex_prefix(dst, 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x1B);
   emit_operand(src, dst);
@@ -6730,38 +6756,17 @@
   emit_int8(imm8 & 0x01);
 }
 
-
-// legacy word/dword replicate
-void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
+// duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
+void Assembler::vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx2(), "");
-  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
-  emit_int8(0x79);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
-  assert(VM_Version::supports_avx2(), "");
-  InstructionAttr attributes(AVX_256bit, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
-  emit_int8(0x58);
-  emit_int8((unsigned char)(0xC0 | encode));
-}
-
-
-// xmm/mem sourced byte/word/dword/qword replicate
-
-// duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
-void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x78);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::evpbroadcastb(XMMRegister dst, Address src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastb(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_avx2(), "");
   assert(dst != xnoreg, "sanity");
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6773,16 +6778,16 @@
 }
 
 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
-void Assembler::evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx2(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x79);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::evpbroadcastw(XMMRegister dst, Address src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastw(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_avx2(), "");
   assert(dst != xnoreg, "sanity");
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
@@ -6793,17 +6798,19 @@
   emit_operand(dst, src);
 }
 
+// xmm/mem sourced byte/word/dword/qword replicate
+
 // duplicate 4-byte integer data from src into programmed locations in dest : requires AVX512VL
-void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(UseAVX >= 2, "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x58);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::evpbroadcastd(XMMRegister dst, Address src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastd(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_avx2(), "");
   assert(dst != xnoreg, "sanity");
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6815,8 +6822,8 @@
 }
 
 // duplicate 8-byte integer data from src into programmed locations in dest : requires AVX512VL
-void Assembler::evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx2(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_rex_vex_w_reverted();
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -6824,8 +6831,8 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::evpbroadcastq(XMMRegister dst, Address src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastq(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_avx2(), "");
   assert(dst != xnoreg, "sanity");
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6863,16 +6870,16 @@
 // scalar single/double precision replicate
 
 // duplicate single precision data from src into programmed locations in dest : requires AVX512VL
-void Assembler::evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::evpbroadcastss(XMMRegister dst, Address src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastss(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6884,8 +6891,8 @@
 }
 
 // duplicate double precision data from src into programmed locations in dest : requires AVX512VL
-void Assembler::evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
   attributes.set_rex_vex_w_reverted();
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -6893,8 +6900,8 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::evpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+void Assembler::vpbroadcastsd(XMMRegister dst, Address src, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
   assert(dst != xnoreg, "sanity");
   InstructionMark im(this);
   InstructionAttr attributes(vector_len, /* vex_w */ VM_Version::supports_evex(), /* legacy_mode */ false, /* no_mask_reg */ false, /* uses_vl */ true);
@@ -6911,7 +6918,7 @@
 
 // duplicate 1-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, Register src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+  assert(VM_Version::supports_avx512bw(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -6921,7 +6928,7 @@
 
 // duplicate 2-byte integer data from src into programmed locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastw(XMMRegister dst, Register src, int vector_len) {
-  assert(VM_Version::supports_evex(), "");
+  assert(VM_Version::supports_avx512bw(), "");
   InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ _legacy_mode_bw, /* no_mask_reg */ true, /* uses_vl */ true);
   attributes.set_is_evex_instruction();
   int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
@@ -6967,7 +6974,7 @@
 // Carry-Less Multiplication Quadword
 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
   assert(VM_Version::supports_clmul(), "");
-  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* rex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -6977,7 +6984,7 @@
 // Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -7597,33 +7604,23 @@
   set_attributes(attributes);
   attributes->set_current_assembler(this);
 
-  // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
-  if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
-    switch (attributes->get_vector_len()) {
-    case AVX_128bit:
-    case AVX_256bit:
-      attributes->set_is_legacy_mode();
-      break;
+  // For EVEX instruction (which is not marked as pure EVEX instruction) check and see if this instruction
+  // is allowed in legacy mode and has resources which will fit in it.
+  // Pure EVEX instructions will have is_evex_instruction set in their definition.
+  if (!attributes->is_legacy_mode()) {
+    if (UseAVX > 2 && !attributes->is_evex_instruction() && !_is_managed) {
+      if ((attributes->get_vector_len() != AVX_512bit) && (nds_enc < 16) && (xreg_enc < 16)) {
+          attributes->set_is_legacy_mode();
+      }
     }
   }
 
-  // For pure EVEX check and see if this instruction
-  // is allowed in legacy mode and has resources which will
-  // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
-  // else that field is set when we encode to EVEX
-  if (UseAVX > 2 && !attributes->is_legacy_mode() &&
-      !_is_managed && !attributes->is_evex_instruction()) {
-    if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
-      bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
-      if (check_register_bank) {
-        // check nds_enc and xreg_enc for upper bank usage
-        if (nds_enc < 16 && xreg_enc < 16) {
-          attributes->set_is_legacy_mode();
-        }
-      } else {
-        attributes->set_is_legacy_mode();
-      }
-    }
+  if (UseAVX > 2) {
+    assert(((!attributes->uses_vl()) ||
+            (attributes->get_vector_len() == AVX_512bit) ||
+            (!_legacy_mode_vl) ||
+            (attributes->is_legacy_mode())),"XMM register should be 0-15");
+    assert(((nds_enc < 16 && xreg_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
   }
 
   _is_managed = false;
@@ -7653,43 +7650,31 @@
   bool vex_x = false;
   set_attributes(attributes);
   attributes->set_current_assembler(this);
-  bool check_register_bank = NOT_IA32(true) IA32_ONLY(false);
-
-  // if vector length is turned off, revert to AVX for vectors smaller than 512-bit
-  if (UseAVX > 2 && _legacy_mode_vl && attributes->uses_vl()) {
-    switch (attributes->get_vector_len()) {
-    case AVX_128bit:
-    case AVX_256bit:
-      if (check_register_bank) {
-        if (dst_enc >= 16 || nds_enc >= 16 || src_enc >= 16) {
-          // up propagate arithmetic instructions to meet RA requirements
-          attributes->set_vector_len(AVX_512bit);
-        } else {
+
+  // For EVEX instruction (which is not marked as pure EVEX instruction) check and see if this instruction
+  // is allowed in legacy mode and has resources which will fit in it.
+  // Pure EVEX instructions will have is_evex_instruction set in their definition.
+  if (!attributes->is_legacy_mode()) {
+    if (UseAVX > 2 && !attributes->is_evex_instruction() && !_is_managed) {
+      if ((!attributes->uses_vl() || (attributes->get_vector_len() != AVX_512bit)) &&
+          (dst_enc < 16) && (nds_enc < 16) && (src_enc < 16)) {
           attributes->set_is_legacy_mode();
-        }
-      } else {
-        attributes->set_is_legacy_mode();
       }
-      break;
     }
   }
 
-  // For pure EVEX check and see if this instruction
-  // is allowed in legacy mode and has resources which will
-  // fit in it.  Pure EVEX instructions will use set_is_evex_instruction in their definition,
-  // else that field is set when we encode to EVEX
-  if (UseAVX > 2 && !attributes->is_legacy_mode() &&
-      !_is_managed && !attributes->is_evex_instruction()) {
-    if (!_legacy_mode_vl && attributes->get_vector_len() != AVX_512bit) {
-      if (check_register_bank) {
-        // check dst_enc, nds_enc and src_enc for upper bank usage
-        if (dst_enc < 16 && nds_enc < 16 && src_enc < 16) {
-          attributes->set_is_legacy_mode();
-        }
-      } else {
-        attributes->set_is_legacy_mode();
-      }
-    }
+  if (UseAVX > 2) {
+    // All the scalar fp instructions (with uses_vl as false) can have legacy_mode as false
+    // Instruction with uses_vl true are vector instructions
+    // All the vector instructions with AVX_512bit length can have legacy_mode as false
+    // All the vector instructions with < AVX_512bit length can have legacy_mode as false if AVX512vl() is supported
+    // Rest all should have legacy_mode set as true
+    assert(((!attributes->uses_vl()) ||
+            (attributes->get_vector_len() == AVX_512bit) ||
+            (!_legacy_mode_vl) ||
+            (attributes->is_legacy_mode())),"XMM register should be 0-15");
+    // Instruction with legacy_mode true should have dst, nds and src < 15
+    assert(((dst_enc < 16 && nds_enc < 16 && src_enc < 16) || (!attributes->is_legacy_mode())),"XMM register should be 0-15");
   }
 
   _is_managed = false;
@@ -7741,7 +7726,7 @@
 void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_66, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC2);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -7751,7 +7736,7 @@
 void Assembler::blendvpd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0x4B);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -7762,7 +7747,7 @@
 void Assembler::cmpps(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = simd_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, VEX_OPCODE_0F, &attributes);
   emit_int8((unsigned char)0xC2);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -7772,7 +7757,7 @@
 void Assembler::blendvps(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
   assert(VM_Version::supports_avx(), "");
   assert(!VM_Version::supports_evex(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0x4A);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -7782,7 +7767,7 @@
 
 void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src, int imm8, int vector_len) {
   assert(VM_Version::supports_avx2(), "");
-  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ false);
+  InstructionAttr attributes(vector_len, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ false, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_3A, &attributes);
   emit_int8((unsigned char)0x02);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -7791,7 +7776,7 @@
 
 void Assembler::shlxl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ false, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF7);
   emit_int8((unsigned char)(0xC0 | encode));
@@ -7799,7 +7784,7 @@
 
 void Assembler::shlxq(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi2(), "");
-  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ false);
+  InstructionAttr attributes(AVX_128bit, /* vex_w */ true, /* legacy_mode */ true, /* no_mask_reg */ true, /* uses_vl */ true);
   int encode = vex_prefix_and_encode(dst->encoding(), src2->encoding(), src1->encoding(), VEX_SIMD_66, VEX_OPCODE_0F_38, &attributes);
   emit_int8((unsigned char)0xF7);
   emit_int8((unsigned char)(0xC0 | encode));
--- a/src/hotspot/cpu/x86/assembler_x86.hpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/assembler_x86.hpp	Thu Sep 27 22:04:49 2018 +0200
@@ -2097,6 +2097,7 @@
 
   // Andn packed integers
   void pandn(XMMRegister dst, XMMRegister src);
+  void vpandn(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
 
   // Or packed integers
   void por(XMMRegister dst, XMMRegister src);
@@ -2134,6 +2135,7 @@
   void vextracti32x4(Address dst, XMMRegister src, uint8_t imm8);
   void vextracti64x2(XMMRegister dst, XMMRegister src, uint8_t imm8);
   void vextracti64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
+  void vextracti64x4(Address dst, XMMRegister src, uint8_t imm8);
 
   // vextractf forms
   void vextractf128(XMMRegister dst, XMMRegister src, uint8_t imm8);
@@ -2144,28 +2146,24 @@
   void vextractf64x4(XMMRegister dst, XMMRegister src, uint8_t imm8);
   void vextractf64x4(Address dst, XMMRegister src, uint8_t imm8);
 
-  // legacy xmm sourced word/dword replicate
-  void vpbroadcastw(XMMRegister dst, XMMRegister src);
-  void vpbroadcastd(XMMRegister dst, XMMRegister src);
-
   // xmm/mem sourced byte/word/dword/qword replicate
-  void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
-  void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
-  void evpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
-  void evpbroadcastw(XMMRegister dst, Address src, int vector_len);
-  void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
-  void evpbroadcastd(XMMRegister dst, Address src, int vector_len);
-  void evpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
-  void evpbroadcastq(XMMRegister dst, Address src, int vector_len);
+  void vpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
+  void vpbroadcastb(XMMRegister dst, Address src, int vector_len);
+  void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
+  void vpbroadcastw(XMMRegister dst, Address src, int vector_len);
+  void vpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
+  void vpbroadcastd(XMMRegister dst, Address src, int vector_len);
+  void vpbroadcastq(XMMRegister dst, XMMRegister src, int vector_len);
+  void vpbroadcastq(XMMRegister dst, Address src, int vector_len);
 
   void evbroadcasti64x2(XMMRegister dst, XMMRegister src, int vector_len);
   void evbroadcasti64x2(XMMRegister dst, Address src, int vector_len);
 
   // scalar single/double precision replicate
-  void evpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
-  void evpbroadcastss(XMMRegister dst, Address src, int vector_len);
-  void evpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
-  void evpbroadcastsd(XMMRegister dst, Address src, int vector_len);
+  void vpbroadcastss(XMMRegister dst, XMMRegister src, int vector_len);
+  void vpbroadcastss(XMMRegister dst, Address src, int vector_len);
+  void vpbroadcastsd(XMMRegister dst, XMMRegister src, int vector_len);
+  void vpbroadcastsd(XMMRegister dst, Address src, int vector_len);
 
   // gpr sourced byte/word/dword/qword replicate
   void evpbroadcastb(XMMRegister dst, Register src, int vector_len);
--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -68,7 +68,6 @@
 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], (jlong)UCONST64(0x8000000000000000), (jlong)UCONST64(0x8000000000000000));
 
 
-
 NEEDS_CLEANUP // remove this definitions ?
 const Register IC_Klass    = rax;   // where the IC klass is cached
 const Register SYNC_header = rax;   // synchronization header
@@ -650,7 +649,7 @@
 
     case T_FLOAT: {
       if (dest->is_single_xmm()) {
-        if (c->is_zero_float()) {
+        if (LP64_ONLY(UseAVX < 2 &&) c->is_zero_float()) {
           __ xorps(dest->as_xmm_float_reg(), dest->as_xmm_float_reg());
         } else {
           __ movflt(dest->as_xmm_float_reg(),
@@ -672,7 +671,7 @@
 
     case T_DOUBLE: {
       if (dest->is_double_xmm()) {
-        if (c->is_zero_double()) {
+        if (LP64_ONLY(UseAVX < 2 &&) c->is_zero_double()) {
           __ xorpd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg());
         } else {
           __ movdbl(dest->as_xmm_double_reg(),
@@ -2395,16 +2394,24 @@
 }
 
 
-void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr tmp, LIR_Opr dest, LIR_Op* op) {
   if (value->is_double_xmm()) {
     switch(code) {
       case lir_abs :
         {
-          if (dest->as_xmm_double_reg() != value->as_xmm_double_reg()) {
-            __ movdbl(dest->as_xmm_double_reg(), value->as_xmm_double_reg());
+#ifdef _LP64
+          if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+            assert(tmp->is_valid(), "need temporary");
+            __ vpandn(dest->as_xmm_double_reg(), tmp->as_xmm_double_reg(), value->as_xmm_double_reg(), 2);
+          } else {
+#endif
+            if (dest->as_xmm_double_reg() != value->as_xmm_double_reg()) {
+              __ movdbl(dest->as_xmm_double_reg(), value->as_xmm_double_reg());
+            }
+            assert(!tmp->is_valid(), "do not need temporary");
+            __ andpd(dest->as_xmm_double_reg(),
+                     ExternalAddress((address)double_signmask_pool));
           }
-          __ andpd(dest->as_xmm_double_reg(),
-                    ExternalAddress((address)double_signmask_pool));
         }
         break;
 
@@ -3734,7 +3741,7 @@
 }
 
 
-void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
   if (left->is_single_cpu()) {
     __ negl(left->as_register());
     move_regs(left->as_register(), dest->as_register());
@@ -3759,24 +3766,36 @@
 #endif // _LP64
 
   } else if (dest->is_single_xmm()) {
-    if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
-      __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
+#ifdef _LP64
+    if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+      assert(tmp->is_valid(), "need temporary");
+      assert_different_registers(left->as_xmm_float_reg(), tmp->as_xmm_float_reg());
+      __ vpxor(dest->as_xmm_float_reg(), tmp->as_xmm_float_reg(), left->as_xmm_float_reg(), 2);
     }
-    if (UseAVX > 0) {
-      __ vnegatess(dest->as_xmm_float_reg(), dest->as_xmm_float_reg(),
-                   ExternalAddress((address)float_signflip_pool));
-    } else {
+    else
+#endif
+    {
+      assert(!tmp->is_valid(), "do not need temporary");
+      if (left->as_xmm_float_reg() != dest->as_xmm_float_reg()) {
+        __ movflt(dest->as_xmm_float_reg(), left->as_xmm_float_reg());
+      }
       __ xorps(dest->as_xmm_float_reg(),
                ExternalAddress((address)float_signflip_pool));
     }
   } else if (dest->is_double_xmm()) {
-    if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
-      __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
+#ifdef _LP64
+    if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+      assert(tmp->is_valid(), "need temporary");
+      assert_different_registers(left->as_xmm_double_reg(), tmp->as_xmm_double_reg());
+      __ vpxor(dest->as_xmm_double_reg(), tmp->as_xmm_double_reg(), left->as_xmm_double_reg(), 2);
     }
-    if (UseAVX > 0) {
-      __ vnegatesd(dest->as_xmm_double_reg(), dest->as_xmm_double_reg(),
-                   ExternalAddress((address)double_signflip_pool));
-    } else {
+    else
+#endif
+    {
+      assert(!tmp->is_valid(), "do not need temporary");
+      if (left->as_xmm_double_reg() != dest->as_xmm_double_reg()) {
+        __ movdbl(dest->as_xmm_double_reg(), left->as_xmm_double_reg());
+      }
       __ xorpd(dest->as_xmm_double_reg(),
                ExternalAddress((address)double_signflip_pool));
     }
--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -320,7 +320,21 @@
   value.set_destroys_register();
   value.load_item();
   LIR_Opr reg = rlock(x);
-  __ negate(value.result(), reg);
+
+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
+#ifdef _LP64
+  if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
+    if (x->type()->tag() == doubleTag) {
+      tmp = new_register(T_DOUBLE);
+      __ move(LIR_OprFact::doubleConst(-0.0), tmp);
+    }
+    else if (x->type()->tag() == floatTag) {
+      tmp = new_register(T_FLOAT);
+      __ move(LIR_OprFact::floatConst(-0.0), tmp);
+    }
+  }
+#endif
+  __ negate(value.result(), reg, tmp);
 
   set_result(x, round_item(reg));
 }
@@ -748,8 +762,17 @@
   LIR_Opr calc_input = value.result();
   LIR_Opr calc_result = rlock_result(x);
 
+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
+#ifdef _LP64
+  if (UseAVX > 2 && (!VM_Version::supports_avx512vl()) &&
+      (x->id() == vmIntrinsics::_dabs)) {
+    tmp = new_register(T_DOUBLE);
+    __ move(LIR_OprFact::doubleConst(-0.0), tmp);
+  }
+#endif
+
   switch(x->id()) {
-    case vmIntrinsics::_dabs:   __ abs  (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
+    case vmIntrinsics::_dabs:   __ abs  (calc_input, calc_result, tmp); break;
     case vmIntrinsics::_dsqrt:  __ sqrt (calc_input, calc_result, LIR_OprFact::illegalOpr); break;
     default:                    ShouldNotReachHere();
   }
--- a/src/hotspot/cpu/x86/globals_x86.hpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/globals_x86.hpp	Thu Sep 27 22:04:49 2018 +0200
@@ -119,7 +119,7 @@
   product(bool, UseStoreImmI16, true,                                       \
           "Use store immediate 16-bits value instruction on x86")           \
                                                                             \
-  product(intx, UseAVX, 2,                                                  \
+  product(intx, UseAVX, 3,                                                  \
           "Highest supported AVX instructions set on x86/x64")              \
           range(0, 99)                                                      \
                                                                             \
--- a/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -2942,16 +2942,6 @@
   }
 }
 
-void MacroAssembler::push_zmm(XMMRegister reg) {
-  lea(rsp, Address(rsp, -64)); // Use lea to not affect flags
-  evmovdqul(Address(rsp, 0), reg, Assembler::AVX_512bit);
-}
-
-void MacroAssembler::pop_zmm(XMMRegister reg) {
-  evmovdqul(reg, Address(rsp, 0), Assembler::AVX_512bit);
-  lea(rsp, Address(rsp, 64)); // Use lea to not affect flags
-}
-
 void MacroAssembler::fremr(Register tmp) {
   save_rax(tmp);
   { Label L;
@@ -3332,27 +3322,18 @@
 }
 
 void MacroAssembler::movdqu(Address dst, XMMRegister src) {
-  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
-    Assembler::vextractf32x4(dst, src, 0);
-  } else {
+    assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
     Assembler::movdqu(dst, src);
-  }
 }
 
 void MacroAssembler::movdqu(XMMRegister dst, Address src) {
-  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
-    Assembler::vinsertf32x4(dst, dst, src, 0);
-  } else {
+    assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
     Assembler::movdqu(dst, src);
-  }
 }
 
 void MacroAssembler::movdqu(XMMRegister dst, XMMRegister src) {
-  if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
-    Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
-  } else {
+    assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
     Assembler::movdqu(dst, src);
-  }
 }
 
 void MacroAssembler::movdqu(XMMRegister dst, AddressLiteral src, Register scratchReg) {
@@ -3365,28 +3346,18 @@
 }
 
 void MacroAssembler::vmovdqu(Address dst, XMMRegister src) {
-  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (src->encoding() > 15)) {
-    vextractf64x4_low(dst, src);
-  } else {
+    assert(((src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
     Assembler::vmovdqu(dst, src);
-  }
 }
 
 void MacroAssembler::vmovdqu(XMMRegister dst, Address src) {
-  if (UseAVX > 2 && !VM_Version::supports_avx512vl() && (dst->encoding() > 15)) {
-    vinsertf64x4_low(dst, src);
-  } else {
+    assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
     Assembler::vmovdqu(dst, src);
-  }
 }
 
 void MacroAssembler::vmovdqu(XMMRegister dst, XMMRegister src) {
-  if (UseAVX > 2 && !VM_Version::supports_avx512vl()) {
-    Assembler::evmovdqul(dst, src, Assembler::AVX_512bit);
-  }
-  else {
+    assert(((dst->encoding() < 16  && src->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
     Assembler::vmovdqu(dst, src);
-  }
 }
 
 void MacroAssembler::vmovdqu(XMMRegister dst, AddressLiteral src) {
@@ -3670,187 +3641,43 @@
 }
 
 void MacroAssembler::pcmpeqb(XMMRegister dst, XMMRegister src) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::pcmpeqb(dst, src);
-  } else if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::pcmpeqb(dst, src);
-  } else if (src_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::pcmpeqb(xmm0, src);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  } else if (dst_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::pcmpeqb(dst, xmm0);
-    pop_zmm(xmm0);
-  } else {
-    push_zmm(xmm0);
-    push_zmm(xmm1);
-    movdqu(xmm0, src);
-    movdqu(xmm1, dst);
-    Assembler::pcmpeqb(xmm1, xmm0);
-    movdqu(dst, xmm1);
-    pop_zmm(xmm1);
-    pop_zmm(xmm0);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::pcmpeqb(dst, src);
 }
 
 void MacroAssembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::pcmpeqw(dst, src);
-  } else if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::pcmpeqw(dst, src);
-  } else if (src_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::pcmpeqw(xmm0, src);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  } else if (dst_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::pcmpeqw(dst, xmm0);
-    pop_zmm(xmm0);
-  } else {
-    push_zmm(xmm0);
-    push_zmm(xmm1);
-    movdqu(xmm0, src);
-    movdqu(xmm1, dst);
-    Assembler::pcmpeqw(xmm1, xmm0);
-    movdqu(dst, xmm1);
-    pop_zmm(xmm1);
-    pop_zmm(xmm0);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::pcmpeqw(dst, src);
 }
 
 void MacroAssembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
-  int dst_enc = dst->encoding();
-  if (dst_enc < 16) {
-    Assembler::pcmpestri(dst, src, imm8);
-  } else {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::pcmpestri(xmm0, src, imm8);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  }
+  assert((dst->encoding() < 16),"XMM register should be 0-15");
+  Assembler::pcmpestri(dst, src, imm8);
 }
 
 void MacroAssembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
-  if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::pcmpestri(dst, src, imm8);
-  } else if (src_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::pcmpestri(xmm0, src, imm8);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  } else if (dst_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::pcmpestri(dst, xmm0, imm8);
-    pop_zmm(xmm0);
-  } else {
-    push_zmm(xmm0);
-    push_zmm(xmm1);
-    movdqu(xmm0, src);
-    movdqu(xmm1, dst);
-    Assembler::pcmpestri(xmm1, xmm0, imm8);
-    movdqu(dst, xmm1);
-    pop_zmm(xmm1);
-    pop_zmm(xmm0);
-  }
+  assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15");
+  Assembler::pcmpestri(dst, src, imm8);
 }
 
 void MacroAssembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::pmovzxbw(dst, src);
-  } else if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::pmovzxbw(dst, src);
-  } else if (src_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::pmovzxbw(xmm0, src);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  } else if (dst_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::pmovzxbw(dst, xmm0);
-    pop_zmm(xmm0);
-  } else {
-    push_zmm(xmm0);
-    push_zmm(xmm1);
-    movdqu(xmm0, src);
-    movdqu(xmm1, dst);
-    Assembler::pmovzxbw(xmm1, xmm0);
-    movdqu(dst, xmm1);
-    pop_zmm(xmm1);
-    pop_zmm(xmm0);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::pmovzxbw(dst, src);
 }
 
 void MacroAssembler::pmovzxbw(XMMRegister dst, Address src) {
-  int dst_enc = dst->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::pmovzxbw(dst, src);
-  } else if (dst_enc < 16) {
-    Assembler::pmovzxbw(dst, src);
-  } else {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::pmovzxbw(xmm0, src);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  }
+  assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::pmovzxbw(dst, src);
 }
 
 void MacroAssembler::pmovmskb(Register dst, XMMRegister src) {
-  int src_enc = src->encoding();
-  if (src_enc < 16) {
-    Assembler::pmovmskb(dst, src);
-  } else {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::pmovmskb(dst, xmm0);
-    pop_zmm(xmm0);
-  }
+  assert((src->encoding() < 16),"XMM register should be 0-15");
+  Assembler::pmovmskb(dst, src);
 }
 
 void MacroAssembler::ptest(XMMRegister dst, XMMRegister src) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
-  if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::ptest(dst, src);
-  } else if (src_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::ptest(xmm0, src);
-    pop_zmm(xmm0);
-  } else if (dst_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::ptest(dst, xmm0);
-    pop_zmm(xmm0);
-  } else {
-    push_zmm(xmm0);
-    push_zmm(xmm1);
-    movdqu(xmm0, src);
-    movdqu(xmm1, dst);
-    Assembler::ptest(xmm1, xmm0);
-    pop_zmm(xmm1);
-    pop_zmm(xmm0);
-  }
+  assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15");
+  Assembler::ptest(dst, src);
 }
 
 void MacroAssembler::sqrtsd(XMMRegister dst, AddressLiteral src) {
@@ -3979,194 +3806,33 @@
 }
 
 void MacroAssembler::vabsss(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int src_enc = src->encoding();
-  if ((dst_enc < 16) && (nds_enc < 16)) {
-    vandps(dst, nds, negate_field, vector_len);
-  } else if ((src_enc < 16) && (dst_enc < 16)) {
-    // Use src scratch register
-    evmovdqul(src, nds, Assembler::AVX_512bit);
-    vandps(dst, src, negate_field, vector_len);
-  } else if (dst_enc < 16) {
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-    vandps(dst, dst, negate_field, vector_len);
-  } else if (nds_enc < 16) {
-    vandps(nds, nds, negate_field, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (src_enc < 16) {
-    evmovdqul(src, nds, Assembler::AVX_512bit);
-    vandps(src, src, negate_field, vector_len);
-    evmovdqul(dst, src, Assembler::AVX_512bit);
-  } else {
-    if (src_enc != dst_enc) {
-      // Use src scratch register
-      evmovdqul(src, xmm0, Assembler::AVX_512bit);
-      evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-      vandps(xmm0, xmm0, negate_field, vector_len);
-      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-      evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    } else {
-      push_zmm(xmm0);
-      evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-      vandps(xmm0, xmm0, negate_field, vector_len);
-      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-      pop_zmm(xmm0);
-    }
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15");
+  vandps(dst, nds, negate_field, vector_len);
 }
 
 void MacroAssembler::vabssd(XMMRegister dst, XMMRegister nds, XMMRegister src, AddressLiteral negate_field, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int src_enc = src->encoding();
-  if ((dst_enc < 16) && (nds_enc < 16)) {
-    vandpd(dst, nds, negate_field, vector_len);
-  } else if ((src_enc < 16) && (dst_enc < 16)) {
-    // Use src scratch register
-    evmovdqul(src, nds, Assembler::AVX_512bit);
-    vandpd(dst, src, negate_field, vector_len);
-  } else if (dst_enc < 16) {
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-    vandpd(dst, dst, negate_field, vector_len);
-  } else if (nds_enc < 16) {
-    vandpd(nds, nds, negate_field, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (src_enc < 16) {
-    evmovdqul(src, nds, Assembler::AVX_512bit);
-    vandpd(src, src, negate_field, vector_len);
-    evmovdqul(dst, src, Assembler::AVX_512bit);
-  } else {
-    if (src_enc != dst_enc) {
-      evmovdqul(src, xmm0, Assembler::AVX_512bit);
-      evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-      vandpd(xmm0, xmm0, negate_field, vector_len);
-      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-      evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    } else {
-      push_zmm(xmm0);
-      evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-      vandpd(xmm0, xmm0, negate_field, vector_len);
-      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-      pop_zmm(xmm0);
-    }
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15");
+  vandpd(dst, nds, negate_field, vector_len);
 }
 
 void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int src_enc = src->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpaddb(dst, nds, src, vector_len);
-  } else if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::vpaddb(dst, dst, src, vector_len);
-  } else if ((dst_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for src
-    evmovdqul(nds, src, Assembler::AVX_512bit);
-    Assembler::vpaddb(dst, dst, nds, vector_len);
-  } else if ((src_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for dst
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpaddb(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (dst_enc < 16) {
-    // use nds as scatch for xmm0 to hold src
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vpaddb(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs are in the upper bank
-    push_zmm(xmm1);
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm1, src, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpaddb(xmm0, xmm0, xmm1, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    pop_zmm(xmm1);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpaddb(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpaddb(dst, nds, src, vector_len);
-  } else if (dst_enc < 16) {
-    Assembler::vpaddb(dst, dst, src, vector_len);
-  } else if (nds_enc < 16) {
-    // implies dst_enc in upper bank with src as scratch
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpaddb(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs in upper bank
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpaddb(xmm0, xmm0, src, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpaddb(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int src_enc = src->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpaddw(dst, nds, src, vector_len);
-  } else if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::vpaddw(dst, dst, src, vector_len);
-  } else if ((dst_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for src
-    evmovdqul(nds, src, Assembler::AVX_512bit);
-    Assembler::vpaddw(dst, dst, nds, vector_len);
-  } else if ((src_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for dst
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpaddw(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (dst_enc < 16) {
-    // use nds as scatch for xmm0 to hold src
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vpaddw(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs are in the upper bank
-    push_zmm(xmm1);
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm1, src, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpaddw(xmm0, xmm0, xmm1, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    pop_zmm(xmm1);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpaddw(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpaddw(dst, nds, src, vector_len);
-  } else if (dst_enc < 16) {
-    Assembler::vpaddw(dst, dst, src, vector_len);
-  } else if (nds_enc < 16) {
-    // implies dst_enc in upper bank with nds as scratch
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpaddw(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs in upper bank
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpaddw(xmm0, xmm0, src, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpaddw(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
@@ -4178,627 +3844,109 @@
   }
 }
 
-void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpbroadcastw(dst, src);
-  } else if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::vpbroadcastw(dst, src);
-  } else if (src_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpbroadcastw(xmm0, src);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  } else if (dst_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vpbroadcastw(dst, xmm0);
-    pop_zmm(xmm0);
-  } else {
-    push_zmm(xmm0);
-    push_zmm(xmm1);
-    movdqu(xmm0, src);
-    movdqu(xmm1, dst);
-    Assembler::vpbroadcastw(xmm1, xmm0);
-    movdqu(dst, xmm1);
-    pop_zmm(xmm1);
-    pop_zmm(xmm0);
-  }
+void MacroAssembler::vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpbroadcastw(dst, src, vector_len);
 }
 
 void MacroAssembler::vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int src_enc = src->encoding();
-  assert(dst_enc == nds_enc, "");
-  if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::vpcmpeqb(dst, nds, src, vector_len);
-  } else if (src_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpcmpeqb(xmm0, xmm0, src, vector_len);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  } else if (dst_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vpcmpeqb(dst, dst, xmm0, vector_len);
-    pop_zmm(xmm0);
-  } else {
-    push_zmm(xmm0);
-    push_zmm(xmm1);
-    movdqu(xmm0, src);
-    movdqu(xmm1, dst);
-    Assembler::vpcmpeqb(xmm1, xmm1, xmm0, vector_len);
-    movdqu(dst, xmm1);
-    pop_zmm(xmm1);
-    pop_zmm(xmm0);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpcmpeqb(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int src_enc = src->encoding();
-  assert(dst_enc == nds_enc, "");
-  if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::vpcmpeqw(dst, nds, src, vector_len);
-  } else if (src_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpcmpeqw(xmm0, xmm0, src, vector_len);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  } else if (dst_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vpcmpeqw(dst, dst, xmm0, vector_len);
-    pop_zmm(xmm0);
-  } else {
-    push_zmm(xmm0);
-    push_zmm(xmm1);
-    movdqu(xmm0, src);
-    movdqu(xmm1, dst);
-    Assembler::vpcmpeqw(xmm1, xmm1, xmm0, vector_len);
-    movdqu(dst, xmm1);
-    pop_zmm(xmm1);
-    pop_zmm(xmm0);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpcmpeqw(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpmovzxbw(XMMRegister dst, Address src, int vector_len) {
-  int dst_enc = dst->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpmovzxbw(dst, src, vector_len);
-  } else if (dst_enc < 16) {
-    Assembler::vpmovzxbw(dst, src, vector_len);
-  } else {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpmovzxbw(xmm0, src, vector_len);
-    movdqu(dst, xmm0);
-    pop_zmm(xmm0);
-  }
+  assert(((dst->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpmovzxbw(dst, src, vector_len);
 }
 
 void MacroAssembler::vpmovmskb(Register dst, XMMRegister src) {
-  int src_enc = src->encoding();
-  if (src_enc < 16) {
-    Assembler::vpmovmskb(dst, src);
-  } else {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vpmovmskb(dst, xmm0);
-    pop_zmm(xmm0);
-  }
+  assert((src->encoding() < 16),"XMM register should be 0-15");
+  Assembler::vpmovmskb(dst, src);
 }
 
 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int src_enc = src->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpmullw(dst, nds, src, vector_len);
-  } else if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::vpmullw(dst, dst, src, vector_len);
-  } else if ((dst_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for src
-    evmovdqul(nds, src, Assembler::AVX_512bit);
-    Assembler::vpmullw(dst, dst, nds, vector_len);
-  } else if ((src_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for dst
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpmullw(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (dst_enc < 16) {
-    // use nds as scatch for xmm0 to hold src
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vpmullw(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs are in the upper bank
-    push_zmm(xmm1);
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm1, src, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpmullw(xmm0, xmm0, xmm1, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    pop_zmm(xmm1);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpmullw(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpmullw(dst, nds, src, vector_len);
-  } else if (dst_enc < 16) {
-    Assembler::vpmullw(dst, dst, src, vector_len);
-  } else if (nds_enc < 16) {
-    // implies dst_enc in upper bank with src as scratch
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpmullw(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs in upper bank
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpmullw(xmm0, xmm0, src, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpmullw(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int src_enc = src->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsubb(dst, nds, src, vector_len);
-  } else if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::vpsubb(dst, dst, src, vector_len);
-  } else if ((dst_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for src
-    evmovdqul(nds, src, Assembler::AVX_512bit);
-    Assembler::vpsubb(dst, dst, nds, vector_len);
-  } else if ((src_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for dst
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsubb(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (dst_enc < 16) {
-    // use nds as scatch for xmm0 to hold src
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vpsubb(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs are in the upper bank
-    push_zmm(xmm1);
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm1, src, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsubb(xmm0, xmm0, xmm1, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    pop_zmm(xmm1);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsubb(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsubb(dst, nds, src, vector_len);
-  } else if (dst_enc < 16) {
-    Assembler::vpsubb(dst, dst, src, vector_len);
-  } else if (nds_enc < 16) {
-    // implies dst_enc in upper bank with src as scratch
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsubb(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs in upper bank
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsubb(xmm0, xmm0, src, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsubb(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int src_enc = src->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsubw(dst, nds, src, vector_len);
-  } else if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::vpsubw(dst, dst, src, vector_len);
-  } else if ((dst_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for src
-    evmovdqul(nds, src, Assembler::AVX_512bit);
-    Assembler::vpsubw(dst, dst, nds, vector_len);
-  } else if ((src_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch for dst
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsubw(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (dst_enc < 16) {
-    // use nds as scatch for xmm0 to hold src
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vpsubw(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs are in the upper bank
-    push_zmm(xmm1);
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm1, src, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsubw(xmm0, xmm0, xmm1, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    pop_zmm(xmm1);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsubw(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsubw(dst, nds, src, vector_len);
-  } else if (dst_enc < 16) {
-    Assembler::vpsubw(dst, dst, src, vector_len);
-  } else if (nds_enc < 16) {
-    // implies dst_enc in upper bank with src as scratch
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsubw(nds, nds, src, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs in upper bank
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsubw(xmm0, xmm0, src, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsubw(dst, nds, src, vector_len);
 }
 
 void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int shift_enc = shift->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsraw(dst, nds, shift, vector_len);
-  } else if ((dst_enc < 16) && (shift_enc < 16)) {
-    Assembler::vpsraw(dst, dst, shift, vector_len);
-  } else if ((dst_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch with shift
-    evmovdqul(nds, shift, Assembler::AVX_512bit);
-    Assembler::vpsraw(dst, dst, nds, vector_len);
-  } else if ((shift_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch with dst
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsraw(nds, nds, shift, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (dst_enc < 16) {
-    // use nds to save a copy of xmm0 and hold shift
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
-    Assembler::vpsraw(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  } else if (nds_enc < 16) {
-    // use nds and dst as temps
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
-    Assembler::vpsraw(nds, nds, xmm0, vector_len);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs are in the upper bank
-    push_zmm(xmm1);
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm1, shift, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsraw(xmm0, xmm0, xmm1, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    pop_zmm(xmm1);
-  }
+  assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsraw(dst, nds, shift, vector_len);
 }
 
 void MacroAssembler::vpsraw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsraw(dst, nds, shift, vector_len);
-  } else if (dst_enc < 16) {
-    Assembler::vpsraw(dst, dst, shift, vector_len);
-  } else if (nds_enc < 16) {
-    // use nds as scratch
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsraw(nds, nds, shift, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // use nds as scratch for xmm0
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsraw(xmm0, xmm0, shift, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsraw(dst, nds, shift, vector_len);
 }
 
 void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int shift_enc = shift->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsrlw(dst, nds, shift, vector_len);
-  } else if ((dst_enc < 16) && (shift_enc < 16)) {
-    Assembler::vpsrlw(dst, dst, shift, vector_len);
-  } else if ((dst_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch with shift
-    evmovdqul(nds, shift, Assembler::AVX_512bit);
-    Assembler::vpsrlw(dst, dst, nds, vector_len);
-  } else if ((shift_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch with dst
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsrlw(nds, nds, shift, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (dst_enc < 16) {
-    // use nds to save a copy of xmm0 and hold shift
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
-    Assembler::vpsrlw(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  } else if (nds_enc < 16) {
-    // use nds and dst as temps
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
-    Assembler::vpsrlw(nds, nds, xmm0, vector_len);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs are in the upper bank
-    push_zmm(xmm1);
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm1, shift, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsrlw(xmm0, xmm0, xmm1, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    pop_zmm(xmm1);
-  }
+  assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsrlw(dst, nds, shift, vector_len);
 }
 
 void MacroAssembler::vpsrlw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsrlw(dst, nds, shift, vector_len);
-  } else if (dst_enc < 16) {
-    Assembler::vpsrlw(dst, dst, shift, vector_len);
-  } else if (nds_enc < 16) {
-    // use nds as scratch
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsrlw(nds, nds, shift, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // use nds as scratch for xmm0
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsrlw(xmm0, xmm0, shift, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsrlw(dst, nds, shift, vector_len);
 }
 
 void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, XMMRegister shift, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  int shift_enc = shift->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsllw(dst, nds, shift, vector_len);
-  } else if ((dst_enc < 16) && (shift_enc < 16)) {
-    Assembler::vpsllw(dst, dst, shift, vector_len);
-  } else if ((dst_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch with shift
-    evmovdqul(nds, shift, Assembler::AVX_512bit);
-    Assembler::vpsllw(dst, dst, nds, vector_len);
-  } else if ((shift_enc < 16) && (nds_enc < 16)) {
-    // use nds as scratch with dst
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsllw(nds, nds, shift, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else if (dst_enc < 16) {
-    // use nds to save a copy of xmm0 and hold shift
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
-    Assembler::vpsllw(dst, dst, xmm0, vector_len);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  } else if (nds_enc < 16) {
-    // use nds and dst as temps
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, shift, Assembler::AVX_512bit);
-    Assembler::vpsllw(nds, nds, xmm0, vector_len);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // worse case scenario, all regs are in the upper bank
-    push_zmm(xmm1);
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm1, shift, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsllw(xmm0, xmm0, xmm1, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-    pop_zmm(xmm1);
-  }
+  assert(((dst->encoding() < 16 && shift->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsllw(dst, nds, shift, vector_len);
 }
 
 void MacroAssembler::vpsllw(XMMRegister dst, XMMRegister nds, int shift, int vector_len) {
-  int dst_enc = dst->encoding();
-  int nds_enc = nds->encoding();
-  if (VM_Version::supports_avxonly() || VM_Version::supports_avx512bw()) {
-    Assembler::vpsllw(dst, nds, shift, vector_len);
-  } else if (dst_enc < 16) {
-    Assembler::vpsllw(dst, dst, shift, vector_len);
-  } else if (nds_enc < 16) {
-    // use nds as scratch
-    evmovdqul(nds, dst, Assembler::AVX_512bit);
-    Assembler::vpsllw(nds, nds, shift, vector_len);
-    evmovdqul(dst, nds, Assembler::AVX_512bit);
-  } else {
-    // use nds as scratch for xmm0
-    evmovdqul(nds, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vpsllw(xmm0, xmm0, shift, vector_len);
-    evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-    evmovdqul(xmm0, nds, Assembler::AVX_512bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::vpsllw(dst, nds, shift, vector_len);
 }
 
 void MacroAssembler::vptest(XMMRegister dst, XMMRegister src) {
-  int dst_enc = dst->encoding();
-  int src_enc = src->encoding();
-  if ((dst_enc < 16) && (src_enc < 16)) {
-    Assembler::vptest(dst, src);
-  } else if (src_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-    Assembler::vptest(xmm0, src);
-    pop_zmm(xmm0);
-  } else if (dst_enc < 16) {
-    push_zmm(xmm0);
-    evmovdqul(xmm0, src, Assembler::AVX_512bit);
-    Assembler::vptest(dst, xmm0);
-    pop_zmm(xmm0);
-  } else {
-    push_zmm(xmm0);
-    push_zmm(xmm1);
-    movdqu(xmm0, src);
-    movdqu(xmm1, dst);
-    Assembler::vptest(xmm1, xmm0);
-    pop_zmm(xmm1);
-    pop_zmm(xmm0);
-  }
-}
-
-// This instruction exists within macros, ergo we cannot control its input
-// when emitted through those patterns.
+  assert((dst->encoding() < 16 && src->encoding() < 16),"XMM register should be 0-15");
+  Assembler::vptest(dst, src);
+}
+
 void MacroAssembler::punpcklbw(XMMRegister dst, XMMRegister src) {
-  if (VM_Version::supports_avx512nobw()) {
-    int dst_enc = dst->encoding();
-    int src_enc = src->encoding();
-    if (dst_enc == src_enc) {
-      if (dst_enc < 16) {
-        Assembler::punpcklbw(dst, src);
-      } else {
-        push_zmm(xmm0);
-        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-        Assembler::punpcklbw(xmm0, xmm0);
-        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        pop_zmm(xmm0);
-      }
-    } else {
-      if ((src_enc < 16) && (dst_enc < 16)) {
-        Assembler::punpcklbw(dst, src);
-      } else if (src_enc < 16) {
-        push_zmm(xmm0);
-        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-        Assembler::punpcklbw(xmm0, src);
-        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        pop_zmm(xmm0);
-      } else if (dst_enc < 16) {
-        push_zmm(xmm0);
-        evmovdqul(xmm0, src, Assembler::AVX_512bit);
-        Assembler::punpcklbw(dst, xmm0);
-        pop_zmm(xmm0);
-      } else {
-        push_zmm(xmm0);
-        push_zmm(xmm1);
-        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-        evmovdqul(xmm1, src, Assembler::AVX_512bit);
-        Assembler::punpcklbw(xmm0, xmm1);
-        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        pop_zmm(xmm1);
-        pop_zmm(xmm0);
-      }
-    }
-  } else {
-    Assembler::punpcklbw(dst, src);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::punpcklbw(dst, src);
 }
 
 void MacroAssembler::pshufd(XMMRegister dst, Address src, int mode) {
-  if (VM_Version::supports_avx512vl()) {
-    Assembler::pshufd(dst, src, mode);
-  } else {
-    int dst_enc = dst->encoding();
-    if (dst_enc < 16) {
-      Assembler::pshufd(dst, src, mode);
-    } else {
-      push_zmm(xmm0);
-      Assembler::pshufd(xmm0, src, mode);
-      evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-      pop_zmm(xmm0);
-    }
-  }
-}
-
-// This instruction exists within macros, ergo we cannot control its input
-// when emitted through those patterns.
+  assert(((dst->encoding() < 16) || VM_Version::supports_avx512vl()),"XMM register should be 0-15");
+  Assembler::pshufd(dst, src, mode);
+}
+
 void MacroAssembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
-  if (VM_Version::supports_avx512nobw()) {
-    int dst_enc = dst->encoding();
-    int src_enc = src->encoding();
-    if (dst_enc == src_enc) {
-      if (dst_enc < 16) {
-        Assembler::pshuflw(dst, src, mode);
-      } else {
-        push_zmm(xmm0);
-        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-        Assembler::pshuflw(xmm0, xmm0, mode);
-        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        pop_zmm(xmm0);
-      }
-    } else {
-      if ((src_enc < 16) && (dst_enc < 16)) {
-        Assembler::pshuflw(dst, src, mode);
-      } else if (src_enc < 16) {
-        push_zmm(xmm0);
-        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-        Assembler::pshuflw(xmm0, src, mode);
-        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        pop_zmm(xmm0);
-      } else if (dst_enc < 16) {
-        push_zmm(xmm0);
-        evmovdqul(xmm0, src, Assembler::AVX_512bit);
-        Assembler::pshuflw(dst, xmm0, mode);
-        pop_zmm(xmm0);
-      } else {
-        push_zmm(xmm0);
-        push_zmm(xmm1);
-        evmovdqul(xmm0, dst, Assembler::AVX_512bit);
-        evmovdqul(xmm1, src, Assembler::AVX_512bit);
-        Assembler::pshuflw(xmm0, xmm1, mode);
-        evmovdqul(dst, xmm0, Assembler::AVX_512bit);
-        pop_zmm(xmm1);
-        pop_zmm(xmm0);
-      }
-    }
-  } else {
-    Assembler::pshuflw(dst, src, mode);
-  }
+  assert(((dst->encoding() < 16 && src->encoding() < 16) || VM_Version::supports_avx512vlbw()),"XMM register should be 0-15");
+  Assembler::pshuflw(dst, src, mode);
 }
 
 void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
@@ -4874,47 +4022,13 @@
 }
 
 void MacroAssembler::vnegatess(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  int nds_enc = nds->encoding();
-  int dst_enc = dst->encoding();
-  bool dst_upper_bank = (dst_enc > 15);
-  bool nds_upper_bank = (nds_enc > 15);
-  if (VM_Version::supports_avx512novl() &&
-      (nds_upper_bank || dst_upper_bank)) {
-    if (dst_upper_bank) {
-      push_zmm(xmm0);
-      movflt(xmm0, nds);
-      vxorps(xmm0, xmm0, src, Assembler::AVX_128bit);
-      movflt(dst, xmm0);
-      pop_zmm(xmm0);
-    } else {
-      movflt(dst, nds);
-      vxorps(dst, dst, src, Assembler::AVX_128bit);
-    }
-  } else {
-    vxorps(dst, nds, src, Assembler::AVX_128bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15");
+  vxorps(dst, nds, src, Assembler::AVX_128bit);
 }
 
 void MacroAssembler::vnegatesd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
-  int nds_enc = nds->encoding();
-  int dst_enc = dst->encoding();
-  bool dst_upper_bank = (dst_enc > 15);
-  bool nds_upper_bank = (nds_enc > 15);
-  if (VM_Version::supports_avx512novl() &&
-      (nds_upper_bank || dst_upper_bank)) {
-    if (dst_upper_bank) {
-      push_zmm(xmm0);
-      movdbl(xmm0, nds);
-      vxorpd(xmm0, xmm0, src, Assembler::AVX_128bit);
-      movdbl(dst, xmm0);
-      pop_zmm(xmm0);
-    } else {
-      movdbl(dst, nds);
-      vxorpd(dst, dst, src, Assembler::AVX_128bit);
-    }
-  } else {
-    vxorpd(dst, nds, src, Assembler::AVX_128bit);
-  }
+  assert(((dst->encoding() < 16 && nds->encoding() < 16) || VM_Version::supports_avx512vldq()),"XMM register should be 0-15");
+  vxorpd(dst, nds, src, Assembler::AVX_128bit);
 }
 
 void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
@@ -7064,7 +6178,7 @@
     cmpl(cnt1, 2*stride);
     jcc(Assembler::less, SCAN_TO_8_CHAR_INIT);
     movdl(vec1, ch);
-    vpbroadcastw(vec1, vec1);
+    vpbroadcastw(vec1, vec1, Assembler::AVX_256bit);
     vpxor(vec2, vec2);
     movl(tmp, cnt1);
     andl(tmp, 0xFFFFFFF0);  //vector count (in chars)
@@ -7659,7 +6773,7 @@
 
       movl(tmp1, 0x80808080);   // create mask to test for Unicode chars in vector
       movdl(vec2, tmp1);
-      vpbroadcastd(vec2, vec2);
+      vpbroadcastd(vec2, vec2, Assembler::AVX_256bit);
 
       bind(COMPARE_WIDE_VECTORS);
       vmovdqu(vec1, Address(ary1, len, Address::times_1));
@@ -8091,7 +7205,7 @@
       if (UseAVX > 2 && UseUnalignedLoadStores) {
         // Fill 64-byte chunks
         Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
-        evpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
+        vpbroadcastd(xtmp, xtmp, Assembler::AVX_512bit);
 
         subl(count, 16 << shift);
         jcc(Assembler::less, L_check_fill_32_bytes);
@@ -8114,7 +7228,7 @@
       } else if (UseAVX == 2 && UseUnalignedLoadStores) {
         // Fill 64-byte chunks
         Label L_fill_64_bytes_loop, L_check_fill_32_bytes;
-        vpbroadcastd(xtmp, xtmp);
+        vpbroadcastd(xtmp, xtmp, Assembler::AVX_256bit);
 
         subl(count, 16 << shift);
         jcc(Assembler::less, L_check_fill_32_bytes);
@@ -8256,7 +7370,7 @@
       Label L_chars_32_check, L_copy_32_chars, L_copy_32_chars_exit;
       movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vector
       movdl(tmp1Reg, tmp5);
-      vpbroadcastd(tmp1Reg, tmp1Reg);
+      vpbroadcastd(tmp1Reg, tmp1Reg, Assembler::AVX_256bit);
       jmp(L_chars_32_check);
 
       bind(L_copy_32_chars);
--- a/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/macroAssembler_x86.hpp	Thu Sep 27 22:04:49 2018 +0200
@@ -482,10 +482,6 @@
   // from register xmm0. Otherwise, the value is stored from the FPU stack.
   void store_double(Address dst);
 
-  // Save/restore ZMM (512bit) register on stack.
-  void push_zmm(XMMRegister reg);
-  void pop_zmm(XMMRegister reg);
-
   // pushes double TOS element of FPU stack on CPU stack; pops from FPU stack
   void push_fTOS();
 
@@ -1214,9 +1210,11 @@
   void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) { Assembler::vpand(dst, nds, src, vector_len); }
   void vpand(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len);
 
-  void vpbroadcastw(XMMRegister dst, XMMRegister src);
+  void vpbroadcastw(XMMRegister dst, XMMRegister src, int vector_len);
+  void vpbroadcastw(XMMRegister dst, Address src, int vector_len) { Assembler::vpbroadcastw(dst, src, vector_len); }
 
   void vpcmpeqb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
   void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
 
   void vpmovzxbw(XMMRegister dst, Address src, int vector_len);
--- a/src/hotspot/cpu/x86/vm_version_x86.cpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/vm_version_x86.cpp	Thu Sep 27 22:04:49 2018 +0200
@@ -403,7 +403,7 @@
       __ movdl(xmm0, rcx);
       __ movl(rcx, 0xffff);
       __ kmovwl(k1, rcx);
-      __ evpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
+      __ vpbroadcastd(xmm0, xmm0, Assembler::AVX_512bit);
       __ evmovdqul(xmm7, xmm0, Assembler::AVX_512bit);
 #ifdef _LP64
       __ evmovdqul(xmm8, xmm0, Assembler::AVX_512bit);
@@ -885,7 +885,7 @@
     FLAG_SET_DEFAULT(UseSHA, false);
   }
 
-  if (supports_sha() && UseSHA) {
+  if (supports_sha() && supports_sse4_1() && UseSHA) {
     if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
       FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
     }
@@ -894,7 +894,7 @@
     FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
   }
 
-  if (UseSHA) {
+  if (supports_sse4_1() && UseSHA) {
     if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
       FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
     }
--- a/src/hotspot/cpu/x86/vm_version_x86.hpp	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/vm_version_x86.hpp	Thu Sep 27 22:04:49 2018 +0200
@@ -816,7 +816,10 @@
   static bool supports_avx512cd() { return (_features & CPU_AVX512CD) != 0; }
   static bool supports_avx512bw() { return (_features & CPU_AVX512BW) != 0; }
   static bool supports_avx512vl() { return (_features & CPU_AVX512VL) != 0; }
-  static bool supports_avx512vlbw() { return (supports_avx512bw() && supports_avx512vl()); }
+  static bool supports_avx512vlbw() { return (supports_evex() && supports_avx512bw() && supports_avx512vl()); }
+  static bool supports_avx512vldq() { return (supports_evex() && supports_avx512dq() && supports_avx512vl()); }
+  static bool supports_avx512vlbwdq() { return (supports_evex() && supports_avx512vl() &&
+                                                supports_avx512bw() && supports_avx512dq()); }
   static bool supports_avx512novl() { return (supports_evex() && !supports_avx512vl()); }
   static bool supports_avx512nobw() { return (supports_evex() && !supports_avx512bw()); }
   static bool supports_avx256only() { return (supports_avx2() && !supports_evex()); }
--- a/src/hotspot/cpu/x86/x86.ad	Thu Sep 20 22:05:07 2018 +0200
+++ b/src/hotspot/cpu/x86/x86.ad	Thu Sep 27 22:04:49 2018 +0200
@@ -729,6 +729,7 @@
                     );
 
 reg_class_dynamic float_reg(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic float_reg_vl(float_reg_evex, float_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 
 // Class for pre evex double registers
 reg_class double_reg_legacy(XMM0,  XMM0b,
@@ -789,6 +790,7 @@
                      );
 
 reg_class_dynamic double_reg(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic double_reg_vl(double_reg_evex, double_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
 
 // Class for pre evex 32bit vector registers
 reg_class vectors_reg_legacy(XMM0,
@@ -849,6 +851,7 @@
                       );
 
 reg_class_dynamic vectors_reg(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectors_reg_vlbwdq(vectors_reg_evex, vectors_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 
 // Class for all 64bit vector registers
 reg_class vectord_reg_legacy(XMM0,  XMM0b,
@@ -909,6 +912,7 @@
                       );
 
 reg_class_dynamic vectord_reg(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectord_reg_vlbwdq(vectord_reg_evex, vectord_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 
 // Class for all 128bit vector registers
 reg_class vectorx_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,
@@ -969,6 +973,7 @@
                       );
 
 reg_class_dynamic vectorx_reg(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectorx_reg_vlbwdq(vectorx_reg_evex, vectorx_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 
 // Class for all 256bit vector registers
 reg_class vectory_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
@@ -1029,9 +1034,10 @@
                       );
 
 reg_class_dynamic vectory_reg(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectory_reg_vlbwdq(vectory_reg_evex, vectory_reg_legacy, %{ VM_Version::supports_avx512vlbwdq() %} );
 
 // Class for all 512bit vector registers
-reg_class vectorz_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
+reg_class vectorz_reg_evex(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
                       XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
                       XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
                       XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
@@ -1067,6 +1073,30 @@
 #endif
                       );
 
+// Class for restricted 512bit vector registers
+reg_class vectorz_reg_legacy(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,  XMM0i,  XMM0j,  XMM0k,  XMM0l,  XMM0m,  XMM0n,  XMM0o,  XMM0p,
+                      XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,  XMM1i,  XMM1j,  XMM1k,  XMM1l,  XMM1m,  XMM1n,  XMM1o,  XMM1p,
+                      XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,  XMM2i,  XMM2j,  XMM2k,  XMM2l,  XMM2m,  XMM2n,  XMM2o,  XMM2p,
+                      XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,  XMM3i,  XMM3j,  XMM3k,  XMM3l,  XMM3m,  XMM3n,  XMM3o,  XMM3p,
+                      XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,  XMM4i,  XMM4j,  XMM4k,  XMM4l,  XMM4m,  XMM4n,  XMM4o,  XMM4p,
+                      XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,  XMM5i,  XMM5j,  XMM5k,  XMM5l,  XMM5m,  XMM5n,  XMM5o,  XMM5p,
+                      XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,  XMM6i,  XMM6j,  XMM6k,  XMM6l,  XMM6m,  XMM6n,  XMM6o,  XMM6p,
+                      XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h,  XMM7i,  XMM7j,  XMM7k,  XMM7l,  XMM7m,  XMM7n,  XMM7o,  XMM7p
+#ifdef _LP64
+                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,  XMM8i,  XMM8j,  XMM8k,  XMM8l,  XMM8m,  XMM8n,  XMM8o,  XMM8p,
+                      XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,  XMM9i,  XMM9j,  XMM9k,  XMM9l,  XMM9m,  XMM9n,  XMM9o,  XMM9p,
+                      XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h, XMM10i, XMM10j, XMM10k, XMM10l, XMM10m, XMM10n, XMM10o, XMM10p,
+                      XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h, XMM11i, XMM11j, XMM11k, XMM11l, XMM11m, XMM11n, XMM11o, XMM11p,
+                      XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h, XMM12i, XMM12j, XMM12k, XMM12l, XMM12m, XMM12n, XMM12o, XMM12p,
+                      XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h, XMM13i, XMM13j, XMM13k, XMM13l, XMM13m, XMM13n, XMM13o, XMM13p,
+                      XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h, XMM14i, XMM14j, XMM14k, XMM14l, XMM14m, XMM14n, XMM14o, XMM14p,
+                      XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h, XMM15i, XMM15j, XMM15k, XMM15l, XMM15m, XMM15n, XMM15o, XMM15p
+#endif
+                      );
+
+reg_class_dynamic vectorz_reg(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() %} );
+reg_class_dynamic vectorz_reg_vl(vectorz_reg_evex, vectorz_reg_legacy, %{ VM_Version::supports_evex() && VM_Version::supports_avx512vl() %} );
+
 reg_class xmm0_reg(XMM0, XMM0b, XMM0c, XMM0d);
 reg_class ymm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h);
 reg_class zmm0_reg(XMM0, XMM0b, XMM0c, XMM0d, XMM0e, XMM0f, XMM0g, XMM0h, XMM0i, XMM0j, XMM0k, XMM0l, XMM0m, XMM0n, XMM0o, XMM0p);
@@ -1487,6 +1517,8 @@
   // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
   if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
     size = (UseAVX > 2) ? 64 : 32;
+  if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
+    size = (VM_Version::supports_avx512bw()) ? 64 : 32;
   // Use flag to limit vector size.
   size = MIN2(size,(int)MaxVectorSize);
   // Minimum 2 values in vector (or 4 for bytes).
@@ -1528,7 +1560,7 @@
   return MIN2(size,max_size);
 }
 
-// Vector ideal reg corresponding to specidied size in bytes
+// Vector ideal reg corresponding to specified size in bytes
 const uint Matcher::vector_ideal_reg(int size) {
   assert(MaxVectorSize >= size, "");
   switch(size) {
@@ -1648,10 +1680,28 @@
     case Op_VecS: // copy whole register
     case Op_VecD:
     case Op_VecX:
+#ifndef LP64
       __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+#else
+      if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+        __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+      } else {
+        __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2);
+        __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
+     }
+#endif
       break;
     case Op_VecY:
+#ifndef LP64
       __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+#else
+      if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+        __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+      } else {
+        __ vpxor(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), 2);
+        __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
+     }
+#endif
       break;
     case Op_VecZ:
       __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
@@ -1703,10 +1753,28 @@
         __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
         break;
       case Op_VecX:
+#ifndef LP64
         __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+#else
+        if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+          __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        } else {
+          __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
+          __ vinserti32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
+        }
+#endif
         break;
       case Op_VecY:
+#ifndef LP64
         __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+#else
+        if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+          __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        } else {
+          __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
+          __ vinserti64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
+        }
+#endif
         break;
       case Op_VecZ:
         __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
@@ -1723,10 +1791,28 @@
         __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
         break;
       case Op_VecX:
+#ifndef LP64
         __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+#else
+        if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+          __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        }
+        else {
+          __ vextracti32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
+        }
+#endif
         break;
       case Op_VecY:
+#ifndef LP64
         __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+#else
+        if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
+          __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        }
+        else {
+          __ vextracti64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
+        }
+#endif
         break;
       case Op_VecZ:
         __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
@@ -1908,7 +1994,6 @@
 // in the ADLC because operands constitute user defined types which are used in
 // instruction definitions.
 
-// This one generically applies only for evex, so only one version
 operand vecZ() %{
   constraint(ALLOC_IN_RC(vectorz_reg));
   match(VecZ);
@@ -1917,6 +2002,14 @@
   interface(REG_INTER);
 %}
 
+operand legVecZ() %{
+  constraint(ALLOC_IN_RC(vectorz_reg_vl));
+  match(VecZ);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 // Comparison Code for FP conditional move
 operand cmpOp_vcmppd() %{
   match(Bool);
@@ -2547,8 +2640,8 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct absF_reg_reg(regF dst, regF src) %{
-  predicate(VM_Version::supports_avxonly());
+instruct absF_reg_reg(vlRegF dst, vlRegF src) %{
+  predicate(UseAVX > 0);
   match(Set dst (AbsF src));
   ins_cost(150);
   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
@@ -2560,48 +2653,6 @@
   ins_pipe(pipe_slow);
 %}
 
-#ifdef _LP64
-instruct absF_reg_reg_evex(regF dst, regF src) %{
-  predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
-  match(Set dst (AbsF src));
-  ins_cost(150);
-  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(float_signmask()), vector_len);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct absF_reg_reg_evex_special(regF dst, regF src1, regF src2) %{
-  predicate(VM_Version::supports_avx512novl());
-  match(Set dst (AbsF src1));
-  effect(TEMP src2);
-  ins_cost(150);
-  format %{ "vabsss  $dst, $src1, $src2, [0x7fffffff]\t# abs float by sign masking" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vabsss($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
-              ExternalAddress(float_signmask()), vector_len);
-  %}
-  ins_pipe(pipe_slow);
-%}
-#else // _LP64
-instruct absF_reg_reg_evex(regF dst, regF src) %{
-  predicate(UseAVX > 2);
-  match(Set dst (AbsF src));
-  ins_cost(150);
-  format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vandps($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(float_signmask()), vector_len);
-  %}
-  ins_pipe(pipe_slow);
-%}
-#endif
-
 instruct absD_reg(regD dst) %{
   predicate((UseSSE>=2) && (UseAVX == 0));
   match(Set dst (AbsD dst));
@@ -2614,8 +2665,8 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct absD_reg_reg(regD dst, regD src) %{
-  predicate(VM_Version::supports_avxonly());
+instruct absD_reg_reg(vlRegD dst, vlRegD src) %{
+  predicate(UseAVX > 0);
   match(Set dst (AbsD src));
   ins_cost(150);
   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
@@ -2628,50 +2679,6 @@
   ins_pipe(pipe_slow);
 %}
 
-#ifdef _LP64
-instruct absD_reg_reg_evex(regD dst, regD src) %{
-  predicate(UseAVX > 2 && VM_Version::supports_avx512vl());
-  match(Set dst (AbsD src));
-  ins_cost(150);
-  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
-            "# abs double by sign masking" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(double_signmask()), vector_len);
-  %}
-  ins_pipe(pipe_slow);
-%}
-
-instruct absD_reg_reg_evex_special(regD dst, regD src1, regD src2) %{
-  predicate(VM_Version::supports_avx512novl());
-  match(Set dst (AbsD src1));
-  effect(TEMP src2);
-  ins_cost(150);
-  format %{ "vabssd  $dst, $src1, $src2, [0x7fffffffffffffff]\t# abs float by sign masking" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vabssd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister,
-              ExternalAddress(double_signmask()), vector_len);
-  %}
-  ins_pipe(pipe_slow);
-%}
-#else // _LP64
-instruct absD_reg_reg_evex(regD dst, regD src) %{
-  predicate(UseAVX > 2);
-  match(Set dst (AbsD src));
-  ins_cost(150);
-  format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
-            "# abs double by sign masking" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(double_signmask()), vector_len);
-  %}
-  ins_pipe(pipe_slow);
-%}
-#endif
-
 instruct negF_reg(regF dst) %{
   predicate((UseSSE>=1) && (UseAVX == 0));
   match(Set dst (NegF dst));
@@ -2683,7 +2690,7 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct negF_reg_reg(regF dst, regF src) %{
+instruct negF_reg_reg(vlRegF dst, vlRegF src) %{
   predicate(UseAVX > 0);
   match(Set dst (NegF src));
   ins_cost(150);
@@ -2707,11 +2714,11 @@
   ins_pipe(pipe_slow);
 %}
 
-instruct negD_reg_reg(regD dst, regD src) %{
+instruct negD_reg_reg(vlRegD dst, vlRegD src) %{
   predicate(UseAVX > 0);
   match(Set dst (NegD src));
   ins_cost(150);
-  format %{ "vnegatess  $dst, $src, [0x8000000000000000]\t"
+  format %{ "vnegatesd  $dst, $src, [0x8000000000000000]\t"
             "# neg double by sign flipping" %}
   ins_encode %{
     __ vnegatesd($dst$$XMMRegister, $src$$XMMRegister,
@@ -2835,6 +2842,7 @@
 
 // ====================VECTOR INSTRUCTIONS=====================================
 
+
 // Load vectors (4 bytes long)
 instruct loadV4(vecS dst, memory mem) %{
   predicate(n->as_LoadVector()->memory_size() == 4);
@@ -2847,6 +2855,26 @@
   ins_pipe( pipe_slow );
 %}
 
+// Load vectors (4 bytes long)
+instruct MoveVecS2Leg(legVecS dst, vecS src) %{
+  match(Set dst src);
+  format %{ "movss $dst,$src\t! load vector (4 bytes)" %}
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (4 bytes long)
+instruct MoveLeg2VecS(vecS dst, legVecS src) %{
+  match(Set dst src);
+  format %{ "movss $dst,$src\t! load vector (4 bytes)" %}
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
 // Load vectors (8 bytes long)
 instruct loadV8(vecD dst, memory mem) %{
   predicate(n->as_LoadVector()->memory_size() == 8);
@@ -2859,6 +2887,26 @@
   ins_pipe( pipe_slow );
 %}
 
+// Load vectors (8 bytes long)
+instruct MoveVecD2Leg(legVecD dst, vecD src) %{
+  match(Set dst src);
+  format %{ "movsd $dst,$src\t! load vector (8 bytes)" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (8 bytes long)
+instruct MoveLeg2VecD(vecD dst, legVecD src) %{
+  match(Set dst src);
+  format %{ "movsd $dst,$src\t! load vector (8 bytes)" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
 // Load vectors (16 bytes long)
 instruct loadV16(vecX dst, memory mem) %{
   predicate(n->as_LoadVector()->memory_size() == 16);
@@ -2871,6 +2919,36 @@
   ins_pipe( pipe_slow );
 %}
 
+// Load vectors (16 bytes long)
+instruct MoveVecX2Leg(legVecX dst, vecX src) %{
+  match(Set dst src);
+  format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %}
+  ins_encode %{
+    if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+      __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+    } else {
+      int vector_len = 2;
+      __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+    }
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (16 bytes long)
+instruct MoveLeg2VecX(vecX dst, legVecX src) %{
+  match(Set dst src);
+  format %{ "movdqu $dst,$src\t! load vector (16 bytes)" %}
+  ins_encode %{
+    if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+      __ movdqu($dst$$XMMRegister, $src$$XMMRegister);
+    } else {
+      int vector_len = 2;
+      __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+    }
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
 // Load vectors (32 bytes long)
 instruct loadV32(vecY dst, memory mem) %{
   predicate(n->as_LoadVector()->memory_size() == 32);
@@ -2883,6 +2961,36 @@
   ins_pipe( pipe_slow );
 %}
 
+// Load vectors (32 bytes long)
+instruct MoveVecY2Leg(legVecY dst, vecY src) %{
+  match(Set dst src);
+  format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %}
+  ins_encode %{
+    if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+      __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister);
+    } else {
+      int vector_len = 2;
+      __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+    }
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Load vectors (32 bytes long)
+instruct MoveLeg2VecY(vecY dst, legVecY src) %{
+  match(Set dst src);
+  format %{ "vmovdqu $dst,$src\t! load vector (32 bytes)" %}
+  ins_encode %{
+    if (UseAVX < 2 || VM_Version::supports_avx512vl()) {
+      __ vmovdqu($dst$$XMMRegister, $src$$XMMRegister);
+    } else {
+      int vector_len = 2;
+      __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+    }
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
 // Load vectors (64 bytes long)
 instruct loadV64_dword(vecZ dst, memory mem) %{
   predicate(n->as_LoadVector()->memory_size() == 64 && n->as_LoadVector()->element_size() <= 4);
@@ -2909,6 +3017,26 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct MoveVecZ2Leg(legVecZ dst, vecZ  src) %{
+  match(Set dst src);
+  format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct MoveLeg2VecZ(vecZ dst, legVecZ  src) %{
+  match(Set dst src);
+  format %{ "vmovdquq $dst k0,$src\t! Move vector (64 bytes)" %}
+  ins_encode %{
+    int vector_len = 2;
+    __ evmovdquq($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
 // Store vectors
 instruct storeV4(memory mem, vecS src) %{
   predicate(n->as_StoreVector()->memory_size() == 4);
@@ -3068,6 +3196,44 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct Repl64B(legVecZ dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl64B_mem(legVecZ dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB (LoadB mem)));
+  format %{ "punpcklbw $dst,$mem\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B" %}
+  ins_encode %{
+    __ punpcklbw($dst$$XMMRegister, $mem$$Address);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct Repl16B_imm(vecX dst, immI con) %{
   predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateB con));
@@ -3094,6 +3260,22 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct Repl64B_imm(legVecZ dst, immI con) %{
+  predicate(n->as_Vector()->length() == 64 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateB con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate64B($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct Repl4S(vecD dst, rRegI src) %{
   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateS src));
@@ -3198,6 +3380,56 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct Repl32S(legVecZ dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32S_mem(legVecZ dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS (LoadS mem)));
+  format %{ "pshuflw $dst,$mem,0x00\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S" %}
+  ins_encode %{
+    __ pshuflw($dst$$XMMRegister, $mem$$Address, 0x00);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32S_imm(legVecZ dst, immI con) %{
+  predicate(n->as_Vector()->length() == 32 && !VM_Version::supports_avx512vlbw());
+  match(Set dst (ReplicateS con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate32S($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct Repl4I(vecX dst, rRegI src) %{
   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateI src));
@@ -3246,6 +3478,36 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct Repl16I(legVecZ dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshufd  $dst,$dst,0x00\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16I_mem(legVecZ dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI (LoadI mem)));
+  format %{ "pshufd  $dst,$mem,0x00\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct Repl4I_imm(vecX dst, immI con) %{
   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateI con));
@@ -3272,6 +3534,22 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct Repl16I_imm(legVecZ dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateI con));
+  format %{ "movq    $dst,[$constantaddress]\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16I($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 // Long could be loaded into xmm register directly from memory.
 instruct Repl2L_mem(vecX dst, memory mem) %{
   predicate(n->as_Vector()->length() == 2 && !VM_Version::supports_avx512vlbw());
@@ -3300,8 +3578,24 @@
   %}
   ins_pipe( pipe_slow );
 %}
+
+instruct Repl8L(legVecZ dst, rRegL src) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL src));
+  format %{ "movdq   $dst,$src\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %}
+  ins_encode %{
+    __ movdq($dst$$XMMRegister, $src$$Register);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
 #else // _LP64
-instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
+instruct Repl4L(vecY dst, eRegL src, vecY tmp) %{
   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateL src));
   effect(TEMP dst, USE src, TEMP tmp);
@@ -3319,6 +3613,27 @@
   %}
   ins_pipe( pipe_slow );
 %}
+
+instruct Repl8L(legVecZ dst, eRegL src, legVecZ tmp) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL src));
+  effect(TEMP dst, USE src, TEMP tmp);
+  format %{ "movdl   $dst,$src.lo\n\t"
+            "movdl   $tmp,$src.hi\n\t"
+            "punpckldq $dst,$tmp\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
 #endif // _LP64
 
 instruct Repl4L_imm(vecY dst, immL con) %{
@@ -3335,6 +3650,22 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct Repl8L_imm(legVecZ dst, immL con) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL con));
+  format %{ "movq    $dst,[$constantaddress]\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L($con)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $constantaddress($con));
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct Repl4L_mem(vecY dst, memory mem) %{
   predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateL (LoadL mem)));
@@ -3349,6 +3680,22 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct Repl8L_mem(legVecZ dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateL (LoadL mem)));
+  format %{ "movq    $dst,$mem\n\t"
+            "punpcklqdq $dst,$dst\n\t"
+            "vinserti128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8L" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+    __ punpcklqdq($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct Repl2F_mem(vecD dst, memory mem) %{
   predicate(n->as_Vector()->length() == 2 && UseAVX > 0 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateF (LoadF mem)));
@@ -3369,8 +3716,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct Repl8F(vecY dst, regF src) %{
-  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+instruct Repl8F(vecY dst, vlRegF src) %{
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateF src));
   format %{ "pshufd  $dst,$src,0x00\n\t"
             "vinsertf128_high $dst,$dst\t! replicate8F" %}
@@ -3393,6 +3740,34 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct Repl16F(legVecZ dst, vlRegF src) %{
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateF src));
+  format %{ "pshufd  $dst,$src,0x00\n\t"
+            "vinsertf128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+    __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16F_mem(legVecZ dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 16 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateF (LoadF mem)));
+  format %{ "pshufd  $dst,$mem,0x00\n\t"
+            "vinsertf128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate16F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x00);
+    __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 instruct Repl2F_zero(vecD dst, immF0 zero) %{
   predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
   match(Set dst (ReplicateF zero));
@@ -3434,8 +3809,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct Repl4D(vecY dst, regD src) %{
-  predicate(n->as_Vector()->length() == 4 && !VM_Version::supports_avx512vl());
+instruct Repl4D(vecY dst, vlRegD src) %{
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 0 && !VM_Version::supports_avx512vl());
   match(Set dst (ReplicateD src));
   format %{ "pshufd  $dst,$src,0x44\n\t"
             "vinsertf128_high $dst,$dst\t! replicate4D" %}
@@ -3458,6 +3833,34 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct Repl8D(legVecZ dst, vlRegD src) %{
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 0 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateD src));
+  format %{ "pshufd  $dst,$src,0x44\n\t"
+            "vinsertf128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+    __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8D_mem(legVecZ dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8 && !VM_Version::supports_avx512vl());
+  match(Set dst (ReplicateD (LoadD mem)));
+  format %{ "pshufd  $dst,$mem,0x44\n\t"
+            "vinsertf128_high $dst,$dst\t"
+            "vinserti64x4 $dst,$dst,$dst,0x1\t! replicate8D" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $mem$$Address, 0x44);
+    __ vinsertf128_high($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinserti64x4($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, 0x1);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 // Replicate double (8 byte) scalar zero to be vector
 instruct Repl2D_zero(vecX dst, immD0 zero) %{
   predicate(n->as_Vector()->length() == 2 && UseAVX < 3);
@@ -3736,7 +4139,7 @@
   ins_pipe( pipe_slow );
 %}
 #else // _LP64
-instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
+instruct Repl2L(vecX dst, eRegL src, vecX tmp) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateL src));
   effect(TEMP dst, USE src, TEMP tmp);
@@ -3791,7 +4194,7 @@
 %}
 
 // Replicate float (4 byte) scalar to be vector
-instruct Repl2F(vecD dst, regF src) %{
+instruct Repl2F(vecD dst, vlRegF src) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateF src));
   format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
@@ -3801,7 +4204,7 @@
   ins_pipe( fpu_reg_reg );
 %}
 
-instruct Repl4F(vecX dst, regF src) %{
+instruct Repl4F(vecX dst, vlRegF src) %{
   predicate(n->as_Vector()->length() == 4);
   match(Set dst (ReplicateF src));
   format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
@@ -3812,7 +4215,7 @@
 %}
 
 // Replicate double (8 bytes) scalar to be vector
-instruct Repl2D(vecX dst, regD src) %{
+instruct Repl2D(vecX dst, vlRegD src) %{
   predicate(n->as_Vector()->length() == 2);
   match(Set dst (ReplicateD src));
   format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
@@ -3825,31 +4228,31 @@
 // ====================EVEX REPLICATE=============================================
 
 instruct Repl4B_mem_evex(vecS dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateB (LoadB mem)));
   format %{ "vpbroadcastb  $dst,$mem\t! replicate4B" %}
   ins_encode %{
     int vector_len = 0;
-    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl8B_mem_evex(vecD dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateB (LoadB mem)));
   format %{ "vpbroadcastb  $dst,$mem\t! replicate8B" %}
   ins_encode %{
     int vector_len = 0;
-    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl16B_evex(vecX dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateB src));
-  format %{ "vpbroadcastb $dst,$src\t! replicate16B" %}
+  format %{ "evpbroadcastb $dst,$src\t! replicate16B" %}
   ins_encode %{
    int vector_len = 0;
     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3858,20 +4261,20 @@
 %}
 
 instruct Repl16B_mem_evex(vecX dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateB (LoadB mem)));
   format %{ "vpbroadcastb  $dst,$mem\t! replicate16B" %}
   ins_encode %{
     int vector_len = 0;
-    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl32B_evex(vecY dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateB src));
-  format %{ "vpbroadcastb $dst,$src\t! replicate32B" %}
+  format %{ "evpbroadcastb $dst,$src\t! replicate32B" %}
   ins_encode %{
    int vector_len = 1;
     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3880,20 +4283,20 @@
 %}
 
 instruct Repl32B_mem_evex(vecY dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateB (LoadB mem)));
   format %{ "vpbroadcastb  $dst,$mem\t! replicate32B" %}
   ins_encode %{
     int vector_len = 1;
-    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl64B_evex(vecZ dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
+  predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw());
   match(Set dst (ReplicateB src));
-  format %{ "vpbroadcastb $dst,$src\t! upper replicate64B" %}
+  format %{ "evpbroadcastb $dst,$src\t! upper replicate64B" %}
   ins_encode %{
    int vector_len = 2;
     __ evpbroadcastb($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3902,51 +4305,51 @@
 %}
 
 instruct Repl64B_mem_evex(vecZ dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
+  predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw());
   match(Set dst (ReplicateB (LoadB mem)));
   format %{ "vpbroadcastb  $dst,$mem\t! replicate64B" %}
   ins_encode %{
     int vector_len = 2;
-    __ evpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastb($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl16B_imm_evex(vecX dst, immI con) %{
-  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateB con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "vpbroadcastb $dst,$dst\t! replicate16B" %}
   ins_encode %{
    int vector_len = 0;
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl32B_imm_evex(vecY dst, immI con) %{
-  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateB con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "vpbroadcastb $dst,$dst\t! replicate32B" %}
   ins_encode %{
    int vector_len = 1;
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl64B_imm_evex(vecZ dst, immI con) %{
-  predicate(n->as_Vector()->length() == 64 && VM_Version::supports_avx512bw());
+  predicate(n->as_Vector()->length() == 64 && UseAVX > 2 && VM_Version::supports_avx512bw());
   match(Set dst (ReplicateB con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "vpbroadcastb $dst,$dst\t! upper replicate64B" %}
   ins_encode %{
    int vector_len = 2;
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
-    __ evpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastb($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -3964,9 +4367,9 @@
 %}
 
 instruct Repl4S_evex(vecD dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateS src));
-  format %{ "vpbroadcastw $dst,$src\t! replicate4S" %}
+  format %{ "evpbroadcastw $dst,$src\t! replicate4S" %}
   ins_encode %{
    int vector_len = 0;
     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3975,20 +4378,20 @@
 %}
 
 instruct Repl4S_mem_evex(vecD dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateS (LoadS mem)));
   format %{ "vpbroadcastw  $dst,$mem\t! replicate4S" %}
   ins_encode %{
     int vector_len = 0;
-    __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl8S_evex(vecX dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateS src));
-  format %{ "vpbroadcastw $dst,$src\t! replicate8S" %}
+  format %{ "evpbroadcastw $dst,$src\t! replicate8S" %}
   ins_encode %{
    int vector_len = 0;
     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
@@ -3997,20 +4400,20 @@
 %}
 
 instruct Repl8S_mem_evex(vecX dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateS (LoadS mem)));
   format %{ "vpbroadcastw  $dst,$mem\t! replicate8S" %}
   ins_encode %{
     int vector_len = 0;
-    __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl16S_evex(vecY dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateS src));
-  format %{ "vpbroadcastw $dst,$src\t! replicate16S" %}
+  format %{ "evpbroadcastw $dst,$src\t! replicate16S" %}
   ins_encode %{
    int vector_len = 1;
     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4019,20 +4422,20 @@
 %}
 
 instruct Repl16S_mem_evex(vecY dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateS (LoadS mem)));
   format %{ "vpbroadcastw  $dst,$mem\t! replicate16S" %}
   ins_encode %{
     int vector_len = 1;
-    __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl32S_evex(vecZ dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw());
   match(Set dst (ReplicateS src));
-  format %{ "vpbroadcastw $dst,$src\t! replicate32S" %}
+  format %{ "evpbroadcastw $dst,$src\t! replicate32S" %}
   ins_encode %{
    int vector_len = 2;
     __ evpbroadcastw($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4041,51 +4444,51 @@
 %}
 
 instruct Repl32S_mem_evex(vecZ dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw());
   match(Set dst (ReplicateS (LoadS mem)));
   format %{ "vpbroadcastw  $dst,$mem\t! replicate32S" %}
   ins_encode %{
     int vector_len = 2;
-    __ evpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastw($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl8S_imm_evex(vecX dst, immI con) %{
-  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateS con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "vpbroadcastw $dst,$dst\t! replicate8S" %}
   ins_encode %{
    int vector_len = 0;
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl16S_imm_evex(vecY dst, immI con) %{
-  predicate(n->as_Vector()->length() == 16 && VM_Version::supports_avx512vlbw());
+  predicate(n->as_Vector()->length() == 16 && UseAVX > 2 && VM_Version::supports_avx512vlbw());
   match(Set dst (ReplicateS con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "vpbroadcastw $dst,$dst\t! replicate16S" %}
   ins_encode %{
    int vector_len = 1;
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl32S_imm_evex(vecZ dst, immI con) %{
-  predicate(n->as_Vector()->length() == 32 && VM_Version::supports_avx512bw());
+  predicate(n->as_Vector()->length() == 32 && UseAVX > 2 && VM_Version::supports_avx512bw());
   match(Set dst (ReplicateS con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "vpbroadcastw $dst,$dst\t! replicate32S" %}
   ins_encode %{
    int vector_len = 2;
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
-    __ evpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastw($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4103,9 +4506,9 @@
 %}
 
 instruct Repl4I_evex(vecX dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateI src));
-  format %{ "vpbroadcastd  $dst,$src\t! replicate4I" %}
+  format %{ "evpbroadcastd  $dst,$src\t! replicate4I" %}
   ins_encode %{
     int vector_len = 0;
     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4114,20 +4517,20 @@
 %}
 
 instruct Repl4I_mem_evex(vecX dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateI (LoadI mem)));
   format %{ "vpbroadcastd  $dst,$mem\t! replicate4I" %}
   ins_encode %{
     int vector_len = 0;
-    __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl8I_evex(vecY dst, rRegI src) %{
-  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateI src));
-  format %{ "vpbroadcastd  $dst,$src\t! replicate8I" %}
+  format %{ "evpbroadcastd  $dst,$src\t! replicate8I" %}
   ins_encode %{
     int vector_len = 1;
     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4136,12 +4539,12 @@
 %}
 
 instruct Repl8I_mem_evex(vecY dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateI (LoadI mem)));
   format %{ "vpbroadcastd  $dst,$mem\t! replicate8I" %}
   ins_encode %{
     int vector_len = 1;
-    __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4149,7 +4552,7 @@
 instruct Repl16I_evex(vecZ dst, rRegI src) %{
   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
   match(Set dst (ReplicateI src));
-  format %{ "vpbroadcastd  $dst,$src\t! replicate16I" %}
+  format %{ "evpbroadcastd  $dst,$src\t! replicate16I" %}
   ins_encode %{
     int vector_len = 2;
     __ evpbroadcastd($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4163,33 +4566,33 @@
   format %{ "vpbroadcastd  $dst,$mem\t! replicate16I" %}
   ins_encode %{
     int vector_len = 2;
-    __ evpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastd($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl4I_imm_evex(vecX dst, immI con) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateI con));
   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
             "vpbroadcastd  $dst,$dst\t! replicate4I" %}
   ins_encode %{
     int vector_len = 0;
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl8I_imm_evex(vecY dst, immI con) %{
-  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateI con));
   format %{ "movq    $dst,[$constantaddress]\t! replicate8I($con)\n\t"
             "vpbroadcastd  $dst,$dst\t! replicate8I" %}
   ins_encode %{
     int vector_len = 1;
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4202,7 +4605,7 @@
   ins_encode %{
     int vector_len = 2;
     __ movq($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
-    __ evpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastd($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4222,9 +4625,9 @@
 // Replicate long (8 byte) scalar to be vector
 #ifdef _LP64
 instruct Repl4L_evex(vecY dst, rRegL src) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateL src));
-  format %{ "vpbroadcastq  $dst,$src\t! replicate4L" %}
+  format %{ "evpbroadcastq  $dst,$src\t! replicate4L" %}
   ins_encode %{
     int vector_len = 1;
     __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4235,7 +4638,7 @@
 instruct Repl8L_evex(vecZ dst, rRegL src) %{
   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
   match(Set dst (ReplicateL src));
-  format %{ "vpbroadcastq  $dst,$src\t! replicate8L" %}
+  format %{ "evpbroadcastq  $dst,$src\t! replicate8L" %}
   ins_encode %{
     int vector_len = 2;
     __ evpbroadcastq($dst$$XMMRegister, $src$$Register, vector_len);
@@ -4244,7 +4647,7 @@
 %}
 #else // _LP64
 instruct Repl4L_evex(vecY dst, eRegL src, regD tmp) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateL src));
   effect(TEMP dst, USE src, TEMP tmp);
   format %{ "movdl   $dst,$src.lo\n\t"
@@ -4256,12 +4659,12 @@
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
-    __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct Repl8L_evex(vecZ dst, eRegL src, regD tmp) %{
+    __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8L_evex(legVecZ dst, eRegL src, legVecZ tmp) %{
   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
   match(Set dst (ReplicateL src));
   effect(TEMP dst, USE src, TEMP tmp);
@@ -4274,21 +4677,21 @@
     __ movdl($dst$$XMMRegister, $src$$Register);
     __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
     __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
-    __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 #endif // _LP64
 
 instruct Repl4L_imm_evex(vecY dst, immL con) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateL con));
   format %{ "movq    $dst,[$constantaddress]\n\t"
             "vpbroadcastq  $dst,$dst\t! replicate4L" %}
   ins_encode %{
     int vector_len = 1;
     __ movq($dst$$XMMRegister, $constantaddress($con));
-    __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4301,29 +4704,29 @@
   ins_encode %{
     int vector_len = 2;
     __ movq($dst$$XMMRegister, $constantaddress($con));
-    __ evpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
+    __ vpbroadcastq($dst$$XMMRegister, $dst$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl2L_mem_evex(vecX dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 2 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 2 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateL (LoadL mem)));
   format %{ "vpbroadcastd  $dst,$mem\t! replicate2L" %}
   ins_encode %{
     int vector_len = 0;
-    __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl4L_mem_evex(vecY dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateL (LoadL mem)));
   format %{ "vpbroadcastd  $dst,$mem\t! replicate4L" %}
   ins_encode %{
     int vector_len = 1;
-    __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4334,7 +4737,7 @@
   format %{ "vpbroadcastd  $dst,$mem\t! replicate8L" %}
   ins_encode %{
     int vector_len = 2;
-    __ evpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastq($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4352,23 +4755,23 @@
 %}
 
 instruct Repl8F_evex(vecY dst, regF src) %{
-  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateF src));
-  format %{ "vbroadcastss $dst,$src\t! replicate8F" %}
+  format %{ "vpbroadcastss $dst,$src\t! replicate8F" %}
   ins_encode %{
     int vector_len = 1;
-    __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+    __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl8F_mem_evex(vecY dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 8 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 8 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateF (LoadF mem)));
   format %{ "vbroadcastss  $dst,$mem\t! replicate8F" %}
   ins_encode %{
     int vector_len = 1;
-    __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4376,10 +4779,10 @@
 instruct Repl16F_evex(vecZ dst, regF src) %{
   predicate(n->as_Vector()->length() == 16 && UseAVX > 2);
   match(Set dst (ReplicateF src));
-  format %{ "vbroadcastss $dst,$src\t! replicate16F" %}
+  format %{ "vpbroadcastss $dst,$src\t! replicate16F" %}
   ins_encode %{
     int vector_len = 2;
-    __ evpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+    __ vpbroadcastss($dst$$XMMRegister, $src$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4390,7 +4793,7 @@
   format %{ "vbroadcastss  $dst,$mem\t! replicate16F" %}
   ins_encode %{
     int vector_len = 2;
-    __ evpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastss($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4444,23 +4847,23 @@
 %}
 
 instruct Repl4D_evex(vecY dst, regD src) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateD src));
-  format %{ "vbroadcastsd $dst,$src\t! replicate4D" %}
+  format %{ "vpbroadcastsd $dst,$src\t! replicate4D" %}
   ins_encode %{
     int vector_len = 1;
-    __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+    __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
 
 instruct Repl4D_mem_evex(vecY dst, memory mem) %{
-  predicate(n->as_Vector()->length() == 4 && VM_Version::supports_avx512vl());
+  predicate(n->as_Vector()->length() == 4 && UseAVX > 2 && VM_Version::supports_avx512vl());
   match(Set dst (ReplicateD (LoadD mem)));
   format %{ "vbroadcastsd  $dst,$mem\t! replicate4D" %}
   ins_encode %{
     int vector_len = 1;
-    __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4468,10 +4871,10 @@
 instruct Repl8D_evex(vecZ dst, regD src) %{
   predicate(n->as_Vector()->length() == 8 && UseAVX > 2);
   match(Set dst (ReplicateD src));
-  format %{ "vbroadcastsd $dst,$src\t! replicate8D" %}
+  format %{ "vpbroadcastsd $dst,$src\t! replicate8D" %}
   ins_encode %{
     int vector_len = 2;
-    __ evpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
+    __ vpbroadcastsd($dst$$XMMRegister, $src$$XMMRegister, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4482,7 +4885,7 @@
   format %{ "vbroadcastsd  $dst,$mem\t! replicate8D" %}
   ins_encode %{
     int vector_len = 2;
-    __ evpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
+    __ vpbroadcastsd($dst$$XMMRegister, $mem$$Address, vector_len);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -4525,7 +4928,7 @@
 
 // ====================REDUCTION ARITHMETIC=======================================
 
-instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
   predicate(UseSSE > 2 && UseAVX == 0);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp2, TEMP tmp);
@@ -4544,7 +4947,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvadd2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
   predicate(VM_Version::supports_avxonly());
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4562,7 +4965,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvadd2I_reduction_reg_evex(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4582,7 +4985,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rsadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
   predicate(UseSSE > 2 && UseAVX == 0);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4603,7 +5006,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
   predicate(VM_Version::supports_avxonly());
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4623,7 +5026,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd4I_reduction_reg_evex(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4647,7 +5050,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvadd8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{
   predicate(VM_Version::supports_avxonly());
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4671,7 +5074,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvadd8I_reduction_reg_evex(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4699,7 +5102,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct rvadd16I_reduction_reg_evex(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
@@ -4731,7 +5134,7 @@
 %}
 
 #ifdef _LP64
-instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvadd2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4750,7 +5153,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvadd4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4773,7 +5176,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
+instruct rvadd8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -4801,7 +5204,7 @@
 %}
 #endif
 
-instruct rsadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
+instruct rsadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
   match(Set dst (AddReductionVF dst src2));
   effect(TEMP dst, TEMP tmp);
@@ -4816,7 +5219,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
+instruct rvadd2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{
   predicate(UseAVX > 0);
   match(Set dst (AddReductionVF dst src2));
   effect(TEMP dst, TEMP tmp);
@@ -4831,7 +5234,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rsadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rsadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
   match(Set dst (AddReductionVF dst src2));
   effect(TEMP dst, TEMP tmp);
@@ -4854,7 +5257,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rvadd4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
   predicate(UseAVX > 0);
   match(Set dst (AddReductionVF dst src2));
   effect(TEMP tmp, TEMP dst);
@@ -4877,7 +5280,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct radd8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
+instruct radd8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddReductionVF dst src2));
   effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -4916,7 +5319,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct radd16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
+instruct radd16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVF dst src2));
   effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -4987,7 +5390,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rsadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rsadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
   match(Set dst (AddReductionVD dst src2));
   effect(TEMP tmp, TEMP dst);
@@ -5002,7 +5405,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rvadd2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
   predicate(UseAVX > 0);
   match(Set dst (AddReductionVD dst src2));
   effect(TEMP tmp, TEMP dst);
@@ -5017,14 +5420,14 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
+instruct rvadd4D_reduction_reg(regD dst, vecY src2, vecX tmp, vecX tmp2) %{
   predicate(UseAVX > 0);
   match(Set dst (AddReductionVD dst src2));
   effect(TEMP tmp, TEMP dst, TEMP tmp2);
   format %{ "vaddsd  $dst,$dst,$src2\n\t"
             "pshufd  $tmp,$src2,0xE\n\t"
             "vaddsd  $dst,$dst,$tmp\n\t"
-            "vextractf32x4  $tmp2,$src2,0x1\n\t"
+            "vextractf128  $tmp2,$src2,0x1\n\t"
             "vaddsd  $dst,$dst,$tmp2\n\t"
             "pshufd  $tmp,$tmp2,0xE\n\t"
             "vaddsd  $dst,$dst,$tmp\t! add reduction4D" %}
@@ -5032,7 +5435,7 @@
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $src2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $src2$$XMMRegister, 0xE);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
-    __ vextractf32x4($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
+    __ vextractf128($tmp2$$XMMRegister, $src2$$XMMRegister, 0x1);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp2$$XMMRegister);
     __ pshufd($tmp$$XMMRegister, $tmp2$$XMMRegister, 0xE);
     __ vaddsd($dst$$XMMRegister, $dst$$XMMRegister, $tmp$$XMMRegister);
@@ -5040,7 +5443,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvadd8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
+instruct rvadd8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (AddReductionVD dst src2));
   effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5079,7 +5482,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rsmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
   predicate(UseSSE > 3 && UseAVX == 0);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -5098,7 +5501,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, regF tmp, regF tmp2) %{
+instruct rvmul2I_reduction_reg(rRegI dst, rRegI src1, vecD src2, vecD tmp, vecD tmp2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -5118,7 +5521,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rsmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
   predicate(UseSSE > 3 && UseAVX == 0);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -5141,7 +5544,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvmul4I_reduction_reg(rRegI dst, rRegI src1, vecX src2, vecX tmp, vecX tmp2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -5165,8 +5568,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, regF tmp, regF tmp2) %{
-  predicate(UseAVX > 0);
+instruct rvmul8I_reduction_reg(rRegI dst, rRegI src1, vecY src2, vecY tmp, vecY tmp2) %{
+  predicate(UseAVX > 1);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2);
   format %{ "vextracti128_high  $tmp,$src2\n\t"
@@ -5193,7 +5596,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, vecZ src2, regF tmp, regF tmp2, regF tmp3) %{
+instruct rvmul16I_reduction_reg(rRegI dst, rRegI src1, legVecZ src2, legVecZ tmp, legVecZ tmp2, legVecZ tmp3) %{
   predicate(UseAVX > 2);
   match(Set dst (MulReductionVI src1 src2));
   effect(TEMP tmp, TEMP tmp2, TEMP tmp3);
@@ -5225,7 +5628,7 @@
 %}
 
 #ifdef _LP64
-instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, regF tmp, regF tmp2) %{
+instruct rvmul2L_reduction_reg(rRegL dst, rRegL src1, vecX src2, vecX tmp, vecX tmp2) %{
   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
   match(Set dst (MulReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -5244,7 +5647,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, regF tmp, regF tmp2) %{
+instruct rvmul4L_reduction_reg(rRegL dst, rRegL src1, vecY src2, vecY tmp, vecY tmp2) %{
   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
   match(Set dst (MulReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -5267,7 +5670,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, vecZ src2, regF tmp, regF tmp2) %{
+instruct rvmul8L_reduction_reg(rRegL dst, rRegL src1, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
   predicate(UseAVX > 2 && VM_Version::supports_avx512dq());
   match(Set dst (MulReductionVL src1 src2));
   effect(TEMP tmp, TEMP tmp2);
@@ -5295,7 +5698,7 @@
 %}
 #endif
 
-instruct rsmul2F_reduction(regF dst, vecD src2, regF tmp) %{
+instruct rsmul2F_reduction(regF dst, vecD src2, vecD tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
   match(Set dst (MulReductionVF dst src2));
   effect(TEMP dst, TEMP tmp);
@@ -5310,7 +5713,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul2F_reduction_reg(regF dst, vecD src2, regF tmp) %{
+instruct rvmul2F_reduction_reg(regF dst, vecD src2, vecD tmp) %{
   predicate(UseAVX > 0);
   match(Set dst (MulReductionVF dst src2));
   effect(TEMP tmp, TEMP dst);
@@ -5325,7 +5728,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rsmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rsmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
   match(Set dst (MulReductionVF dst src2));
   effect(TEMP dst, TEMP tmp);
@@ -5348,7 +5751,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul4F_reduction_reg(regF dst, vecX src2, regF tmp) %{
+instruct rvmul4F_reduction_reg(regF dst, vecX src2, vecX tmp) %{
   predicate(UseAVX > 0);
   match(Set dst (MulReductionVF dst src2));
   effect(TEMP tmp, TEMP dst);
@@ -5371,7 +5774,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul8F_reduction_reg(regF dst, vecY src2, regF tmp, regF tmp2) %{
+instruct rvmul8F_reduction_reg(regF dst, vecY src2, vecY tmp, vecY tmp2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulReductionVF dst src2));
   effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5410,7 +5813,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul16F_reduction_reg(regF dst, vecZ src2, regF tmp, regF tmp2) %{
+instruct rvmul16F_reduction_reg(regF dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (MulReductionVF dst src2));
   effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5481,7 +5884,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rsmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rsmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
   predicate(UseSSE >= 1 && UseAVX == 0);
   match(Set dst (MulReductionVD dst src2));
   effect(TEMP dst, TEMP tmp);
@@ -5496,7 +5899,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul2D_reduction_reg(regD dst, vecX src2, regD tmp) %{
+instruct rvmul2D_reduction_reg(regD dst, vecX src2, vecX tmp) %{
   predicate(UseAVX > 0);
   match(Set dst (MulReductionVD dst src2));
   effect(TEMP tmp, TEMP dst);
@@ -5511,7 +5914,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul4D_reduction_reg(regD dst, vecY src2, regD tmp, regD tmp2) %{
+instruct rvmul4D_reduction_reg(regD dst, vecY src2, vecY tmp, vecY tmp2) %{
   predicate(UseAVX > 0);
   match(Set dst (MulReductionVD dst src2));
   effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5534,7 +5937,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct rvmul8D_reduction_reg(regD dst, vecZ src2, regD tmp, regD tmp2) %{
+instruct rvmul8D_reduction_reg(regD dst, legVecZ src2, legVecZ tmp, legVecZ tmp2) %{
   predicate(UseAVX > 2);
   match(Set dst (MulReductionVD dst src2));
   effect(TEMP tmp, TEMP dst, TEMP tmp2);
@@ -5588,8 +5991,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
   ins_encode %{
@@ -5599,31 +6002,9 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
-  match(Set dst (AddVB src1 src2));
-  format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd4B_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
-  match(Set dst (AddVB dst src2));
-  effect(TEMP src1);
-  format %{ "vpaddb  $dst,$dst,$src2\t! add packed4B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd4B_mem_avx(vecS dst, vecS src, memory mem) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+
+instruct vadd4B_mem(vecS dst, vecS src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
   ins_encode %{
@@ -5633,29 +6014,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4B_mem_evex(vecS dst, vecS src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
-  match(Set dst (AddVB src (LoadVector mem)));
-  format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd4B_mem_evex_special(vecS dst, vecS src, memory mem) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
-  match(Set dst (AddVB dst (LoadVector mem)));
-  effect(TEMP src);
-  format %{ "vpaddb  $dst,$src,$mem\t! add packed4B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 instruct vadd8B(vecD dst, vecD src) %{
   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVB dst src));
@@ -5666,8 +6024,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8B_reg_avx(vecD dst, vecD src1, vecD src2) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
   ins_encode %{
@@ -5677,31 +6035,9 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8B_reg_evex(vecD dst, vecD src1, vecD src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
-  match(Set dst (AddVB src1 src2));
-  format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd8B_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
-  match(Set dst (AddVB dst src2));
-  effect(TEMP src1);
-  format %{ "vpaddb  $dst,$dst,$src2\t! add packed8B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd8B_mem_avx(vecD dst, vecD src, memory mem) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+
+instruct vadd8B_mem(vecD dst, vecD src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
   ins_encode %{
@@ -5711,29 +6047,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8B_mem_evex(vecD dst, vecD src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
-  match(Set dst (AddVB src (LoadVector mem)));
-  format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd8B_mem_evex_special(vecD dst, vecD src, memory mem) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
-  match(Set dst (AddVB dst (LoadVector mem)));
-  effect(TEMP src);
-  format %{ "vpaddb  $dst,$src,$mem\t! add packed8B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 instruct vadd16B(vecX dst, vecX src) %{
   predicate(UseAVX == 0 && n->as_Vector()->length() == 16);
   match(Set dst (AddVB dst src));
@@ -5744,8 +6057,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16B_reg_avx(vecX dst, vecX src1, vecX src2) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0  && n->as_Vector()->length() == 16);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
   ins_encode %{
@@ -5755,31 +6068,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16B_reg_evex(vecX dst, vecX src1, vecX src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
-  match(Set dst (AddVB src1 src2));
-  format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd16B_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
-  match(Set dst (AddVB dst src2));
-  effect(TEMP src1);
-  format %{ "vpaddb  $dst,$dst,$src2\t! add packed16B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd16B_mem_avx(vecX dst, vecX src, memory mem) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 16);
+instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
   ins_encode %{
@@ -5789,31 +6079,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16B_mem_evex(vecX dst, vecX src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
-  match(Set dst (AddVB src (LoadVector mem)));
-  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd16B_mem_evex_special(vecX dst, vecX src, memory mem) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
-  match(Set dst (AddVB dst (LoadVector mem)));
-  effect(TEMP src);
-  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_reg_avx(vecY dst, vecY src1, vecY src2) %{
-  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
   ins_encode %{
@@ -5823,31 +6090,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd32B_reg_evex(vecY dst, vecY src1, vecY src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
-  match(Set dst (AddVB src1 src2));
-  format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
-  match(Set dst (AddVB dst src2));
-  effect(TEMP src1);
-  format %{ "vpaddb  $dst,$dst,$src2\t! add packed32B" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_mem_avx(vecY dst, vecY src, memory mem) %{
-  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 32);
+instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
   ins_encode %{
@@ -5857,31 +6101,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd32B_mem_evex(vecY dst, vecY src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
-  match(Set dst (AddVB src (LoadVector mem)));
-  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd32B_mem_evex_special(vecY dst, vecY src, memory mem) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 32);
-  match(Set dst (AddVB dst (LoadVector mem)));
-  effect(TEMP src);
-  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 instruct vadd64B_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+  predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (AddVB src1 src2));
   format %{ "vpaddb  $dst,$src1,$src2\t! add packed64B" %}
   ins_encode %{
@@ -5892,7 +6113,7 @@
 %}
 
 instruct vadd64B_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
+  predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 64);
   match(Set dst (AddVB src (LoadVector mem)));
   format %{ "vpaddb  $dst,$src,$mem\t! add packed64B" %}
   ins_encode %{
@@ -5913,8 +6134,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd2S_reg_avx(vecS dst, vecS src1, vecS src2) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0  && n->as_Vector()->length() == 2);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
   ins_encode %{
@@ -5924,31 +6145,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd2S_reg_evex(vecS dst, vecS src1, vecS src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
-  match(Set dst (AddVS src1 src2));
-  format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd2S_reg_evex_special(vecS dst, vecS src1, vecS src2) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
-  match(Set dst (AddVS dst src2));
-  effect(TEMP src1);
-  format %{ "vpaddw  $dst,$dst,$src2\t! add packed2S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd2S_mem_avx(vecS dst, vecS src, memory mem) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 2);
+instruct vadd2S_mem(vecS dst, vecS src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
   ins_encode %{
@@ -5958,29 +6156,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd2S_mem_evex(vecS dst, vecS src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 2);
-  match(Set dst (AddVS src (LoadVector mem)));
-  format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd2S_mem_evex_special(vecS dst, vecS src, memory mem) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 2);
-  match(Set dst (AddVS dst (LoadVector mem)));
-  effect(TEMP src);
-  format %{ "vpaddw  $dst,$src,$mem\t! add packed2S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 instruct vadd4S(vecD dst, vecD src) %{
   predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVS dst src));
@@ -5991,8 +6166,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4S_reg_avx(vecD dst, vecD src1, vecD src2) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
   ins_encode %{
@@ -6002,31 +6177,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4S_reg_evex(vecD dst, vecD src1, vecD src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
-  match(Set dst (AddVS src1 src2));
-  format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd4S_reg_evex_special(vecD dst, vecD src1, vecD src2) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
-  match(Set dst (AddVS dst src2));
-  effect(TEMP src1);
-  format %{ "vpaddw  $dst,$dst,$src2\t! add packed4S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd4S_mem_avx(vecD dst, vecD src, memory mem) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vadd4S_mem(vecD dst, vecD src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
   ins_encode %{
@@ -6036,29 +6188,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd4S_mem_evex(vecD dst, vecD src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
-  match(Set dst (AddVS src (LoadVector mem)));
-  format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd4S_mem_evex_special(vecD dst, vecD src, memory mem) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
-  match(Set dst (AddVS dst (LoadVector mem)));
-  effect(TEMP src);
-  format %{ "vpaddw  $dst,$src,$mem\t! add packed4S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 instruct vadd8S(vecX dst, vecX src) %{
   predicate(UseAVX == 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVS dst src));
@@ -6069,8 +6198,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8S_reg_avx(vecX dst, vecX src1, vecX src2) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
   ins_encode %{
@@ -6080,31 +6209,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8S_reg_evex(vecX dst, vecX src1, vecX src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
-  match(Set dst (AddVS src1 src2));
-  format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd8S_reg_evex_special(vecX dst, vecX src1, vecX src2) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
-  match(Set dst (AddVS dst src2));
-  effect(TEMP src1);
-  format %{ "vpaddw  $dst,$dst,$src2\t! add packed8S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd8S_mem_avx(vecX dst, vecX src, memory mem) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 8);
+instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
   ins_encode %{
@@ -6114,31 +6220,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd8S_mem_evex(vecX dst, vecX src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 8);
-  match(Set dst (AddVS src (LoadVector mem)));
-  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd8S_mem_evex_special(vecX dst, vecX src, memory mem) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 8);
-  match(Set dst (AddVS dst (LoadVector mem)));
-  effect(TEMP src);
-  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_reg_avx(vecY dst, vecY src1, vecY src2) %{
-  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
   ins_encode %{
@@ -6148,31 +6231,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16S_reg_evex(vecY dst, vecY src1, vecY src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
-  match(Set dst (AddVS src1 src2));
-  format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_reg_evex_special(vecY dst, vecY src1, vecY src2) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
-  match(Set dst (AddVS dst src2));
-  effect(TEMP src1);
-  format %{ "vpaddw  $dst,$dst,$src2\t! add packed16S" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_mem_avx(vecY dst, vecY src, memory mem) %{
-  predicate(VM_Version::supports_avx256only() && n->as_Vector()->length() == 16);
+instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
   ins_encode %{
@@ -6182,31 +6242,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vadd16S_mem_evex(vecY dst, vecY src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 16);
-  match(Set dst (AddVS src (LoadVector mem)));
-  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vadd16S_mem_evex_special(vecY dst, vecY src, memory mem) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 16);
-  match(Set dst (AddVS dst (LoadVector mem)));
-  effect(TEMP src);
-  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
-  ins_encode %{
-    int vector_len = 1;
-    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 instruct vadd32S_reg(vecZ dst, vecZ src1, vecZ src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+  predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (AddVS src1 src2));
   format %{ "vpaddw  $dst,$src1,$src2\t! add packed32S" %}
   ins_encode %{
@@ -6217,7 +6254,7 @@
 %}
 
 instruct vadd32S_mem(vecZ dst, vecZ src, memory mem) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
+  predicate(UseAVX > 2 && VM_Version::supports_avx512bw() && n->as_Vector()->length() == 32);
   match(Set dst (AddVS src (LoadVector mem)));
   format %{ "vpaddw  $dst,$src,$mem\t! add packed32S" %}
   ins_encode %{
@@ -6229,7 +6266,7 @@
 
 // Integers vector add
 instruct vadd2I(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (AddVI dst src));
   format %{ "paddd   $dst,$src\t! add packed2I" %}
   ins_encode %{
@@ -6261,7 +6298,7 @@
 %}
 
 instruct vadd4I(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVI dst src));
   format %{ "paddd   $dst,$src\t! add packed4I" %}
   ins_encode %{
@@ -6338,7 +6375,7 @@
 
 // Longs vector add
 instruct vadd2L(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (AddVL dst src));
   format %{ "paddq   $dst,$src\t! add packed2L" %}
   ins_encode %{
@@ -6415,7 +6452,7 @@
 
 // Floats vector add
 instruct vadd2F(vecD dst, vecD src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (AddVF dst src));
   format %{ "addps   $dst,$src\t! add packed2F" %}
   ins_encode %{
@@ -6447,7 +6484,7 @@
 %}
 
 instruct vadd4F(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 4);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 4);
   match(Set dst (AddVF dst src));
   format %{ "addps   $dst,$src\t! add packed4F" %}
   ins_encode %{
@@ -6524,7 +6561,7 @@
 
 // Doubles vector add
 instruct vadd2D(vecX dst, vecX src) %{
-  predicate(n->as_Vector()->length() == 2);
+  predicate(UseAVX == 0 && n->as_Vector()->length() == 2);
   match(Set dst (AddVD dst src));
   format %{ "addpd   $dst,$src\t! add packed2D" %}
   ins_encode %{
@@ -6612,8 +6649,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4B_reg_avx(vecS dst, vecS src1, vecS src2) %{
-  predicate(VM_Version::supports_avxonly() && n->as_Vector()->length() == 4);
+instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
   match(Set dst (SubVB src1 src2));
   format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
   ins_encode %{
@@ -6623,31 +6660,8 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct vsub4B_reg_evex(vecS dst, vecS src1, vecS src2) %{
-  predicate(VM_Version::supports_avx512bw() && n->as_Vector()->length() == 4);
-  match(Set dst (SubVB src1 src2));
-  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
-  ins_encode %{
-    int vector_len = 0;
-    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector_len);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-instruct vsub4B_reg_exex_special(vecS dst, vecS src1, vecS src2) %{
-  predicate(VM_Version::supports_avx512nobw() && n->as_Vector()->length() == 4);
-  match(Set dst (SubVB dst src2));