changeset 8392:197e94e0dacd jdk9-b66

Merge
author lana
date Thu, 21 May 2015 16:19:49 -0700
parents 6cc6758bda54 9dc9bcc49745
children ac6a7b63d701 eb76189435bb
files agent/src/share/classes/sun/jvm/hotspot/compiler/OopMap.java agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapSet.java src/share/tools/IdealGraphVisualizer/Bytecodes/src/com/sun/hotspot/igv/bytecodes/images/bytecode.gif src/share/tools/IdealGraphVisualizer/Bytecodes/src/com/sun/hotspot/igv/bytecodes/images/link.gif src/share/tools/IdealGraphVisualizer/Bytecodes/src/com/sun/hotspot/igv/bytecodes/images/method.gif src/share/tools/IdealGraphVisualizer/Coordinator/src/META-INF/services/com.sun.hotspot.igv.data.services.GroupOrganizer src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/GraphCountGroupOrganizer.java src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/StandardGroupOrganizer.java src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/actions/StructuredViewAction.java src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/images/diff.gif src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/images/folder.gif src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/images/graph.gif src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/images/import.gif src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/images/remove.gif src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/images/removeall.gif src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/images/save.gif src/share/tools/IdealGraphVisualizer/Coordinator/src/com/sun/hotspot/igv/coordinator/images/structure.gif src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/services/GroupOrganizer.java src/share/tools/IdealGraphVisualizer/Data/src/com/sun/hotspot/igv/data/services/GroupReceiver.java src/share/tools/IdealGraphVisualizer/Filter/src/META-INF/services/com.sun.hotspot.igv.filter.ScriptEngineAbstraction src/share/tools/IdealGraphVisualizer/Filter/src/com/sun/hotspot/igv/filter/JavaSE6ScriptEngine.java src/share/tools/IdealGraphVisualizer/Filter/src/com/sun/hotspot/igv/filter/NullScriptEngine.java src/share/tools/IdealGraphVisualizer/Filter/src/com/sun/hotspot/igv/filter/ScriptEngineAbstraction.java src/share/tools/IdealGraphVisualizer/FilterWindow/src/com/sun/hotspot/igv/filterwindow/images/add.gif src/share/tools/IdealGraphVisualizer/FilterWindow/src/com/sun/hotspot/igv/filterwindow/images/delete.gif src/share/tools/IdealGraphVisualizer/FilterWindow/src/com/sun/hotspot/igv/filterwindow/images/down.gif src/share/tools/IdealGraphVisualizer/FilterWindow/src/com/sun/hotspot/igv/filterwindow/images/minus.gif src/share/tools/IdealGraphVisualizer/FilterWindow/src/com/sun/hotspot/igv/filterwindow/images/plus.gif src/share/tools/IdealGraphVisualizer/FilterWindow/src/com/sun/hotspot/igv/filterwindow/images/up.gif src/share/tools/IdealGraphVisualizer/Graph/src/com/sun/hotspot/igv/graph/Source.java src/share/tools/IdealGraphVisualizer/HierarchicalLayout/src/com/sun/hotspot/igv/hierarchicallayout/OldHierarchicalLayoutManager.java src/share/tools/IdealGraphVisualizer/NetworkConnection/src/META-INF/services/com.sun.hotspot.igv.data.services.GroupReceiver src/share/tools/IdealGraphVisualizer/README src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/build.xml src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/manifest.mf src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/nbproject/build-impl.xml src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/nbproject/genfiles.properties src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/nbproject/project.properties src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/nbproject/project.xml src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/nbproject/suite.properties src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/src/META-INF/services/com.sun.hotspot.igv.filter.ScriptEngineAbstraction src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/src/com/sun/hotspot/igv/rhino/Bundle.properties src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/src/com/sun/hotspot/igv/rhino/RhinoScriptEngine.java src/share/tools/IdealGraphVisualizer/RhinoScriptEngineProxy/src/com/sun/hotspot/igv/rhino/layer.xml src/share/tools/IdealGraphVisualizer/ServerCompiler/src/META-INF/services/com.sun.hotspot.igv.data.services.GroupOrganizer src/share/tools/IdealGraphVisualizer/ServerCompiler/src/com/sun/hotspot/igv/servercompiler/JavaGroupOrganizer.java src/share/tools/IdealGraphVisualizer/ServerCompiler/src/com/sun/hotspot/igv/servercompiler/filters/combine.filter src/share/tools/IdealGraphVisualizer/ServerCompiler/src/com/sun/hotspot/igv/servercompiler/filters/extendedColor.filter src/share/tools/IdealGraphVisualizer/ServerCompiler/src/com/sun/hotspot/igv/servercompiler/filters/linestyle.filter src/share/tools/IdealGraphVisualizer/ServerCompiler/src/com/sun/hotspot/igv/servercompiler/filters/removeMemory.filter src/share/tools/IdealGraphVisualizer/ServerCompiler/src/com/sun/hotspot/igv/servercompiler/filters/removeRootInputs.filter src/share/tools/IdealGraphVisualizer/ServerCompiler/src/com/sun/hotspot/igv/servercompiler/filters/removeSafepointInputs.filter src/share/tools/IdealGraphVisualizer/ServerCompiler/src/com/sun/hotspot/igv/servercompiler/filters/removeSelfLoops.filter src/share/tools/IdealGraphVisualizer/ServerCompiler/src/com/sun/hotspot/igv/servercompiler/filters/split.filter src/share/tools/IdealGraphVisualizer/Settings/src/com/sun/hotspot/igv/settings/settings.gif src/share/tools/IdealGraphVisualizer/View/src/META-INF/services/com.sun.hotspot.igv.data.services.InputGraphProvider src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/ConnectionAnchor.java src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/ExtendedPanAction.java src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/FindPanel.java src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/PreferenceConstants.java src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/SlotLayout.java src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/actions/NodeFindAction.java src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/images/export.gif src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/images/overview.gif src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/images/search.gif src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/images/zoomin.gif src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/images/zoomout.gif src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/widgets/DiagramConnectionWidget.java src/share/tools/IdealGraphVisualizer/View/src/com/sun/hotspot/igv/view/widgets/MultiConnectionWidget.java test/testlibrary/com/oracle/java/testlibrary/Asserts.java test/testlibrary/com/oracle/java/testlibrary/BuildHelper.java test/testlibrary/com/oracle/java/testlibrary/ByteCodeLoader.java test/testlibrary/com/oracle/java/testlibrary/DynamicVMOption.java test/testlibrary/com/oracle/java/testlibrary/ExitCode.java test/testlibrary/com/oracle/java/testlibrary/InMemoryJavaCompiler.java test/testlibrary/com/oracle/java/testlibrary/InfiniteLoop.java test/testlibrary/com/oracle/java/testlibrary/InputArguments.java test/testlibrary/com/oracle/java/testlibrary/JDKToolFinder.java test/testlibrary/com/oracle/java/testlibrary/JDKToolLauncher.java test/testlibrary/com/oracle/java/testlibrary/OutputAnalyzer.java test/testlibrary/com/oracle/java/testlibrary/OutputBuffer.java test/testlibrary/com/oracle/java/testlibrary/PerfCounter.java test/testlibrary/com/oracle/java/testlibrary/PerfCounters.java test/testlibrary/com/oracle/java/testlibrary/Platform.java test/testlibrary/com/oracle/java/testlibrary/ProcessTools.java test/testlibrary/com/oracle/java/testlibrary/StreamPumper.java test/testlibrary/com/oracle/java/testlibrary/TimeLimitedRunner.java test/testlibrary/com/oracle/java/testlibrary/Utils.java test/testlibrary/com/oracle/java/testlibrary/cli/CPUSpecificCommandLineOptionTest.java test/testlibrary/com/oracle/java/testlibrary/cli/CommandLineOptionTest.java test/testlibrary/com/oracle/java/testlibrary/cli/predicate/AndPredicate.java test/testlibrary/com/oracle/java/testlibrary/cli/predicate/CPUSpecificPredicate.java test/testlibrary/com/oracle/java/testlibrary/cli/predicate/NotPredicate.java test/testlibrary/com/oracle/java/testlibrary/cli/predicate/OrPredicate.java test/testlibrary/com/oracle/java/testlibrary/dcmd/CommandExecutor.java test/testlibrary/com/oracle/java/testlibrary/dcmd/CommandExecutorException.java test/testlibrary/com/oracle/java/testlibrary/dcmd/FileJcmdExecutor.java test/testlibrary/com/oracle/java/testlibrary/dcmd/JMXExecutor.java test/testlibrary/com/oracle/java/testlibrary/dcmd/JcmdExecutor.java test/testlibrary/com/oracle/java/testlibrary/dcmd/MainClassJcmdExecutor.java test/testlibrary/com/oracle/java/testlibrary/dcmd/PidJcmdExecutor.java test/testlibrary/com/oracle/java/testlibrary/dtrace/DtraceResultsAnalyzer.java test/testlibrary/com/oracle/java/testlibrary/dtrace/DtraceRunner.java
diffstat 1070 files changed, 27249 insertions(+), 16875 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Thu May 21 10:07:35 2015 -0700
+++ b/.hgignore	Thu May 21 16:19:49 2015 -0700
@@ -6,5 +6,7 @@
 ^src/share/tools/IdealGraphVisualizer/[a-zA-Z0-9]*/build/
 ^src/share/tools/IdealGraphVisualizer/build/
 ^src/share/tools/IdealGraphVisualizer/dist/
+^src/share/tools/IdealGraphVisualizer/nbplatform/
+.igv.log
 ^.hgtip
 .DS_Store
--- a/agent/src/share/classes/sun/jvm/hotspot/HSDB.java	Thu May 21 10:07:35 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/HSDB.java	Thu May 21 16:19:49 2015 -0700
@@ -27,9 +27,7 @@
 import java.io.*;
 import java.awt.*;
 import java.awt.event.*;
-import java.math.*;
 import javax.swing.*;
-import javax.swing.tree.*;
 import java.util.*;
 
 import sun.jvm.hotspot.code.*;
@@ -928,7 +926,7 @@
             boolean shouldSkipOopMaps = false;
             if (curVFrame.isCompiledFrame()) {
               CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC());
-              OopMapSet maps = cb.getOopMaps();
+              ImmutableOopMapSet maps = cb.getOopMaps();
               if ((maps == null) || (maps.getSize() == 0)) {
                 shouldSkipOopMaps = true;
               }
@@ -977,7 +975,7 @@
             } while (nextVFrame != null && nextFrame.equals(curFrame));
 
             if (shouldSkipOopMaps) {
-              anno = anno + "\nNOTE: null or empty OopMapSet found for this CodeBlob";
+              anno = anno + "\nNOTE: null or empty ImmutableOopMapSet found for this CodeBlob";
             }
 
             if (curFrame.getFP() != null) {
--- a/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java	Thu May 21 10:07:35 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java	Thu May 21 16:19:49 2015 -0700
@@ -171,17 +171,17 @@
   public boolean isLockedByVM()         { return false; }
 
   /** OopMap for frame; can return null if none available */
-  public OopMapSet getOopMaps() {
+  public ImmutableOopMapSet getOopMaps() {
     Address oopMapsAddr = oopMapsField.getValue(addr);
     if (oopMapsAddr == null) {
       return null;
     }
-    return new OopMapSet(oopMapsAddr);
+    return new ImmutableOopMapSet(oopMapsAddr);
   }
   // FIXME: not yet implementable
-  //  void set_oop_maps(OopMapSet* p);
+  //  void set_oop_maps(ImmutableOopMapSet* p);
 
-  public OopMap getOopMapForReturnAddress(Address returnAddress, boolean debugging) {
+  public ImmutableOopMap getOopMapForReturnAddress(Address returnAddress, boolean debugging) {
     Address pc = returnAddress;
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(getOopMaps() != null, "nope");
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/compiler/ImmutableOopMap.java	Thu May 21 16:19:49 2015 -0700
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2000, 2004, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.compiler;
+
+import java.util.*;
+
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+
+public class ImmutableOopMap extends VMObject {
+  private static CIntegerField countField;
+  private static long classSize;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+        public void update(Observable o, Object data) {
+          initialize(VM.getVM().getTypeDataBase());
+        }
+      });
+  }
+
+  private static void initialize(TypeDataBase db) {
+    Type type = db.lookupType("ImmutableOopMap");
+    countField = type.getCIntegerField("_count");
+    classSize = type.getSize();
+  }
+
+  public ImmutableOopMap(Address addr) {
+    super(addr);
+  }
+
+  //--------------------------------------------------------------------------------
+  // Internals only below this point
+  //
+
+  long getCount() {
+    return countField.getValue(addr);
+  }
+
+  public Address getData() {
+    return addr.addOffsetTo(classSize);
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/compiler/ImmutableOopMapPair.java	Thu May 21 16:19:49 2015 -0700
@@ -0,0 +1,74 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.compiler;
+
+import sun.jvm.hotspot.debugger.Address;
+import sun.jvm.hotspot.runtime.VM;
+import sun.jvm.hotspot.types.CIntegerField;
+import sun.jvm.hotspot.types.Type;
+import sun.jvm.hotspot.types.TypeDataBase;
+
+import java.util.Observable;
+import java.util.Observer;
+
+public class ImmutableOopMapPair {
+  private static CIntegerField pcField;
+  private static CIntegerField offsetField;
+  private static long classSize;
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+      public void update(Observable o, Object data) {
+        initialize(VM.getVM().getTypeDataBase());
+      }
+    });
+  }
+
+  private final Address address;
+
+  public ImmutableOopMapPair(Address address) {
+    this.address = address;
+  }
+
+  public static long classSize() {
+    return classSize;
+  }
+
+  public int getPC() {
+    return (int) pcField.getValue(address);
+  }
+
+  public int getOffset() {
+    return (int) offsetField.getValue(address);
+  }
+
+  private static void initialize(TypeDataBase db) {
+    Type type = db.lookupType("ImmutableOopMapPair");
+
+    pcField = type.getCIntegerField("_pc_offset");
+    offsetField = type.getCIntegerField("_oopmap_offset");
+    classSize = type.getSize();
+  }
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/compiler/ImmutableOopMapSet.java	Thu May 21 16:19:49 2015 -0700
@@ -0,0 +1,318 @@
+/*
+ * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.compiler;
+
+import java.util.*;
+
+import sun.jvm.hotspot.code.*;
+import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.runtime.*;
+import sun.jvm.hotspot.types.*;
+import sun.jvm.hotspot.utilities.*;
+
+public class ImmutableOopMapSet extends VMObject {
+  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.compiler.ImmutableOopMapSet.DEBUG") != null;
+
+  private static CIntegerField countField;
+  private static CIntegerField sizeField;
+  private static AddressField omDataField;
+  private static int REG_COUNT;
+  private static int SAVED_ON_ENTRY_REG_COUNT;
+  private static int C_SAVED_ON_ENTRY_REG_COUNT;
+  private static long classSize;
+
+  private static class MyVisitor implements OopMapVisitor {
+    private AddressVisitor addressVisitor;
+
+    public MyVisitor(AddressVisitor oopVisitor) {
+      setAddressVisitor(oopVisitor);
+    }
+
+    public void setAddressVisitor(AddressVisitor addressVisitor) {
+      this.addressVisitor = addressVisitor;
+    }
+
+    public void visitOopLocation(Address oopAddr) {
+      addressVisitor.visitAddress(oopAddr);
+    }
+
+    public void visitDerivedOopLocation(Address baseOopAddr, Address derivedOopAddr) {
+      if (VM.getVM().isClientCompiler()) {
+        Assert.that(false, "should not reach here");
+      } else if (VM.getVM().isServerCompiler() &&
+          VM.getVM().useDerivedPointerTable()) {
+        Assert.that(false, "FIXME: add derived pointer table");
+      }
+    }
+
+    public void visitValueLocation(Address valueAddr) {
+    }
+
+    public void visitNarrowOopLocation(Address narrowOopAddr) {
+      addressVisitor.visitCompOopAddress(narrowOopAddr);
+    }
+  }
+
+  static {
+    VM.registerVMInitializedObserver(new Observer() {
+      public void update(Observable o, Object data) {
+        initialize(VM.getVM().getTypeDataBase());
+      }
+    });
+  }
+
+  private static void initialize(TypeDataBase db) {
+    Type type = db.lookupType("ImmutableOopMapSet");
+
+    countField = type.getCIntegerField("_count");
+    sizeField = type.getCIntegerField("_size");
+    classSize = type.getSize();
+
+    if (!VM.getVM().isCore()) {
+      REG_COUNT = db.lookupIntConstant("REG_COUNT").intValue();
+      if (VM.getVM().isServerCompiler()) {
+        SAVED_ON_ENTRY_REG_COUNT = (int) db.lookupIntConstant("SAVED_ON_ENTRY_REG_COUNT").intValue();
+        C_SAVED_ON_ENTRY_REG_COUNT = (int) db.lookupIntConstant("C_SAVED_ON_ENTRY_REG_COUNT").intValue();
+      }
+    }
+  }
+
+  public ImmutableOopMapSet(Address addr) {
+    super(addr);
+  }
+
+  /**
+   * Returns the number of OopMaps in this ImmutableOopMapSet
+   */
+  public long getSize() {
+    return countField.getValue(addr);
+  }
+
+  public int getCount() { return (int) countField.getValue(addr); }
+
+  private Address dataStart() {
+    return (addr.addOffsetTo(ImmutableOopMapSet.classSize * getCount()));
+  }
+
+  public ImmutableOopMapPair pairAt(int index) {
+    Assert.that((index >= 0) && (index < getCount()), "bad index");
+    return new ImmutableOopMapPair(addr.addOffsetTo(index * ImmutableOopMapPair.classSize()));
+  }
+
+  /**
+   * returns the OopMap at a given index
+   */
+  public ImmutableOopMap getMapAt(int index) {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that((index >= 0) && (index <= getSize()), "bad index");
+    }
+
+    ImmutableOopMapPair immutableOopMapPair = pairAt(index);
+    return getMap(immutableOopMapPair);
+  }
+
+  public ImmutableOopMap findMapAtOffset(long pcOffset, boolean debugging) {
+    int i;
+    int len = (int) getSize();
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(len > 0, "must have pointer maps");
+    }
+
+    // Scan through oopmaps. Stop when current offset is either equal or greater
+    // than the one we are looking for.
+    for (i = 0; i < len; i++) {
+      if (pairAt(i).getPC() >= pcOffset) {
+        break;
+      }
+    }
+
+    if (!debugging) {
+      if (Assert.ASSERTS_ENABLED) {
+        Assert.that(i < len, "oopmap not found for pcOffset = " + pcOffset + "; len = " + len);
+        Assert.that(pairAt(i).getPC() == pcOffset, "oopmap not found");
+      }
+    } else {
+      if (i == len) {
+        if (DEBUG) {
+          System.out.println("can't find oopmap at " + pcOffset);
+          System.out.print("Oopmap offsets are [ ");
+          for (i = 0; i < len; i++) {
+            System.out.print(pairAt(i).getPC());
+          }
+          System.out.println("]");
+        }
+        i = len - 1;
+        return getMapAt(i);
+      }
+    }
+
+    ImmutableOopMap m = getMapAt(i);
+    return m;
+  }
+
+  /**
+   * Visitation -- iterates through the frame for a compiled method.
+   * This is a very generic mechanism that requires the Address to be
+   * dereferenced by the callee. Other, more specialized, visitation
+   * mechanisms are given below.
+   */
+  public static void oopsDo(Frame fr, CodeBlob cb, RegisterMap regMap, AddressVisitor oopVisitor, boolean debugging) {
+    allDo(fr, cb, regMap, new MyVisitor(oopVisitor), debugging);
+  }
+
+  /**
+   * Note that there are 4 required AddressVisitors: one for oops,
+   * one for derived oops, one for values, and one for dead values
+   */
+  public static void allDo(Frame fr, CodeBlob cb, RegisterMap regMap, OopMapVisitor visitor, boolean debugging) {
+    if (Assert.ASSERTS_ENABLED) {
+      CodeBlob tmpCB = VM.getVM().getCodeCache().findBlob(fr.getPC());
+      Assert.that(tmpCB != null && cb.equals(tmpCB), "wrong codeblob passed in");
+    }
+
+    ImmutableOopMapSet maps = cb.getOopMaps();
+    ImmutableOopMap map = cb.getOopMapForReturnAddress(fr.getPC(), debugging);
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "no ptr map found");
+    }
+
+    // handle derived pointers first (otherwise base pointer may be
+    // changed before derived pointer offset has been collected)
+    OopMapValue omv;
+    {
+      for (OopMapStream oms = new OopMapStream(map, OopMapValue.OopTypes.DERIVED_OOP_VALUE); !oms.isDone(); oms.next()) {
+        if (VM.getVM().isClientCompiler()) {
+          Assert.that(false, "should not reach here");
+        }
+        omv = oms.getCurrent();
+        Address loc = fr.oopMapRegToLocation(omv.getReg(), regMap);
+        if (loc != null) {
+          Address baseLoc = fr.oopMapRegToLocation(omv.getContentReg(), regMap);
+          Address derivedLoc = loc;
+          visitor.visitDerivedOopLocation(baseLoc, derivedLoc);
+        }
+      }
+    }
+
+    // We want narow oop, value and oop oop_types
+    OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[]{
+        OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.VALUE_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE
+    };
+
+    {
+      for (OopMapStream oms = new OopMapStream(map, values); !oms.isDone(); oms.next()) {
+        omv = oms.getCurrent();
+        Address loc = fr.oopMapRegToLocation(omv.getReg(), regMap);
+        if (loc != null) {
+          if (omv.getType() == OopMapValue.OopTypes.OOP_VALUE) {
+            // This assert commented out because this will be useful
+            // to detect in the debugging system
+            // assert(Universe::is_heap_or_null(*loc), "found non oop pointer");
+            visitor.visitOopLocation(loc);
+          } else if (omv.getType() == OopMapValue.OopTypes.VALUE_VALUE) {
+            visitor.visitValueLocation(loc);
+          } else if (omv.getType() == OopMapValue.OopTypes.NARROWOOP_VALUE) {
+            visitor.visitNarrowOopLocation(loc);
+          }
+        }
+      }
+    }
+  }
+
+  /**
+   * Update callee-saved register info for the following frame.
+   * Should only be called in non-core builds.
+   */
+  public static void updateRegisterMap(Frame fr, CodeBlob cb, RegisterMap regMap, boolean debugging) {
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(!VM.getVM().isCore(), "non-core builds only");
+    }
+
+    if (!VM.getVM().isDebugging()) {
+      if (Assert.ASSERTS_ENABLED) {
+        ImmutableOopMapSet maps = cb.getOopMaps();
+        Assert.that((maps != null) && (maps.getSize() > 0), "found null or empty ImmutableOopMapSet for CodeBlob");
+      }
+    } else {
+      // Hack for some topmost frames that have been found with empty
+      // OopMapSets. (Actually have not seen the null case, but don't
+      // want to take any chances.) See HSDB.showThreadStackMemory().
+      ImmutableOopMapSet maps = cb.getOopMaps();
+      if ((maps == null) || (maps.getSize() == 0)) {
+        return;
+      }
+    }
+
+    // Check if caller must update oop argument
+    regMap.setIncludeArgumentOops(cb.callerMustGCArguments());
+
+    int nofCallee = 0;
+    Address[] locs = new Address[2 * REG_COUNT + 1];
+    VMReg[] regs = new VMReg[2 * REG_COUNT + 1];
+    // ("+1" because REG_COUNT might be zero)
+
+    // Scan through oopmap and find location of all callee-saved registers
+    // (we do not do update in place, since info could be overwritten)
+    ImmutableOopMap map = cb.getOopMapForReturnAddress(fr.getPC(), debugging);
+    if (Assert.ASSERTS_ENABLED) {
+      Assert.that(map != null, "no ptr map found");
+    }
+
+    OopMapValue omv = null;
+    for (OopMapStream oms = new OopMapStream(map, OopMapValue.OopTypes.CALLEE_SAVED_VALUE); !oms.isDone(); oms.next()) {
+      omv = oms.getCurrent();
+      if (Assert.ASSERTS_ENABLED) {
+        Assert.that(nofCallee < 2 * REG_COUNT, "overflow");
+      }
+      regs[nofCallee] = omv.getContentReg();
+      locs[nofCallee] = fr.oopMapRegToLocation(omv.getReg(), regMap);
+      nofCallee++;
+    }
+
+    // Check that runtime stubs save all callee-saved registers
+    // After adapter frames were deleted C2 doesn't use callee save registers at present
+    if (Assert.ASSERTS_ENABLED) {
+      if (VM.getVM().isServerCompiler()) {
+        Assert.that(!cb.isRuntimeStub() ||
+                (nofCallee >= SAVED_ON_ENTRY_REG_COUNT || nofCallee >= C_SAVED_ON_ENTRY_REG_COUNT),
+            "must save all");
+      }
+    }
+
+    // Copy found callee-saved register to reg_map
+    for (int i = 0; i < nofCallee; i++) {
+      regMap.setLocation(regs[i], locs[i]);
+    }
+  }
+
+  public ImmutableOopMapPair getPairAt(int index) {
+    return pairAt(index);
+  }
+
+  public ImmutableOopMap getMap(ImmutableOopMapPair pair) {
+    Assert.that(pair.getOffset() < (int) sizeField.getValue(), "boundary check");
+    return new ImmutableOopMap(dataStart().addOffsetTo(pair.getOffset()));
+  }
+}
--- a/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMap.java	Thu May 21 10:07:35 2015 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2000, 2004, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.compiler;
-
-import java.util.*;
-
-import sun.jvm.hotspot.code.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-
-public class OopMap extends VMObject {
-  private static CIntegerField pcOffsetField;
-  private static CIntegerField omvCountField;
-  private static CIntegerField omvDataSizeField;
-  private static AddressField  omvDataField;
-  private static AddressField  compressedWriteStreamField;
-
-  // This is actually a field inside class CompressedStream
-  private static AddressField  compressedStreamBufferField;
-
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static void initialize(TypeDataBase db) {
-    Type type = db.lookupType("OopMap");
-
-    pcOffsetField              = type.getCIntegerField("_pc_offset");
-    omvCountField              = type.getCIntegerField("_omv_count");
-    omvDataSizeField           = type.getCIntegerField("_omv_data_size");
-    omvDataField               = type.getAddressField("_omv_data");
-    compressedWriteStreamField = type.getAddressField("_write_stream");
-
-    type = db.lookupType("CompressedStream");
-    compressedStreamBufferField = type.getAddressField("_buffer");
-  }
-
-  public OopMap(Address addr) {
-    super(addr);
-  }
-
-  public long getOffset() {
-    return pcOffsetField.getValue(addr);
-  }
-
-  //--------------------------------------------------------------------------------
-  // Internals only below this point
-  //
-
-  // Accessors -- package private for now
-  Address getOMVData() {
-    return omvDataField.getValue(addr);
-  }
-
-  long getOMVDataSize() {
-    return omvDataSizeField.getValue(addr);
-  }
-
-  long getOMVCount() {
-    return omvCountField.getValue(addr);
-  }
-
-  CompressedWriteStream getWriteStream() {
-    Address wsAddr = compressedWriteStreamField.getValue(addr);
-    if (wsAddr == null) {
-      return null;
-    }
-    Address bufferAddr = compressedStreamBufferField.getValue(wsAddr);
-    if (bufferAddr == null) {
-      return null;
-    }
-    return new CompressedWriteStream(bufferAddr);
-  }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapSet.java	Thu May 21 10:07:35 2015 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,289 +0,0 @@
-/*
- * Copyright (c) 2000, 2008, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.compiler;
-
-import java.util.*;
-
-import sun.jvm.hotspot.code.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-import sun.jvm.hotspot.utilities.*;
-
-public class OopMapSet extends VMObject {
-  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.compiler.OopMapSet.DEBUG") != null;
-
-  private static CIntegerField omCountField;
-  private static CIntegerField omSizeField;
-  private static AddressField  omDataField;
-  private static int REG_COUNT;
-  private static int SAVED_ON_ENTRY_REG_COUNT;
-  private static int C_SAVED_ON_ENTRY_REG_COUNT;
-  private static class MyVisitor implements OopMapVisitor {
-    private AddressVisitor addressVisitor;
-
-    public MyVisitor(AddressVisitor oopVisitor) {
-      setAddressVisitor(oopVisitor);
-    }
-
-    public void setAddressVisitor(AddressVisitor addressVisitor) {
-      this.addressVisitor = addressVisitor;
-    }
-
-    public void visitOopLocation(Address oopAddr) {
-      addressVisitor.visitAddress(oopAddr);
-    }
-
-    public void visitDerivedOopLocation(Address baseOopAddr, Address derivedOopAddr) {
-      if (VM.getVM().isClientCompiler()) {
-        Assert.that(false, "should not reach here");
-      } else if (VM.getVM().isServerCompiler() &&
-                 VM.getVM().useDerivedPointerTable()) {
-        Assert.that(false, "FIXME: add derived pointer table");
-      }
-    }
-
-    public void visitValueLocation(Address valueAddr) {
-    }
-
-    public void visitNarrowOopLocation(Address narrowOopAddr) {
-      addressVisitor.visitCompOopAddress(narrowOopAddr);
-    }
-  }
-
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static void initialize(TypeDataBase db) {
-    Type type = db.lookupType("OopMapSet");
-
-    omCountField  = type.getCIntegerField("_om_count");
-    omSizeField   = type.getCIntegerField("_om_size");
-    omDataField   = type.getAddressField("_om_data");
-
-    if (!VM.getVM().isCore()) {
-      REG_COUNT = db.lookupIntConstant("REG_COUNT").intValue();
-      if (VM.getVM().isServerCompiler()) {
-        SAVED_ON_ENTRY_REG_COUNT = (int) db.lookupIntConstant("SAVED_ON_ENTRY_REG_COUNT").intValue();
-        C_SAVED_ON_ENTRY_REG_COUNT = (int) db.lookupIntConstant("C_SAVED_ON_ENTRY_REG_COUNT").intValue();
-      }
-    }
-  }
-
-  public OopMapSet(Address addr) {
-    super(addr);
-  }
-
-  /** Returns the number of OopMaps in this OopMapSet */
-  public long getSize() {
-    return omCountField.getValue(addr);
-  }
-
-  /** returns the OopMap at a given index */
-  public OopMap getMapAt(int index) {
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that((index >= 0) && (index <= getSize()),"bad index");
-    }
-    Address omDataAddr = omDataField.getValue(addr);
-    Address oopMapAddr = omDataAddr.getAddressAt(index * VM.getVM().getAddressSize());
-    if (oopMapAddr == null) {
-      return null;
-    }
-    return new OopMap(oopMapAddr);
-  }
-
-  public OopMap findMapAtOffset(long pcOffset, boolean debugging) {
-    int i;
-    int len = (int) getSize();
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(len > 0, "must have pointer maps");
-    }
-
-    // Scan through oopmaps. Stop when current offset is either equal or greater
-    // than the one we are looking for.
-    for (i = 0; i < len; i++) {
-      if (getMapAt(i).getOffset() >= pcOffset) {
-        break;
-      }
-    }
-
-    if (!debugging) {
-      if (Assert.ASSERTS_ENABLED) {
-        Assert.that(i < len, "oopmap not found for pcOffset = " + pcOffset + "; len = " + len);
-        Assert.that(getMapAt(i).getOffset() == pcOffset, "oopmap not found");
-      }
-    } else {
-      if (i == len) {
-        if (DEBUG) {
-          System.out.println("can't find oopmap at " + pcOffset);
-          System.out.print("Oopmap offsets are [ ");
-          for (i = 0; i < len; i++) {
-            System.out.print(getMapAt(i).getOffset());
-          }
-          System.out.println("]");
-        }
-        i = len - 1;
-        return getMapAt(i);
-      }
-    }
-
-    OopMap m = getMapAt(i);
-    return m;
-  }
-
-  /** Visitation -- iterates through the frame for a compiled method.
-      This is a very generic mechanism that requires the Address to be
-      dereferenced by the callee. Other, more specialized, visitation
-      mechanisms are given below. */
-  public static void oopsDo(Frame fr, CodeBlob cb, RegisterMap regMap, AddressVisitor oopVisitor, boolean debugging) {
-    allDo(fr, cb, regMap, new MyVisitor(oopVisitor), debugging);
-  }
-
-  /** Note that there are 4 required AddressVisitors: one for oops,
-      one for derived oops, one for values, and one for dead values */
-  public static void allDo(Frame fr, CodeBlob cb, RegisterMap regMap, OopMapVisitor visitor, boolean debugging) {
-    if (Assert.ASSERTS_ENABLED) {
-      CodeBlob tmpCB = VM.getVM().getCodeCache().findBlob(fr.getPC());
-      Assert.that(tmpCB != null && cb.equals(tmpCB), "wrong codeblob passed in");
-    }
-
-    OopMapSet maps = cb.getOopMaps();
-    OopMap map = cb.getOopMapForReturnAddress(fr.getPC(), debugging);
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(map != null, "no ptr map found");
-    }
-
-    // handle derived pointers first (otherwise base pointer may be
-    // changed before derived pointer offset has been collected)
-    OopMapValue omv;
-    {
-      for (OopMapStream oms = new OopMapStream(map, OopMapValue.OopTypes.DERIVED_OOP_VALUE); !oms.isDone(); oms.next()) {
-        if (VM.getVM().isClientCompiler()) {
-          Assert.that(false, "should not reach here");
-        }
-        omv = oms.getCurrent();
-        Address loc = fr.oopMapRegToLocation(omv.getReg(), regMap);
-        if (loc != null) {
-          Address baseLoc    = fr.oopMapRegToLocation(omv.getContentReg(), regMap);
-          Address derivedLoc = loc;
-          visitor.visitDerivedOopLocation(baseLoc, derivedLoc);
-        }
-      }
-    }
-
-    // We want narow oop, value and oop oop_types
-    OopMapValue.OopTypes[] values = new OopMapValue.OopTypes[] {
-      OopMapValue.OopTypes.OOP_VALUE, OopMapValue.OopTypes.VALUE_VALUE, OopMapValue.OopTypes.NARROWOOP_VALUE
-    };
-
-    {
-      for (OopMapStream oms = new OopMapStream(map, values); !oms.isDone(); oms.next()) {
-        omv = oms.getCurrent();
-        Address loc = fr.oopMapRegToLocation(omv.getReg(), regMap);
-        if (loc != null) {
-          if (omv.getType() == OopMapValue.OopTypes.OOP_VALUE) {
-            // This assert commented out because this will be useful
-            // to detect in the debugging system
-            // assert(Universe::is_heap_or_null(*loc), "found non oop pointer");
-            visitor.visitOopLocation(loc);
-          } else if (omv.getType() == OopMapValue.OopTypes.VALUE_VALUE) {
-            visitor.visitValueLocation(loc);
-          } else if (omv.getType() == OopMapValue.OopTypes.NARROWOOP_VALUE) {
-            visitor.visitNarrowOopLocation(loc);
-          }
-        }
-      }
-    }
-  }
-
-  /** Update callee-saved register info for the following frame.
-      Should only be called in non-core builds. */
-  public static void updateRegisterMap(Frame fr, CodeBlob cb, RegisterMap regMap, boolean debugging) {
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(!VM.getVM().isCore(), "non-core builds only");
-    }
-
-    if (!VM.getVM().isDebugging()) {
-      if (Assert.ASSERTS_ENABLED) {
-        OopMapSet maps = cb.getOopMaps();
-        Assert.that((maps != null) && (maps.getSize() > 0), "found null or empty OopMapSet for CodeBlob");
-      }
-    } else {
-      // Hack for some topmost frames that have been found with empty
-      // OopMapSets. (Actually have not seen the null case, but don't
-      // want to take any chances.) See HSDB.showThreadStackMemory().
-      OopMapSet maps = cb.getOopMaps();
-      if ((maps == null) || (maps.getSize() == 0)) {
-        return;
-      }
-    }
-
-    // Check if caller must update oop argument
-    regMap.setIncludeArgumentOops(cb.callerMustGCArguments());
-
-    int nofCallee = 0;
-    Address[] locs = new Address[2 * REG_COUNT + 1];
-    VMReg  [] regs = new VMReg  [2 * REG_COUNT + 1];
-    // ("+1" because REG_COUNT might be zero)
-
-    // Scan through oopmap and find location of all callee-saved registers
-    // (we do not do update in place, since info could be overwritten)
-    OopMap map  = cb.getOopMapForReturnAddress(fr.getPC(), debugging);
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(map != null, "no ptr map found");
-    }
-
-    OopMapValue omv = null;
-    for(OopMapStream oms = new OopMapStream(map, OopMapValue.OopTypes.CALLEE_SAVED_VALUE); !oms.isDone(); oms.next()) {
-      omv = oms.getCurrent();
-      if (Assert.ASSERTS_ENABLED) {
-        Assert.that(nofCallee < 2 * REG_COUNT, "overflow");
-      }
-      regs[nofCallee] = omv.getContentReg();
-      locs[nofCallee] = fr.oopMapRegToLocation(omv.getReg(), regMap);
-      nofCallee++;
-    }
-
-    // Check that runtime stubs save all callee-saved registers
-    // After adapter frames were deleted C2 doesn't use callee save registers at present
-    if (Assert.ASSERTS_ENABLED) {
-      if (VM.getVM().isServerCompiler()) {
-        Assert.that(!cb.isRuntimeStub() ||
-                    (nofCallee >= SAVED_ON_ENTRY_REG_COUNT || nofCallee >= C_SAVED_ON_ENTRY_REG_COUNT),
-                    "must save all");
-      }
-    }
-
-    // Copy found callee-saved register to reg_map
-    for (int i = 0; i < nofCallee; i++) {
-      regMap.setLocation(regs[i], locs[i]);
-    }
-  }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapStream.java	Thu May 21 10:07:35 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/compiler/OopMapStream.java	Thu May 21 16:19:49 2015 -0700
@@ -28,30 +28,26 @@
 
 public class OopMapStream {
   private CompressedReadStream stream;
-  private OopMap oopMap;
+  private ImmutableOopMap oopMap;
   private int mask;
   private int size;
   private int position;
   private OopMapValue omv;
   private boolean omvValid;
 
-  public OopMapStream(OopMap oopMap) {
+  public OopMapStream(ImmutableOopMap oopMap) {
     this(oopMap, (OopMapValue.OopTypes[]) null);
   }
 
-  public OopMapStream(OopMap oopMap, OopMapValue.OopTypes type) {
+  public OopMapStream(ImmutableOopMap oopMap, OopMapValue.OopTypes type) {
     this(oopMap, (OopMapValue.OopTypes[]) null);
     mask = type.getValue();
   }
 
-  public OopMapStream(OopMap oopMap, OopMapValue.OopTypes[] types) {
-    if (oopMap.getOMVData() == null) {
-      stream = new CompressedReadStream(oopMap.getWriteStream().getBuffer());
-    } else {
-      stream = new CompressedReadStream(oopMap.getOMVData());
-    }
+  public OopMapStream(ImmutableOopMap oopMap, OopMapValue.OopTypes[] types) {
+    stream = new CompressedReadStream(oopMap.getData());
     mask = computeMask(types);
-    size = (int) oopMap.getOMVCount();
+    size = (int) oopMap.getCount();
     position = 0;
     omv = new OopMapValue();
     omvValid = false;
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Thu May 21 10:07:35 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Thu May 21 16:19:49 2015 -0700
@@ -26,14 +26,12 @@
 
 import java.io.*;
 import java.util.*;
-import sun.jvm.hotspot.*;
+
 import sun.jvm.hotspot.code.*;
 import sun.jvm.hotspot.compiler.*;
-import sun.jvm.hotspot.c1.*;
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.interpreter.*;
 import sun.jvm.hotspot.oops.*;
-import sun.jvm.hotspot.runtime.sparc.SPARCFrame;
 import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;
 
@@ -626,7 +624,7 @@
       Assert.that(cb != null, "sanity check");
     }
     if (cb.getOopMaps() != null) {
-      OopMapSet.oopsDo(this, cb, regMap, oopVisitor, VM.getVM().isDebugging());
+      ImmutableOopMapSet.oopsDo(this, cb, regMap, oopVisitor, VM.getVM().isDebugging());
 
       // FIXME: add in traversal of argument oops (skipping this for
       // now until we have the other stuff tested)
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/ppc64/PPC64Frame.java	Thu May 21 10:07:35 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/ppc64/PPC64Frame.java	Thu May 21 16:19:49 2015 -0700
@@ -358,7 +358,7 @@
       map.setIncludeArgumentOops(cb.callerMustGCArguments());
 
       if (cb.getOopMaps() != null) {
-        OopMapSet.updateRegisterMap(this, cb, map, true);
+        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
       }
     }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java	Thu May 21 10:07:35 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java	Thu May 21 16:19:49 2015 -0700
@@ -24,8 +24,6 @@
 
 package sun.jvm.hotspot.runtime.sparc;
 
-import java.util.*;
-
 import sun.jvm.hotspot.asm.sparc.*;
 import sun.jvm.hotspot.code.*;
 import sun.jvm.hotspot.compiler.*;
@@ -34,7 +32,6 @@
 import sun.jvm.hotspot.oops.*;
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.runtime.posix.*;
-import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;
 
 /** Specialization of and implementation of abstract methods of the
@@ -592,7 +589,7 @@
             map.setIncludeArgumentOops(true);
           }
           if (cb.getOopMaps() != null) {
-            OopMapSet.updateRegisterMap(this, cb, map, VM.getVM().isDebugging());
+            ImmutableOopMapSet.updateRegisterMap(this, cb, map, VM.getVM().isDebugging());
           }
         }
       }
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Thu May 21 10:07:35 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Thu May 21 16:19:49 2015 -0700
@@ -385,7 +385,7 @@
       map.setIncludeArgumentOops(cb.callerMustGCArguments());
 
       if (cb.getOopMaps() != null) {
-        OopMapSet.updateRegisterMap(this, cb, map, true);
+        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
       }
 
       // Since the prolog does the save and restore of EBP there is no oopmap
--- a/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Thu May 21 10:07:35 2015 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Thu May 21 16:19:49 2015 -0700
@@ -31,11 +31,9 @@
 import sun.jvm.hotspot.compiler.*;
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.interpreter.*;
-import sun.jvm.hotspot.memory.*;
 import sun.jvm.hotspot.oops.*;
 import sun.jvm.hotspot.runtime.*;
 import sun.jvm.hotspot.tools.jcore.*;
-import sun.jvm.hotspot.types.*;
 import sun.jvm.hotspot.utilities.*;
 
 public class HTMLGenerator implements /* imports */ ClassConstants {
@@ -887,7 +885,7 @@
       private Formatter buf;
       private SymbolFinder symFinder = createSymbolFinder();
       private long pc;
-      private OopMapSet oms;
+      private ImmutableOopMapSet oms;
       private CodeBlob blob;
       private NMethod nmethod;
 
@@ -954,13 +952,13 @@
 
          if (oms != null) {
             long base = addressToLong(blob.codeBegin());
-            for (int i = 0, imax = (int)oms.getSize(); i < imax; i++) {
-               OopMap om = oms.getMapAt(i);
-               long omspc = base + om.getOffset();
+            for (int i = 0, imax = oms.getCount(); i < imax; i++) {
+               ImmutableOopMapPair pair = oms.getPairAt(i);
+               long omspc = base + pair.getPC();
                if (omspc > pc) {
                   if (omspc <= endPc) {
                      buf.br();
-                     buf.append(genOopMapInfo(om));
+                     buf.append(genOopMapInfo(oms.getMap(pair)));
                      // st.move_to(column);
                      // visitor.print("; ");
                         // om.print_on(st);
@@ -1167,7 +1165,7 @@
         }
     }
 
-   protected String genHTMLForOopMap(OopMap map) {
+   protected String genHTMLForOopMap(ImmutableOopMap map) {
       final int stack0 = VMRegImpl.getStack0().getValue();
       Formatter buf = new Formatter(genHTML);
 
@@ -1237,11 +1235,11 @@
 
 
    protected String genOopMapInfo(NMethod nmethod, PCDesc pcDesc) {
-      OopMapSet mapSet = nmethod.getOopMaps();
+      ImmutableOopMapSet mapSet = nmethod.getOopMaps();
       if (mapSet == null || (mapSet.getSize() <= 0))
         return "";
       int pcOffset = pcDesc.getPCOffset();
-      OopMap map = mapSet.findMapAtOffset(pcOffset, VM.getVM().isDebugging());
+      ImmutableOopMap map = mapSet.findMapAtOffset(pcOffset, VM.getVM().isDebugging());
       if (map == null) {
          throw new IllegalArgumentException("no oopmap at safepoint!");
       }
@@ -1249,7 +1247,7 @@
       return genOopMapInfo(map);
    }
 
-   protected String genOopMapInfo(OopMap map) {
+   protected String genOopMapInfo(ImmutableOopMap map) {
      Formatter buf = new Formatter(genHTML);
      buf.beginTag("pre");
      buf.append("OopMap: ");
--- a/src/cpu/aarch64/vm/aarch64.ad	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/aarch64.ad	Thu May 21 16:19:49 2015 -0700
@@ -421,7 +421,40 @@
 );
 
 // Class for all non-special integer registers
-reg_class no_special_reg32(
+reg_class no_special_reg32_no_fp(
+    R0,
+    R1,
+    R2,
+    R3,
+    R4,
+    R5,
+    R6,
+    R7,
+    R10,
+    R11,
+    R12,                        // rmethod
+    R13,
+    R14,
+    R15,
+    R16,
+    R17,
+    R18,
+    R19,
+    R20,
+    R21,
+    R22,
+    R23,
+    R24,
+    R25,
+    R26
+ /* R27, */                     // heapbase
+ /* R28, */                     // thread
+ /* R29, */                     // fp
+ /* R30, */                     // lr
+ /* R31 */                      // sp
+);
+
+reg_class no_special_reg32_with_fp(
     R0,
     R1,
     R2,
@@ -454,8 +487,43 @@
  /* R31 */                      // sp
 );
 
+reg_class_dynamic no_special_reg32(no_special_reg32_no_fp, no_special_reg32_with_fp, %{ PreserveFramePointer %});
+
 // Class for all non-special long integer registers
-reg_class no_special_reg(
+reg_class no_special_reg_no_fp(
+    R0, R0_H,
+    R1, R1_H,
+    R2, R2_H,
+    R3, R3_H,
+    R4, R4_H,
+    R5, R5_H,
+    R6, R6_H,
+    R7, R7_H,
+    R10, R10_H,
+    R11, R11_H,
+    R12, R12_H,                 // rmethod
+    R13, R13_H,
+    R14, R14_H,
+    R15, R15_H,
+    R16, R16_H,
+    R17, R17_H,
+    R18, R18_H,
+    R19, R19_H,
+    R20, R20_H,
+    R21, R21_H,
+    R22, R22_H,
+    R23, R23_H,
+    R24, R24_H,
+    R25, R25_H,
+    R26, R26_H,
+ /* R27, R27_H, */              // heapbase
+ /* R28, R28_H, */              // thread
+ /* R29, R29_H, */              // fp
+ /* R30, R30_H, */              // lr
+ /* R31, R31_H */               // sp
+);
+
+reg_class no_special_reg_with_fp(
     R0, R0_H,
     R1, R1_H,
     R2, R2_H,
@@ -488,6 +556,8 @@
  /* R31, R31_H */               // sp
 );
 
+reg_class_dynamic no_special_reg(no_special_reg_no_fp, no_special_reg_with_fp, %{ PreserveFramePointer %});
+
 // Class for 64 bit register r0
 reg_class r0_reg(
     R0, R0_H
@@ -1637,12 +1707,7 @@
 int MachCallStaticJavaNode::ret_addr_offset()
 {
   // call should be a simple bl
-  // unless this is a method handle invoke in which case it is
-  // mov(rfp, sp), bl, mov(sp, rfp)
   int off = 4;
-  if (_method_handle_invoke) {
-    off += 4;
-  }
   return off;
 }
 
@@ -1753,14 +1818,13 @@
   if (C->need_stack_bang(framesize))
     st->print("# stack bang size=%d\n\t", framesize);
 
-  if (framesize == 0) {
-    // Is this even possible?
-    st->print("stp  lr, rfp, [sp, #%d]!", -(2 * wordSize));
-  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+  if (framesize < ((1 << 9) + 2 * wordSize)) {
     st->print("sub  sp, sp, #%d\n\t", framesize);
     st->print("stp  rfp, lr, [sp, #%d]", framesize - 2 * wordSize);
+    if (PreserveFramePointer) st->print("\n\tadd  rfp, sp, #%d", framesize - 2 * wordSize);
   } else {
     st->print("stp  lr, rfp, [sp, #%d]!\n\t", -(2 * wordSize));
+    if (PreserveFramePointer) st->print("mov  rfp, sp\n\t");
     st->print("mov  rscratch1, #%d\n\t", framesize - 2 * wordSize);
     st->print("sub  sp, sp, rscratch1");
   }
@@ -3517,34 +3581,6 @@
     }
   %}
 
-  enc_class aarch64_enc_java_handle_call(method meth) %{
-    MacroAssembler _masm(&cbuf);
-    relocInfo::relocType reloc;
-
-    // RFP is preserved across all calls, even compiled calls.
-    // Use it to preserve SP.
-    __ mov(rfp, sp);
-
-    const int start_offset = __ offset();
-    address addr = (address)$meth$$method;
-    if (!_method) {
-      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
-      __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
-    } else if (_optimized_virtual) {
-      __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
-    } else {
-      __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
-    }
-
-    if (_method) {
-      // Emit stub for static call
-      CompiledStaticCall::emit_to_interp_stub(cbuf);
-    }
-
-    // now restore sp
-    __ mov(sp, rfp);
-  %}
-
   enc_class aarch64_enc_java_dynamic_call(method meth) %{
     MacroAssembler _masm(&cbuf);
     __ ic_call((address)$meth$$method);
@@ -12561,8 +12597,6 @@
 
   effect(USE meth);
 
-  predicate(!((CallStaticJavaNode*)n)->is_method_handle_invoke());
-
   ins_cost(CALL_COST);
 
   format %{ "call,static $meth \t// ==> " %}
@@ -12575,26 +12609,6 @@
 
 // TO HERE
 
-// Call Java Static Instruction (method handle version)
-
-instruct CallStaticJavaDirectHandle(method meth, iRegP_FP reg_mh_save)
-%{
-  match(CallStaticJava);
-
-  effect(USE meth);
-
-  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
-
-  ins_cost(CALL_COST);
-
-  format %{ "call,static $meth \t// (methodhandle) ==> " %}
-
-  ins_encode( aarch64_enc_java_handle_call(meth),
-              aarch64_enc_call_epilog );
-
-  ins_pipe(pipe_class_call);
-%}
-
 // Call Java Dynamic Instruction
 instruct CallDynamicJavaDirect(method meth)
 %{
--- a/src/cpu/aarch64/vm/c1_FrameMap_aarch64.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/c1_FrameMap_aarch64.cpp	Thu May 21 16:19:49 2015 -0700
@@ -346,8 +346,7 @@
 
 // JSR 292
 LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
-  // assert(rfp == rbp_mh_SP_save, "must be same register");
-  return rfp_opr;
+  return LIR_OprFact::illegalOpr;  // Not needed on aarch64
 }
 
 
--- a/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/c1_Runtime1_aarch64.cpp	Thu May 21 16:19:49 2015 -0700
@@ -443,18 +443,8 @@
     restore_live_registers(sasm, id != handle_exception_nofpu_id);
     break;
   case handle_exception_from_callee_id:
-    // Pop the return address since we are possibly changing SP (restoring from BP).
+    // Pop the return address.
     __ leave();
-
-    // Restore SP from FP if the exception PC is a method handle call site.
-    {
-      Label nope;
-      __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset()));
-      __ cbzw(rscratch1, nope);
-      __ mov(sp, rfp);
-      __ bind(nope);
-    }
-
     __ ret(lr);  // jump to exception handler
     break;
   default:  ShouldNotReachHere();
@@ -514,14 +504,6 @@
 
   __ verify_not_null_oop(exception_oop);
 
-  {
-    Label foo;
-    __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset()));
-    __ cbzw(rscratch1, foo);
-    __ mov(sp, rfp);
-    __ bind(foo);
-  }
-
   // continue at exception handler (return address removed)
   // note: do *not* remove arguments when unwinding the
   //       activation since the caller assumes having
--- a/src/cpu/aarch64/vm/frame_aarch64.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/frame_aarch64.cpp	Thu May 21 16:19:49 2015 -0700
@@ -223,7 +223,8 @@
     if (sender_blob->is_nmethod()) {
         nmethod* nm = sender_blob->as_nmethod_or_null();
         if (nm != NULL) {
-            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
+                nm->method()->is_method_handle_intrinsic()) {
                 return false;
             }
         }
@@ -389,10 +390,9 @@
 // frame::verify_deopt_original_pc
 //
 // Verifies the calculated original PC of a deoptimization PC for the
-// given unextended SP.  The unextended SP might also be the saved SP
-// for MethodHandle call sites.
+// given unextended SP.
 #ifdef ASSERT
-void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp) {
   frame fr;
 
   // This is ugly but it's better than to change {get,set}_original_pc
@@ -402,33 +402,23 @@
 
   address original_pc = nm->get_original_pc(&fr);
   assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
-  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
 }
 #endif
 
 //------------------------------------------------------------------------------
 // frame::adjust_unextended_sp
 void frame::adjust_unextended_sp() {
-  // If we are returning to a compiled MethodHandle call site, the
-  // saved_fp will in fact be a saved value of the unextended SP.  The
-  // simplest way to tell whether we are returning to such a call site
-  // is as follows:
+  // On aarch64, sites calling method handle intrinsics and lambda forms are treated
+  // as any other call site. Therefore, no special action is needed when we are
+  // returning to any of these call sites.
 
   nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
   if (sender_nm != NULL) {
-    // If the sender PC is a deoptimization point, get the original
-    // PC.  For MethodHandle call site the unextended_sp is stored in
-    // saved_fp.
-    if (sender_nm->is_deopt_mh_entry(_pc)) {
-      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
-      _unextended_sp = _fp;
-    }
-    else if (sender_nm->is_deopt_entry(_pc)) {
+    // If the sender PC is a deoptimization point, get the original PC.
+    if (sender_nm->is_deopt_entry(_pc) ||
+        sender_nm->is_deopt_mh_entry(_pc)) {
       DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
     }
-    else if (sender_nm->is_method_handle_return(_pc)) {
-      _unextended_sp = _fp;
-    }
   }
 }
 
--- a/src/cpu/aarch64/vm/frame_aarch64.hpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/frame_aarch64.hpp	Thu May 21 16:19:49 2015 -0700
@@ -167,10 +167,7 @@
 
 #ifdef ASSERT
   // Used in frame::sender_for_{interpreter,compiled}_frame
-  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
-  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
-    verify_deopt_original_pc(nm, unextended_sp, true);
-  }
+  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp);
 #endif
 
  public:
--- a/src/cpu/aarch64/vm/frame_aarch64.inline.hpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/frame_aarch64.inline.hpp	Thu May 21 16:19:49 2015 -0700
@@ -47,12 +47,6 @@
 inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
   intptr_t a = intptr_t(sp);
   intptr_t b = intptr_t(fp);
-#ifndef PRODUCT
-  if (fp)
-    if (sp > fp || (fp - sp > 0x100000))
-      for(;;)
-        asm("nop");
-#endif
   _sp = sp;
   _unextended_sp = sp;
   _fp = fp;
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Thu May 21 16:19:49 2015 -0700
@@ -3788,14 +3788,14 @@
 }
 
 void MacroAssembler::build_frame(int framesize) {
-  if (framesize == 0) {
-    // Is this even possible?
-    stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
-  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+  assert(framesize > 0, "framesize must be > 0");
+  if (framesize < ((1 << 9) + 2 * wordSize)) {
     sub(sp, sp, framesize);
     stp(rfp, lr, Address(sp, framesize - 2 * wordSize));
+    if (PreserveFramePointer) add(rfp, sp, framesize - 2 * wordSize);
   } else {
     stp(rfp, lr, Address(pre(sp, -2 * wordSize)));
+    if (PreserveFramePointer) mov(rfp, sp);
     if (framesize < ((1 << 12) + 2 * wordSize))
       sub(sp, sp, framesize - 2 * wordSize);
     else {
@@ -3806,9 +3806,8 @@
 }
 
 void MacroAssembler::remove_frame(int framesize) {
-  if (framesize == 0) {
-    ldp(rfp, lr, Address(post(sp, 2 * wordSize)));
-  } else if (framesize < ((1 << 9) + 2 * wordSize)) {
+  assert(framesize > 0, "framesize must be > 0");
+  if (framesize < ((1 << 9) + 2 * wordSize)) {
     ldp(rfp, lr, Address(sp, framesize - 2 * wordSize));
     add(sp, sp, framesize);
   } else {
--- a/src/cpu/aarch64/vm/register_definitions_aarch64.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/register_definitions_aarch64.cpp	Thu May 21 16:19:49 2015 -0700
@@ -149,7 +149,3 @@
 REGISTER_DEFINITION(Register, rheapbase);
 
 REGISTER_DEFINITION(Register, r31_sp);
-
-// TODO : x86 uses rbp to save SP in method handle code
-// we may need to do the same with fp
-// REGISTER_DEFINITION(Register, rbp_mh_SP_save)
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Thu May 21 16:19:49 2015 -0700
@@ -2995,21 +2995,6 @@
 
   // r0: exception handler
 
-  // Restore SP from BP if the exception PC is a MethodHandle call site.
-  __ ldrw(rscratch1, Address(rthread, JavaThread::is_method_handle_return_offset()));
-  // n.b. Intel uses special register rbp_mh_SP_save here but we will
-  // just hard wire rfp
-  __ cmpw(rscratch1, zr);
-  // the obvious way to conditionally copy rfp to sp if NE
-  // Label skip;
-  // __ br(Assembler::EQ, skip);
-  // __ mov(sp, rfp);
-  // __ bind(skip);
-  // same but branchless
-  __ mov(rscratch1, sp);
-  __ csel(rscratch1, rfp, rscratch1, Assembler::NE);
-  __ mov(sp, rscratch1);
-
   // We have a handler in r0 (could be deopt blob).
   __ mov(r8, r0);
 
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Thu May 21 16:19:49 2015 -0700
@@ -1891,7 +1891,7 @@
     address start = __ pc();
       __ enter();
 
-      __ mov(rscratch1, len_reg);
+      __ mov(rscratch2, len_reg);
       __ ldrw(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
 
       __ ld1(v0, __ T16B, rvec);
--- a/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/ppc/vm/vm_version_ppc.cpp	Thu May 21 16:19:49 2015 -0700
@@ -629,7 +629,7 @@
   // Print the detection code.
   if (PrintAssembly) {
     ttyLocker ttyl;
-    tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", code);
+    tty->print_cr("Decoding dscr configuration stub at " INTPTR_FORMAT " before execution:", p2i(code));
     Disassembler::decode((u_char*)code, (u_char*)code_end, tty);
   }
 
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Thu May 21 16:19:49 2015 -0700
@@ -54,6 +54,36 @@
 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 // Implementation of AddressLiteral
 
+// A 2-D table for managing compressed displacement(disp8) on EVEX enabled platforms.
+unsigned char tuple_table[Assembler::EVEX_ETUP + 1][Assembler::AVX_512bit + 1] = {
+  // -----------------Table 4.5 -------------------- //
+  16, 32, 64,  // EVEX_FV(0)
+  4,  4,  4,   // EVEX_FV(1) - with Evex.b
+  16, 32, 64,  // EVEX_FV(2) - with Evex.w
+  8,  8,  8,   // EVEX_FV(3) - with Evex.w and Evex.b
+  8,  16, 32,  // EVEX_HV(0)
+  4,  4,  4,   // EVEX_HV(1) - with Evex.b
+  // -----------------Table 4.6 -------------------- //
+  16, 32, 64,  // EVEX_FVM(0)
+  1,  1,  1,   // EVEX_T1S(0)
+  2,  2,  2,   // EVEX_T1S(1)
+  4,  4,  4,   // EVEX_T1S(2)
+  8,  8,  8,   // EVEX_T1S(3)
+  4,  4,  4,   // EVEX_T1F(0)
+  8,  8,  8,   // EVEX_T1F(1)
+  8,  8,  8,   // EVEX_T2(0)
+  0,  16, 16,  // EVEX_T2(1)
+  0,  16, 16,  // EVEX_T4(0)
+  0,  0,  32,  // EVEX_T4(1)
+  0,  0,  32,  // EVEX_T8(0)
+  8,  16, 32,  // EVEX_HVM(0)
+  4,  8,  16,  // EVEX_QVM(0)
+  2,  4,  8,   // EVEX_OVM(0)
+  16, 16, 16,  // EVEX_M128(0)
+  8,  32, 64,  // EVEX_DUP(0)
+  0,  0,  0    // EVEX_NTUP
+};
+
 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
   _is_lval = false;
   _target = target;
@@ -183,8 +213,9 @@
 // make this go away someday
 void Assembler::emit_data(jint data, relocInfo::relocType rtype, int format) {
   if (rtype == relocInfo::none)
-        emit_int32(data);
-  else  emit_data(data, Relocation::spec_simple(rtype), format);
+    emit_int32(data);
+  else
+    emit_data(data, Relocation::spec_simple(rtype), format);
 }
 
 void Assembler::emit_data(jint data, RelocationHolder const& rspec, int format) {
@@ -273,6 +304,177 @@
 }
 
 
+bool Assembler::query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
+                                           int cur_tuple_type, int in_size_in_bits, int cur_encoding) {
+  int mod_idx = 0;
+  // We will test if the displacement fits the compressed format and if so
+  // apply the compression to the displacment iff the result is8bit.
+  if (VM_Version::supports_evex() && is_evex_inst) {
+    switch (cur_tuple_type) {
+    case EVEX_FV:
+      if ((cur_encoding & VEX_W) == VEX_W) {
+        mod_idx += 2 + ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      } else {
+        mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      }
+      break;
+
+    case EVEX_HV:
+      mod_idx = ((cur_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      break;
+
+    case EVEX_FVM:
+      break;
+
+    case EVEX_T1S:
+      switch (in_size_in_bits) {
+      case EVEX_8bit:
+        break;
+
+      case EVEX_16bit:
+        mod_idx = 1;
+        break;
+
+      case EVEX_32bit:
+        mod_idx = 2;
+        break;
+
+      case EVEX_64bit:
+        mod_idx = 3;
+        break;
+      }
+      break;
+
+    case EVEX_T1F:
+    case EVEX_T2:
+    case EVEX_T4:
+      mod_idx = (in_size_in_bits == EVEX_64bit) ? 1 : 0;
+      break;
+
+    case EVEX_T8:
+      break;
+
+    case EVEX_HVM:
+      break;
+
+    case EVEX_QVM:
+      break;
+
+    case EVEX_OVM:
+      break;
+
+    case EVEX_M128:
+      break;
+
+    case EVEX_DUP:
+      break;
+
+    default:
+      assert(0, "no valid evex tuple_table entry");
+      break;
+    }
+
+    if (vector_len >= AVX_128bit && vector_len <= AVX_512bit) {
+      int disp_factor = tuple_table[cur_tuple_type + mod_idx][vector_len];
+      if ((disp % disp_factor) == 0) {
+        int new_disp = disp / disp_factor;
+        if ((-0x80 <= new_disp && new_disp < 0x80)) {
+          disp = new_disp;
+        }
+      } else {
+        return false;
+      }
+    }
+  }
+  return (-0x80 <= disp && disp < 0x80);
+}
+
+
+bool Assembler::emit_compressed_disp_byte(int &disp) {
+  int mod_idx = 0;
+  // We will test if the displacement fits the compressed format and if so
+  // apply the compression to the displacment iff the result is8bit.
+  if (VM_Version::supports_evex() && is_evex_instruction) {
+    switch (tuple_type) {
+    case EVEX_FV:
+      if ((evex_encoding & VEX_W) == VEX_W) {
+        mod_idx += 2 + ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      } else {
+        mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      }
+      break;
+
+    case EVEX_HV:
+      mod_idx = ((evex_encoding & EVEX_Rb) == EVEX_Rb) ? 1 : 0;
+      break;
+
+    case EVEX_FVM:
+      break;
+
+    case EVEX_T1S:
+      switch (input_size_in_bits) {
+      case EVEX_8bit:
+        break;
+
+      case EVEX_16bit:
+        mod_idx = 1;
+        break;
+
+      case EVEX_32bit:
+        mod_idx = 2;
+        break;
+
+      case EVEX_64bit:
+        mod_idx = 3;
+        break;
+      }
+      break;
+
+    case EVEX_T1F:
+    case EVEX_T2:
+    case EVEX_T4:
+      mod_idx = (input_size_in_bits == EVEX_64bit) ? 1 : 0;
+      break;
+
+    case EVEX_T8:
+      break;
+
+    case EVEX_HVM:
+      break;
+
+    case EVEX_QVM:
+      break;
+
+    case EVEX_OVM:
+      break;
+
+    case EVEX_M128:
+      break;
+
+    case EVEX_DUP:
+      break;
+
+    default:
+      assert(0, "no valid evex tuple_table entry");
+      break;
+    }
+
+    if (avx_vector_len >= AVX_128bit && avx_vector_len <= AVX_512bit) {
+      int disp_factor = tuple_table[tuple_type + mod_idx][avx_vector_len];
+      if ((disp % disp_factor) == 0) {
+        int new_disp = disp / disp_factor;
+        if (is8bit(new_disp)) {
+          disp = new_disp;
+        }
+      } else {
+        return false;
+      }
+    }
+  }
+  return is8bit(disp);
+}
+
+
 void Assembler::emit_operand(Register reg, Register base, Register index,
                              Address::ScaleFactor scale, int disp,
                              RelocationHolder const& rspec,
@@ -296,7 +498,7 @@
         assert(index != rsp, "illegal addressing mode");
         emit_int8(0x04 | regenc);
         emit_int8(scale << 6 | indexenc | baseenc);
-      } else if (is8bit(disp) && rtype == relocInfo::none) {
+      } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
         // [base + index*scale + imm8]
         // [01 reg 100][ss index base] imm8
         assert(index != rsp, "illegal addressing mode");
@@ -318,7 +520,7 @@
         // [00 reg 100][00 100 100]
         emit_int8(0x04 | regenc);
         emit_int8(0x24);
-      } else if (is8bit(disp) && rtype == relocInfo::none) {
+      } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
         // [rsp + imm8]
         // [01 reg 100][00 100 100] disp8
         emit_int8(0x44 | regenc);
@@ -339,7 +541,7 @@
         // [base]
         // [00 reg base]
         emit_int8(0x00 | regenc | baseenc);
-      } else if (is8bit(disp) && rtype == relocInfo::none) {
+      } else if (emit_compressed_disp_byte(disp) && rtype == relocInfo::none) {
         // [base + disp8]
         // [01 reg base] disp8
         emit_int8(0x40 | regenc | baseenc);
@@ -389,11 +591,20 @@
       emit_data(disp, rspec, disp32_operand);
     }
   }
+  is_evex_instruction = false;
 }
 
 void Assembler::emit_operand(XMMRegister reg, Register base, Register index,
                              Address::ScaleFactor scale, int disp,
                              RelocationHolder const& rspec) {
+  if (UseAVX > 2) {
+    int xreg_enc = reg->encoding();
+    if (xreg_enc > 15) {
+      XMMRegister new_reg = as_XMMRegister(xreg_enc & 0xf);
+      emit_operand((Register)new_reg, base, index, scale, disp, rspec);
+      return;
+    }
+  }
   emit_operand((Register)reg, base, index, scale, disp, rspec);
 }
 
@@ -686,6 +897,29 @@
     debug_only(has_disp32 = true); // has both kinds of operands!
     break;
 
+  case 0x62: // EVEX_4bytes
+    assert((UseAVX > 0), "shouldn't have EVEX prefix");
+    assert(ip == inst+1, "no prefixes allowed");
+    // no EVEX collisions, all instructions that have 0x62 opcodes
+    // have EVEX versions and are subopcodes of 0x66
+    ip++; // skip P0 and exmaine W in P1
+    is_64bit = ((VEX_W & *ip) == VEX_W);
+    ip++; // move to P2
+    ip++; // skip P2, move to opcode
+    // To find the end of instruction (which == end_pc_operand).
+    switch (0xFF & *ip) {
+    case 0x61: // pcmpestri r, r/a, #8
+    case 0x70: // pshufd r, r/a, #8
+    case 0x73: // psrldq r, #8
+      tail_size = 1;  // the imm8
+      break;
+    default:
+      break;
+    }
+    ip++; // skip opcode
+    debug_only(has_disp32 = true); // has both kinds of operands!
+    break;
+
   case 0xD1: // sal a, 1; sar a, 1; shl a, 1; shr a, 1
   case 0xD3: // sal a, %cl; sar a, %cl; shl a, %cl; shr a, %cl
   case 0xD9: // fld_s a; fst_s a; fstp_s a; fldcw a
@@ -985,12 +1219,22 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
@@ -1000,20 +1244,26 @@
 
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
   emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::aesdec(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, false,
+              VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8((unsigned char)0xDE);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdec(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8((unsigned char)0xDE);
   emit_int8(0xC0 | encode);
 }
@@ -1021,14 +1271,16 @@
 void Assembler::aesdeclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, false,
+              VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8((unsigned char)0xDF);
   emit_operand(dst, src);
 }
 
 void Assembler::aesdeclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8((unsigned char)0xDF);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1036,14 +1288,16 @@
 void Assembler::aesenc(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, false,
+              VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8((unsigned char)0xDC);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenc(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8((unsigned char)0xDC);
   emit_int8(0xC0 | encode);
 }
@@ -1051,14 +1305,16 @@
 void Assembler::aesenclast(XMMRegister dst, Address src) {
   assert(VM_Version::supports_aes(), "");
   InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, false,
+              VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8((unsigned char)0xDD);
   emit_operand(dst, src);
 }
 
 void Assembler::aesenclast(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_aes(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8((unsigned char)0xDD);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1091,7 +1347,7 @@
 
 void Assembler::andnl(Register dst, Register src1, Register src2) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode(dst, src1, src2);
+  int encode = vex_prefix_0F38_and_encode(dst, src1, src2, false);
   emit_int8((unsigned char)0xF2);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1099,7 +1355,7 @@
 void Assembler::andnl(Register dst, Register src1, Address src2) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38(dst, src1, src2);
+  vex_prefix_0F38(dst, src1, src2, false);
   emit_int8((unsigned char)0xF2);
   emit_operand(dst, src2);
 }
@@ -1126,7 +1382,7 @@
 
 void Assembler::blsil(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode(rbx, dst, src);
+  int encode = vex_prefix_0F38_and_encode(rbx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1134,14 +1390,14 @@
 void Assembler::blsil(Register dst, Address src) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38(rbx, dst, src);
+  vex_prefix_0F38(rbx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_operand(rbx, src);
 }
 
 void Assembler::blsmskl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode(rdx, dst, src);
+  int encode = vex_prefix_0F38_and_encode(rdx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1149,14 +1405,14 @@
 void Assembler::blsmskl(Register dst, Address src) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38(rdx, dst, src);
+  vex_prefix_0F38(rdx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_operand(rdx, src);
 }
 
 void Assembler::blsrl(Register dst, Register src) {
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_0F38_and_encode(rcx, dst, src);
+  int encode = vex_prefix_0F38_and_encode(rcx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1164,7 +1420,7 @@
 void Assembler::blsrl(Register dst, Address src) {
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
-  vex_prefix_0F38(rcx, dst, src);
+  vex_prefix_0F38(rcx, dst, src, false);
   emit_int8((unsigned char)0xF3);
   emit_operand(rcx, src);
 }
@@ -1312,22 +1568,36 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
+  } else {
+    emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_nonds_q(0x2F, dst, src, VEX_SIMD_66, true);
+  } else {
+    emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
 }
 
 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE, true);
 }
 
 void Assembler::cpuid() {
@@ -1347,36 +1617,61 @@
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1F;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_q(0x5A, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
+  int encode = 0;
+  if (VM_Version::supports_evex()) {
+    encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
+  } else {
+    encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2, false);
+  }
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+    emit_simd_arith_q(0x2A, dst, src, VEX_SIMD_F2, true);
+  } else {
+    emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, true);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
+  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3, true);
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
@@ -1385,6 +1680,10 @@
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
 }
@@ -1392,14 +1691,14 @@
 
 void Assembler::cvttsd2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2sil(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1414,15 +1713,29 @@
 
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::divss(XMMRegister dst, Address src) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
   emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
 }
@@ -1675,7 +1988,11 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_nonds_q(0x28, dst, src, VEX_SIMD_66, true);
+  } else {
+    emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
@@ -1685,7 +2002,8 @@
 
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
+  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE, true, VEX_OPCODE_0F,
+                                      false, AVX_128bit);
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1698,6 +2016,51 @@
   emit_operand(dst, src);
 }
 
+void Assembler::kmovq(KRegister dst, KRegister src) {
+  NOT_LP64(assert(VM_Version::supports_evex(), ""));
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
+                                      true, VEX_OPCODE_0F, true);
+  emit_int8((unsigned char)0x90);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::kmovq(KRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_evex(), ""));
+  int dst_enc = dst->encoding();
+  int nds_enc = 0;
+  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_NONE,
+             VEX_OPCODE_0F, true, AVX_128bit, true, true);
+  emit_int8((unsigned char)0x90);
+  emit_operand((Register)dst, src);
+}
+
+void Assembler::kmovq(Address dst, KRegister src) {
+  NOT_LP64(assert(VM_Version::supports_evex(), ""));
+  int src_enc = src->encoding();
+  int nds_enc = 0;
+  vex_prefix(dst, nds_enc, src_enc, VEX_SIMD_NONE,
+             VEX_OPCODE_0F, true, AVX_128bit, true, true);
+  emit_int8((unsigned char)0x90);
+  emit_operand((Register)src, dst);
+}
+
+void Assembler::kmovql(KRegister dst, Register src) {
+  NOT_LP64(assert(VM_Version::supports_evex(), ""));
+  bool supports_bw = VM_Version::supports_avx512bw();
+  VexSimdPrefix pre = supports_bw ? VEX_SIMD_F2 : VEX_SIMD_NONE;
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true,
+                                      VEX_OPCODE_0F, supports_bw);
+  emit_int8((unsigned char)0x92);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::kmovdl(KRegister dst, Register src) {
+  NOT_LP64(assert(VM_Version::supports_evex(), ""));
+  VexSimdPrefix pre = VM_Version::supports_avx512bw() ? VEX_SIMD_F2 : VEX_SIMD_NONE;
+  int encode = kreg_prefix_and_encode(dst, knoreg, src, pre, true, VEX_OPCODE_0F, false);
+  emit_int8((unsigned char)0x92);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
 
 void Assembler::movb(Address dst, int imm8) {
   InstructionMark im(this);
@@ -1718,7 +2081,7 @@
 
 void Assembler::movdl(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, true);
   emit_int8(0x6E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -1726,23 +2089,31 @@
 void Assembler::movdl(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(src, dst, VEX_SIMD_66, true);
   emit_int8(0x7E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::movdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, true, VEX_OPCODE_0F);
   emit_int8(0x6E);
   emit_operand(dst, src);
 }
 
 void Assembler::movdl(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, true);
   emit_int8(0x7E);
   emit_operand(src, dst);
 }
@@ -1754,11 +2125,17 @@
 
 void Assembler::movdqa(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movdqu(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
   emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
 }
 
@@ -1769,8 +2146,11 @@
 
 void Assembler::movdqu(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_F3, false);
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
@@ -1778,28 +2158,77 @@
 // Move Unaligned 256bit Vector
 void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
   assert(UseAVX > 0, "");
-  bool vector256 = true;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  int vector_len = AVX_256bit;
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector_len);
   emit_int8(0x6F);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::vmovdqu(XMMRegister dst, Address src) {
   assert(UseAVX > 0, "");
-  InstructionMark im(this);
-  bool vector256 = true;
-  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  InstructionMark im(this);
+  int vector_len = AVX_256bit;
+  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
   emit_int8(0x6F);
   emit_operand(dst, src);
 }
 
 void Assembler::vmovdqu(Address dst, XMMRegister src) {
   assert(UseAVX > 0, "");
-  InstructionMark im(this);
-  bool vector256 = true;
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  InstructionMark im(this);
+  int vector_len = AVX_256bit;
   // swap src<->dst for encoding
   assert(src != xnoreg, "sanity");
-  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
+  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
+  emit_int8(0x7F);
+  emit_operand(src, dst);
+}
+
+// Move Unaligned EVEX enabled Vector (programmable : 8,16,32,64)
+void Assembler::evmovdqu(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "");
+  int src_enc = src->encoding();
+  int dst_enc = dst->encoding();
+  int encode = vex_prefix_and_encode(dst_enc, 0, src_enc, VEX_SIMD_F3, VEX_OPCODE_0F,
+                                     true, vector_len, false, false);
+  emit_int8(0x6F);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::evmovdqu(XMMRegister dst, Address src, int vector_len) {
+  assert(UseAVX > 0, "");
+  InstructionMark im(this);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+    vex_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
+  } else {
+    vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector_len, false);
+  }
+  emit_int8(0x6F);
+  emit_operand(dst, src);
+}
+
+void Assembler::evmovdqu(Address dst, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "");
+  InstructionMark im(this);
+  assert(src != xnoreg, "sanity");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+    // swap src<->dst for encoding
+    vex_prefix_q(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
+  } else {
+    // swap src<->dst for encoding
+    vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector_len, false);
+  }
   emit_int8(0x7F);
   emit_operand(src, dst);
 }
@@ -1845,7 +2274,11 @@
 // The selection is done in MacroAssembler::movdbl() and movflt().
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_simd_arith(0x12, dst, src, VEX_SIMD_66, true);
 }
 
 void Assembler::movq( MMXRegister dst, Address src ) {
@@ -1871,7 +2304,13 @@
 void Assembler::movq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    simd_prefix_q(dst, xnoreg, src, VEX_SIMD_F3, true);
+  } else {
+    simd_prefix(dst, src, VEX_SIMD_F3, true, VEX_OPCODE_0F);
+  }
   emit_int8(0x7E);
   emit_operand(dst, src);
 }
@@ -1879,7 +2318,14 @@
 void Assembler::movq(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    simd_prefix(src, xnoreg, dst, VEX_SIMD_66, true,
+                VEX_OPCODE_0F, true, AVX_128bit);
+  } else {
+    simd_prefix(dst, src, VEX_SIMD_66, true);
+  }
   emit_int8((unsigned char)0xD6);
   emit_operand(src, dst);
 }
@@ -1902,36 +2348,60 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x10, dst, src, VEX_SIMD_F2, true);
+  } else {
+    emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_nonds_q(0x10, dst, src, VEX_SIMD_F2, true);
+  } else {
+    emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::movsd(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    simd_prefix_q(src, xnoreg, dst, VEX_SIMD_F2);
+  } else {
+    simd_prefix(src, xnoreg, dst, VEX_SIMD_F2, false);
+  }
   emit_int8(0x11);
   emit_operand(src, dst);
 }
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
+  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3, true);
 }
 
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3, true);
 }
 
 void Assembler::movss(Address dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_F3, false);
   emit_int8(0x11);
   emit_operand(src, dst);
 }
@@ -2023,16 +2493,30 @@
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
   emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
 }
 
@@ -2332,22 +2816,30 @@
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66,
+                  false, (VM_Version::supports_avx512dq() == false));
 }
 
 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
-}
-
-void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256) {
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66,
+                  false, (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "some form of AVX must be enabled");
+  emit_vex_arith(0x67, dst, nds, src, VEX_SIMD_66, vector_len,
+                 false, (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len) {
   assert(VM_Version::supports_avx2(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector256);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_3A, true, vector_len);
   emit_int8(0x00);
   emit_int8(0xC0 | encode);
   emit_int8(imm8);
@@ -2361,7 +2853,8 @@
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
   InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_3A,
+              false, AVX_128bit, true);
   emit_int8(0x61);
   emit_operand(dst, src);
   emit_int8(imm8);
@@ -2369,7 +2862,8 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_3A, false, AVX_128bit, true);
   emit_int8(0x61);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -2377,7 +2871,8 @@
 
 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
+                                      false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -2385,7 +2880,8 @@
 
 void Assembler::pextrq(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
+                                      false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
   emit_int8(0x16);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -2393,7 +2889,8 @@
 
 void Assembler::pinsrd(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, false);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
+                                      false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -2401,7 +2898,8 @@
 
 void Assembler::pinsrq(XMMRegister dst, Register src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, VEX_OPCODE_0F_3A, true);
+  int encode = simd_prefix_and_encode(dst, dst, as_XMMRegister(src->encoding()), VEX_SIMD_66, true, VEX_OPCODE_0F_3A,
+                                      false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
   emit_int8(0x22);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
@@ -2409,15 +2907,18 @@
 
 void Assembler::pmovzxbw(XMMRegister dst, Address src) {
   assert(VM_Version::supports_sse4_1(), "");
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_HVM;
+  }
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
   emit_int8(0x30);
   emit_operand(dst, src);
 }
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
   emit_int8(0x30);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -2520,15 +3021,20 @@
 
 void Assembler::pshufb(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_ssse3(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38,
+                                      false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
   emit_int8(0x00);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::pshufb(XMMRegister dst, Address src) {
   assert(VM_Version::supports_ssse3(), "");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38,
+              false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
   emit_int8(0x00);
   emit_operand(dst, src);
 }
@@ -2545,8 +3051,12 @@
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66, false);
   emit_int8(0x70);
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
@@ -2555,7 +3065,8 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
+  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2, false,
+                        (VM_Version::supports_avx512bw() == false));
   emit_int8(mode & 0xFF);
 }
 
@@ -2563,8 +3074,12 @@
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  InstructionMark im(this);
+  simd_prefix(dst, xnoreg, src, VEX_SIMD_F2, false, VEX_OPCODE_0F,
+              false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
   emit_int8(0x70);
   emit_operand(dst, src);
   emit_int8(mode & 0xFF);
@@ -2573,7 +3088,8 @@
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(xmm3, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F,
+                                      false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift);
@@ -2583,14 +3099,15 @@
   assert(VM_Version::supports_sse4_1(), "");
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
   InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  simd_prefix(dst, src, VEX_SIMD_66, false, VEX_OPCODE_0F_38);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+                                      false, VEX_OPCODE_0F_38);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -2598,19 +3115,20 @@
 void Assembler::vptest(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
-  bool vector256 = true;
+  int vector_len = AVX_256bit;
   assert(dst != xnoreg, "sanity");
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
-  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector_len);
   emit_int8(0x17);
   emit_operand(dst, src);
 }
 
 void Assembler::vptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  bool vector256 = true;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  int vector_len = AVX_256bit;
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+                                     vector_len, VEX_OPCODE_0F_38);
   emit_int8(0x17);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -2618,6 +3136,9 @@
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
   emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
 }
 
@@ -2629,6 +3150,10 @@
 void Assembler::punpckldq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
   emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
 }
 
@@ -2838,12 +3363,22 @@
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_q(0x51, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
@@ -2857,6 +3392,10 @@
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
   emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
@@ -2907,12 +3446,20 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
+  } else {
+    emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
+  }
 }
 
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+  }
+  emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
@@ -2922,6 +3469,10 @@
 
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
   emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
 }
 
@@ -2978,22 +3529,36 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
+  } else {
+    emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_nonds_q(0x2E, dst, src, VEX_SIMD_66, true);
+  } else {
+    emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE, true);
 }
 
 void Assembler::xabort(int8_t imm8) {
@@ -3075,82 +3640,138 @@
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  } else {
+    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  }
 }
 
 void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  } else {
+    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  }
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
 }
 
 void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  } else {
+    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  }
 }
 
 void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  } else {
+    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  }
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
 }
 
 void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  } else {
+    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  }
 }
 
 void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  } else {
+    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  }
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
 }
 
 void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  } else {
+    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  }
 }
 
 void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  } else {
+    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, AVX_128bit);
+  }
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
 }
 
 void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, AVX_128bit);
 }
 
 //====================VECTOR ARITHMETIC=====================================
@@ -3159,7 +3780,11 @@
 
 void Assembler::addpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x58, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::addps(XMMRegister dst, XMMRegister src) {
@@ -3167,29 +3792,47 @@
   emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
 }
 
-void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector_len);
 }
 
 void Assembler::subpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x5C, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::subps(XMMRegister dst, XMMRegister src) {
@@ -3197,29 +3840,47 @@
   emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
 }
 
-void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector_len);
 }
 
 void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x59, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::mulps(XMMRegister dst, XMMRegister src) {
@@ -3227,29 +3888,47 @@
   emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
 }
 
-void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector_len);
 }
 
 void Assembler::divpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0x5E, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::divps(XMMRegister dst, XMMRegister src) {
@@ -3257,118 +3936,199 @@
   emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
 }
 
-void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector_len);
 }
 
 void Assembler::andpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+    emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true);
+  }
 }
 
 void Assembler::andps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE, false,
+                  (VM_Version::supports_avx512dq() == false));
 }
 
 void Assembler::andps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE,
+                  false, (VM_Version::supports_avx512dq() == false));
 }
 
 void Assembler::andpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
-}
-
-void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_q(0x54, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x54, dst, src, VEX_SIMD_66, false, true);
+  }
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+    emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true);
+  }
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  bool legacy_mode = (VM_Version::supports_avx512dq() == false);
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len, legacy_mode);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x54, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector_len, true);
+  }
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector_len,
+                 (VM_Version::supports_avx512dq() == false));
 }
 
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+    emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true);
+  }
 }
 
 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE,
+                  false, (VM_Version::supports_avx512dq() == false));
 }
 
 void Assembler::xorpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_simd_arith_q(0x57, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0x57, dst, src, VEX_SIMD_66, false, true);
+  }
 }
 
 void Assembler::xorps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
-}
-
-void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE, false,
+                  (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+    emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true);
+  }
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
-void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len,
+                 (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  if (VM_Version::supports_evex() && VM_Version::supports_avx512dq()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0x57, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector_len, true);
+  }
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
   assert(VM_Version::supports_avx(), "");
-  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
-}
-
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector_len,
+                 (VM_Version::supports_avx512dq() == false));
+}
 
 // Integer vector arithmetic
-void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+void Assembler::vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx() && (vector_len == 0) ||
+         VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len,
+                                     VEX_OPCODE_0F_38, true, false);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+void Assembler::vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_avx() && (vector_len == 0) ||
+         VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len,
+                                     VEX_OPCODE_0F_38, true, false);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3390,61 +4150,89 @@
 
 void Assembler::paddq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0xD4, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
+  }
 }
 
 void Assembler::phaddw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8(0x01);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::phaddd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse3(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_38, false, AVX_128bit, true);
   emit_int8(0x02);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
 }
 
 void Assembler::psubb(XMMRegister dst, XMMRegister src) {
@@ -3464,84 +4252,149 @@
 
 void Assembler::psubq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
-}
-
-void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0xFB, dst, src, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
+  }
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+    emit_vex_arith_q(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector_len);
+  }
 }
 
 void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
+  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66,
+                  (VM_Version::supports_avx512bw() == false));
 }
 
 void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66,
+                                      false, VEX_OPCODE_0F_38);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66,
+                                     vector_len, VEX_OPCODE_0F_38);
   emit_int8(0x40);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
-void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  InstructionMark im(this);
+void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 2, "requires some form of AVX");
+  int src_enc = src->encoding();
   int dst_enc = dst->encoding();
   int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66,
+                                     VEX_OPCODE_0F_38, true, vector_len, false, false);
+  emit_int8(0x40);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FVM;
+  }
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66,
+             VEX_OPCODE_0F_38, false, vector_len);
+  emit_int8(0x40);
+  emit_operand(dst, src);
+}
+
+void Assembler::vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_64bit;
+  }
+  InstructionMark im(this);
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, true, vector_len);
   emit_int8(0x40);
   emit_operand(dst, src);
 }
@@ -3550,7 +4403,8 @@
 void Assembler::psllw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
+                                      false, AVX_128bit, (VM_Version::supports_avx512bw() == false));
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -3559,7 +4413,7 @@
 void Assembler::pslld(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -3568,7 +4422,7 @@
 void Assembler::psllq(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
-  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -3576,7 +4430,8 @@
 
 void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
+  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66, false,
+                  (VM_Version::supports_avx512bw() == false));
 }
 
 void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
@@ -3586,50 +4441,65 @@
 
 void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
-}
-
-void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0xF3, dst, shift, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
+  }
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
   // XMM6 is for /6 encoding: 66 0F 71 /6 ib
-  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
   emit_int8(shift & 0xFF);
 }
 
-void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
   // XMM6 is for /6 encoding: 66 0F 72 /6 ib
-  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector_len);
   emit_int8(shift & 0xFF);
 }
 
-void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
   // XMM6 is for /6 encoding: 66 0F 73 /6 ib
-  emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector_len);
+  }
   emit_int8(shift & 0xFF);
 }
 
-void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector_len);
+  }
 }
 
 // Shift packed integers logically right by specified number of bits.
 void Assembler::psrlw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 71 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
+                                      (VM_Version::supports_avx512bw() == false));
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -3638,7 +4508,7 @@
 void Assembler::psrld(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 72 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -3649,7 +4519,12 @@
   // shifts 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  int encode = 0;
+  if (VM_Version::supports_evex() && VM_Version::supports_avx512bw()) {
+    encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, true, VEX_OPCODE_0F, false);
+  } else {
+    encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F, true);
+  }
   emit_int8(0x73);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -3657,7 +4532,8 @@
 
 void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
+  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66, false,
+                  (VM_Version::supports_avx512bw() == false));
 }
 
 void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
@@ -3667,50 +4543,65 @@
 
 void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
-}
-
-void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  if (VM_Version::supports_evex()) {
+    emit_simd_arith_q(0xD3, dst, shift, VEX_SIMD_66);
+  } else {
+    emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
+  }
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
   emit_int8(shift & 0xFF);
 }
 
-void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector_len);
   emit_int8(shift & 0xFF);
 }
 
-void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
   // XMM2 is for /2 encoding: 66 0F 73 /2 ib
-  emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector_len);
+  }
   emit_int8(shift & 0xFF);
 }
 
-void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    emit_vex_arith_q(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
+  } else {
+    emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector_len);
+  }
 }
 
 // Shift packed integers arithmetically right by specified number of bits.
 void Assembler::psraw(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false, VEX_OPCODE_0F,
+                                      (VM_Version::supports_avx512bw() == false));
   emit_int8(0x71);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -3719,7 +4610,7 @@
 void Assembler::psrad(XMMRegister dst, int shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // XMM4 is for /4 encoding: 66 0F 72 /4 ib
-  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66, false);
   emit_int8(0x72);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(shift & 0xFF);
@@ -3727,7 +4618,8 @@
 
 void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
+  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66,
+                  (VM_Version::supports_avx512bw() == false));
 }
 
 void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
@@ -3735,28 +4627,30 @@
   emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
 }
 
-void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
   emit_int8(shift & 0xFF);
 }
 
-void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
   // XMM4 is for /4 encoding: 66 0F 71 /4 ib
-  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector_len);
   emit_int8(shift & 0xFF);
 }
 
-void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector_len,
+                 (VM_Version::supports_avx512bw() == false));
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector_len);
 }
 
 
@@ -3766,14 +4660,18 @@
   emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
 }
 
-void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector_len);
 }
 
 void Assembler::por(XMMRegister dst, XMMRegister src) {
@@ -3781,14 +4679,18 @@
   emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
 }
 
-void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector_len);
 }
 
 void Assembler::pxor(XMMRegister dst, XMMRegister src) {
@@ -3796,21 +4698,25 @@
   emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
 }
 
-void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
-}
-
-void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
-  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
-  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
+}
+
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len) {
+  assert(UseAVX > 0, "requires some form of AVX");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_FV;
+    input_size_in_bits = EVEX_32bit;
+  }
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector_len);
 }
 
 
 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  bool vector256 = true;
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  int vector_len = AVX_256bit;
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
   emit_int8(0x18);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -3818,14 +4724,51 @@
   emit_int8(0x01);
 }
 
-void Assembler::vinsertf128h(XMMRegister dst, Address src) {
+void Assembler::vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_evex(), "");
+  int vector_len = AVX_512bit;
+  int src_enc = src->encoding();
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66,
+                                     VEX_OPCODE_0F_3A, true, vector_len, false, false);
+  emit_int8(0x1A);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - insert into lower 256 bits
+  // 0x01 - insert into upper 256 bits
+  emit_int8(0x01);
+}
+
+void Assembler::vinsertf64x4h(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  bool vector256 = true;
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T4;
+    input_size_in_bits = EVEX_64bit;
+  }
+  InstructionMark im(this);
+  int vector_len = AVX_512bit;
   assert(dst != xnoreg, "sanity");
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, true, vector_len);
+  emit_int8(0x1A);
+  emit_operand(dst, src);
+  // 0x01 - insert into upper 128 bits
+  emit_int8(0x01);
+}
+
+void Assembler::vinsertf128h(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T4;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  int vector_len = AVX_256bit;
+  assert(dst != xnoreg, "sanity");
+  int dst_enc = dst->encoding();
+  // swap src<->dst for encoding
+  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
   emit_int8(0x18);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
@@ -3834,8 +4777,8 @@
 
 void Assembler::vextractf128h(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  bool vector256 = true;
-  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  int vector_len = AVX_256bit;
+  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
   emit_int8(0x19);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -3845,11 +4788,15 @@
 
 void Assembler::vextractf128h(Address dst, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  bool vector256 = true;
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T4;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  int vector_len = AVX_256bit;
   assert(src != xnoreg, "sanity");
   int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
   emit_int8(0x19);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
@@ -3858,8 +4805,8 @@
 
 void Assembler::vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  bool vector256 = true;
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  int vector_len = AVX_256bit;
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
   emit_int8(0x38);
   emit_int8((unsigned char)(0xC0 | encode));
   // 0x00 - insert into lower 128 bits
@@ -3867,38 +4814,169 @@
   emit_int8(0x01);
 }
 
+void Assembler::vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_evex(), "");
+  int vector_len = AVX_512bit;
+  int src_enc = src->encoding();
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+                                     VM_Version::supports_avx512dq(), vector_len, false, false);
+  emit_int8(0x38);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - insert into lower 256 bits
+  // 0x01 - insert into upper 256 bits
+  emit_int8(0x01);
+}
+
 void Assembler::vinserti128h(XMMRegister dst, Address src) {
   assert(VM_Version::supports_avx2(), "");
-  InstructionMark im(this);
-  bool vector256 = true;
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T4;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  int vector_len = AVX_256bit;
   assert(dst != xnoreg, "sanity");
   int dst_enc = dst->encoding();
   // swap src<->dst for encoding
-  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+  vex_prefix(src, dst_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
   emit_int8(0x38);
   emit_operand(dst, src);
   // 0x01 - insert into upper 128 bits
   emit_int8(0x01);
 }
 
+void Assembler::vextracti128h(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  int vector_len = AVX_256bit;
+  int encode = vex_prefix_and_encode(src, xnoreg, dst, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A);
+  emit_int8(0x39);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x00 - insert into lower 128 bits
+  // 0x01 - insert into upper 128 bits
+  emit_int8(0x01);
+}
+
 void Assembler::vextracti128h(Address dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  InstructionMark im(this);
-  bool vector256 = true;
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T4;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  int vector_len = AVX_256bit;
   assert(src != xnoreg, "sanity");
   int src_enc = src->encoding();
-  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector256);
+  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A, false, vector_len);
   emit_int8(0x39);
   emit_operand(src, dst);
   // 0x01 - extract from upper 128 bits
   emit_int8(0x01);
 }
 
+void Assembler::vextracti64x4h(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_evex(), "");
+  int vector_len = AVX_512bit;
+  int src_enc = src->encoding();
+  int dst_enc = dst->encoding();
+  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+                                     true, vector_len, false, false);
+  emit_int8(0x3B);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x01 - extract from upper 256 bits
+  emit_int8(0x01);
+}
+
+void Assembler::vextracti64x2h(XMMRegister dst, XMMRegister src, int value) {
+  assert(VM_Version::supports_evex(), "");
+  int vector_len = AVX_512bit;
+  int src_enc = src->encoding();
+  int dst_enc = dst->encoding();
+  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+                                     VM_Version::supports_avx512dq(), vector_len, false, false);
+  emit_int8(0x39);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x01 - extract from bits 255:128
+  // 0x02 - extract from bits 383:256
+  // 0x03 - extract from bits 511:384
+  emit_int8(value & 0x3);
+}
+
+void Assembler::vextractf64x4h(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_evex(), "");
+  int vector_len = AVX_512bit;
+  int src_enc = src->encoding();
+  int dst_enc = dst->encoding();
+  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+                                     VM_Version::supports_avx512dq(), vector_len, false, false);
+  emit_int8(0x1B);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x01 - extract from upper 256 bits
+  emit_int8(0x01);
+}
+
+void Assembler::vextractf64x4h(Address dst, XMMRegister src) {
+  assert(VM_Version::supports_avx2(), "");
+  tuple_type = EVEX_T4;
+  input_size_in_bits = EVEX_64bit;
+  InstructionMark im(this);
+  int vector_len = AVX_512bit;
+  assert(src != xnoreg, "sanity");
+  int src_enc = src->encoding();
+  vex_prefix(dst, 0, src_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+             VM_Version::supports_avx512dq(), vector_len);
+  emit_int8(0x1B);
+  emit_operand(src, dst);
+  // 0x01 - extract from upper 128 bits
+  emit_int8(0x01);
+}
+
+void Assembler::vextractf32x4h(XMMRegister dst, XMMRegister src, int value) {
+  assert(VM_Version::supports_evex(), "");
+  int vector_len = AVX_512bit;
+  int src_enc = src->encoding();
+  int dst_enc = dst->encoding();
+  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66,
+                                     VEX_OPCODE_0F_3A, false, vector_len, false, false);
+  emit_int8(0x19);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x01 - extract from bits 255:128
+  // 0x02 - extract from bits 383:256
+  // 0x03 - extract from bits 511:384
+  emit_int8(value & 0x3);
+}
+
+void Assembler::vextractf64x2h(XMMRegister dst, XMMRegister src, int value) {
+  assert(VM_Version::supports_evex(), "");
+  int vector_len = AVX_512bit;
+  int src_enc = src->encoding();
+  int dst_enc = dst->encoding();
+  int encode = vex_prefix_and_encode(src_enc, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_3A,
+                                     VM_Version::supports_avx512dq(), vector_len, false, false);
+  emit_int8(0x19);
+  emit_int8((unsigned char)(0xC0 | encode));
+  // 0x01 - extract from bits 255:128
+  // 0x02 - extract from bits 383:256
+  // 0x03 - extract from bits 511:384
+  emit_int8(value & 0x3);
+}
+
 // duplicate 4-bytes integer data from src into 8 locations in dest
 void Assembler::vpbroadcastd(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_avx2(), "");
-  bool vector256 = true;
-  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  int vector_len = AVX_256bit;
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+                                     vector_len, VEX_OPCODE_0F_38, false);
+  emit_int8(0x58);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+// duplicate 4-bytes integer data from src into 8 locations in dest
+void Assembler::evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len) {
+  assert(VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+                                     vector_len, VEX_OPCODE_0F_38, false);
   emit_int8(0x58);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -3906,7 +4984,8 @@
 // Carry-Less Multiplication Quadword
 void Assembler::pclmulqdq(XMMRegister dst, XMMRegister src, int mask) {
   assert(VM_Version::supports_clmul(), "");
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, false,
+                                      VEX_OPCODE_0F_3A, false, AVX_128bit, true);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)mask);
@@ -3915,8 +4994,9 @@
 // Carry-Less Multiplication Quadword
 void Assembler::vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask) {
   assert(VM_Version::supports_avx() && VM_Version::supports_clmul(), "");
-  bool vector256 = false;
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  int vector_len = AVX_128bit;
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66,
+                                     vector_len, VEX_OPCODE_0F_3A, true);
   emit_int8(0x44);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8((unsigned char)mask);
@@ -3924,8 +5004,11 @@
 
 void Assembler::vzeroupper() {
   assert(VM_Version::supports_avx(), "");
-  (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
-  emit_int8(0x77);
+  if (UseAVX < 3)
+  {
+    (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
+    emit_int8(0x77);
+  }
 }
 
 
@@ -4442,7 +5525,7 @@
 }
 
 
-void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, bool vector256) {
+void Assembler::vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, int nds_enc, VexSimdPrefix pre, VexOpcode opc, int vector_len) {
   if (vex_b || vex_x || vex_w || (opc == VEX_OPCODE_0F_38) || (opc == VEX_OPCODE_0F_3A)) {
     prefix(VEX_3bytes);
 
@@ -4452,7 +5535,7 @@
     emit_int8(byte1);
 
     int byte2 = ((~nds_enc) & 0xf) << 3;
-    byte2 |= (vex_w ? VEX_W : 0) | (vector256 ? 4 : 0) | pre;
+    byte2 |= (vex_w ? VEX_W : 0) | ((vector_len > 0) ? 4 : 0) | pre;
     emit_int8(byte2);
   } else {
     prefix(VEX_2bytes);
@@ -4460,89 +5543,237 @@
     int byte1 = vex_r ? VEX_R : 0;
     byte1 = (~byte1) & 0x80;
     byte1 |= ((~nds_enc) & 0xf) << 3;
-    byte1 |= (vector256 ? 4 : 0) | pre;
+    byte1 |= ((vector_len > 0 ) ? 4 : 0) | pre;
     emit_int8(byte1);
   }
 }
 
-void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256){
+// This is a 4 byte encoding
+void Assembler::evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
+                            int nds_enc, VexSimdPrefix pre, VexOpcode opc,
+                            bool is_extended_context, bool is_merge_context,
+                            int vector_len, bool no_mask_reg ){
+  // EVEX 0x62 prefix
+  prefix(EVEX_4bytes);
+  evex_encoding = (vex_w ? VEX_W : 0) | (evex_r ? EVEX_Rb : 0);
+
+  // P0: byte 2, initialized to RXBR`00mm
+  // instead of not'd
+  int byte2 = (vex_r ? VEX_R : 0) | (vex_x ? VEX_X : 0) | (vex_b ? VEX_B : 0) | (evex_r ? EVEX_Rb : 0);
+  byte2 = (~byte2) & 0xF0;
+  // confine opc opcode extensions in mm bits to lower two bits
+  // of form {0F, 0F_38, 0F_3A}
+  byte2 |= opc;
+  emit_int8(byte2);
+
+  // P1: byte 3 as Wvvvv1pp
+  int byte3 = ((~nds_enc) & 0xf) << 3;
+  // p[10] is always 1
+  byte3 |= EVEX_F;
+  byte3 |= (vex_w & 1) << 7;
+  // confine pre opcode extensions in pp bits to lower two bits
+  // of form {66, F3, F2}
+  byte3 |= pre;
+  emit_int8(byte3);
+
+  // P2: byte 4 as zL'Lbv'aaa
+  int byte4 = (no_mask_reg) ? 0 : 1; // kregs are implemented in the low 3 bits as aaa (hard code k1, it will be initialized for now)
+  // EVEX.v` for extending EVEX.vvvv or VIDX
+  byte4 |= (evex_v ? 0: EVEX_V);
+  // third EXEC.b for broadcast actions
+  byte4 |= (is_extended_context ? EVEX_Rb : 0);
+  // fourth EVEX.L'L for vector length : 0 is 128, 1 is 256, 2 is 512, currently we do not support 1024
+  byte4 |= ((vector_len) & 0x3) << 5;
+  // last is EVEX.z for zero/merge actions
+  byte4 |= (is_merge_context ? EVEX_Z : 0);
+  emit_int8(byte4);
+}
+
+void Assembler::vex_prefix(Address adr, int nds_enc, int xreg_enc, VexSimdPrefix pre,
+                           VexOpcode opc, bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg) {
   bool vex_r = (xreg_enc >= 8);
   bool vex_b = adr.base_needs_rex();
   bool vex_x = adr.index_needs_rex();
-  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
-}
-
-int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc, bool vex_w, bool vector256) {
+  avx_vector_len = vector_len;
+
+  // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit
+  if (VM_Version::supports_avx512vl() == false) {
+    switch (vector_len) {
+    case AVX_128bit:
+    case AVX_256bit:
+      legacy_mode = true;
+      break;
+    }
+  }
+
+  if ((UseAVX > 2) && (legacy_mode == false))
+  {
+    bool evex_r = (xreg_enc >= 16);
+    bool evex_v = (nds_enc >= 16);
+    is_evex_instruction = true;
+    evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+  } else {
+    vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+  }
+}
+
+int Assembler::vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc, VexSimdPrefix pre, VexOpcode opc,
+                                     bool vex_w, int vector_len, bool legacy_mode, bool no_mask_reg ) {
   bool vex_r = (dst_enc >= 8);
   bool vex_b = (src_enc >= 8);
   bool vex_x = false;
-  vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector256);
+  avx_vector_len = vector_len;
+
+  // if vector length is turned off, revert to AVX for vectors smaller than AVX_512bit
+  if (VM_Version::supports_avx512vl() == false) {
+    switch (vector_len) {
+    case AVX_128bit:
+    case AVX_256bit:
+      legacy_mode = true;
+      break;
+    }
+  }
+
+  if ((UseAVX > 2) && (legacy_mode == false))
+  {
+    bool evex_r = (dst_enc >= 16);
+    bool evex_v = (nds_enc >= 16);
+    // can use vex_x as bank extender on rm encoding
+    vex_x = (src_enc >= 16);
+    evex_prefix(vex_r, vex_b, vex_x, vex_w, evex_r, evex_v, nds_enc, pre, opc, false, false, vector_len, no_mask_reg);
+  } else {
+    vex_prefix(vex_r, vex_b, vex_x, vex_w, nds_enc, pre, opc, vector_len);
+  }
+
+  // return modrm byte components for operands
   return (((dst_enc & 7) << 3) | (src_enc & 7));
 }
 
 
-void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+void Assembler::simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr, VexSimdPrefix pre,
+                            bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
   if (UseAVX > 0) {
     int xreg_enc = xreg->encoding();
     int  nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector256);
+    vex_prefix(adr, nds_enc, xreg_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
   } else {
     assert((nds == xreg) || (nds == xnoreg), "wrong sse encoding");
     rex_prefix(adr, xreg, pre, opc, rex_w);
   }
 }
 
-int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre, VexOpcode opc, bool rex_w, bool vector256) {
+int Assembler::simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src, VexSimdPrefix pre,
+                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len, bool legacy_mode) {
   int dst_enc = dst->encoding();
   int src_enc = src->encoding();
   if (UseAVX > 0) {
     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector256);
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, legacy_mode, no_mask_reg);
   } else {
     assert((nds == dst) || (nds == src) || (nds == xnoreg), "wrong sse encoding");
     return rex_prefix_and_encode(dst_enc, src_enc, pre, opc, rex_w);
   }
 }
 
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, pre);
+int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src, VexSimdPrefix pre,
+                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
+}
+
+int Assembler::kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src, VexSimdPrefix pre,
+                                      bool no_mask_reg, VexOpcode opc, bool rex_w, int vector_len) {
+  int dst_enc = dst->encoding();
+  int src_enc = src->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, rex_w, vector_len, true, no_mask_reg);
+}
+
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
   emit_int8(opcode);
   emit_operand(dst, src);
 }
 
-void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
-  int encode = simd_prefix_and_encode(dst, dst, src, pre);
+void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg) {
+  InstructionMark im(this);
+  simd_prefix_q(dst, dst, src, pre, no_mask_reg);
+  emit_int8(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
+  int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, false, AVX_128bit, legacy_mode);
   emit_int8(opcode);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
+  int encode = simd_prefix_and_encode(dst, dst, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 // Versions with no second source register (non-destructive source).
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
-  InstructionMark im(this);
-  simd_prefix(dst, xnoreg, src, pre);
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
+  InstructionMark im(this);
+  simd_prefix(dst, xnoreg, src, pre, opNoRegMask);
   emit_int8(opcode);
   emit_operand(dst, src);
 }
 
-void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
-  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
+void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool opNoRegMask) {
+  InstructionMark im(this);
+  simd_prefix_q(dst, xnoreg, src, pre, opNoRegMask);
+  emit_int8(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg, bool legacy_mode) {
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, legacy_mode, AVX_128bit);
   emit_int8(opcode);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg, VEX_OPCODE_0F, true, AVX_128bit);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 // 3-operands AVX instructions
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                               Address src, VexSimdPrefix pre, bool vector256) {
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, pre, vector256);
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, Address src,
+                               VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, pre, vector_len, no_mask_reg, legacy_mode);
   emit_int8(opcode);
   emit_operand(dst, src);
 }
 
-void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                               XMMRegister src, VexSimdPrefix pre, bool vector256) {
-  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
+void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
+                                 Address src, VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
+  InstructionMark im(this);
+  vex_prefix_q(dst, nds, src, pre, vector_len, no_mask_reg);
+  emit_int8(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
+                               VexSimdPrefix pre, int vector_len, bool no_mask_reg, bool legacy_mode) {
+  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector_len, VEX_OPCODE_0F, false, no_mask_reg);
+  emit_int8(opcode);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds, XMMRegister src,
+                                 VexSimdPrefix pre, int vector_len, bool no_mask_reg) {
+  int src_enc = src->encoding();
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  int encode = vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
   emit_int8(opcode);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5040,6 +6271,10 @@
 }
 
 void Assembler::andnq(Register dst, Register src1, Address src2) {
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_64bit;
+  }
   InstructionMark im(this);
   assert(VM_Version::supports_bmi1(), "bit manipulation instructions not supported");
   vex_prefix_0F38_q(dst, src1, src2);
@@ -5181,44 +6416,52 @@
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2);
+  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F2, true);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2sdq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, VEX_SIMD_F2);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  simd_prefix_q(dst, dst, src, VEX_SIMD_F2, true);
   emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Register src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3);
+  int encode = simd_prefix_and_encode_q(dst, dst, src, VEX_SIMD_F3, true);
   emit_int8(0x2A);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvtsi2ssq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix_q(dst, dst, src, VEX_SIMD_F3);
+  if (VM_Version::supports_evex()) {
+    tuple_type = EVEX_T1S;
+    input_size_in_bits = EVEX_32bit;
+  }
+  InstructionMark im(this);
+  simd_prefix_q(dst, dst, src, VEX_SIMD_F3, true);
   emit_int8(0x2A);
   emit_operand(dst, src);
 }
 
 void Assembler::cvttsd2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2);
+  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F2, VEX_OPCODE_0F, true);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
 void Assembler::cvttss2siq(Register dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3);
+  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_F3, VEX_OPCODE_0F, true);
   emit_int8(0x2C);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5387,7 +6630,7 @@
 void Assembler::movdq(XMMRegister dst, Register src) {
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode_q(dst, src, VEX_SIMD_66, true);
   emit_int8(0x6E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5396,7 +6639,7 @@
   // table D-1 says MMX/SSE2
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   // swap src/dst to get correct prefix
-  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66);
+  int encode = simd_prefix_and_encode_q(src, dst, VEX_SIMD_66, true);
   emit_int8(0x7E);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5529,7 +6772,8 @@
 
 void Assembler::mulxq(Register dst1, Register dst2, Register src) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_38, true, false);
+  int encode = vex_prefix_and_encode(dst1->encoding(), dst2->encoding(), src->encoding(),
+                                     VEX_SIMD_F2, VEX_OPCODE_0F_38, true, AVX_128bit, true, false);
   emit_int8((unsigned char)0xF6);
   emit_int8((unsigned char)(0xC0 | encode));
 }
@@ -5678,7 +6922,8 @@
 
 void Assembler::rorxq(Register dst, Register src, int imm8) {
   assert(VM_Version::supports_bmi2(), "bit manipulation instructions not supported");
-  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2, VEX_OPCODE_0F_3A, true, false);
+  int encode = vex_prefix_and_encode(dst->encoding(), 0, src->encoding(), VEX_SIMD_F2,
+                                     VEX_OPCODE_0F_3A, true, AVX_128bit, true, false);
   emit_int8((unsigned char)0xF0);
   emit_int8((unsigned char)(0xC0 | encode));
   emit_int8(imm8);
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Thu May 21 16:19:49 2015 -0700
@@ -438,7 +438,7 @@
 
 };
 
-const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512 / wordSize);
+const int FPUStateSizeInWords = NOT_LP64(27) LP64_ONLY( 512*2 / wordSize);
 
 // The Intel x86/Amd64 Assembler: Pure assembler doing NO optimizations on the instruction
 // level (e.g. mov rax, 0 is not translated into xor rax, rax!); i.e., what you write
@@ -503,7 +503,8 @@
     REX_WRXB   = 0x4F,
 
     VEX_3bytes = 0xC4,
-    VEX_2bytes = 0xC5
+    VEX_2bytes = 0xC5,
+    EVEX_4bytes = 0x62
   };
 
   enum VexPrefix {
@@ -513,6 +514,14 @@
     VEX_W = 0x80
   };
 
+  enum ExexPrefix {
+    EVEX_F  = 0x04,
+    EVEX_V  = 0x08,
+    EVEX_Rb = 0x10,
+    EVEX_X  = 0x40,
+    EVEX_Z  = 0x80
+  };
+
   enum VexSimdPrefix {
     VEX_SIMD_NONE = 0x0,
     VEX_SIMD_66   = 0x1,
@@ -527,6 +536,37 @@
     VEX_OPCODE_0F_3A = 0x3
   };
 
+  enum AvxVectorLen {
+    AVX_128bit = 0x0,
+    AVX_256bit = 0x1,
+    AVX_512bit = 0x2,
+    AVX_NoVec  = 0x4
+  };
+
+  enum EvexTupleType {
+    EVEX_FV   = 0,
+    EVEX_HV   = 4,
+    EVEX_FVM  = 6,
+    EVEX_T1S  = 7,
+    EVEX_T1F  = 11,
+    EVEX_T2   = 13,
+    EVEX_T4   = 15,
+    EVEX_T8   = 17,
+    EVEX_HVM  = 18,
+    EVEX_QVM  = 19,
+    EVEX_OVM  = 20,
+    EVEX_M128 = 21,
+    EVEX_DUP  = 22,
+    EVEX_ETUP = 23
+  };
+
+  enum EvexInputSizeInBits {
+    EVEX_8bit  = 0,
+    EVEX_16bit = 1,
+    EVEX_32bit = 2,
+    EVEX_64bit = 3
+  };
+
   enum WhichOperand {
     // input to locate_operand, and format code for relocations
     imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
@@ -554,6 +594,11 @@
 
 private:
 
+  int evex_encoding;
+  int input_size_in_bits;
+  int avx_vector_len;
+  int tuple_type;
+  bool is_evex_instruction;
 
   // 64bit prefixes
   int prefix_and_encode(int reg_enc, bool byteinst = false);
@@ -580,108 +625,143 @@
 
   void vex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w,
                   int nds_enc, VexSimdPrefix pre, VexOpcode opc,
-                  bool vector256);
+                  int vector_len);
+
+  void evex_prefix(bool vex_r, bool vex_b, bool vex_x, bool vex_w, bool evex_r, bool evex_v,
+                   int nds_enc, VexSimdPrefix pre, VexOpcode opc,
+                   bool is_extended_context, bool is_merge_context,
+                   int vector_len, bool no_mask_reg );
 
   void vex_prefix(Address adr, int nds_enc, int xreg_enc,
                   VexSimdPrefix pre, VexOpcode opc,
-                  bool vex_w, bool vector256);
+                  bool vex_w, int vector_len,
+                  bool legacy_mode = false, bool no_mask_reg = false);
 
   void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
-                  VexSimdPrefix pre, bool vector256 = false) {
+                  VexSimdPrefix pre, int vector_len = AVX_128bit,
+                  bool no_mask_reg = false, bool legacy_mode = false) {
     int dst_enc = dst->encoding();
     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
+    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector_len, legacy_mode, no_mask_reg);
   }
 
-  void vex_prefix_0F38(Register dst, Register nds, Address src) {
-    bool vex_w = false;
-    bool vector256 = false;
-    vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+  void vex_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
+                    VexSimdPrefix pre, int vector_len = AVX_128bit,
+                    bool no_mask_reg = false) {
+    int dst_enc = dst->encoding();
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, true, vector_len, false, no_mask_reg);
   }
 
-  void vex_prefix_0F38_q(Register dst, Register nds, Address src) {
+  void vex_prefix_0F38(Register dst, Register nds, Address src, bool no_mask_reg = false) {
+    bool vex_w = false;
+    int vector_len = AVX_128bit;
+    vex_prefix(src, nds->encoding(), dst->encoding(),
+               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
+               vector_len, no_mask_reg);
+  }
+
+  void vex_prefix_0F38_q(Register dst, Register nds, Address src, bool no_mask_reg = false) {
     bool vex_w = true;
-    bool vector256 = false;
+    int vector_len = AVX_128bit;
     vex_prefix(src, nds->encoding(), dst->encoding(),
-               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+               VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w,
+               vector_len, no_mask_reg);
   }
   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
                              VexSimdPrefix pre, VexOpcode opc,
-                             bool vex_w, bool vector256);
+                             bool vex_w, int vector_len,
+                             bool legacy_mode, bool no_mask_reg);
 
-  int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src) {
+  int  vex_prefix_0F38_and_encode(Register dst, Register nds, Register src, bool no_mask_reg = false) {
     bool vex_w = false;
-    bool vector256 = false;
+    int vector_len = AVX_128bit;
     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
+                                 false, no_mask_reg);
   }
-  int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src) {
+  int  vex_prefix_0F38_and_encode_q(Register dst, Register nds, Register src, bool no_mask_reg = false) {
     bool vex_w = true;
-    bool vector256 = false;
+    int vector_len = AVX_128bit;
     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector256);
+                                 VEX_SIMD_NONE, VEX_OPCODE_0F_38, vex_w, vector_len,
+                                 false, no_mask_reg);
   }
   int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, bool vector256 = false,
-                             VexOpcode opc = VEX_OPCODE_0F) {
+                             VexSimdPrefix pre, int vector_len = AVX_128bit,
+                             VexOpcode opc = VEX_OPCODE_0F, bool legacy_mode = false,
+                             bool no_mask_reg = false) {
     int src_enc = src->encoding();
     int dst_enc = dst->encoding();
     int nds_enc = nds->is_valid() ? nds->encoding() : 0;
-    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector_len, legacy_mode, no_mask_reg);
   }
 
   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
-                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
-                   bool rex_w = false, bool vector256 = false);
+                   VexSimdPrefix pre, bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F,
+                   bool rex_w = false, int vector_len = AVX_128bit, bool legacy_mode = false);
 
-  void simd_prefix(XMMRegister dst, Address src,
-                   VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
-    simd_prefix(dst, xnoreg, src, pre, opc);
+  void simd_prefix(XMMRegister dst, Address src, VexSimdPrefix pre,
+                   bool no_mask_reg, VexOpcode opc = VEX_OPCODE_0F) {
+    simd_prefix(dst, xnoreg, src, pre, no_mask_reg, opc);
   }
 
-  void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
-    simd_prefix(src, dst, pre);
+  void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg) {
+    simd_prefix(src, dst, pre, no_mask_reg);
   }
   void simd_prefix_q(XMMRegister dst, XMMRegister nds, Address src,
-                     VexSimdPrefix pre) {
+                     VexSimdPrefix pre, bool no_mask_reg = false) {
     bool rex_w = true;
-    simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
+    simd_prefix(dst, nds, src, pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
   }
 
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
-                             bool rex_w = false, bool vector256 = false);
+                             VexSimdPrefix pre, bool no_mask_reg,
+                             VexOpcode opc = VEX_OPCODE_0F,
+                             bool rex_w = false, int vector_len = AVX_128bit,
+                             bool legacy_mode = false);
+
+  int kreg_prefix_and_encode(KRegister dst, KRegister nds, KRegister src,
+                             VexSimdPrefix pre, bool no_mask_reg,
+                             VexOpcode opc = VEX_OPCODE_0F,
+                             bool rex_w = false, int vector_len = AVX_128bit);
+
+  int kreg_prefix_and_encode(KRegister dst, KRegister nds, Register src,
+                             VexSimdPrefix pre, bool no_mask_reg,
+                             VexOpcode opc = VEX_OPCODE_0F,
+                             bool rex_w = false, int vector_len = AVX_128bit);
 
   // Move/convert 32-bit integer value.
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
-                             VexSimdPrefix pre) {
+                             VexSimdPrefix pre, bool no_mask_reg) {
     // It is OK to cast from Register to XMMRegister to pass argument here
     // since only encoding is used in simd_prefix_and_encode() and number of
     // Gen and Xmm registers are the same.
-    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre);
+    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F);
   }
-  int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre) {
-    return simd_prefix_and_encode(dst, xnoreg, src, pre);
+  int simd_prefix_and_encode(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
+    return simd_prefix_and_encode(dst, xnoreg, src, pre, no_mask_reg);
   }
   int simd_prefix_and_encode(Register dst, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
-    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc);
+                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
+                             bool no_mask_reg = false) {
+    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc);
   }
 
   // Move/convert 64-bit integer value.
   int simd_prefix_and_encode_q(XMMRegister dst, XMMRegister nds, Register src,
-                               VexSimdPrefix pre) {
+                               VexSimdPrefix pre, bool no_mask_reg = false) {
     bool rex_w = true;
-    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, VEX_OPCODE_0F, rex_w);
+    return simd_prefix_and_encode(dst, nds, as_XMMRegister(src->encoding()), pre, no_mask_reg, VEX_OPCODE_0F, rex_w);
   }
-  int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre) {
-    return simd_prefix_and_encode_q(dst, xnoreg, src, pre);
+  int simd_prefix_and_encode_q(XMMRegister dst, Register src, VexSimdPrefix pre, bool no_mask_reg) {
+    return simd_prefix_and_encode_q(dst, xnoreg, src, pre, no_mask_reg);
   }
   int simd_prefix_and_encode_q(Register dst, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
+                               VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
+                               bool no_mask_reg = false) {
     bool rex_w = true;
-    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, opc, rex_w);
+    return simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, pre, no_mask_reg, opc, rex_w);
   }
 
   // Helper functions for groups of instructions
@@ -692,14 +772,28 @@
   void emit_arith_imm32(int op1, int op2, Register dst, int32_t imm32);
   void emit_arith(int op1, int op2, Register dst, Register src);
 
-  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
-  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
-  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
-  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
+  void emit_simd_arith_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
+  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
+  void emit_simd_arith_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
+  void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre, bool no_mask_reg = false);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false, bool legacy_mode = false);
+  void emit_simd_arith_nonds_q(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre, bool no_mask_reg = false);
   void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                      Address src, VexSimdPrefix pre, bool vector256);
+                      Address src, VexSimdPrefix pre, int vector_len,
+                      bool no_mask_reg = false, bool legacy_mode = false);
+  void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
+                        Address src, VexSimdPrefix pre, int vector_len,
+                        bool no_mask_reg = false);
   void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
-                      XMMRegister src, VexSimdPrefix pre, bool vector256);
+                      XMMRegister src, VexSimdPrefix pre, int vector_len,
+                      bool no_mask_reg = false, bool legacy_mode = false);
+  void emit_vex_arith_q(int opcode, XMMRegister dst, XMMRegister nds,
+                        XMMRegister src, VexSimdPrefix pre, int vector_len,
+                        bool no_mask_reg = false);
+
+  bool emit_compressed_disp_byte(int &disp);
 
   void emit_operand(Register reg,
                     Register base, Register index, Address::ScaleFactor scale,
@@ -825,7 +919,9 @@
   public:
 
   // Creation
-  Assembler(CodeBuffer* code) : AbstractAssembler(code) {}
+  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
+    init_attributes();
+  }
 
   // Decoding
   static address locate_operand(address inst, WhichOperand which);
@@ -833,11 +929,21 @@
 
   // Utilities
   static bool is_polling_page_far() NOT_LP64({ return false;});
+  static bool query_compressed_disp_byte(int disp, bool is_evex_inst, int vector_len,
+                                         int cur_tuple_type, int in_size_in_bits, int cur_encoding);
 
   // Generic instructions
   // Does 32bit or 64bit as needed for the platform. In some sense these
   // belong in macro assembler but there is no need for both varieties to exist
 
+  void init_attributes(void) {
+    evex_encoding = 0;
+    input_size_in_bits = 0;
+    avx_vector_len = AVX_NoVec;
+    tuple_type = EVEX_ETUP;
+    is_evex_instruction = false;
+  }
+
   void lea(Register dst, Address src);
 
   void mov(Register dst, Register src);
@@ -1338,6 +1444,12 @@
   void movb(Address dst, int imm8);
   void movb(Register dst, Address src);
 
+  void kmovq(KRegister dst, KRegister src);
+  void kmovql(KRegister dst, Register src);
+  void kmovdl(KRegister dst, Register src);
+  void kmovq(Address dst, KRegister src);
+  void kmovq(KRegister dst, Address src);
+
   void movdl(XMMRegister dst, Register src);
   void movdl(Register dst, XMMRegister src);
   void movdl(XMMRegister dst, Address src);
@@ -1361,6 +1473,11 @@
   void vmovdqu(XMMRegister dst, Address src);
   void vmovdqu(XMMRegister dst, XMMRegister src);
 
+   // Move Unaligned 512bit Vector
+  void evmovdqu(Address dst, XMMRegister src, int vector_len);
+  void evmovdqu(XMMRegister dst, Address src, int vector_len);
+  void evmovdqu(XMMRegister dst, XMMRegister src, int vector_len);
+
   // Move lower 64bit to high 64bit in 128bit register
   void movlhps(XMMRegister dst, XMMRegister src);
 
@@ -1486,10 +1603,10 @@
   // Pack with unsigned saturation
   void packuswb(XMMRegister dst, XMMRegister src);
   void packuswb(XMMRegister dst, Address src);
-  void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpackuswb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
 
   // Pemutation of 64bit words
-  void vpermq(XMMRegister dst, XMMRegister src, int imm8, bool vector256);
+  void vpermq(XMMRegister dst, XMMRegister src, int imm8, int vector_len);
 
   void pause();
 
@@ -1734,54 +1851,54 @@
   // Add Packed Floating-Point Values
   void addpd(XMMRegister dst, XMMRegister src);
   void addps(XMMRegister dst, XMMRegister src);
-  void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vaddpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vaddps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Subtract Packed Floating-Point Values
   void subpd(XMMRegister dst, XMMRegister src);
   void subps(XMMRegister dst, XMMRegister src);
-  void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vsubpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vsubps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Multiply Packed Floating-Point Values
   void mulpd(XMMRegister dst, XMMRegister src);
   void mulps(XMMRegister dst, XMMRegister src);
-  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vmulpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vmulps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Divide Packed Floating-Point Values
   void divpd(XMMRegister dst, XMMRegister src);
   void divps(XMMRegister dst, XMMRegister src);
-  void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vdivps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Bitwise Logical AND of Packed Floating-Point Values
   void andpd(XMMRegister dst, XMMRegister src);
   void andps(XMMRegister dst, XMMRegister src);
-  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Bitwise Logical XOR of Packed Floating-Point Values
   void xorpd(XMMRegister dst, XMMRegister src);
   void xorps(XMMRegister dst, XMMRegister src);
-  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Add horizontal packed integers
-  void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vphaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vphaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
   void phaddw(XMMRegister dst, XMMRegister src);
   void phaddd(XMMRegister dst, XMMRegister src);
 
@@ -1790,36 +1907,38 @@
   void paddw(XMMRegister dst, XMMRegister src);
   void paddd(XMMRegister dst, XMMRegister src);
   void paddq(XMMRegister dst, XMMRegister src);
-  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vpaddd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vpaddq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Sub packed integers
   void psubb(XMMRegister dst, XMMRegister src);
   void psubw(XMMRegister dst, XMMRegister src);
   void psubd(XMMRegister dst, XMMRegister src);
   void psubq(XMMRegister dst, XMMRegister src);
-  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vpsubd(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vpsubq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Multiply packed integers (only shorts and ints)
   void pmullw(XMMRegister dst, XMMRegister src);
   void pmulld(XMMRegister dst, XMMRegister src);
-  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
-  void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpmullq(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vpmulld(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
+  void vpmullq(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Shift left packed integers
   void psllw(XMMRegister dst, int shift);
@@ -1828,12 +1947,12 @@
   void psllw(XMMRegister dst, XMMRegister shift);
   void pslld(XMMRegister dst, XMMRegister shift);
   void psllq(XMMRegister dst, XMMRegister shift);
-  void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
-  void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
-  void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
-  void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
-  void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
-  void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsllw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+  void vpslld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+  void vpsllq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+  void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+  void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+  void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
 
   // Logical shift right packed integers
   void psrlw(XMMRegister dst, int shift);
@@ -1842,42 +1961,43 @@
   void psrlw(XMMRegister dst, XMMRegister shift);
   void psrld(XMMRegister dst, XMMRegister shift);
   void psrlq(XMMRegister dst, XMMRegister shift);
-  void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
-  void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
-  void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
-  void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
-  void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
-  void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrlw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+  void vpsrld(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+  void vpsrlq(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+  void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+  void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+  void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
 
   // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
   void psraw(XMMRegister dst, int shift);
   void psrad(XMMRegister dst, int shift);
   void psraw(XMMRegister dst, XMMRegister shift);
   void psrad(XMMRegister dst, XMMRegister shift);
-  void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
-  void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
-  void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
-  void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsraw(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+  void vpsrad(XMMRegister dst, XMMRegister src, int shift, int vector_len);
+  void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
+  void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, int vector_len);
 
   // And packed integers
   void pand(XMMRegister dst, XMMRegister src);
-  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpand(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Or packed integers
   void por(XMMRegister dst, XMMRegister src);
-  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Xor packed integers
   void pxor(XMMRegister dst, XMMRegister src);
-  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
-  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, int vector_len);
 
   // Copy low 128bit into high 128bit of YMM registers.
   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vextractf128h(XMMRegister dst, XMMRegister src);
+  void vextracti128h(XMMRegister dst, XMMRegister src);
 
   // Load/store high 128bit of YMM registers which does not destroy other half.
   void vinsertf128h(XMMRegister dst, Address src);
@@ -1885,9 +2005,25 @@
   void vextractf128h(Address dst, XMMRegister src);
   void vextracti128h(Address dst, XMMRegister src);
 
+  // Copy low 256bit into high 256bit of ZMM registers.
+  void vinserti64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vinsertf64x4h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+  void vextracti64x4h(XMMRegister dst, XMMRegister src);
+  void vextractf64x4h(XMMRegister dst, XMMRegister src);
+  void vextractf64x4h(Address dst, XMMRegister src);
+  void vinsertf64x4h(XMMRegister dst, Address src);
+
+  // Copy targeted 128bit segments of the ZMM registers
+  void vextracti64x2h(XMMRegister dst, XMMRegister src, int value);
+  void vextractf64x2h(XMMRegister dst, XMMRegister src, int value);
+  void vextractf32x4h(XMMRegister dst, XMMRegister src, int value);
+
   // duplicate 4-bytes integer data from src into 8 locations in dest
   void vpbroadcastd(XMMRegister dst, XMMRegister src);
 
+  // duplicate 4-bytes integer data from src into vector_len locations in dest
+  void evpbroadcastd(XMMRegister dst, XMMRegister src, int vector_len);
+
   // Carry-Less Multiplication Quadword
   void pclmulqdq(XMMRegister dst, XMMRegister src, int mask);
   void vpclmulqdq(XMMRegister dst, XMMRegister nds, XMMRegister src, int mask);
--- a/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/x86/vm/c1_FrameMap_x86.cpp	Thu May 21 16:19:49 2015 -0700
@@ -233,13 +233,30 @@
   _xmm_regs[13]  = xmm13;
   _xmm_regs[14]  = xmm14;
   _xmm_regs[15]  = xmm15;
+  _xmm_regs[16]  = xmm16;
+  _xmm_regs[17]  = xmm17;
+  _xmm_regs[18]  = xmm18;
+  _xmm_regs[19]  = xmm19;
+  _xmm_regs[20]  = xmm20;
+  _xmm_regs[21]  = xmm21;
+  _xmm_regs[22]  = xmm22;
+  _xmm_regs[23]  = xmm23;
+  _xmm_regs[24]  = xmm24;
+  _xmm_regs[25]  = xmm25;
+  _xmm_regs[26]  = xmm26;
+  _xmm_regs[27]  = xmm27;
+  _xmm_regs[28]  = xmm28;
+  _xmm_regs[29]  = xmm29;
+  _xmm_regs[30]  = xmm30;
+  _xmm_regs[31]  = xmm31;
 #endif // _LP64
 
   for (int i = 0; i < 8; i++) {
     _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
   }
 
-  for (int i = 0; i < nof_caller_save_xmm_regs ; i++) {
+  int num_caller_save_xmm_regs = get_num_caller_save_xmms();
+  for (int i = 0; i < num_caller_save_xmm_regs; i++) {
     _caller_save_xmm_regs[i] = LIR_OprFact::single_xmm(i);
   }
 
--- a/src/cpu/x86/vm/c1_FrameMap_x86.hpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/x86/vm/c1_FrameMap_x86.hpp	Thu May 21 16:19:49 2015 -0700
@@ -152,6 +152,16 @@
     return range;
   }
 
+  static int get_num_caller_save_xmms(void) {
+    int num_caller_save_xmm_regs = nof_caller_save_xmm_regs;
+#ifdef _LP64
+    if (UseAVX < 3) {
+      num_caller_save_xmm_regs = num_caller_save_xmm_regs / 2;
+    }
+#endif
+    return num_caller_save_xmm_regs;
+  }
+
   static int nof_caller_save_cpu_regs() { return adjust_reg_range(pd_nof_caller_save_cpu_regs_frame_map); }
   static int last_cpu_reg()             { return adjust_reg_range(pd_last_cpu_reg);  }
   static int last_byte_reg()            { return adjust_reg_range(pd_last_byte_reg); }
--- a/src/cpu/x86/vm/c1_LinearScan_x86.hpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/x86/vm/c1_LinearScan_x86.hpp	Thu May 21 16:19:49 2015 -0700
@@ -85,8 +85,9 @@
           tty->print_cr("killing XMMs for trig");
         }
 #endif
+        int num_caller_save_xmm_regs = FrameMap::get_num_caller_save_xmms();
         int op_id = op->id();
-        for (int xmm = 0; xmm < FrameMap::nof_caller_save_xmm_regs; xmm++) {
+        for (int xmm = 0; xmm < num_caller_save_xmm_regs; xmm++) {
           LIR_Opr opr = FrameMap::caller_save_xmm_reg_at(xmm);
           add_temp(reg_num(opr), op_id, noUse, T_ILLEGAL);
         }
@@ -100,6 +101,10 @@
 // Implementation of LinearScanWalker
 
 inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
+  int last_xmm_reg = pd_last_xmm_reg;
+  if (UseAVX < 3) {
+    last_xmm_reg = pd_first_xmm_reg + (pd_nof_xmm_regs_frame_map / 2) - 1;
+  }
   if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::byte_reg)) {
     assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
     _first_reg = pd_first_byte_reg;
@@ -107,7 +112,7 @@
     return true;
   } else if ((UseSSE >= 1 && cur->type() == T_FLOAT) || (UseSSE >= 2 && cur->type() == T_DOUBLE)) {
     _first_reg = pd_first_xmm_reg;
-    _last_reg = pd_last_xmm_reg;
+    _last_reg = last_xmm_reg;
     return true;
   }
 
--- a/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/x86/vm/c1_Runtime1_x86.cpp	Thu May 21 16:19:49 2015 -0700
@@ -323,7 +323,7 @@
   LP64_ONLY(num_rt_args = 0);
   LP64_ONLY(assert((reg_save_frame_size * VMRegImpl::stack_slot_size) % 16 == 0, "must be 16 byte aligned");)
   int frame_size_in_slots = reg_save_frame_size + num_rt_args; // args + thread
-  sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word );
+  sasm->set_frame_size(frame_size_in_slots / VMRegImpl::slots_per_word);
 
   // record saved value locations in an OopMap
   // locations are offsets from sp after runtime call; num_rt_args is number of arguments in call, including thread
@@ -362,6 +362,13 @@
   map->set_callee_saved(VMRegImpl::stack2reg(r15H_off + num_rt_args), r15->as_VMReg()->next());
 #endif // _LP64
 
+  int xmm_bypass_limit = FrameMap::nof_xmm_regs;
+#ifdef _LP64
+  if (UseAVX < 3) {
+    xmm_bypass_limit = xmm_bypass_limit / 2;
+  }
+#endif
+
   if (save_fpu_registers) {
     if (UseSSE < 2) {
       int fpu_off = float_regs_as_doubles_off;
@@ -380,11 +387,13 @@
     if (UseSSE >= 2) {
       int xmm_off = xmm_regs_as_doubles_off;
       for (int n = 0; n < FrameMap::nof_xmm_regs; n++) {
-        VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
-        map->set_callee_saved(VMRegImpl::stack2reg(xmm_off +     num_rt_args), xmm_name_0);
-        // %%% This is really a waste but we'll keep things as they were for now
-        if (true) {
-          map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + 1 + num_rt_args), xmm_name_0->next());
+        if (n < xmm_bypass_limit) {
+          VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
+          map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
+          // %%% This is really a waste but we'll keep things as they were for now
+          if (true) {
+            map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + 1 + num_rt_args), xmm_name_0->next());
+          }
         }
         xmm_off += 2;
       }
@@ -393,8 +402,10 @@
     } else if (UseSSE == 1) {
       int xmm_off = xmm_regs_as_doubles_off;
       for (int n = 0; n < FrameMap::nof_xmm_regs; n++) {
-        VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
-        map->set_callee_saved(VMRegImpl::stack2reg(xmm_off +     num_rt_args), xmm_name_0);
+        if (n < xmm_bypass_limit) {
+          VMReg xmm_name_0 = as_XMMRegister(n)->as_VMReg();
+          map->set_callee_saved(VMRegImpl::stack2reg(xmm_off + num_rt_args), xmm_name_0);
+        }
         xmm_off += 2;
       }
       assert(xmm_off == float_regs_as_doubles_off, "incorrect number of xmm registers");
@@ -474,6 +485,24 @@
       __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 104), xmm13);
       __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 112), xmm14);
       __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 120), xmm15);
+      if (UseAVX > 2) {
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 128), xmm16);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 136), xmm17);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 144), xmm18);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 152), xmm19);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 160), xmm20);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 168), xmm21);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 176), xmm22);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 184), xmm23);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 192), xmm24);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 200), xmm25);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 208), xmm26);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 216), xmm27);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 224), xmm28);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 232), xmm29);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 240), xmm30);
+        __ movdbl(Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 248), xmm31);
+      }
 #endif // _LP64
     } else if (UseSSE == 1) {
       // save XMM registers as float because double not supported without SSE2
@@ -516,6 +545,24 @@
       __ movdbl(xmm13, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 104));
       __ movdbl(xmm14, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 112));
       __ movdbl(xmm15, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 120));
+      if (UseAVX > 2) {
+        __ movdbl(xmm16, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 128));
+        __ movdbl(xmm17, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 136));
+        __ movdbl(xmm18, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 144));
+        __ movdbl(xmm19, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 152));
+        __ movdbl(xmm20, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 160));
+        __ movdbl(xmm21, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 168));
+        __ movdbl(xmm22, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 176));
+        __ movdbl(xmm23, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 184));
+        __ movdbl(xmm24, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 192));
+        __ movdbl(xmm25, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 200));
+        __ movdbl(xmm26, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 208));
+        __ movdbl(xmm27, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 216));
+        __ movdbl(xmm28, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 224));
+        __ movdbl(xmm29, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 232));
+        __ movdbl(xmm30, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 240));
+        __ movdbl(xmm31, Address(rsp, xmm_regs_as_doubles_off * VMRegImpl::stack_slot_size + 248));
+      }
 #endif // _LP64
     } else if (UseSSE == 1) {
       // restore XMM registers
@@ -1631,36 +1678,22 @@
 
         NOT_LP64(__ get_thread(thread);)
 
-        Address in_progress(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                             PtrQueue::byte_offset_of_active()));
-
         Address queue_index(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
                                              PtrQueue::byte_offset_of_index()));
         Address buffer(thread, in_bytes(JavaThread::satb_mark_queue_offset() +
                                         PtrQueue::byte_offset_of_buf()));
 
-
         Label done;
         Label runtime;
 
         // Can we store original value in the thread's buffer?
 
-#ifdef _LP64
-        __ movslq(tmp, queue_index);
-        __ cmpq(tmp, 0);
-#else
-        __ cmpl(queue_index, 0);
-#endif
-        __ jcc(Assembler::equal, runtime);
-#ifdef _LP64
-        __ subq(tmp, wordSize);
-        __ movl(queue_index, tmp);
-        __ addq(tmp, buffer);
-#else
-        __ subl(queue_index, wordSize);
-        __ movl(tmp, buffer);
-        __ addl(tmp, queue_index);
-#endif
+        __ movptr(tmp, queue_index);
+        __ testptr(tmp, tmp);
+        __ jcc(Assembler::zero, runtime);
+        __ subptr(tmp, wordSize);
+        __ movptr(queue_index, tmp);
+        __ addptr(tmp, buffer);
 
         // prev_val (rax)
         f.load_argument(0, pre_val);
@@ -1713,6 +1746,7 @@
         assert(sizeof(*ct->byte_map_base) == sizeof(jbyte), "adjust this code");
 
         Label done;
+        Label enqueued;
         Label runtime;
 
         // At this point we know new_value is non-NULL and the new_value crosses regions.
@@ -1752,28 +1786,19 @@
 
         __ movb(Address(card_addr, 0), (int)CardTableModRefBS::dirty_card_val());
 
-        __ cmpl(queue_index, 0);
-        __ jcc(Assembler::equal, runtime);
-        __ subl(queue_index, wordSize);
+        const Register tmp = rdx;
+        __ push(rdx);
 
-        const Register buffer_addr = rbx;
-        __ push(rbx);
-
-        __ movptr(buffer_addr, buffer);
-
-#ifdef _LP64
-        __ movslq(rscratch1, queue_index);
-        __ addptr(buffer_addr, rscratch1);
-#else
-        __ addptr(buffer_addr, queue_index);
-#endif
-        __ movptr(Address(buffer_addr, 0), card_addr);
-
-        __ pop(rbx);
-        __ jmp(done);
+        __ movptr(tmp, queue_index);
+        __ testptr(tmp, tmp);
+        __ jcc(Assembler::zero, runtime);
+        __ subptr(tmp, wordSize);
+        __ movptr(queue_index, tmp);
+        __ addptr(tmp, buffer);
+        __ movptr(Address(tmp, 0), card_addr);
+        __ jmp(enqueued);
 
         __ bind(runtime);
-        __ push(rdx);
 #ifdef _LP64
         __ push(r8);
         __ push(r9);
@@ -1795,12 +1820,12 @@
         __ pop(r9);
         __ pop(r8);
 #endif
+        __ bind(enqueued);
         __ pop(rdx);
+
         __ bind(done);
-
         __ pop(rcx);
         __ pop(rax);
-
       }
       break;
 #endif // INCLUDE_ALL_GCS
--- a/src/cpu/x86/vm/c2_init_x86.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/x86/vm/c2_init_x86.cpp	Thu May 21 16:19:49 2015 -0700
@@ -25,6 +25,7 @@
 #include "precompiled.hpp"
 #include "opto/compile.hpp"
 #include "opto/node.hpp"
+#include "opto/optoreg.hpp"
 
 // processor dependent initialization for i486
 
@@ -37,4 +38,24 @@
     ConditionalMoveLimit = 0;
   }
 #endif // AMD64
+
+  if (UseAVX < 3) {
+    int delta = XMMRegisterImpl::max_slots_per_register * XMMRegisterImpl::number_of_registers;
+    int bottom = ConcreteRegisterImpl::max_fpr;
+    int top = bottom + delta;
+    int middle = bottom + (delta / 2);
+    int xmm_slots = XMMRegisterImpl::max_slots_per_register;
+    int lower = xmm_slots / 2;
+    // mark bad every register that we cannot get to if AVX less than 3, we have all slots in the array
+    // Note: vm2opto is allocated to ConcreteRegisterImpl::number_of_registers
+    for (int i = bottom; i < middle; i += xmm_slots) {
+      for (OptoReg::Name j = OptoReg::Name(i + lower); j<OptoReg::Name(i + xmm_slots); j = OptoReg::add(j, 1)) {
+        OptoReg::invalidate(j);
+      }
+    }
+    // mark the upper zmm bank bad and all the mask registers bad in this case
+    for (OptoReg::Name i = OptoReg::Name(middle); i<OptoReg::Name(_last_Mach_Reg - 1); i = OptoReg::add(i, 1)) {
+      OptoReg::invalidate(i);
+    }
+  }
 }
--- a/src/cpu/x86/vm/frame_x86.hpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/x86/vm/frame_x86.hpp	Thu May 21 16:19:49 2015 -0700
@@ -125,7 +125,7 @@
     // Entry frames
 #ifdef AMD64
 #ifdef _WIN64
-    entry_frame_after_call_words                     =  28,
+    entry_frame_after_call_words                     =  60,
     entry_frame_call_wrapper_offset                  =  2,
 
     arg_reg_save_area_bytes                          = 32 // Register argument save area
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu May 21 10:07:35 2015 -0700
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Thu May 21 16:19:49 2015 -0700
@@ -3996,21 +3996,21 @@
   }
 }
 
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, int vector_len) {
   if (reachable(src)) {
-    vandpd(dst, nds, as_Address(src), vector256);
+    vandpd(dst, nds, as_Address(src), vector_len);
   } else {
     lea(rscratch1, src);
-    vandpd(dst, nds, Address(rscratch1, 0), vector256);
-  }
-}
-
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
+    vandpd(dst, nds, Address(rscratch1, 0), vector_len);
+  }
+}
+