changeset 3459:40e5a3f2907e hs24-b15

Merge
author amurillo
date Fri, 29 Jun 2012 17:04:39 -0700
parents a8b9798c1d45 61a94c2da7c4
children cf37a594c38d
files
diffstat 134 files changed, 53963 insertions(+), 3994 deletions(-) [+]
line wrap: on
line diff
--- a/agent/src/share/classes/sun/jvm/hotspot/bugspot/BugSpot.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/bugspot/BugSpot.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -657,7 +657,7 @@
     while (fr != null) {
       trace.add(new StackTraceEntry(fr, getCDebugger()));
       try {
-        fr = fr.sender();
+        fr = fr.sender(t);
       } catch (AddressException e) {
         e.printStackTrace();
         showMessageDialog("Error while walking stack; stack trace will be truncated\n(see console for details)",
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/amd64/BsdAMD64CFrame.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/amd64/BsdAMD64CFrame.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.bsd.amd64;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.debugger.bsd.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.cdbg.basic.*;
@@ -51,8 +52,11 @@
       return rbp;
    }
 
-   public CFrame sender() {
-      if (rbp == null) {
+   public CFrame sender(ThreadProxy thread) {
+      AMD64ThreadContext context = (AMD64ThreadContext) thread.getContext();
+      Address rsp = context.getRegisterAsAddress(AMD64ThreadContext.RSP);
+
+      if ( (rbp == null) || rbp.lessThan(rsp) ) {
         return null;
       }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/x86/BsdX86CFrame.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/bsd/x86/BsdX86CFrame.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 import sun.jvm.hotspot.debugger.bsd.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.cdbg.basic.*;
+import sun.jvm.hotspot.debugger.x86.*;
 
 final public class BsdX86CFrame extends BasicCFrame {
    // package/class internals only
@@ -52,8 +53,11 @@
       return ebp;
    }
 
-   public CFrame sender() {
-      if (ebp == null) {
+   public CFrame sender(ThreadProxy thread) {
+      X86ThreadContext context = (X86ThreadContext) thread.getContext();
+      Address esp = context.getRegisterAsAddress(X86ThreadContext.ESP);
+
+      if ( (ebp == null) || ebp.lessThan(esp) ) {
         return null;
       }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/CFrame.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/CFrame.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,7 +34,7 @@
 
 public interface CFrame {
   /** Returns null when no more frames on stack */
-  public CFrame sender();
+  public CFrame sender(ThreadProxy th);
 
   /** Get the program counter of this frame */
   public Address pc();
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/basic/amd64/AMD64CFrame.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/basic/amd64/AMD64CFrame.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.cdbg.basic.amd64;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.cdbg.basic.*;
 
@@ -43,8 +44,11 @@
     this.pc  = pc;
   }
 
-  public CFrame sender() {
-    if (rbp == null) {
+  public CFrame sender(ThreadProxy thread) {
+    AMD64ThreadContext context = (AMD64ThreadContext) thread.getContext();
+    Address rsp = context.getRegisterAsAddress(AMD64ThreadContext.RSP);
+
+    if ( (rbp == null) || rbp.lessThan(rsp) ) {
       return null;
     }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/basic/x86/X86CFrame.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/cdbg/basic/x86/X86CFrame.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.cdbg.basic.x86;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.x86.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.cdbg.basic.*;
 
@@ -43,8 +44,11 @@
     this.pc  = pc;
   }
 
-  public CFrame sender() {
-    if (ebp == null) {
+  public CFrame sender(ThreadProxy thread) {
+    X86ThreadContext context = (X86ThreadContext) thread.getContext();
+    Address esp = context.getRegisterAsAddress(X86ThreadContext.ESP);
+
+    if ( (ebp == null) || ebp.lessThan(esp) ) {
       return null;
     }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/amd64/LinuxAMD64CFrame.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/amd64/LinuxAMD64CFrame.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.linux.amd64;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.amd64.*;
 import sun.jvm.hotspot.debugger.linux.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.cdbg.basic.*;
@@ -51,8 +52,11 @@
       return rbp;
    }
 
-   public CFrame sender() {
-      if (rbp == null) {
+   public CFrame sender(ThreadProxy thread) {
+      AMD64ThreadContext context = (AMD64ThreadContext) thread.getContext();
+      Address rsp = context.getRegisterAsAddress(AMD64ThreadContext.RSP);
+
+      if ( (rbp == null) || rbp.lessThan(rsp) ) {
         return null;
       }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/sparc/LinuxSPARCCFrame.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/sparc/LinuxSPARCCFrame.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -57,7 +57,7 @@
       return sp;
    }
 
-   public CFrame sender() {
+   public CFrame sender(ThreadProxy thread) {
       if (sp == null) {
         return null;
       }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/x86/LinuxX86CFrame.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/x86/LinuxX86CFrame.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,7 @@
 import sun.jvm.hotspot.debugger.linux.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.cdbg.basic.*;
+import sun.jvm.hotspot.debugger.x86.*;
 
 final public class LinuxX86CFrame extends BasicCFrame {
    // package/class internals only
@@ -52,8 +53,11 @@
       return ebp;
    }
 
-   public CFrame sender() {
-      if (ebp == null) {
+   public CFrame sender(ThreadProxy thread) {
+      X86ThreadContext context = (X86ThreadContext) thread.getContext();
+      Address esp = context.getRegisterAsAddress(X86ThreadContext.ESP);
+
+      if ( (ebp == null) || ebp.lessThan(esp) ) {
         return null;
       }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcCFrame.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcCFrame.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -37,7 +37,7 @@
       return fp;
    }
 
-   public CFrame sender() {
+   public CFrame sender(ThreadProxy t) {
       return sender;
    }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/OopUtilities.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/OopUtilities.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -141,18 +141,19 @@
   public static String stringOopToString(Oop stringOop) {
     if (offsetField == null) {
       InstanceKlass k = (InstanceKlass) stringOop.getKlass();
-      offsetField = (IntField) k.findField("offset", "I");
-      countField  = (IntField) k.findField("count",  "I");
+      offsetField = (IntField) k.findField("offset", "I");   // optional
+      countField  = (IntField) k.findField("count",  "I");   // optional
       valueField  = (OopField) k.findField("value",  "[C");
       if (Assert.ASSERTS_ENABLED) {
-        Assert.that(offsetField != null &&
-                    countField != null &&
-                    valueField != null, "must find all java.lang.String fields");
+         Assert.that(valueField != null, "Field \'value\' of java.lang.String not found");
       }
     }
-    return charArrayToString((TypeArray) valueField.getValue(stringOop),
-                             offsetField.getValue(stringOop),
-                             countField.getValue(stringOop));
+    if (offsetField != null && countField != null) {
+      return charArrayToString((TypeArray) valueField.getValue(stringOop),
+                               offsetField.getValue(stringOop),
+                               countField.getValue(stringOop));
+    }
+    return  charArrayToString((TypeArray) valueField.getValue(stringOop));
   }
 
   public static String stringOopToEscapedString(Oop stringOop) {
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/PStack.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/PStack.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -158,7 +158,7 @@
                         printUnknown(out);
                      }
                   }
-                  f = f.sender();
+                  f = f.sender(th);
                }
             } catch (Exception exp) {
                exp.printStackTrace();
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/ObjectReader.java	Thu Jun 28 09:32:35 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/ObjectReader.java	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -85,6 +85,21 @@
       this(new ProcImageClassLoader());
    }
 
+   static void debugPrintln(String msg) {
+      if (DEBUG) {
+         System.err.println("DEBUG>" + msg);
+      }
+   }
+
+   static void debugPrintStackTrace(Exception exp) {
+      if (DEBUG) {
+         StackTraceElement[] els = exp.getStackTrace();
+         for (int i = 0; i < els.length; i++) {
+            System.err.println("DEBUG>" + els[i].toString());
+         }
+      }
+   }
+
    public Object readObject(Oop oop) throws ClassNotFoundException {
       if (oop instanceof Instance) {
          return readInstance((Instance) oop);
@@ -120,13 +135,96 @@
    }
 
    protected Symbol javaLangString;
+   protected Symbol javaUtilHashtableEntry;
+   protected Symbol javaUtilHashtable;
+   protected Symbol javaUtilProperties;
+
+   protected Symbol getVMSymbol(String name) {
+      return VM.getVM().getSymbolTable().probe(name);
+   }
+
    protected Symbol javaLangString() {
       if (javaLangString == null) {
-         javaLangString = VM.getVM().getSymbolTable().probe("java/lang/String");
+         javaLangString = getVMSymbol("java/lang/String");
       }
       return javaLangString;
    }
 
+   protected Symbol javaUtilHashtableEntry() {
+      if (javaUtilHashtableEntry == null) {
+         javaUtilHashtableEntry = getVMSymbol("java/util/Hashtable$Entry");
+      }
+      return javaUtilHashtableEntry;
+   }
+
+   protected Symbol javaUtilHashtable() {
+      if (javaUtilHashtable == null) {
+         javaUtilHashtable = getVMSymbol("java/util/Hashtable");
+      }
+      return javaUtilHashtable;
+   }
+
+   protected Symbol javaUtilProperties() {
+      if (javaUtilProperties == null) {
+         javaUtilProperties = getVMSymbol("java/util/Properties");
+      }
+      return javaUtilProperties;
+   }
+
+   private void setHashtableEntry(java.util.Hashtable p, Oop oop) {
+      InstanceKlass ik = (InstanceKlass)oop.getKlass();
+      OopField keyField = (OopField)ik.findField("key", "Ljava/lang/Object;");
+      OopField valueField = (OopField)ik.findField("value", "Ljava/lang/Object;");
+      OopField nextField = (OopField)ik.findField("next", "Ljava/util/Hashtable$Entry;");
+      if (DEBUG) {
+         if (Assert.ASSERTS_ENABLED) {
+            Assert.that(ik.getName().equals(javaUtilHashtableEntry()), "Not a Hashtable$Entry?");
+            Assert.that(keyField != null && valueField != null && nextField != null, "Invalid fields!");
+         }
+      }
+
+      Object key = null;
+      Object value = null;
+      Oop next = null;
+      try {
+         key = readObject(keyField.getValue(oop));
+         value = readObject(valueField.getValue(oop));
+         next =  (Oop)nextField.getValue(oop);
+         // For Properties, should use setProperty(k, v). Since it only runs in SA
+         // using put(k, v) should be OK.
+         p.put(key, value);
+         if (next != null) {
+            setHashtableEntry(p, next);
+         }
+      } catch (ClassNotFoundException ce) {
+         if( DEBUG) {
+            debugPrintln("Class not found " + ce);
+            debugPrintStackTrace(ce);
+         }
+      }
+   }
+
+   protected Object getHashtable(Instance oop, boolean isProperties) {
+      InstanceKlass k = (InstanceKlass)oop.getKlass();
+      OopField tableField = (OopField)k.findField("table", "[Ljava/util/Hashtable$Entry;");
+      if (tableField == null) {
+         debugPrintln("Could not find field of [Ljava/util/Hashtable$Entry;");
+         return null;
+      }
+      java.util.Hashtable table = (isProperties) ? new java.util.Properties()
+                                                 : new java.util.Hashtable();
+      ObjArray kvs = (ObjArray)tableField.getValue(oop);
+      long size = kvs.getLength();
+      debugPrintln("Hashtable$Entry Size = " + size);
+      for (long i=0; i<size; i++) {
+         Oop entry = kvs.getObjAt(i);
+         if (entry != null && entry.isInstance()) {
+            setHashtableEntry(table, entry);
+         }
+      }
+      return table;
+   }
+
    public Object readInstance(Instance oop) throws ClassNotFoundException {
       Object result = getFromObjTable(oop);
       if (result == null) {
@@ -134,11 +232,21 @@
          // Handle java.lang.String instances differently. As part of JSR-133, fields of immutable
          // classes have been made final. The algorithm below will not be able to read Strings from
          // debuggee (can't use reflection to set final fields). But, need to read Strings is very
-         // important. FIXME: need a framework to handle many other special cases.
+         // important.
+         // Same for Hashtable, key and hash are final, could not be set in the algorithm too.
+         // FIXME: need a framework to handle many other special cases.
          if (kls.getName().equals(javaLangString())) {
             return OopUtilities.stringOopToString(oop);
          }
 
+         if (kls.getName().equals(javaUtilHashtable())) {
+            return getHashtable(oop, false);
+         }
+
+         if (kls.getName().equals(javaUtilProperties())) {
+            return getHashtable(oop, true);
+         }
+
          Class clz = readClass(kls);
          try {
             result = clz.newInstance();
@@ -164,8 +272,8 @@
                   break;
                } catch (Exception exp) {
                   if (DEBUG) {
-                     System.err.println("Can't create object using " + c);
-                     exp.printStackTrace();
+                     debugPrintln("Can't create object using " + c);
+                     debugPrintStackTrace(exp);
                   }
                }
             }
@@ -329,8 +437,8 @@
                                      arrayObj[ifd.getIndex()] = readObject(field.getValue(getObj()));
                                   } catch (Exception e) {
                                      if (DEBUG) {
-                                        System.err.println("Array element set failed for " + ifd);
-                                        e.printStackTrace();
+                                        debugPrintln("Array element set failed for " + ifd);
+                                        debugPrintStackTrace(e);
                                      }
                                   }
                                }
@@ -348,8 +456,8 @@
 
       private void printFieldSetError(java.lang.reflect.Field f, Exception ex) {
          if (DEBUG) {
-            if (f != null) System.err.println("Field set failed for " + f);
-            ex.printStackTrace();
+            if (f != null) debugPrintln("Field set failed for " + f);
+            debugPrintStackTrace(ex);
          }
       }
 
@@ -601,7 +709,7 @@
             return Class.forName(className, true, cl);
          } catch (Exception e) {
             if (DEBUG) {
-               System.err.println("Can't load class " + className);
+               debugPrintln("Can't load class " + className);
             }
             throw new RuntimeException(e);
          }
--- a/make/bsd/makefiles/universal.gmk	Thu Jun 28 09:32:35 2012 -0700
+++ b/make/bsd/makefiles/universal.gmk	Fri Jun 29 17:04:39 2012 -0700
@@ -110,4 +110,5 @@
 
 .PHONY:	universal_product universal_fastdebug universal_debug \
 	all_product_universal all_fastdebug_universal all_debug_universal \
-	universalize export_universal copy_universal
+	universalize export_universal copy_universal \
+	$(UNIVERSAL_LIPO_LIST) $(UNIVERSAL_COPY_LIST)
--- a/make/hotspot_version	Thu Jun 28 09:32:35 2012 -0700
+++ b/make/hotspot_version	Fri Jun 29 17:04:39 2012 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=24
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=14
+HS_BUILD_NUMBER=15
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/jprt.properties	Thu Jun 28 09:32:35 2012 -0700
+++ b/make/jprt.properties	Fri Jun 29 17:04:39 2012 -0700
@@ -102,6 +102,11 @@
 jprt.my.linux.armvfp.jdk7u6=${jprt.my.linux.armvfp.jdk7}
 jprt.my.linux.armvfp=${jprt.my.linux.armvfp.${jprt.tools.default.release}}
 
+jprt.my.linux.armv6.jdk8=linux_armv6_2.6
+jprt.my.linux.armv6.jdk7=linux_armv6_2.6
+jprt.my.linux.armv6.jdk7u6=${jprt.my.linux.armv6.jdk7}
+jprt.my.linux.armv6=${jprt.my.linux.armv6.${jprt.tools.default.release}}
+
 jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6
 jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6
 jprt.my.linux.armsflt.jdk7u6=${jprt.my.linux.armsflt.jdk7}
@@ -134,7 +139,7 @@
     ${jprt.my.macosx.x64}-{product|fastdebug|debug}, \
     ${jprt.my.windows.i586}-{product|fastdebug|debug}, \
     ${jprt.my.windows.x64}-{product|fastdebug|debug}, \
-    ${jprt.my.linux.armvfp}-{product|fastdebug}
+    ${jprt.my.linux.armv6}-{product|fastdebug}
 
 jprt.build.targets.open= \
     ${jprt.my.solaris.i586}-{productOpen}, \
--- a/make/pic.make	Thu Jun 28 09:32:35 2012 -0700
+++ b/make/pic.make	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2006, 2007, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,13 @@
 
 ifneq ($(OSNAME), windows)
   ifndef LP64
+    PARTIAL_NONPIC=1
+  endif
+  PIC_ARCH = ppc
+  ifneq ("$(filter $(PIC_ARCH),$(BUILDARCH))","")
+    PARTIAL_NONPIC=0
+  endif
+  ifeq ($(PARTIAL_NONPIC),1)
     NONPIC_DIRS  = memory oops gc_implementation gc_interface 
     NONPIC_DIRS  := $(foreach dir,$(NONPIC_DIRS), $(GAMMADIR)/src/share/vm/$(dir))
     # Look for source files under NONPIC_DIRS
--- a/make/solaris/makefiles/add_gnu_debuglink.make	Thu Jun 28 09:32:35 2012 -0700
+++ b/make/solaris/makefiles/add_gnu_debuglink.make	Fri Jun 29 17:04:39 2012 -0700
@@ -24,8 +24,11 @@
 
 # Rules to build add_gnu_debuglink, used by vm.make on Solaris
 
-GENERATED                 = ../generated
-ADD_GNU_DEBUGLINK         = $(GENERATED)/add_gnu_debuglink
+# Allow $(ADD_GNU_DEBUGLINK) to be called from any directory.
+# We don't set or use the GENERATED macro to avoid affecting
+# other HotSpot Makefiles.
+TOPDIR                    = $(shell echo `pwd`)
+ADD_GNU_DEBUGLINK         = $(TOPDIR)/../generated/add_gnu_debuglink
 
 ADD_GNU_DEBUGLINK_DIR     = $(GAMMADIR)/src/os/solaris/add_gnu_debuglink
 ADD_GNU_DEBUGLINK_SRC     = $(ADD_GNU_DEBUGLINK_DIR)/add_gnu_debuglink.c
--- a/make/solaris/makefiles/defs.make	Thu Jun 28 09:32:35 2012 -0700
+++ b/make/solaris/makefiles/defs.make	Fri Jun 29 17:04:39 2012 -0700
@@ -203,10 +203,18 @@
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.diz
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_db.diz
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_dtrace.diz
+      ifeq ($(ARCH_DATA_MODEL),32)
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.diz
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.diz
+      endif
     else
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm.debuginfo
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_db.debuginfo
       EXPORT_LIST += $(EXPORT_SERVER_DIR)/libjvm_dtrace.debuginfo
+      ifeq ($(ARCH_DATA_MODEL),32)
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_db.debuginfo
+        EXPORT_LIST += $(EXPORT_SERVER_DIR)/64/libjvm_dtrace.debuginfo
+      endif
     endif
   endif
 endif
--- a/make/solaris/makefiles/dtrace.make	Thu Jun 28 09:32:35 2012 -0700
+++ b/make/solaris/makefiles/dtrace.make	Fri Jun 29 17:04:39 2012 -0700
@@ -94,23 +94,24 @@
 # Making 64/libjvm_db.so: 64-bit version of libjvm_db.so which handles 32-bit libjvm.so
 ifneq ("${ISA}","${BUILDARCH}")
 
-XLIBJVM_DB = 64/$(LIBJVM_DB)
-XLIBJVM_DB_G = 64/$(LIBJVM_DB_G)
-XLIBJVM_DTRACE = 64/$(LIBJVM_DTRACE)
-XLIBJVM_DTRACE_G = 64/$(LIBJVM_DTRACE_G)
+XLIBJVM_DIR = 64
+XLIBJVM_DB = $(XLIBJVM_DIR)/$(LIBJVM_DB)
+XLIBJVM_DB_G = $(XLIBJVM_DIR)/$(LIBJVM_DB_G)
+XLIBJVM_DTRACE = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE)
+XLIBJVM_DTRACE_G = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_G)
 
-XLIBJVM_DB_DEBUGINFO       = 64/$(LIBJVM_DB_DEBUGINFO)
-XLIBJVM_DB_DIZ             = 64/$(LIBJVM_DB_DIZ)
-XLIBJVM_DB_G_DEBUGINFO     = 64/$(LIBJVM_DB_G_DEBUGINFO)
-XLIBJVM_DB_G_DIZ           = 64/$(LIBJVM_DB_G_DIZ)
-XLIBJVM_DTRACE_DEBUGINFO   = 64/$(LIBJVM_DTRACE_DEBUGINFO)
-XLIBJVM_DTRACE_DIZ         = 64/$(LIBJVM_DTRACE_DIZ)
-XLIBJVM_DTRACE_G_DEBUGINFO = 64/$(LIBJVM_DTRACE_G_DEBUGINFO)
-XLIBJVM_DTRACE_G_DIZ       = 64/$(LIBJVM_DTRACE_G_DIZ)
+XLIBJVM_DB_DEBUGINFO       = $(XLIBJVM_DIR)/$(LIBJVM_DB_DEBUGINFO)
+XLIBJVM_DB_DIZ             = $(XLIBJVM_DIR)/$(LIBJVM_DB_DIZ)
+XLIBJVM_DB_G_DEBUGINFO     = $(XLIBJVM_DIR)/$(LIBJVM_DB_G_DEBUGINFO)
+XLIBJVM_DB_G_DIZ           = $(XLIBJVM_DIR)/$(LIBJVM_DB_G_DIZ)
+XLIBJVM_DTRACE_DEBUGINFO   = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_DEBUGINFO)
+XLIBJVM_DTRACE_DIZ         = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_DIZ)
+XLIBJVM_DTRACE_G_DEBUGINFO = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_G_DEBUGINFO)
+XLIBJVM_DTRACE_G_DIZ       = $(XLIBJVM_DIR)/$(LIBJVM_DTRACE_G_DIZ)
 
 $(XLIBJVM_DB): $(ADD_GNU_DEBUGLINK) $(FIX_EMPTY_SEC_HDR_FLAGS) $(DTRACE_SRCDIR)/$(JVM_DB).c $(JVMOFFS).h $(LIBJVM_DB_MAPFILE)
 	@echo Making $@
-	$(QUIETLY) mkdir -p 64/ ; \
+	$(QUIETLY) mkdir -p $(XLIBJVM_DIR) ; \
 	$(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. -I$(GENERATED) \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DB) -o $@ $(DTRACE_SRCDIR)/$(JVM_DB).c -lc
 	[ -f $(XLIBJVM_DB_G) ] || { ln -s $(LIBJVM_DB) $(XLIBJVM_DB_G); }
@@ -124,8 +125,10 @@
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(XLIBJVM_DB_DEBUGINFO)
 # $(OBJCOPY) --add-gnu-debuglink=... corrupts SUNW_* sections.
 # Use $(ADD_GNU_DEBUGLINK) until a fixed $(OBJCOPY) is available.
-#	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(XLIBJVM_DB_DEBUGINFO) $@
-	$(QUIETLY) $(ADD_GNU_DEBUGLINK) $(XLIBJVM_DB_DEBUGINFO) $@
+#         $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB) ;
+# Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
+# in the link name:
+	( cd $(XLIBJVM_DIR) && $(ADD_GNU_DEBUGLINK) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB) )
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -134,17 +137,19 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(XLIBJVM_DB_G_DEBUGINFO) ] || { ln -s $(XLIBJVM_DB_DEBUGINFO) $(XLIBJVM_DB_G_DEBUGINFO); }
+	[ -f $(XLIBJVM_DB_G_DEBUGINFO) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(XLIBJVM_DB_DIZ) $(XLIBJVM_DB_DEBUGINFO) $(XLIBJVM_DB_G_DEBUGINFO)
+# Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
+# in the archived name:
+	( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DB_DIZ) $(LIBJVM_DB_DEBUGINFO) $(LIBJVM_DB_G_DEBUGINFO) )
 	$(RM) $(XLIBJVM_DB_DEBUGINFO) $(XLIBJVM_DB_G_DEBUGINFO)
-	[ -f $(XLIBJVM_DB_G_DIZ) ] || { ln -s $(XLIBJVM_DB_DIZ) $(XLIBJVM_DB_G_DIZ); }
+	[ -f $(XLIBJVM_DB_G_DIZ) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DB_DIZ) $(LIBJVM_DB_G_DIZ); }
   endif
 endif
 
 $(XLIBJVM_DTRACE): $(ADD_GNU_DEBUGLINK) $(FIX_EMPTY_SEC_HDR_FLAGS) $(DTRACE_SRCDIR)/$(JVM_DTRACE).c $(DTRACE_SRCDIR)/$(JVM_DTRACE).h $(LIBJVM_DTRACE_MAPFILE)
 	@echo Making $@
-	$(QUIETLY) mkdir -p 64/ ; \
+	$(QUIETLY) mkdir -p $(XLIBJVM_DIR) ; \
 	$(CC) $(SYMFLAG) $(ARCHFLAG/$(ISA)) -D$(TYPE) -I. \
 		$(SHARED_FLAG) $(LFLAGS_JVM_DTRACE) -o $@ $(DTRACE_SRCDIR)/$(JVM_DTRACE).c -lc -lthread -ldoor
 	[ -f $(XLIBJVM_DTRACE_G) ] || { ln -s $(LIBJVM_DTRACE) $(XLIBJVM_DTRACE_G); }
@@ -153,8 +158,10 @@
 	$(QUIETLY) $(FIX_EMPTY_SEC_HDR_FLAGS) $@
 	$(QUIETLY) $(OBJCOPY) --only-keep-debug $@ $(XLIBJVM_DTRACE_DEBUGINFO)
 # $(OBJCOPY) --add-gnu-debuglink=... corrupts SUNW_* sections.
-#	$(QUIETLY) $(OBJCOPY) --add-gnu-debuglink=$(XLIBJVM_DTRACE_DEBUGINFO) $@
-	$(QUIETLY) $(ADD_GNU_DEBUGLINK) $(XLIBJVM_DTRACE_DEBUGINFO) $@
+#         $(OBJCOPY) --add-gnu-debuglink=$(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE) ;
+# Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
+# in the link name:
+	( cd $(XLIBJVM_DIR) && $(ADD_GNU_DEBUGLINK) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE) )
   ifeq ($(STRIP_POLICY),all_strip)
 	$(QUIETLY) $(STRIP) $@
   else
@@ -163,11 +170,13 @@
     # implied else here is no stripping at all
     endif
   endif
-	[ -f $(XLIBJVM_DTRACE_G_DEBUGINFO) ] || { ln -s $(XLIBJVM_DTRACE_DEBUGINFO) $(XLIBJVM_DTRACE_G_DEBUGINFO); }
+	[ -f $(XLIBJVM_DTRACE_G_DEBUGINFO) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO); }
   ifeq ($(ZIP_DEBUGINFO_FILES),1)
-	$(ZIPEXE) -q -y $(XLIBJVM_DTRACE_DIZ) $(XLIBJVM_DTRACE_DEBUGINFO) $(XLIBJVM_DTRACE_G_DEBUGINFO)
+# Do this part in the $(XLIBJVM_DIR) subdir so $(XLIBJVM_DIR) is not
+# in the archived name:
+	( cd $(XLIBJVM_DIR) && $(ZIPEXE) -q -y $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_DEBUGINFO) $(LIBJVM_DTRACE_G_DEBUGINFO) )
 	$(RM) $(XLIBJVM_DTRACE_DEBUGINFO) $(XLIBJVM_DTRACE_G_DEBUGINFO)
-	[ -f $(XLIBJVM_DTRACE_G_DIZ) ] || { ln -s $(XLIBJVM_DTRACE_DIZ) $(XLIBJVM_DTRACE_G_DIZ); }
+	[ -f $(XLIBJVM_DTRACE_G_DIZ) ] || { cd $(XLIBJVM_DIR) && ln -s $(LIBJVM_DTRACE_DIZ) $(LIBJVM_DTRACE_G_DIZ); }
   endif
 endif
 
--- a/make/solaris/makefiles/fix_empty_sec_hdr_flags.make	Thu Jun 28 09:32:35 2012 -0700
+++ b/make/solaris/makefiles/fix_empty_sec_hdr_flags.make	Fri Jun 29 17:04:39 2012 -0700
@@ -24,8 +24,11 @@
 
 # Rules to build fix_empty_sec_hdr_flags, used by vm.make on Solaris
 
-GENERATED                       = ../generated
-FIX_EMPTY_SEC_HDR_FLAGS         = $(GENERATED)/fix_empty_sec_hdr_flags
+# Allow $(FIX_EMPTY_SEC_HDR_FLAGS) to be called from any directory.
+# We don't set or use the GENERATED macro to avoid affecting
+# other HotSpot Makefiles.
+TOPDIR                          = $(shell echo `pwd`)
+FIX_EMPTY_SEC_HDR_FLAGS         = $(TOPDIR)/../generated/fix_empty_sec_hdr_flags
 
 FIX_EMPTY_SEC_HDR_FLAGS_DIR     = $(GAMMADIR)/src/os/solaris/fix_empty_sec_hdr_flags
 FIX_EMPTY_SEC_HDR_FLAGS_SRC     = $(FIX_EMPTY_SEC_HDR_FLAGS_DIR)/fix_empty_sec_hdr_flags.c
--- a/src/cpu/sparc/vm/sparc.ad	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/sparc/vm/sparc.ad	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -678,18 +678,26 @@
 
 static inline jdouble replicate_immI(int con, int count, int width) {
   // Load a constant replicated "count" times with width "width"
+  assert(count*width == 8 && width <= 4, "sanity");
   int bit_width = width * 8;
-  jlong elt_val = con;
-  elt_val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
-  jlong val = elt_val;
+  jlong val = con;
+  val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
   for (int i = 0; i < count - 1; i++) {
-    val <<= bit_width;
-    val |= elt_val;
+    val |= (val << bit_width);
   }
   jdouble dval = *((jdouble*) &val);  // coerce to double type
   return dval;
 }
 
+static inline jdouble replicate_immF(float con) {
+  // Replicate float con 2 times and pack into vector.
+  int val = *((int*)&con);
+  jlong lval = val;
+  lval = (lval << 32) | (lval & 0xFFFFFFFFl);
+  jdouble dval = *((jdouble*) &lval);  // coerce to double type
+  return dval;
+}
+
 // Standard Sparc opcode form2 field breakdown
 static inline void emit2_19(CodeBuffer &cbuf, int f30, int f29, int f25, int f22, int f20, int f19, int f0 ) {
   f0 &= (1<<19)-1;     // Mask displacement to 19 bits
@@ -791,6 +799,7 @@
     case Assembler::stdf_op3: st_op = Op_StoreD; break;
 
     case Assembler::ldsb_op3: ld_op = Op_LoadB; break;
+    case Assembler::ldub_op3: ld_op = Op_LoadUB; break;
     case Assembler::lduh_op3: ld_op = Op_LoadUS; break;
     case Assembler::ldsh_op3: ld_op = Op_LoadS; break;
     case Assembler::ldx_op3:  // may become LoadP or stay LoadI
@@ -799,7 +808,6 @@
     case Assembler::ldd_op3:  ld_op = Op_LoadL; break;
     case Assembler::ldf_op3:  ld_op = Op_LoadF; break;
     case Assembler::lddf_op3: ld_op = Op_LoadD; break;
-    case Assembler::ldub_op3: ld_op = Op_LoadB; break;
     case Assembler::prefetch_op3: ld_op = Op_LoadI; break;
 
     default: ShouldNotReachHere();
@@ -840,10 +848,7 @@
           !(n->ideal_Opcode()==Op_PrefetchRead  && ld_op==Op_LoadI) &&
           !(n->ideal_Opcode()==Op_PrefetchWrite && ld_op==Op_LoadI) &&
           !(n->ideal_Opcode()==Op_PrefetchAllocation && ld_op==Op_LoadI) &&
-          !(n->ideal_Opcode()==Op_Load2I    && ld_op==Op_LoadD) &&
-          !(n->ideal_Opcode()==Op_Load4C    && ld_op==Op_LoadD) &&
-          !(n->ideal_Opcode()==Op_Load4S    && ld_op==Op_LoadD) &&
-          !(n->ideal_Opcode()==Op_Load8B    && ld_op==Op_LoadD) &&
+          !(n->ideal_Opcode()==Op_LoadVector && ld_op==Op_LoadD) &&
           !(n->rule() == loadUB_rule)) {
         verify_oops_warning(n, n->ideal_Opcode(), ld_op);
       }
@@ -855,9 +860,7 @@
           !(n->ideal_Opcode()==Op_StoreI && st_op==Op_StoreF) &&
           !(n->ideal_Opcode()==Op_StoreF && st_op==Op_StoreI) &&
           !(n->ideal_Opcode()==Op_StoreL && st_op==Op_StoreI) &&
-          !(n->ideal_Opcode()==Op_Store2I && st_op==Op_StoreD) &&
-          !(n->ideal_Opcode()==Op_Store4C && st_op==Op_StoreD) &&
-          !(n->ideal_Opcode()==Op_Store8B && st_op==Op_StoreD) &&
+          !(n->ideal_Opcode()==Op_StoreVector && st_op==Op_StoreD) &&
           !(n->ideal_Opcode()==Op_StoreD && st_op==Op_StoreI && n->rule() == storeD0_rule)) {
         verify_oops_warning(n, n->ideal_Opcode(), st_op);
       }
@@ -1849,16 +1852,45 @@
 address last_rethrow = NULL;  // debugging aid for Rethrow encoding
 #endif
 
+// Map Types to machine register types
+const int Matcher::base2reg[Type::lastype] = {
+  Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
+  Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
+  0, Op_RegD, 0, 0, /* Vectors */
+  Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
+  0, 0/*abio*/,
+  Op_RegP /* Return address */, 0, /* the memories */
+  Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
+  0  /*bottom*/
+};
+
 // Vector width in bytes
-const uint Matcher::vector_width_in_bytes(void) {
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  assert(MaxVectorSize == 8, "");
   return 8;
 }
 
 // Vector ideal reg
-const uint Matcher::vector_ideal_reg(void) {
+const int Matcher::vector_ideal_reg(int size) {
+  assert(MaxVectorSize == 8, "");
   return Op_RegD;
 }
 
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  assert(is_java_primitive(bt), "only primitive type vectors");
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+
+const int Matcher::min_vector_size(const BasicType bt) {
+  return max_vector_size(bt); // Same as max.
+}
+
+// SPARC doesn't support misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+  return false;
+}
+
 // USII supports fxtof through the whole range of number, USIII doesn't
 const bool Matcher::convL2FSupported(void) {
   return VM_Version::has_fast_fxtof();
@@ -3125,50 +3157,6 @@
     __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) );
   %}
 
-  enc_class enc_repl8b( iRegI src, iRegL dst ) %{
-    MacroAssembler _masm(&cbuf);
-    Register src_reg = reg_to_register_object($src$$reg);
-    Register dst_reg = reg_to_register_object($dst$$reg);
-    __ sllx(src_reg, 56, dst_reg);
-    __ srlx(dst_reg,  8, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-    __ srlx(dst_reg, 16, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-    __ srlx(dst_reg, 32, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-  %}
-
-  enc_class enc_repl4b( iRegI src, iRegL dst ) %{
-    MacroAssembler _masm(&cbuf);
-    Register src_reg = reg_to_register_object($src$$reg);
-    Register dst_reg = reg_to_register_object($dst$$reg);
-    __ sll(src_reg, 24, dst_reg);
-    __ srl(dst_reg,  8, O7);
-    __ or3(dst_reg, O7, dst_reg);
-    __ srl(dst_reg, 16, O7);
-    __ or3(dst_reg, O7, dst_reg);
-  %}
-
-  enc_class enc_repl4s( iRegI src, iRegL dst ) %{
-    MacroAssembler _masm(&cbuf);
-    Register src_reg = reg_to_register_object($src$$reg);
-    Register dst_reg = reg_to_register_object($dst$$reg);
-    __ sllx(src_reg, 48, dst_reg);
-    __ srlx(dst_reg, 16, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-    __ srlx(dst_reg, 32, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-  %}
-
-  enc_class enc_repl2i( iRegI src, iRegL dst ) %{
-    MacroAssembler _masm(&cbuf);
-    Register src_reg = reg_to_register_object($src$$reg);
-    Register dst_reg = reg_to_register_object($dst$$reg);
-    __ sllx(src_reg, 32, dst_reg);
-    __ srlx(dst_reg, 32, O7);
-    __ or3 (dst_reg, O7, dst_reg);
-  %}
-
 %}
 
 //----------FRAME--------------------------------------------------------------
@@ -5932,50 +5920,6 @@
   ins_pipe(iload_mem);
 %}
 
-// Load Aligned Packed Byte into a Double Register
-instruct loadA8B(regD dst, memory mem) %{
-  match(Set dst (Load8B mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "LDDF   $mem,$dst\t! packed8B" %}
-  opcode(Assembler::lddf_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(floadD_mem);
-%}
-
-// Load Aligned Packed Char into a Double Register
-instruct loadA4C(regD dst, memory mem) %{
-  match(Set dst (Load4C mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "LDDF   $mem,$dst\t! packed4C" %}
-  opcode(Assembler::lddf_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(floadD_mem);
-%}
-
-// Load Aligned Packed Short into a Double Register
-instruct loadA4S(regD dst, memory mem) %{
-  match(Set dst (Load4S mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "LDDF   $mem,$dst\t! packed4S" %}
-  opcode(Assembler::lddf_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(floadD_mem);
-%}
-
-// Load Aligned Packed Int into a Double Register
-instruct loadA2I(regD dst, memory mem) %{
-  match(Set dst (Load2I mem));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "LDDF   $mem,$dst\t! packed2I" %}
-  opcode(Assembler::lddf_op3);
-  ins_encode(simple_form3_mem_reg( mem, dst ) );
-  ins_pipe(floadD_mem);
-%}
-
 // Load Range
 instruct loadRange(iRegI dst, memory mem) %{
   match(Set dst (LoadRange mem));
@@ -6599,17 +6543,6 @@
   ins_pipe(fstoreF_mem_zero);
 %}
 
-// Store Aligned Packed Bytes in Double register to memory
-instruct storeA8B(memory mem, regD src) %{
-  match(Set mem (Store8B mem src));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STDF   $src,$mem\t! packed8B" %}
-  opcode(Assembler::stdf_op3);
-  ins_encode(simple_form3_mem_reg( mem, src ) );
-  ins_pipe(fstoreD_mem_reg);
-%}
-
 // Convert oop pointer into compressed form
 instruct encodeHeapOop(iRegN dst, iRegP src) %{
   predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
@@ -6654,62 +6587,6 @@
 %}
 
 
-// Store Zero into Aligned Packed Bytes
-instruct storeA8B0(memory mem, immI0 zero) %{
-  match(Set mem (Store8B mem zero));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STX    $zero,$mem\t! packed8B" %}
-  opcode(Assembler::stx_op3);
-  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
-  ins_pipe(fstoreD_mem_zero);
-%}
-
-// Store Aligned Packed Chars/Shorts in Double register to memory
-instruct storeA4C(memory mem, regD src) %{
-  match(Set mem (Store4C mem src));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STDF   $src,$mem\t! packed4C" %}
-  opcode(Assembler::stdf_op3);
-  ins_encode(simple_form3_mem_reg( mem, src ) );
-  ins_pipe(fstoreD_mem_reg);
-%}
-
-// Store Zero into Aligned Packed Chars/Shorts
-instruct storeA4C0(memory mem, immI0 zero) %{
-  match(Set mem (Store4C mem (Replicate4C zero)));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STX    $zero,$mem\t! packed4C" %}
-  opcode(Assembler::stx_op3);
-  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
-  ins_pipe(fstoreD_mem_zero);
-%}
-
-// Store Aligned Packed Ints in Double register to memory
-instruct storeA2I(memory mem, regD src) %{
-  match(Set mem (Store2I mem src));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STDF   $src,$mem\t! packed2I" %}
-  opcode(Assembler::stdf_op3);
-  ins_encode(simple_form3_mem_reg( mem, src ) );
-  ins_pipe(fstoreD_mem_reg);
-%}
-
-// Store Zero into Aligned Packed Ints
-instruct storeA2I0(memory mem, immI0 zero) %{
-  match(Set mem (Store2I mem zero));
-  ins_cost(MEMORY_REF_COST);
-  size(4);
-  format %{ "STX    $zero,$mem\t! packed2I" %}
-  opcode(Assembler::stx_op3);
-  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
-  ins_pipe(fstoreD_mem_zero);
-%}
-
-
 //----------MemBar Instructions-----------------------------------------------
 // Memory barrier flavors
 
@@ -8880,150 +8757,6 @@
   ins_pipe(ialu_reg_imm);
 %}
 
-// Replicate scalar to packed byte values in Double register
-instruct Repl8B_reg_helper(iRegL dst, iRegI src) %{
-  effect(DEF dst, USE src);
-  format %{ "SLLX  $src,56,$dst\n\t"
-            "SRLX  $dst, 8,O7\n\t"
-            "OR    $dst,O7,$dst\n\t"
-            "SRLX  $dst,16,O7\n\t"
-            "OR    $dst,O7,$dst\n\t"
-            "SRLX  $dst,32,O7\n\t"
-            "OR    $dst,O7,$dst\t! replicate8B" %}
-  ins_encode( enc_repl8b(src, dst));
-  ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed byte values in Double register
-instruct Repl8B_reg(stackSlotD dst, iRegI src) %{
-  match(Set dst (Replicate8B src));
-  expand %{
-    iRegL tmp;
-    Repl8B_reg_helper(tmp, src);
-    regL_to_stkD(dst, tmp);
-  %}
-%}
-
-// Replicate scalar constant to packed byte values in Double register
-instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{
-  match(Set dst (Replicate8B con));
-  effect(KILL tmp);
-  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %}
-  ins_encode %{
-    // XXX This is a quick fix for 6833573.
-    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister);
-    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register);
-    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
-  %}
-  ins_pipe(loadConFD);
-%}
-
-// Replicate scalar to packed char values into stack slot
-instruct Repl4C_reg_helper(iRegL dst, iRegI src) %{
-  effect(DEF dst, USE src);
-  format %{ "SLLX  $src,48,$dst\n\t"
-            "SRLX  $dst,16,O7\n\t"
-            "OR    $dst,O7,$dst\n\t"
-            "SRLX  $dst,32,O7\n\t"
-            "OR    $dst,O7,$dst\t! replicate4C" %}
-  ins_encode( enc_repl4s(src, dst) );
-  ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed char values into stack slot
-instruct Repl4C_reg(stackSlotD dst, iRegI src) %{
-  match(Set dst (Replicate4C src));
-  expand %{
-    iRegL tmp;
-    Repl4C_reg_helper(tmp, src);
-    regL_to_stkD(dst, tmp);
-  %}
-%}
-
-// Replicate scalar constant to packed char values in Double register
-instruct Repl4C_immI(regD dst, immI con, o7RegI tmp) %{
-  match(Set dst (Replicate4C con));
-  effect(KILL tmp);
-  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4C($con)" %}
-  ins_encode %{
-    // XXX This is a quick fix for 6833573.
-    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
-    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
-    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
-  %}
-  ins_pipe(loadConFD);
-%}
-
-// Replicate scalar to packed short values into stack slot
-instruct Repl4S_reg_helper(iRegL dst, iRegI src) %{
-  effect(DEF dst, USE src);
-  format %{ "SLLX  $src,48,$dst\n\t"
-            "SRLX  $dst,16,O7\n\t"
-            "OR    $dst,O7,$dst\n\t"
-            "SRLX  $dst,32,O7\n\t"
-            "OR    $dst,O7,$dst\t! replicate4S" %}
-  ins_encode( enc_repl4s(src, dst) );
-  ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed short values into stack slot
-instruct Repl4S_reg(stackSlotD dst, iRegI src) %{
-  match(Set dst (Replicate4S src));
-  expand %{
-    iRegL tmp;
-    Repl4S_reg_helper(tmp, src);
-    regL_to_stkD(dst, tmp);
-  %}
-%}
-
-// Replicate scalar constant to packed short values in Double register
-instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{
-  match(Set dst (Replicate4S con));
-  effect(KILL tmp);
-  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %}
-  ins_encode %{
-    // XXX This is a quick fix for 6833573.
-    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
-    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
-    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
-  %}
-  ins_pipe(loadConFD);
-%}
-
-// Replicate scalar to packed int values in Double register
-instruct Repl2I_reg_helper(iRegL dst, iRegI src) %{
-  effect(DEF dst, USE src);
-  format %{ "SLLX  $src,32,$dst\n\t"
-            "SRLX  $dst,32,O7\n\t"
-            "OR    $dst,O7,$dst\t! replicate2I" %}
-  ins_encode( enc_repl2i(src, dst));
-  ins_pipe(ialu_reg);
-%}
-
-// Replicate scalar to packed int values in Double register
-instruct Repl2I_reg(stackSlotD dst, iRegI src) %{
-  match(Set dst (Replicate2I src));
-  expand %{
-    iRegL tmp;
-    Repl2I_reg_helper(tmp, src);
-    regL_to_stkD(dst, tmp);
-  %}
-%}
-
-// Replicate scalar zero constant to packed int values in Double register
-instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{
-  match(Set dst (Replicate2I con));
-  effect(KILL tmp);
-  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %}
-  ins_encode %{
-    // XXX This is a quick fix for 6833573.
-    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister);
-    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register);
-    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
-  %}
-  ins_pipe(loadConFD);
-%}
-
 //----------Control Flow Instructions------------------------------------------
 // Compare Instructions
 // Compare Integers
@@ -10742,6 +10475,308 @@
   ins_pipe(istore_mem_reg);
 %}
 
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// Load Aligned Packed values into a Double Register
+instruct loadV8(regD dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 8);
+  match(Set dst (LoadVector mem));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "LDDF   $mem,$dst\t! load vector (8 bytes)" %}
+  ins_encode %{
+    __ ldf(FloatRegisterImpl::D, $mem$$Address, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(floadD_mem);
+%}
+
+// Store Vector in Double register to memory
+instruct storeV8(memory mem, regD src) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem src));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STDF   $src,$mem\t! store vector (8 bytes)" %}
+  ins_encode %{
+    __ stf(FloatRegisterImpl::D, as_DoubleFloatRegister($src$$reg), $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_reg);
+%}
+
+// Store Zero into vector in memory
+instruct storeV8B_zero(memory mem, immI0 zero) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem (ReplicateB zero)));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STX    $zero,$mem\t! store zero vector (8 bytes)" %}
+  ins_encode %{
+    __ stx(G0, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_zero);
+%}
+
+instruct storeV4S_zero(memory mem, immI0 zero) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem (ReplicateS zero)));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STX    $zero,$mem\t! store zero vector (4 shorts)" %}
+  ins_encode %{
+    __ stx(G0, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_zero);
+%}
+
+instruct storeV2I_zero(memory mem, immI0 zero) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem (ReplicateI zero)));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STX    $zero,$mem\t! store zero vector (2 ints)" %}
+  ins_encode %{
+    __ stx(G0, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_zero);
+%}
+
+instruct storeV2F_zero(memory mem, immF0 zero) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem (ReplicateF zero)));
+  ins_cost(MEMORY_REF_COST);
+  size(4);
+  format %{ "STX    $zero,$mem\t! store zero vector (2 floats)" %}
+  ins_encode %{
+    __ stx(G0, $mem$$Address);
+  %}
+  ins_pipe(fstoreD_mem_zero);
+%}
+
+// Replicate scalar to packed byte values into Double register
+instruct Repl8B_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 8 && UseVIS >= 3);
+  match(Set dst (ReplicateB src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,56,$tmp\n\t"
+            "SRLX  $tmp, 8,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,16,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate8B\n\t"
+            "MOVXTOD $tmp,$dst\t! MoveL2D" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    56, Rtmp);
+    __ srlx(Rtmp,     8, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    16, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar to packed byte values into Double stack
+instruct Repl8B_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 8 && UseVIS < 3);
+  match(Set dst (ReplicateB src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,56,$tmp\n\t"
+            "SRLX  $tmp, 8,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,16,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate8B\n\t"
+            "STX   $tmp,$dst\t! regL to stkD" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    56, Rtmp);
+    __ srlx(Rtmp,     8, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    16, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ set ($dst$$disp + STACK_BIAS, Rtmp2);
+    __ stx (Rtmp, Rtmp2, $dst$$base$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar constant to packed byte values in Double register
+instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB con));
+  effect(KILL tmp);
+  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %}
+  ins_encode %{
+    // XXX This is a quick fix for 6833573.
+    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister);
+    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register);
+    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(loadConFD);
+%}
+
+// Replicate scalar to packed char/short values into Double register
+instruct Repl4S_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 4 && UseVIS >= 3);
+  match(Set dst (ReplicateS src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,48,$tmp\n\t"
+            "SRLX  $tmp,16,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate4S\n\t"
+            "MOVXTOD $tmp,$dst\t! MoveL2D" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    48, Rtmp);
+    __ srlx(Rtmp,    16, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar to packed char/short values into Double stack
+instruct Repl4S_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 4 && UseVIS < 3);
+  match(Set dst (ReplicateS src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,48,$tmp\n\t"
+            "SRLX  $tmp,16,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate4S\n\t"
+            "STX   $tmp,$dst\t! regL to stkD" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    48, Rtmp);
+    __ srlx(Rtmp,    16, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ set ($dst$$disp + STACK_BIAS, Rtmp2);
+    __ stx (Rtmp, Rtmp2, $dst$$base$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar constant to packed char/short values in Double register
+instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS con));
+  effect(KILL tmp);
+  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %}
+  ins_encode %{
+    // XXX This is a quick fix for 6833573.
+    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
+    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
+    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(loadConFD);
+%}
+
+// Replicate scalar to packed int values into Double register
+instruct Repl2I_reg(regD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 2 && UseVIS >= 3);
+  match(Set dst (ReplicateI src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,32,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate2I\n\t"
+            "MOVXTOD $tmp,$dst\t! MoveL2D" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    32, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ movxtod(Rtmp, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar to packed int values into Double stack
+instruct Repl2I_stk(stackSlotD dst, iRegI src, iRegL tmp, o7RegL tmp2) %{
+  predicate(n->as_Vector()->length() == 2 && UseVIS < 3);
+  match(Set dst (ReplicateI src));
+  effect(DEF dst, USE src, TEMP tmp, KILL tmp2);
+  format %{ "SLLX  $src,32,$tmp\n\t"
+            "SRLX  $tmp,32,$tmp2\n\t"
+            "OR    $tmp,$tmp2,$tmp\t! replicate2I\n\t"
+            "STX   $tmp,$dst\t! regL to stkD" %}
+  ins_encode %{
+    Register Rsrc = $src$$Register;
+    Register Rtmp = $tmp$$Register;
+    Register Rtmp2 = $tmp2$$Register;
+    __ sllx(Rsrc,    32, Rtmp);
+    __ srlx(Rtmp,    32, Rtmp2);
+    __ or3 (Rtmp, Rtmp2, Rtmp);
+    __ set ($dst$$disp + STACK_BIAS, Rtmp2);
+    __ stx (Rtmp, Rtmp2, $dst$$base$$Register);
+  %}
+  ins_pipe(ialu_reg);
+%}
+
+// Replicate scalar zero constant to packed int values in Double register
+instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI con));
+  effect(KILL tmp);
+  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %}
+  ins_encode %{
+    // XXX This is a quick fix for 6833573.
+    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister);
+    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register);
+    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(loadConFD);
+%}
+
+// Replicate scalar to packed float values into Double stack
+instruct Repl2F_stk(stackSlotD dst, regF src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF src));
+  ins_cost(MEMORY_REF_COST*2);
+  format %{ "STF    $src,$dst.hi\t! packed2F\n\t"
+            "STF    $src,$dst.lo" %}
+  opcode(Assembler::stf_op3);
+  ins_encode(simple_form3_mem_reg(dst, src), form3_mem_plus_4_reg(dst, src));
+  ins_pipe(fstoreF_stk_reg);
+%}
+
+// Replicate scalar zero constant to packed float values in Double register
+instruct Repl2F_immF(regD dst, immF con, o7RegI tmp) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF con));
+  effect(KILL tmp);
+  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2F($con)" %}
+  ins_encode %{
+    // XXX This is a quick fix for 6833573.
+    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immF($con$$constant)), $dst$$FloatRegister);
+    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immF($con$$constant)), $tmp$$Register);
+    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
+  %}
+  ins_pipe(loadConFD);
+%}
+
 //----------PEEPHOLE RULES-----------------------------------------------------
 // These must follow all instruction definitions as they use the names
 // defined in the instructions definitions.
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -217,6 +217,8 @@
   // Currently not supported anywhere.
   FLAG_SET_DEFAULT(UseFPUForSpilling, false);
 
+  MaxVectorSize = 8;
+
   assert((InteriorEntryAlignment % relocInfo::addr_unit()) == 0, "alignment is not a multiple of NOP size");
 #endif
 
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1637,6 +1637,13 @@
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, src, src, VEX_SIMD_NONE);
+  emit_byte(0x16);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::movb(Register dst, Address src) {
   NOT_LP64(assert(dst->has_byte_register(), "must have byte register"));
   InstructionMark im(this);
@@ -1686,6 +1693,14 @@
   emit_operand(dst, src);
 }
 
+void Assembler::movdl(Address dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  InstructionMark im(this);
+  simd_prefix(dst, src, VEX_SIMD_66);
+  emit_byte(0x7E);
+  emit_operand(src, dst);
+}
+
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
@@ -1716,6 +1731,35 @@
   emit_operand(src, dst);
 }
 
+// Move Unaligned 256bit Vector
+void Assembler::vmovdqu(XMMRegister dst, XMMRegister src) {
+  assert(UseAVX, "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+  emit_byte(0x6F);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vmovdqu(XMMRegister dst, Address src) {
+  assert(UseAVX, "");
+  InstructionMark im(this);
+  bool vector256 = true;
+  vex_prefix(dst, xnoreg, src, VEX_SIMD_F3, vector256);
+  emit_byte(0x6F);
+  emit_operand(dst, src);
+}
+
+void Assembler::vmovdqu(Address dst, XMMRegister src) {
+  assert(UseAVX, "");
+  InstructionMark im(this);
+  bool vector256 = true;
+  // swap src<->dst for encoding
+  assert(src != xnoreg, "sanity");
+  vex_prefix(src, xnoreg, dst, VEX_SIMD_F3, vector256);
+  emit_byte(0x7F);
+  emit_operand(src, dst);
+}
+
 // Uses zero extension on 64bit
 
 void Assembler::movl(Register dst, int32_t imm32) {
@@ -3112,6 +3156,13 @@
   emit_operand(dst, src);
 }
 
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
+  emit_byte(0x57);
+  emit_byte(0xC0 | encode);
+}
+
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
   assert(VM_Version::supports_avx(), "");
   InstructionMark im(this);
@@ -3120,6 +3171,30 @@
   emit_operand(dst, src);
 }
 
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, vector256);
+  emit_byte(0x57);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  bool vector256 = true;
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_3A);
+  emit_byte(0x18);
+  emit_byte(0xC0 | encode);
+  // 0x00 - insert into lower 128 bits
+  // 0x01 - insert into upper 128 bits
+  emit_byte(0x01);
+}
+
+void Assembler::vzeroupper() {
+  assert(VM_Version::supports_avx(), "");
+  (void)vex_prefix_and_encode(xmm0, xmm0, xmm0, VEX_SIMD_NONE);
+  emit_byte(0x77);
+}
+
 
 #ifndef _LP64
 // 32bit only pieces of the assembler
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -591,8 +591,9 @@
 
   void vex_prefix(XMMRegister dst, XMMRegister nds, Address src,
                   VexSimdPrefix pre, bool vector256 = false) {
-     vex_prefix(src, nds->encoding(), dst->encoding(),
-                pre, VEX_OPCODE_0F, false, vector256);
+    int dst_enc = dst->encoding();
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    vex_prefix(src, nds_enc, dst_enc, pre, VEX_OPCODE_0F, false, vector256);
   }
 
   int  vex_prefix_and_encode(int dst_enc, int nds_enc, int src_enc,
@@ -600,9 +601,12 @@
                              bool vex_w, bool vector256);
 
   int  vex_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
-                             VexSimdPrefix pre, bool vector256 = false) {
-     return vex_prefix_and_encode(dst->encoding(), nds->encoding(), src->encoding(),
-                                  pre, VEX_OPCODE_0F, false, vector256);
+                             VexSimdPrefix pre, bool vector256 = false,
+                             VexOpcode opc = VEX_OPCODE_0F) {
+    int src_enc = src->encoding();
+    int dst_enc = dst->encoding();
+    int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+    return vex_prefix_and_encode(dst_enc, nds_enc, src_enc, pre, opc, false, vector256);
   }
 
   void simd_prefix(XMMRegister xreg, XMMRegister nds, Address adr,
@@ -1261,6 +1265,7 @@
   void movdl(XMMRegister dst, Register src);
   void movdl(Register dst, XMMRegister src);
   void movdl(XMMRegister dst, Address src);
+  void movdl(Address dst, XMMRegister src);
 
   // Move Double Quadword
   void movdq(XMMRegister dst, Register src);
@@ -1274,6 +1279,14 @@
   void movdqu(XMMRegister dst, Address src);
   void movdqu(XMMRegister dst, XMMRegister src);
 
+  // Move Unaligned 256bit Vector
+  void vmovdqu(Address dst, XMMRegister src);
+  void vmovdqu(XMMRegister dst, Address src);
+  void vmovdqu(XMMRegister dst, XMMRegister src);
+
+  // Move lower 64bit to high 64bit in 128bit register
+  void movlhps(XMMRegister dst, XMMRegister src);
+
   void movl(Register dst, int32_t imm32);
   void movl(Address dst, int32_t imm32);
   void movl(Register dst, Register src);
@@ -1615,6 +1628,17 @@
   void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
   void vxorps(XMMRegister dst, XMMRegister nds, Address src);
 
+  // AVX Vector instrucitons.
+  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
+
+  // AVX instruction which is used to clear upper 128 bits of YMM registers and
+  // to avoid transaction penalty between AVX and SSE states. There is no
+  // penalty if legacy SSE instructions are encoded using VEX prefix because
+  // they always clear upper 128 bits. It should be used before calling
+  // runtime code and native libraries.
+  void vzeroupper();
 
  protected:
   // Next instructions require address alignment 16 bytes SSE mode.
@@ -2529,9 +2553,13 @@
   void vsubss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vsubss(dst, nds, src); }
   void vsubss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
+  // AVX Vector instructions
+
+  void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
   void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
   void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
+  void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
   void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
   void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
--- a/src/cpu/x86/vm/register_x86.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/register_x86.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
 const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
                                                                  2 * FloatRegisterImpl::number_of_registers;
 const int ConcreteRegisterImpl::max_xmm = ConcreteRegisterImpl::max_fpr +
-                                                                 2 * XMMRegisterImpl::number_of_registers;
+                                                                 8 * XMMRegisterImpl::number_of_registers;
 const char* RegisterImpl::name() const {
   const char* names[number_of_registers] = {
 #ifndef AMD64
--- a/src/cpu/x86/vm/register_x86.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/register_x86.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -158,7 +158,7 @@
   XMMRegister successor() const                          { return as_XMMRegister(encoding() + 1); }
 
   // accessors
-  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
+  int   encoding() const                          { assert(is_valid(), err_msg("invalid register (%d)", (int)(intptr_t)this )); return (intptr_t)this; }
   bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
   const char* name() const;
 };
@@ -216,7 +216,7 @@
                                RegisterImpl::number_of_registers +  // "H" half of a 64bit register
 #endif // AMD64
                            2 * FloatRegisterImpl::number_of_registers +
-                           2 * XMMRegisterImpl::number_of_registers +
+                           8 * XMMRegisterImpl::number_of_registers +
                            1 // eflags
   };
 
--- a/src/cpu/x86/vm/vm_version_x86.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/vm_version_x86.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -467,6 +467,32 @@
   if (!supports_avx ()) // Drop to 0 if no AVX  support
     UseAVX = 0;
 
+#ifdef COMPILER2
+  if (UseFPUForSpilling) {
+    if (UseSSE < 2) {
+      // Only supported with SSE2+
+      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
+    }
+  }
+  if (MaxVectorSize > 0) {
+    if (!is_power_of_2(MaxVectorSize)) {
+      warning("MaxVectorSize must be a power of 2");
+      FLAG_SET_DEFAULT(MaxVectorSize, 32);
+    }
+    if (MaxVectorSize > 32) {
+      FLAG_SET_DEFAULT(MaxVectorSize, 32);
+    }
+    if (MaxVectorSize > 16 && UseAVX == 0) {
+      // Only supported with AVX+
+      FLAG_SET_DEFAULT(MaxVectorSize, 16);
+    }
+    if (UseSSE < 2) {
+      // Only supported with SSE2+
+      FLAG_SET_DEFAULT(MaxVectorSize, 0);
+    }
+  }
+#endif
+
   // On new cpus instructions which update whole XMM register should be used
   // to prevent partial register stall due to dependencies on high half.
   //
@@ -544,6 +570,12 @@
       }
     }
 
+#ifdef COMPILER2
+    if (MaxVectorSize > 16) {
+      // Limit vectors size to 16 bytes on current AMD cpus.
+      FLAG_SET_DEFAULT(MaxVectorSize, 16);
+    }
+#endif // COMPILER2
   }
 
   if( is_intel() ) { // Intel cpus specific settings
@@ -606,15 +638,6 @@
     FLAG_SET_DEFAULT(UsePopCountInstruction, false);
   }
 
-#ifdef COMPILER2
-  if (UseFPUForSpilling) {
-    if (UseSSE < 2) {
-      // Only supported with SSE2+
-      FLAG_SET_DEFAULT(UseFPUForSpilling, false);
-    }
-  }
-#endif
-
   assert(0 <= ReadPrefetchInstr && ReadPrefetchInstr <= 3, "invalid value");
   assert(0 <= AllocatePrefetchInstr && AllocatePrefetchInstr <= 3, "invalid value");
 
--- a/src/cpu/x86/vm/vmreg_x86.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/vmreg_x86.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -48,8 +48,9 @@
 
   XMMRegister xreg = ::as_XMMRegister(0);
   for ( ; i < ConcreteRegisterImpl::max_xmm ; ) {
-    regName[i++] = xreg->name();
-    regName[i++] = xreg->name();
+    for (int j = 0 ; j < 8 ; j++) {
+      regName[i++] = xreg->name();
+    }
     xreg = xreg->successor();
   }
   for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
--- a/src/cpu/x86/vm/vmreg_x86.inline.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/vmreg_x86.inline.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -39,7 +39,7 @@
 }
 
 inline VMReg XMMRegisterImpl::as_VMReg() {
-  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_fpr);
+  return VMRegImpl::as_VMReg((encoding() << 3) + ConcreteRegisterImpl::max_fpr);
 }
 
 
@@ -75,7 +75,7 @@
 inline XMMRegister VMRegImpl::as_XMMRegister() {
   assert( is_XMMRegister() && is_even(value()), "must be" );
   // Yuk
-  return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 1);
+  return ::as_XMMRegister((value() - ConcreteRegisterImpl::max_fpr) >> 3);
 }
 
 inline   bool VMRegImpl::is_concrete() {
--- a/src/cpu/x86/vm/x86.ad	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/x86.ad	Fri Jun 29 17:04:39 2012 -0700
@@ -24,6 +24,456 @@
 
 // X86 Common Architecture Description File
 
+//----------REGISTER DEFINITION BLOCK------------------------------------------
+// This information is used by the matcher and the register allocator to
+// describe individual registers and classes of registers within the target
+// archtecture.
+
+register %{
+//----------Architecture Description Register Definitions----------------------
+// General Registers
+// "reg_def"  name ( register save type, C convention save type,
+//                   ideal register type, encoding );
+// Register Save Types:
+//
+// NS  = No-Save:       The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method, &
+//                      that they do not need to be saved at call sites.
+//
+// SOC = Save-On-Call:  The register allocator assumes that these registers
+//                      can be used without saving upon entry to the method,
+//                      but that they must be saved at call sites.
+//
+// SOE = Save-On-Entry: The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, but they do not need to be saved at call
+//                      sites.
+//
+// AS  = Always-Save:   The register allocator assumes that these registers
+//                      must be saved before using them upon entry to the
+//                      method, & that they must be saved at call sites.
+//
+// Ideal Register Type is used to determine how to save & restore a
+// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
+// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
+//
+// The encoding number is the actual bit-pattern placed into the opcodes.
+
+// XMM registers.  256-bit registers or 8 words each, labeled (a)-h.
+// Word a in each register holds a Float, words ab hold a Double.
+// The whole registers are used in SSE4.2 version intrinsics,
+// array copy stubs and superword operations (see UseSSE42Intrinsics,
+// UseXMMForArrayCopy and UseSuperword flags).
+// XMM8-XMM15 must be encoded with REX (VEX for UseAVX).
+// Linux ABI:   No register preserved across function calls
+//              XMM0-XMM7 might hold parameters
+// Windows ABI: XMM6-XMM15 preserved across function calls
+//              XMM0-XMM3 might hold parameters
+
+reg_def XMM0 ( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
+reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
+reg_def XMM0c( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next());
+reg_def XMM0d( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next());
+reg_def XMM0e( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next());
+reg_def XMM0f( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM0g( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM0h( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM1 ( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
+reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
+reg_def XMM1c( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next());
+reg_def XMM1d( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next());
+reg_def XMM1e( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next());
+reg_def XMM1f( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM1g( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM1h( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM2 ( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
+reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
+reg_def XMM2c( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next());
+reg_def XMM2d( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next());
+reg_def XMM2e( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next());
+reg_def XMM2f( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM2g( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM2h( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM3 ( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
+reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
+reg_def XMM3c( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next());
+reg_def XMM3d( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next());
+reg_def XMM3e( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next());
+reg_def XMM3f( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM3g( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM3h( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM4 ( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
+reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
+reg_def XMM4c( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next());
+reg_def XMM4d( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next());
+reg_def XMM4e( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next());
+reg_def XMM4f( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM4g( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM4h( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM5 ( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
+reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
+reg_def XMM5c( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next());
+reg_def XMM5d( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next());
+reg_def XMM5e( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next());
+reg_def XMM5f( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM5g( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM5h( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+#ifdef _WIN64
+
+reg_def XMM6 ( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg());
+reg_def XMM6b( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next());
+reg_def XMM6c( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
+reg_def XMM6d( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
+reg_def XMM6e( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
+reg_def XMM6f( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM6g( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM6h( SOC, SOE, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM7 ( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg());
+reg_def XMM7b( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next());
+reg_def XMM7c( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
+reg_def XMM7d( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
+reg_def XMM7e( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
+reg_def XMM7f( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM7g( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM7h( SOC, SOE, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM8 ( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg());
+reg_def XMM8b( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next());
+reg_def XMM8c( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
+reg_def XMM8d( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
+reg_def XMM8e( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
+reg_def XMM8f( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM8g( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM8h( SOC, SOE, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM9 ( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg());
+reg_def XMM9b( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next());
+reg_def XMM9c( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
+reg_def XMM9d( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
+reg_def XMM9e( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
+reg_def XMM9f( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM9g( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM9h( SOC, SOE, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM10 ( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
+reg_def XMM10b( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
+reg_def XMM10c( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
+reg_def XMM10d( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
+reg_def XMM10e( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
+reg_def XMM10f( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM10g( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM10h( SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM11 ( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
+reg_def XMM11b( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
+reg_def XMM11c( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
+reg_def XMM11d( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
+reg_def XMM11e( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
+reg_def XMM11f( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM11g( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM11h( SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM12 ( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
+reg_def XMM12b( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
+reg_def XMM12c( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
+reg_def XMM12d( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
+reg_def XMM12e( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
+reg_def XMM12f( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM12g( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM12h( SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM13 ( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
+reg_def XMM13b( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
+reg_def XMM13c( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
+reg_def XMM13d( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
+reg_def XMM13e( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
+reg_def XMM13f( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM13g( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM13h( SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM14 ( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
+reg_def XMM14b( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
+reg_def XMM14c( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
+reg_def XMM14d( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
+reg_def XMM14e( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
+reg_def XMM14f( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM14g( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM14h( SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM15 ( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
+reg_def XMM15b( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
+reg_def XMM15c( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
+reg_def XMM15d( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
+reg_def XMM15e( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
+reg_def XMM15f( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM15g( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM15h( SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+#else // _WIN64
+
+reg_def XMM6 ( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
+reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
+reg_def XMM6c( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next());
+reg_def XMM6d( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next());
+reg_def XMM6e( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next());
+reg_def XMM6f( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM6g( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM6h( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM7 ( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
+reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
+reg_def XMM7c( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next());
+reg_def XMM7d( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next());
+reg_def XMM7e( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next());
+reg_def XMM7f( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM7g( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM7h( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+#ifdef _LP64
+
+reg_def XMM8 ( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg());
+reg_def XMM8b( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next());
+reg_def XMM8c( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next());
+reg_def XMM8d( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next());
+reg_def XMM8e( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next());
+reg_def XMM8f( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM8g( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM8h( SOC, SOC, Op_RegF, 8, xmm8->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM9 ( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg());
+reg_def XMM9b( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next());
+reg_def XMM9c( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next());
+reg_def XMM9d( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next());
+reg_def XMM9e( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next());
+reg_def XMM9f( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM9g( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM9h( SOC, SOC, Op_RegF, 9, xmm9->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM10 ( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
+reg_def XMM10b( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
+reg_def XMM10c( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next());
+reg_def XMM10d( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next());
+reg_def XMM10e( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next());
+reg_def XMM10f( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM10g( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM10h( SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM11 ( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
+reg_def XMM11b( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
+reg_def XMM11c( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next());
+reg_def XMM11d( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next());
+reg_def XMM11e( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next());
+reg_def XMM11f( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM11g( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM11h( SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM12 ( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
+reg_def XMM12b( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
+reg_def XMM12c( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next());
+reg_def XMM12d( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next());
+reg_def XMM12e( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next());
+reg_def XMM12f( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM12g( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM12h( SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM13 ( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
+reg_def XMM13b( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
+reg_def XMM13c( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next());
+reg_def XMM13d( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next());
+reg_def XMM13e( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next());
+reg_def XMM13f( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM13g( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM13h( SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM14 ( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
+reg_def XMM14b( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
+reg_def XMM14c( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next());
+reg_def XMM14d( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next());
+reg_def XMM14e( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next());
+reg_def XMM14f( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM14g( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM14h( SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+reg_def XMM15 ( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
+reg_def XMM15b( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
+reg_def XMM15c( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next());
+reg_def XMM15d( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next());
+reg_def XMM15e( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next());
+reg_def XMM15f( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next());
+reg_def XMM15g( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next());
+reg_def XMM15h( SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next()->next()->next()->next()->next()->next()->next());
+
+#endif // _LP64
+
+#endif // _WIN64
+
+#ifdef _LP64
+reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
+#else
+reg_def RFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
+#endif // _LP64
+
+alloc_class chunk1(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
+                   XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
+                   XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
+                   XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
+                   XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
+                   XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
+                   XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
+                   XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
+#ifdef _LP64
+                  ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
+                   XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
+                   XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
+                   XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
+                   XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
+                   XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
+                   XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
+                   XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
+#endif
+                   );
+
+// flags allocation class should be last.
+alloc_class chunk2(RFLAGS);
+
+// Singleton class for condition codes
+reg_class int_flags(RFLAGS);
+
+// Class for all float registers
+reg_class float_reg(XMM0,
+                    XMM1,
+                    XMM2,
+                    XMM3,
+                    XMM4,
+                    XMM5,
+                    XMM6,
+                    XMM7
+#ifdef _LP64
+                   ,XMM8,
+                    XMM9,
+                    XMM10,
+                    XMM11,
+                    XMM12,
+                    XMM13,
+                    XMM14,
+                    XMM15
+#endif
+                    );
+
+// Class for all double registers
+reg_class double_reg(XMM0,  XMM0b,
+                     XMM1,  XMM1b,
+                     XMM2,  XMM2b,
+                     XMM3,  XMM3b,
+                     XMM4,  XMM4b,
+                     XMM5,  XMM5b,
+                     XMM6,  XMM6b,
+                     XMM7,  XMM7b
+#ifdef _LP64
+                    ,XMM8,  XMM8b,
+                     XMM9,  XMM9b,
+                     XMM10, XMM10b,
+                     XMM11, XMM11b,
+                     XMM12, XMM12b,
+                     XMM13, XMM13b,
+                     XMM14, XMM14b,
+                     XMM15, XMM15b
+#endif
+                     );
+
+// Class for all 32bit vector registers
+reg_class vectors_reg(XMM0,
+                      XMM1,
+                      XMM2,
+                      XMM3,
+                      XMM4,
+                      XMM5,
+                      XMM6,
+                      XMM7
+#ifdef _LP64
+                     ,XMM8,
+                      XMM9,
+                      XMM10,
+                      XMM11,
+                      XMM12,
+                      XMM13,
+                      XMM14,
+                      XMM15
+#endif
+                      );
+
+// Class for all 64bit vector registers
+reg_class vectord_reg(XMM0,  XMM0b,
+                      XMM1,  XMM1b,
+                      XMM2,  XMM2b,
+                      XMM3,  XMM3b,
+                      XMM4,  XMM4b,
+                      XMM5,  XMM5b,
+                      XMM6,  XMM6b,
+                      XMM7,  XMM7b
+#ifdef _LP64
+                     ,XMM8,  XMM8b,
+                      XMM9,  XMM9b,
+                      XMM10, XMM10b,
+                      XMM11, XMM11b,
+                      XMM12, XMM12b,
+                      XMM13, XMM13b,
+                      XMM14, XMM14b,
+                      XMM15, XMM15b
+#endif
+                      );
+
+// Class for all 128bit vector registers
+reg_class vectorx_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,
+                      XMM1,  XMM1b,  XMM1c,  XMM1d,
+                      XMM2,  XMM2b,  XMM2c,  XMM2d,
+                      XMM3,  XMM3b,  XMM3c,  XMM3d,
+                      XMM4,  XMM4b,  XMM4c,  XMM4d,
+                      XMM5,  XMM5b,  XMM5c,  XMM5d,
+                      XMM6,  XMM6b,  XMM6c,  XMM6d,
+                      XMM7,  XMM7b,  XMM7c,  XMM7d
+#ifdef _LP64
+                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,
+                      XMM9,  XMM9b,  XMM9c,  XMM9d,
+                      XMM10, XMM10b, XMM10c, XMM10d,
+                      XMM11, XMM11b, XMM11c, XMM11d,
+                      XMM12, XMM12b, XMM12c, XMM12d,
+                      XMM13, XMM13b, XMM13c, XMM13d,
+                      XMM14, XMM14b, XMM14c, XMM14d,
+                      XMM15, XMM15b, XMM15c, XMM15d
+#endif
+                      );
+
+// Class for all 256bit vector registers
+reg_class vectory_reg(XMM0,  XMM0b,  XMM0c,  XMM0d,  XMM0e,  XMM0f,  XMM0g,  XMM0h,
+                      XMM1,  XMM1b,  XMM1c,  XMM1d,  XMM1e,  XMM1f,  XMM1g,  XMM1h,
+                      XMM2,  XMM2b,  XMM2c,  XMM2d,  XMM2e,  XMM2f,  XMM2g,  XMM2h,
+                      XMM3,  XMM3b,  XMM3c,  XMM3d,  XMM3e,  XMM3f,  XMM3g,  XMM3h,
+                      XMM4,  XMM4b,  XMM4c,  XMM4d,  XMM4e,  XMM4f,  XMM4g,  XMM4h,
+                      XMM5,  XMM5b,  XMM5c,  XMM5d,  XMM5e,  XMM5f,  XMM5g,  XMM5h,
+                      XMM6,  XMM6b,  XMM6c,  XMM6d,  XMM6e,  XMM6f,  XMM6g,  XMM6h,
+                      XMM7,  XMM7b,  XMM7c,  XMM7d,  XMM7e,  XMM7f,  XMM7g,  XMM7h
+#ifdef _LP64
+                     ,XMM8,  XMM8b,  XMM8c,  XMM8d,  XMM8e,  XMM8f,  XMM8g,  XMM8h,
+                      XMM9,  XMM9b,  XMM9c,  XMM9d,  XMM9e,  XMM9f,  XMM9g,  XMM9h,
+                      XMM10, XMM10b, XMM10c, XMM10d, XMM10e, XMM10f, XMM10g, XMM10h,
+                      XMM11, XMM11b, XMM11c, XMM11d, XMM11e, XMM11f, XMM11g, XMM11h,
+                      XMM12, XMM12b, XMM12c, XMM12d, XMM12e, XMM12f, XMM12g, XMM12h,
+                      XMM13, XMM13b, XMM13c, XMM13d, XMM13e, XMM13f, XMM13g, XMM13h,
+                      XMM14, XMM14b, XMM14c, XMM14d, XMM14e, XMM14f, XMM14g, XMM14h,
+                      XMM15, XMM15b, XMM15c, XMM15d, XMM15e, XMM15f, XMM15g, XMM15h
+#endif
+                      );
+
+%}
+
 source %{
   // Float masks come from different places depending on platform.
 #ifdef _LP64
@@ -38,6 +488,252 @@
   static address double_signflip() { return (address)double_signflip_pool; }
 #endif
 
+// Map Types to machine register types
+const int Matcher::base2reg[Type::lastype] = {
+  Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
+  Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
+  Op_VecS, Op_VecD, Op_VecX, Op_VecY, /* Vectors */
+  Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
+  0, 0/*abio*/,
+  Op_RegP /* Return address */, 0, /* the memories */
+  Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
+  0  /*bottom*/
+};
+
+// Max vector size in bytes. 0 if not supported.
+const int Matcher::vector_width_in_bytes(BasicType bt) {
+  assert(is_java_primitive(bt), "only primitive type vectors");
+  if (UseSSE < 2) return 0;
+  // SSE2 supports 128bit vectors for all types.
+  // AVX2 supports 256bit vectors for all types.
+  int size = (UseAVX > 1) ? 32 : 16;
+  // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
+  if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
+    size = 32;
+  // Use flag to limit vector size.
+  size = MIN2(size,(int)MaxVectorSize);
+  // Minimum 2 values in vector (or 4 for bytes).
+  switch (bt) {
+  case T_DOUBLE:
+  case T_LONG:
+    if (size < 16) return 0;
+  case T_FLOAT:
+  case T_INT:
+    if (size < 8) return 0;
+  case T_BOOLEAN:
+  case T_BYTE:
+  case T_CHAR:
+  case T_SHORT:
+    if (size < 4) return 0;
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  return size;
+}
+
+// Limits on vector size (number of elements) loaded into vector.
+const int Matcher::max_vector_size(const BasicType bt) {
+  return vector_width_in_bytes(bt)/type2aelembytes(bt);
+}
+const int Matcher::min_vector_size(const BasicType bt) {
+  int max_size = max_vector_size(bt);
+  // Min size which can be loaded into vector is 4 bytes.
+  int size = (type2aelembytes(bt) == 1) ? 4 : 2;
+  return MIN2(size,max_size);
+}
+
+// Vector ideal reg corresponding to specidied size in bytes
+const int Matcher::vector_ideal_reg(int size) {
+  assert(MaxVectorSize >= size, "");
+  switch(size) {
+    case  4: return Op_VecS;
+    case  8: return Op_VecD;
+    case 16: return Op_VecX;
+    case 32: return Op_VecY;
+  }
+  ShouldNotReachHere();
+  return 0;
+}
+
+// x86 supports misaligned vectors store/load.
+const bool Matcher::misaligned_vectors_ok() {
+  return !AlignVector; // can be changed by flag
+}
+
+// Helper methods for MachSpillCopyNode::implementation().
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
+  // In 64-bit VM size calculation is very complex. Emitting instructions
+  // into scratch buffer is used to get size in 64-bit VM.
+  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
+  assert(ireg == Op_VecS || // 32bit vector
+         (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
+         (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
+         "no non-adjacent vector moves" );
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    int offset = __ offset();
+    switch (ireg) {
+    case Op_VecS: // copy whole register
+    case Op_VecD:
+    case Op_VecX:
+      __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+      break;
+    case Op_VecY:
+      __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    int size = __ offset() - offset;
+#ifdef ASSERT
+    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
+    assert(!do_size || size == 4, "incorrect size calculattion");
+#endif
+    return size;
+#ifndef PRODUCT
+  } else if (!do_size) {
+    switch (ireg) {
+    case Op_VecS:
+    case Op_VecD:
+    case Op_VecX:
+      st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+      break;
+    case Op_VecY:
+      st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+#endif
+  }
+  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
+  return 4;
+}
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                            int stack_offset, int reg, uint ireg, outputStream* st) {
+  // In 64-bit VM size calculation is very complex. Emitting instructions
+  // into scratch buffer is used to get size in 64-bit VM.
+  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    int offset = __ offset();
+    if (is_load) {
+      switch (ireg) {
+      case Op_VecS:
+        __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        break;
+      case Op_VecD:
+        __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        break;
+      case Op_VecX:
+        __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        break;
+      case Op_VecY:
+        __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else { // store
+      switch (ireg) {
+      case Op_VecS:
+        __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        break;
+      case Op_VecD:
+        __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        break;
+      case Op_VecX:
+        __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        break;
+      case Op_VecY:
+        __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    }
+    int size = __ offset() - offset;
+#ifdef ASSERT
+    int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
+    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
+    assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
+#endif
+    return size;
+#ifndef PRODUCT
+  } else if (!do_size) {
+    if (is_load) {
+      switch (ireg) {
+      case Op_VecS:
+        st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+        break;
+      case Op_VecD:
+        st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+        break;
+       case Op_VecX:
+        st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+        break;
+      case Op_VecY:
+        st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else { // store
+      switch (ireg) {
+      case Op_VecS:
+        st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+        break;
+      case Op_VecD:
+        st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+        break;
+       case Op_VecX:
+        st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+        break;
+      case Op_VecY:
+        st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+    }
+#endif
+  }
+  int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : 4);
+  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
+  return 5+offset_size;
+}
+
+static inline jfloat replicate4_imm(int con, int width) {
+  // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
+  assert(width == 1 || width == 2, "only byte or short types here");
+  int bit_width = width * 8;
+  jint val = con;
+  val &= (1 << bit_width) - 1;  // mask off sign bits
+  while(bit_width < 32) {
+    val |= (val << bit_width);
+    bit_width <<= 1;
+  }
+  jfloat fval = *((jfloat*) &val);  // coerce to float type
+  return fval;
+}
+
+static inline jdouble replicate8_imm(int con, int width) {
+  // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
+  assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
+  int bit_width = width * 8;
+  jlong val = con;
+  val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
+  while(bit_width < 64) {
+    val |= (val << bit_width);
+    bit_width <<= 1;
+  }
+  jdouble dval = *((jdouble*) &val);  // coerce to double type
+  return dval;
+}
+
 #ifndef PRODUCT
   void MachNopNode::format(PhaseRegAlloc*, outputStream* st) const {
     st->print("nop \t# %d bytes pad for loops and calls", _count);
@@ -103,6 +799,46 @@
 
 %}
 
+
+//----------OPERANDS-----------------------------------------------------------
+// Operand definitions must precede instruction definitions for correct parsing
+// in the ADLC because operands constitute user defined types which are used in
+// instruction definitions.
+
+// Vectors
+operand vecS() %{
+  constraint(ALLOC_IN_RC(vectors_reg));
+  match(VecS);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vecD() %{
+  constraint(ALLOC_IN_RC(vectord_reg));
+  match(VecD);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vecX() %{
+  constraint(ALLOC_IN_RC(vectorx_reg));
+  match(VecX);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+operand vecY() %{
+  constraint(ALLOC_IN_RC(vectory_reg));
+  match(VecY);
+
+  format %{ %}
+  interface(REG_INTER);
+%}
+
+
 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
 
 // ============================================================================
@@ -852,3 +1588,797 @@
   ins_pipe(pipe_slow);
 %}
 
+
+// ====================VECTOR INSTRUCTIONS=====================================
+
+// Load vectors (4 bytes long)
+instruct loadV4(vecS dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 4);
+  match(Set dst (LoadVector mem));
+  ins_cost(125);
+  format %{ "movd    $dst,$mem\t! load vector (4 bytes)" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Load vectors (8 bytes long)
+instruct loadV8(vecD dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 8);
+  match(Set dst (LoadVector mem));
+  ins_cost(125);
+  format %{ "movq    $dst,$mem\t! load vector (8 bytes)" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Load vectors (16 bytes long)
+instruct loadV16(vecX dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 16);
+  match(Set dst (LoadVector mem));
+  ins_cost(125);
+  format %{ "movdqu  $dst,$mem\t! load vector (16 bytes)" %}
+  ins_encode %{
+    __ movdqu($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Load vectors (32 bytes long)
+instruct loadV32(vecY dst, memory mem) %{
+  predicate(n->as_LoadVector()->memory_size() == 32);
+  match(Set dst (LoadVector mem));
+  ins_cost(125);
+  format %{ "vmovdqu $dst,$mem\t! load vector (32 bytes)" %}
+  ins_encode %{
+    __ vmovdqu($dst$$XMMRegister, $mem$$Address);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Store vectors
+instruct storeV4(memory mem, vecS src) %{
+  predicate(n->as_StoreVector()->memory_size() == 4);
+  match(Set mem (StoreVector mem src));
+  ins_cost(145);
+  format %{ "movd    $mem,$src\t! store vector (4 bytes)" %}
+  ins_encode %{
+    __ movdl($mem$$Address, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct storeV8(memory mem, vecD src) %{
+  predicate(n->as_StoreVector()->memory_size() == 8);
+  match(Set mem (StoreVector mem src));
+  ins_cost(145);
+  format %{ "movq    $mem,$src\t! store vector (8 bytes)" %}
+  ins_encode %{
+    __ movq($mem$$Address, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct storeV16(memory mem, vecX src) %{
+  predicate(n->as_StoreVector()->memory_size() == 16);
+  match(Set mem (StoreVector mem src));
+  ins_cost(145);
+  format %{ "movdqu  $mem,$src\t! store vector (16 bytes)" %}
+  ins_encode %{
+    __ movdqu($mem$$Address, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct storeV32(memory mem, vecY src) %{
+  predicate(n->as_StoreVector()->memory_size() == 32);
+  match(Set mem (StoreVector mem src));
+  ins_cost(145);
+  format %{ "vmovdqu $mem,$src\t! store vector (32 bytes)" %}
+  ins_encode %{
+    __ vmovdqu($mem$$Address, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate byte scalar to be vector
+instruct Repl4B(vecS dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate4B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8B(vecD dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate8B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "movlhps $dst,$dst\t! replicate16B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (ReplicateB src));
+  format %{ "movd    $dst,$src\n\t"
+            "punpcklbw $dst,$dst\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "movlhps $dst,$dst\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate32B" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate byte scalar immediate to be vector by loading from const table.
+instruct Repl4B_imm(vecS dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateB con));
+  format %{ "movss   $dst,[$constantaddress]\t! replicate4B($con)" %}
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 1)));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8B_imm(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate8B($con)" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16B_imm(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateB con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate16B($con)\n\t"
+            "movlhps $dst,$dst" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl32B_imm(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (ReplicateB con));
+  format %{ "movsd   $dst,[$constantaddress]\t! lreplicate32B($con)\n\t"
+            "movlhps $dst,$dst\n\t"
+            "vinsertf128h $dst,$dst,$dst" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 1)));
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate byte scalar zero to be vector
+instruct Repl4B_zero(vecS dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateB zero));
+  format %{ "pxor    $dst,$dst\t! replicate4B zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8B_zero(vecD dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateB zero));
+  format %{ "pxor    $dst,$dst\t! replicate8B zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl16B_zero(vecX dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateB zero));
+  format %{ "pxor    $dst,$dst\t! replicate16B zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl32B_zero(vecY dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 32);
+  match(Set dst (ReplicateB zero));
+  format %{ "vxorpd  $dst,$dst,$dst\t! replicate32B zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+    bool vector256 = true;
+    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate char/short (2 byte) scalar to be vector
+instruct Repl2S(vecS dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate2S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S(vecD dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\t! replicate4S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8S(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "movlhps $dst,$dst\t! replicate8S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateS src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshuflw $dst,$dst,0x00\n\t"
+            "movlhps $dst,$dst\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate16S" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate char/short (2 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2S_imm(vecS dst, immI con) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateS con));
+  format %{ "movss   $dst,[$constantaddress]\t! replicate2S($con)" %}
+  ins_encode %{
+    __ movflt($dst$$XMMRegister, $constantaddress(replicate4_imm($con$$constant, 2)));
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S_imm(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate4S($con)" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8S_imm(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateS con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate8S($con)\n\t"
+            "movlhps $dst,$dst" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl16S_imm(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateS con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate16S($con)\n\t"
+            "movlhps $dst,$dst\n\t"
+            "vinsertf128h $dst,$dst,$dst" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 2)));
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate char/short (2 byte) scalar zero to be vector
+instruct Repl2S_zero(vecS dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateS zero));
+  format %{ "pxor    $dst,$dst\t! replicate2S zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4S_zero(vecD dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateS zero));
+  format %{ "pxor    $dst,$dst\t! replicate4S zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8S_zero(vecX dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateS zero));
+  format %{ "pxor    $dst,$dst\t! replicate8S zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl16S_zero(vecY dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (ReplicateS zero));
+  format %{ "vxorpd  $dst,$dst,$dst\t! replicate16S zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+    bool vector256 = true;
+    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate integer (4 byte) scalar to be vector
+instruct Repl2I(vecD dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I(vecX dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I(vecY dst, rRegI src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI src));
+  format %{ "movd    $dst,$src\n\t"
+            "pshufd  $dst,$dst,0x00\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2I_imm(vecD dst, immI con) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate2I($con)" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_imm(vecX dst, immI con) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate4I($con)\n\t"
+            "movlhps $dst,$dst" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_imm(vecY dst, immI con) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate8I($con)\n\t"
+            "movlhps $dst,$dst\n\t"
+            "vinsertf128h $dst,$dst,$dst" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress(replicate8_imm($con$$constant, 4)));
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integer could be loaded into xmm register directly from memory.
+instruct Repl2I_mem(vecD dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI (LoadVector mem)));
+  format %{ "movd    $dst,$mem\n\t"
+            "pshufd  $dst,$dst,0x00\t! replicate2I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $mem$$Address);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI (LoadVector mem)));
+  format %{ "movd    $dst,$mem\n\t"
+            "pshufd  $dst,$dst,0x00\t! replicate4I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $mem$$Address);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8I_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI (LoadVector mem)));
+  format %{ "movd    $dst,$mem\n\t"
+            "pshufd  $dst,$dst,0x00\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate8I" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $mem$$Address);
+    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate integer (4 byte) scalar zero to be vector
+instruct Repl2I_zero(vecD dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateI zero));
+  format %{ "pxor    $dst,$dst\t! replicate2I" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4I_zero(vecX dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateI zero));
+  format %{ "pxor    $dst,$dst\t! replicate4I zero)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8I_zero(vecY dst, immI0 zero) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateI zero));
+  format %{ "vxorpd  $dst,$dst,$dst\t! replicate8I zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+    bool vector256 = true;
+    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate long (8 byte) scalar to be vector
+#ifdef _LP64
+instruct Repl2L(vecX dst, rRegL src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL src));
+  format %{ "movdq   $dst,$src\n\t"
+            "movlhps $dst,$dst\t! replicate2L" %}
+  ins_encode %{
+    __ movdq($dst$$XMMRegister, $src$$Register);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L(vecY dst, rRegL src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL src));
+  format %{ "movdq   $dst,$src\n\t"
+            "movlhps $dst,$dst\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    __ movdq($dst$$XMMRegister, $src$$Register);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+#else // _LP64
+instruct Repl2L(vecX dst, eRegL src, regD tmp) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL src));
+  effect(TEMP dst, USE src, TEMP tmp);
+  format %{ "movdl   $dst,$src.lo\n\t"
+            "movdl   $tmp,$src.hi\n\t"
+            "punpckldq $dst,$tmp\n\t"
+            "movlhps $dst,$dst\t! replicate2L"%}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L(vecY dst, eRegL src, regD tmp) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL src));
+  effect(TEMP dst, USE src, TEMP tmp);
+  format %{ "movdl   $dst,$src.lo\n\t"
+            "movdl   $tmp,$src.hi\n\t"
+            "punpckldq $dst,$tmp\n\t"
+            "movlhps $dst,$dst\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    __ movdl($dst$$XMMRegister, $src$$Register);
+    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
+    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+#endif // _LP64
+
+// Replicate long (8 byte) scalar immediate to be vector by loading from const table.
+instruct Repl2L_imm(vecX dst, immL con) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate2L($con)\n\t"
+            "movlhps $dst,$dst" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress($con));
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L_imm(vecY dst, immL con) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL con));
+  format %{ "movsd   $dst,[$constantaddress]\t! replicate4L($con)\n\t"
+            "movlhps $dst,$dst\n\t"
+            "vinsertf128h $dst,$dst,$dst" %}
+  ins_encode %{
+    __ movdbl($dst$$XMMRegister, $constantaddress($con));
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Long could be loaded into xmm register directly from memory.
+instruct Repl2L_mem(vecX dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL (LoadVector mem)));
+  format %{ "movq    $dst,$mem\n\t"
+            "movlhps $dst,$dst\t! replicate2L" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4L_mem(vecY dst, memory mem) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL (LoadVector mem)));
+  format %{ "movq    $dst,$mem\n\t"
+            "movlhps $dst,$dst\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate4L" %}
+  ins_encode %{
+    __ movq($dst$$XMMRegister, $mem$$Address);
+    __ movlhps($dst$$XMMRegister, $dst$$XMMRegister);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate long (8 byte) scalar zero to be vector
+instruct Repl2L_zero(vecX dst, immL0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateL zero));
+  format %{ "pxor    $dst,$dst\t! replicate2L zero" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4L_zero(vecY dst, immL0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateL zero));
+  format %{ "vxorpd  $dst,$dst,$dst\t! replicate4L zero" %}
+  ins_encode %{
+    // Use vxorpd since AVX does not have vpxor for 256-bit (AVX2 will have it).
+    bool vector256 = true;
+    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate float (4 byte) scalar to be vector
+instruct Repl2F(vecD dst, regF src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF src));
+  format %{ "pshufd  $dst,$dst,0x00\t! replicate2F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4F(vecX dst, regF src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateF src));
+  format %{ "pshufd  $dst,$dst,0x00\t! replicate4F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl8F(vecY dst, regF src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateF src));
+  format %{ "pshufd  $dst,$src,0x00\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate8F" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate float (4 byte) scalar zero to be vector
+instruct Repl2F_zero(vecD dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateF zero));
+  format %{ "xorps   $dst,$dst\t! replicate2F zero" %}
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4F_zero(vecX dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateF zero));
+  format %{ "xorps   $dst,$dst\t! replicate4F zero" %}
+  ins_encode %{
+    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl8F_zero(vecY dst, immF0 zero) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (ReplicateF zero));
+  format %{ "vxorps  $dst,$dst,$dst\t! replicate8F zero" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vxorps($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+// Replicate double (8 bytes) scalar to be vector
+instruct Repl2D(vecX dst, regD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateD src));
+  format %{ "pshufd  $dst,$src,0x44\t! replicate2D" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct Repl4D(vecY dst, regD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateD src));
+  format %{ "pshufd  $dst,$src,0x44\n\t"
+            "vinsertf128h $dst,$dst,$dst\t! replicate4D" %}
+  ins_encode %{
+    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x44);
+    __ vinsertf128h($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Replicate double (8 byte) scalar zero to be vector
+instruct Repl2D_zero(vecX dst, immD0 zero) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (ReplicateD zero));
+  format %{ "xorpd   $dst,$dst\t! replicate2D zero" %}
+  ins_encode %{
+    __ xorpd($dst$$XMMRegister, $dst$$XMMRegister);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
+instruct Repl4D_zero(vecY dst, immD0 zero) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (ReplicateD zero));
+  format %{ "vxorpd  $dst,$dst,$dst,vect256\t! replicate4D zero" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vxorpd($dst$$XMMRegister, $dst$$XMMRegister, $dst$$XMMRegister, vector256);
+  %}
+  ins_pipe( fpu_reg_reg );
+%}
+
--- a/src/cpu/x86/vm/x86_32.ad	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Jun 29 17:04:39 2012 -0700
@@ -74,9 +74,6 @@
 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
 
-// Special Registers
-reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
-
 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
 // allocator, and only shows up in the encodings.
 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
@@ -105,27 +102,6 @@
 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
 
-// XMM registers.  128-bit registers or 4 words each, labeled a-d.
-// Word a in each register holds a Float, words ab hold a Double.
-// We currently do not use the SIMD capabilities, so registers cd
-// are unused at the moment.
-reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
-reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
-reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
-reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
-reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
-reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
-reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
-reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
-reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
-reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
-reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
-reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
-reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
-reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
-reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
-reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
-
 // Specify priority of register selection within phases of register
 // allocation.  Highest priority is first.  A useful heuristic is to
 // give registers a low priority when they are required by machine
@@ -138,15 +114,6 @@
                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
                     FPR6L, FPR6H, FPR7L, FPR7H );
 
-alloc_class chunk1( XMM0a, XMM0b,
-                    XMM1a, XMM1b,
-                    XMM2a, XMM2b,
-                    XMM3a, XMM3b,
-                    XMM4a, XMM4b,
-                    XMM5a, XMM5b,
-                    XMM6a, XMM6b,
-                    XMM7a, XMM7b, EFLAGS);
-
 
 //----------Architecture Description Register Classes--------------------------
 // Several register classes are automatically defined based upon information in
@@ -159,12 +126,12 @@
 // Class for all registers
 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
 // Class for general registers
-reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
+reg_class int_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
 // Class for general registers which may be used for implicit null checks on win95
 // Also safe for use by tailjump. We don't want to allocate in rbp,
-reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
+reg_class int_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
 // Class of "X" registers
-reg_class x_reg(EBX, ECX, EDX, EAX);
+reg_class int_x_reg(EBX, ECX, EDX, EAX);
 // Class of registers that can appear in an address with no offset.
 // EBP and ESP require an extra instruction byte for zero offset.
 // Used in fast-unlock
@@ -193,8 +160,6 @@
 reg_class sp_reg(ESP);
 // Singleton class for instruction pointer
 // reg_class ip_reg(EIP);
-// Singleton class for condition codes
-reg_class int_flags(EFLAGS);
 // Class of integer register pairs
 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
 // Class of integer register pairs that aligns with calling convention
@@ -206,29 +171,18 @@
 // Floating point registers.  Notice FPR0 is not a choice.
 // FPR0 is not ever allocated; we use clever encodings to fake
 // a 2-address instructions out of Intels FP stack.
-reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
-
-// make a register class for SSE registers
-reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
-
-// make a double register class for SSE2 registers
-reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
-                  XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
-
-reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
-                   FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
-                   FPR7L,FPR7H );
-
-reg_class flt_reg0( FPR1L );
-reg_class dbl_reg0( FPR1L,FPR1H );
-reg_class dbl_reg1( FPR2L,FPR2H );
-reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
-                       FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
-
-// XMM6 and XMM7 could be used as temporary registers for long, float and
-// double values for SSE2.
-reg_class xdb_reg6( XMM6a,XMM6b );
-reg_class xdb_reg7( XMM7a,XMM7b );
+reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
+
+reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
+                      FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
+                      FPR7L,FPR7H );
+
+reg_class fp_flt_reg0( FPR1L );
+reg_class fp_dbl_reg0( FPR1L,FPR1H );
+reg_class fp_dbl_reg1( FPR2L,FPR2H );
+reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
+                          FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
+
 %}
 
 
@@ -412,7 +366,7 @@
   }
 }
 
-   // eRegI ereg, memory mem) %{    // emit_reg_mem
+   // rRegI ereg, memory mem) %{    // emit_reg_mem
 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
   // There is no index & no scale, use form without SIB byte
   if ((index == 0x4) &&
@@ -787,7 +741,7 @@
 #endif
   }
   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
-  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
+  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
   return size+5+offset_size;
 }
 
@@ -821,7 +775,7 @@
     }
 #endif
   }
-  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
+  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
   // Only MOVAPS SSE prefix uses 1 byte.
   int sz = 4;
   if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
@@ -903,6 +857,108 @@
   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
 }
 
+// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                          int src_hi, int dst_hi, uint ireg, outputStream* st);
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                            int stack_offset, int reg, uint ireg, outputStream* st);
+
+static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
+                                     int dst_offset, uint ireg, outputStream* st) {
+  int calc_size = 0;
+  int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
+  int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
+  switch (ireg) {
+  case Op_VecS:
+    calc_size = 3+src_offset_size + 3+dst_offset_size;
+    break;
+  case Op_VecD:
+    calc_size = 3+src_offset_size + 3+dst_offset_size;
+    src_offset += 4;
+    dst_offset += 4;
+    src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
+    dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
+    calc_size += 3+src_offset_size + 3+dst_offset_size;
+    break;
+  case Op_VecX:
+    calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
+    break;
+  case Op_VecY:
+    calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    int offset = __ offset();
+    switch (ireg) {
+    case Op_VecS:
+      __ pushl(Address(rsp, src_offset));
+      __ popl (Address(rsp, dst_offset));
+      break;
+    case Op_VecD:
+      __ pushl(Address(rsp, src_offset));
+      __ popl (Address(rsp, dst_offset));
+      __ pushl(Address(rsp, src_offset+4));
+      __ popl (Address(rsp, dst_offset+4));
+      break;
+    case Op_VecX:
+      __ movdqu(Address(rsp, -16), xmm0);
+      __ movdqu(xmm0, Address(rsp, src_offset));
+      __ movdqu(Address(rsp, dst_offset), xmm0);
+      __ movdqu(xmm0, Address(rsp, -16));
+      break;
+    case Op_VecY:
+      __ vmovdqu(Address(rsp, -32), xmm0);
+      __ vmovdqu(xmm0, Address(rsp, src_offset));
+      __ vmovdqu(Address(rsp, dst_offset), xmm0);
+      __ vmovdqu(xmm0, Address(rsp, -32));
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    int size = __ offset() - offset;
+    assert(size == calc_size, "incorrect size calculattion");
+    return size;
+#ifndef PRODUCT
+  } else if (!do_size) {
+    switch (ireg) {
+    case Op_VecS:
+      st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
+                "popl    [rsp + #%d]",
+                src_offset, dst_offset);
+      break;
+    case Op_VecD:
+      st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
+                "popq    [rsp + #%d]\n\t"
+                "pushl   [rsp + #%d]\n\t"
+                "popq    [rsp + #%d]",
+                src_offset, dst_offset, src_offset+4, dst_offset+4);
+      break;
+     case Op_VecX:
+      st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
+                "movdqu  xmm0, [rsp + #%d]\n\t"
+                "movdqu  [rsp + #%d], xmm0\n\t"
+                "movdqu  xmm0, [rsp - #16]",
+                src_offset, dst_offset);
+      break;
+    case Op_VecY:
+      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
+                "vmovdqu xmm0, [rsp + #%d]\n\t"
+                "vmovdqu [rsp + #%d], xmm0\n\t"
+                "vmovdqu xmm0, [rsp - #32]",
+                src_offset, dst_offset);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+#endif
+  }
+  return calc_size;
+}
+
 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
   // Get registers to move
   OptoReg::Name src_second = ra_->get_reg_second(in(1));
@@ -923,6 +979,29 @@
   if( src_first == dst_first && src_second == dst_second )
     return size;            // Self copy, no move
 
+  if (bottom_type()->isa_vect() != NULL) {
+    uint ireg = ideal_reg();
+    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
+    assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
+    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
+    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
+      // mem -> mem
+      int src_offset = ra_->reg2offset(src_first);
+      int dst_offset = ra_->reg2offset(dst_first);
+      return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
+    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
+      return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
+    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
+      int stack_offset = ra_->reg2offset(dst_first);
+      return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
+    } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
+      int stack_offset = ra_->reg2offset(src_first);
+      return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
+    } else {
+      ShouldNotReachHere();
+    }
+  }
+
   // --------------------------------------
   // Check for mem-mem move.  push/pop to move.
   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
@@ -1313,16 +1392,6 @@
   return true;
 }
 
-// Vector width in bytes
-const uint Matcher::vector_width_in_bytes(void) {
-  return UseSSE >= 2 ? 8 : 0;
-}
-
-// Vector ideal reg
-const uint Matcher::vector_ideal_reg(void) {
-  return Op_RegD;
-}
-
 // Is this branch offset short enough that a short branch can be used?
 //
 // NOTE: If the platform does not provide any short branch variants, then
@@ -1452,7 +1521,7 @@
 // arguments in those registers not be available to the callee.
 bool Matcher::can_be_java_arg( int reg ) {
   if(  reg == ECX_num   || reg == EDX_num   ) return true;
-  if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
+  if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
   return false;
 }
@@ -1565,16 +1634,16 @@
     emit_opcode(cbuf,0x66);
   %}
 
-  enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
+  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
-  enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
+  enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
     emit_opcode(cbuf,$opcode$$constant);
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
-  enc_class mov_r32_imm0( eRegI dst ) %{
+  enc_class mov_r32_imm0( rRegI dst ) %{
     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
   %}
@@ -1621,7 +1690,7 @@
   %}
 
   // Dense encoding for older common ops
-  enc_class Opc_plus(immI opcode, eRegI reg) %{
+  enc_class Opc_plus(immI opcode, rRegI reg) %{
     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
   %}
 
@@ -1637,7 +1706,7 @@
     }
   %}
 
-  enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
+  enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
     // Emit primary opcode and set sign-extend bit
     // Check for 8-bit immediate, and set sign extend bit in opcode
     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
@@ -1682,7 +1751,7 @@
     else                               emit_d32(cbuf,con);
   %}
 
-  enc_class OpcSReg (eRegI dst) %{    // BSWAP
+  enc_class OpcSReg (rRegI dst) %{    // BSWAP
     emit_cc(cbuf, $secondary, $dst$$reg );
   %}
 
@@ -1700,7 +1769,7 @@
     emit_rm(cbuf, 0x3, destlo, desthi);
   %}
 
-  enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
+  enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
   %}
 
@@ -1891,20 +1960,20 @@
 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
 //   %}
 
-  enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
+  enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
     $$$emit8$primary;
     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
     $$$emit8$shift$$constant;
   %}
 
-  enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
+  enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
     // Load immediate does not have a zero or sign extended version
     // for 8-bit immediates
     emit_opcode(cbuf, 0xB8 + $dst$$reg);
     $$$emit32$src$$constant;
   %}
 
-  enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
+  enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
     // Load immediate does not have a zero or sign extended version
     // for 8-bit immediates
     emit_opcode(cbuf, $primary + $dst$$reg);
@@ -1943,15 +2012,15 @@
 
 
   // Encode a reg-reg copy.  If it is useless, then empty encoding.
-  enc_class enc_Copy( eRegI dst, eRegI src ) %{
+  enc_class enc_Copy( rRegI dst, rRegI src ) %{
     encode_Copy( cbuf, $dst$$reg, $src$$reg );
   %}
 
-  enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
+  enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
     encode_Copy( cbuf, $dst$$reg, $src$$reg );
   %}
 
-  enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
+  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
@@ -1973,7 +2042,7 @@
     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
   %}
 
-  enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
+  enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
   %}
 
@@ -2068,7 +2137,7 @@
     cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
   %}
 
-  enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
+  enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
     int reg_encoding = $ereg$$reg;
     int base  = $mem$$base;
     int index = $mem$$index;
@@ -2132,7 +2201,7 @@
 
   // Clone of RegMem but accepts an extra parameter to access each
   // half of a double in memory; it never needs relocation info.
-  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
+  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
     emit_opcode(cbuf,$opcode$$constant);
     int reg_encoding = $rm_reg$$reg;
     int base     = $mem$$base;
@@ -2168,7 +2237,7 @@
     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
   %}
 
-  enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
+  enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
     int reg_encoding = $dst$$reg;
     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
     int index        = 0x04;            // 0x04 indicates no index
@@ -2178,7 +2247,7 @@
     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
   %}
 
-  enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
+  enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
     // Compare dst,src
     emit_opcode(cbuf,0x3B);
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
@@ -2190,7 +2259,7 @@
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
   %}
 
-  enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
+  enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
     // Compare dst,src
     emit_opcode(cbuf,0x3B);
     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
@@ -2221,7 +2290,7 @@
     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
   %}
 
-  enc_class neg_reg(eRegI dst) %{
+  enc_class neg_reg(rRegI dst) %{
     // NEG $dst
     emit_opcode(cbuf,0xF7);
     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
@@ -2251,7 +2320,7 @@
     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
   %}
 
-  enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
+  enc_class enc_cmpLTP_mem(rRegI p, rRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
     int tmpReg = $tmp$$reg;
 
     // SUB $p,$q
@@ -2390,12 +2459,12 @@
   %}
 
   // Special case for moving an integer register to a stack slot.
-  enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
+  enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
   %}
 
   // Special case for moving a register to a stack slot.
-  enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
+  enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
     // Opcode already emitted
     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
@@ -2640,7 +2709,7 @@
 // equal_result    = 0;
 // nan_result      = -1;
 
-  enc_class CmpF_Result(eRegI dst) %{
+  enc_class CmpF_Result(rRegI dst) %{
     // fnstsw_ax();
     emit_opcode( cbuf, 0xDF);
     emit_opcode( cbuf, 0xE0);
@@ -2685,7 +2754,7 @@
 // done:
   %}
 
-  enc_class convert_int_long( regL dst, eRegI src ) %{
+  enc_class convert_int_long( regL dst, rRegI src ) %{
     // mov $dst.lo,$src
     int dst_encoding = $dst$$reg;
     int src_encoding = $src$$reg;
@@ -2754,7 +2823,7 @@
     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
   %}
 
-  enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
+  enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
     // Basic idea: lo(result) = lo(x_lo * y_lo)
     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
     // MOV    $tmp,$src.lo
@@ -2780,7 +2849,7 @@
     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
   %}
 
-  enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
+  enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
     // Basic idea: lo(result) = lo(src * y_lo)
     //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
     // IMUL   $tmp,EDX,$src
@@ -2836,7 +2905,7 @@
     emit_d8(cbuf, 4*4);
   %}
 
-  enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
+  enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
     // MOV   $tmp,$src.lo
     emit_opcode(cbuf, 0x8B);
     emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
@@ -2857,7 +2926,7 @@
     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
   %}
 
-  enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
+  enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
     // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
     emit_opcode( cbuf, 0x3B );
     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
@@ -2869,7 +2938,7 @@
     emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
   %}
 
-  enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
+  enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
     // XOR    $tmp,$tmp
     emit_opcode(cbuf,0x33);  // XOR
     emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
@@ -3762,9 +3831,9 @@
     // in SSE2+ mode we want to keep the FPU stack clean so pretend
     // that C functions return float and double results in XMM0.
     if( ideal_reg == Op_RegD && UseSSE>=2 )
-      return OptoRegPair(XMM0b_num,XMM0a_num);
+      return OptoRegPair(XMM0b_num,XMM0_num);
     if( ideal_reg == Op_RegF && UseSSE>=2 )
-      return OptoRegPair(OptoReg::Bad,XMM0a_num);
+      return OptoRegPair(OptoReg::Bad,XMM0_num);
 
     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
   %}
@@ -3775,9 +3844,9 @@
     static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
     static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
     if( ideal_reg == Op_RegD && UseSSE>=2 )
-      return OptoRegPair(XMM0b_num,XMM0a_num);
+      return OptoRegPair(XMM0b_num,XMM0_num);
     if( ideal_reg == Op_RegF && UseSSE>=1 )
-      return OptoRegPair(OptoReg::Bad,XMM0a_num);
+      return OptoRegPair(OptoReg::Bad,XMM0_num);
     return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
   %}
 
@@ -4147,8 +4216,8 @@
 
 // Register Operands
 // Integer Register
-operand eRegI() %{
-  constraint(ALLOC_IN_RC(e_reg));
+operand rRegI() %{
+  constraint(ALLOC_IN_RC(int_reg));
   match(RegI);
   match(xRegI);
   match(eAXRegI);
@@ -4163,8 +4232,8 @@
 %}
 
 // Subset of Integer Register
-operand xRegI(eRegI reg) %{
-  constraint(ALLOC_IN_RC(x_reg));
+operand xRegI(rRegI reg) %{
+  constraint(ALLOC_IN_RC(int_x_reg));
   match(reg);
   match(eAXRegI);
   match(eBXRegI);
@@ -4179,7 +4248,7 @@
 operand eAXRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(eax_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "EAX" %}
   interface(REG_INTER);
@@ -4189,7 +4258,7 @@
 operand eBXRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(ebx_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "EBX" %}
   interface(REG_INTER);
@@ -4198,7 +4267,7 @@
 operand eCXRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(ecx_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "ECX" %}
   interface(REG_INTER);
@@ -4207,7 +4276,7 @@
 operand eDXRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(edx_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "EDX" %}
   interface(REG_INTER);
@@ -4216,7 +4285,7 @@
 operand eDIRegI(xRegI reg) %{
   constraint(ALLOC_IN_RC(edi_reg));
   match(reg);
-  match(eRegI);
+  match(rRegI);
 
   format %{ "EDI" %}
   interface(REG_INTER);
@@ -4263,7 +4332,7 @@
 operand eSIRegI(xRegI reg) %{
    constraint(ALLOC_IN_RC(esi_reg));
    match(reg);
-   match(eRegI);
+   match(rRegI);
 
    format %{ "ESI" %}
    interface(REG_INTER);
@@ -4284,7 +4353,7 @@
 %}
 
 operand eRegP() %{
-  constraint(ALLOC_IN_RC(e_reg));
+  constraint(ALLOC_IN_RC(int_reg));
   match(RegP);
   match(eAXRegP);
   match(eBXRegP);
@@ -4297,7 +4366,7 @@
 
 // On windows95, EBP is not safe to use for implicit null tests.
 operand eRegP_no_EBP() %{
-  constraint(ALLOC_IN_RC(e_reg_no_rbp));
+  constraint(ALLOC_IN_RC(int_reg_no_rbp));
   match(RegP);
   match(eAXRegP);
   match(eBXRegP);
@@ -4477,7 +4546,7 @@
 // Float register operands
 operand regDPR() %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(dbl_reg));
+  constraint(ALLOC_IN_RC(fp_dbl_reg));
   match(RegD);
   match(regDPR1);
   match(regDPR2);
@@ -4487,7 +4556,7 @@
 
 operand regDPR1(regDPR reg) %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(dbl_reg0));
+  constraint(ALLOC_IN_RC(fp_dbl_reg0));
   match(reg);
   format %{ "FPR1" %}
   interface(REG_INTER);
@@ -4495,7 +4564,7 @@
 
 operand regDPR2(regDPR reg) %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(dbl_reg1));
+  constraint(ALLOC_IN_RC(fp_dbl_reg1));
   match(reg);
   format %{ "FPR2" %}
   interface(REG_INTER);
@@ -4503,45 +4572,16 @@
 
 operand regnotDPR1(regDPR reg) %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(dbl_notreg0));
+  constraint(ALLOC_IN_RC(fp_dbl_notreg0));
   match(reg);
   format %{ %}
   interface(REG_INTER);
 %}
 
-// XMM Double register operands
-operand regD() %{
-  predicate( UseSSE>=2 );
-  constraint(ALLOC_IN_RC(xdb_reg));
-  match(RegD);
-  match(regD6);
-  match(regD7);
-  format %{ %}
-  interface(REG_INTER);
-%}
-
-// XMM6 double register operands
-operand regD6(regD reg) %{
-  predicate( UseSSE>=2 );
-  constraint(ALLOC_IN_RC(xdb_reg6));
-  match(reg);
-  format %{ "XMM6" %}
-  interface(REG_INTER);
-%}
-
-// XMM7 double register operands
-operand regD7(regD reg) %{
-  predicate( UseSSE>=2 );
-  constraint(ALLOC_IN_RC(xdb_reg7));
-  match(reg);
-  format %{ "XMM7" %}
-  interface(REG_INTER);
-%}
-
 // Float register operands
 operand regFPR() %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(flt_reg));
+  constraint(ALLOC_IN_RC(fp_flt_reg));
   match(RegF);
   match(regFPR1);
   format %{ %}
@@ -4551,21 +4591,30 @@
 // Float register operands
 operand regFPR1(regFPR reg) %{
   predicate( UseSSE < 2 );
-  constraint(ALLOC_IN_RC(flt_reg0));
+  constraint(ALLOC_IN_RC(fp_flt_reg0));
   match(reg);
   format %{ "FPR1" %}
   interface(REG_INTER);
 %}
 
-// XMM register operands
+// XMM Float register operands
 operand regF() %{
   predicate( UseSSE>=1 );
-  constraint(ALLOC_IN_RC(xmm_reg));
+  constraint(ALLOC_IN_RC(float_reg));
   match(RegF);
   format %{ %}
   interface(REG_INTER);
 %}
 
+// XMM Double register operands
+operand regD() %{
+  predicate( UseSSE>=2 );
+  constraint(ALLOC_IN_RC(double_reg));
+  match(RegD);
+  format %{ %}
+  interface(REG_INTER);
+%}
+
 
 //----------Memory Operands----------------------------------------------------
 // Direct Memory Operand
@@ -4583,7 +4632,7 @@
 
 // Indirect Memory Operand
 operand indirect(eRegP reg) %{
-  constraint(ALLOC_IN_RC(e_reg));
+  constraint(ALLOC_IN_RC(int_reg));
   match(reg);
 
   format %{ "[$reg]" %}
@@ -4622,7 +4671,7 @@
 %}
 
 // Indirect Memory Plus Long Offset Operand
-operand indOffset32X(eRegI reg, immP off) %{
+operand indOffset32X(rRegI reg, immP off) %{
   match(AddP off reg);
 
   format %{ "[$reg + $off]" %}
@@ -4635,7 +4684,7 @@
 %}
 
 // Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
+operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
   match(AddP (AddP reg ireg) off);
 
   op_cost(10);
@@ -4649,7 +4698,7 @@
 %}
 
 // Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndex(eRegP reg, eRegI ireg) %{
+operand indIndex(eRegP reg, rRegI ireg) %{
   match(AddP reg ireg);
 
   op_cost(10);
@@ -4667,7 +4716,7 @@
 // // -------------------------------------------------------------------------
 // // Scaled Memory Operands
 // // Indirect Memory Times Scale Plus Offset Operand
-// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
+// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
 //   match(AddP off (LShiftI ireg scale));
 //
 //   op_cost(10);
@@ -4681,7 +4730,7 @@
 // %}
 
 // Indirect Memory Times Scale Plus Index Register
-operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
+operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
   match(AddP reg (LShiftI ireg scale));
 
   op_cost(10);
@@ -4695,7 +4744,7 @@
 %}
 
 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
-operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
+operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
   match(AddP (AddP reg (LShiftI ireg scale)) off);
 
   op_cost(10);
@@ -4823,7 +4872,7 @@
 // Indirect Memory Operand
 operand indirect_win95_safe(eRegP_no_EBP reg)
 %{
-  constraint(ALLOC_IN_RC(e_reg));
+  constraint(ALLOC_IN_RC(int_reg));
   match(reg);
 
   op_cost(100);
@@ -4867,7 +4916,7 @@
 %}
 
 // Indirect Memory Plus Index Register Plus Offset Operand
-operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
+operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
 %{
   match(AddP (AddP reg ireg) off);
 
@@ -4882,7 +4931,7 @@
 %}
 
 // Indirect Memory Times Scale Plus Index Register
-operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
+operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
 %{
   match(AddP reg (LShiftI ireg scale));
 
@@ -4897,7 +4946,7 @@
 %}
 
 // Indirect Memory Times Scale Plus Index Register Plus Offset Operand
-operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
+operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
 %{
   match(AddP (AddP reg (LShiftI ireg scale)) off);
 
@@ -5086,7 +5135,7 @@
 //   Or: _mem if it requires the big decoder and a memory unit.
 
 // Integer ALU reg operation
-pipe_class ialu_reg(eRegI dst) %{
+pipe_class ialu_reg(rRegI dst) %{
     single_instruction;
     dst    : S4(write);
     dst    : S3(read);
@@ -5104,7 +5153,7 @@
 %}
 
 // Integer ALU reg operation using big decoder
-pipe_class ialu_reg_fat(eRegI dst) %{
+pipe_class ialu_reg_fat(rRegI dst) %{
     single_instruction;
     dst    : S4(write);
     dst    : S3(read);
@@ -5122,7 +5171,7 @@
 %}
 
 // Integer ALU reg-reg operation
-pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
+pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5140,7 +5189,7 @@
 %}
 
 // Integer ALU reg-reg operation
-pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
+pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5158,7 +5207,7 @@
 %}
 
 // Integer ALU reg-mem operation
-pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
+pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
     single_instruction;
     dst    : S5(write);
     mem    : S3(read);
@@ -5187,7 +5236,7 @@
 %}
 
 // Integer Store to Memory
-pipe_class ialu_mem_reg(memory mem, eRegI src) %{
+pipe_class ialu_mem_reg(memory mem, rRegI src) %{
     single_instruction;
     mem    : S3(read);
     src    : S5(read);
@@ -5216,7 +5265,7 @@
 %}
 
 // Integer ALU0 reg-reg operation
-pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
+pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5225,7 +5274,7 @@
 %}
 
 // Integer ALU0 reg-mem operation
-pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
+pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
     single_instruction;
     dst    : S5(write);
     mem    : S3(read);
@@ -5235,7 +5284,7 @@
 %}
 
 // Integer ALU reg-reg operation
-pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
+pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
     single_instruction;
     cr     : S4(write);
     src1   : S3(read);
@@ -5245,7 +5294,7 @@
 %}
 
 // Integer ALU reg-imm operation
-pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
+pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
     single_instruction;
     cr     : S4(write);
     src1   : S3(read);
@@ -5254,7 +5303,7 @@
 %}
 
 // Integer ALU reg-mem operation
-pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
+pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
     single_instruction;
     cr     : S4(write);
     src1   : S3(read);
@@ -5265,7 +5314,7 @@
 %}
 
 // Conditional move reg-reg
-pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
+pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
     instruction_count(4);
     y      : S4(read);
     q      : S3(read);
@@ -5274,7 +5323,7 @@
 %}
 
 // Conditional move reg-reg
-pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
+pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5283,7 +5332,7 @@
 %}
 
 // Conditional move reg-mem
-pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
+pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
     single_instruction;
     dst    : S4(write);
     src    : S3(read);
@@ -5534,7 +5583,7 @@
 //               in the encode section of the architecture description.
 
 //----------BSWAP-Instruction--------------------------------------------------
-instruct bytes_reverse_int(eRegI dst) %{
+instruct bytes_reverse_int(rRegI dst) %{
   match(Set dst (ReverseBytesI dst));
 
   format %{ "BSWAP  $dst" %}
@@ -5555,7 +5604,7 @@
   ins_pipe( ialu_reg_reg);
 %}
 
-instruct bytes_reverse_unsigned_short(eRegI dst, eFlagsReg cr) %{
+instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
   match(Set dst (ReverseBytesUS dst));
   effect(KILL cr);
 
@@ -5568,7 +5617,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct bytes_reverse_short(eRegI dst, eFlagsReg cr) %{
+instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
   match(Set dst (ReverseBytesS dst));
   effect(KILL cr);
 
@@ -5584,7 +5633,7 @@
 
 //---------- Zeros Count Instructions ------------------------------------------
 
-instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
   predicate(UseCountLeadingZerosInstruction);
   match(Set dst (CountLeadingZerosI src));
   effect(KILL cr);
@@ -5596,7 +5645,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
   predicate(!UseCountLeadingZerosInstruction);
   match(Set dst (CountLeadingZerosI src));
   effect(KILL cr);
@@ -5621,7 +5670,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
+instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
   predicate(UseCountLeadingZerosInstruction);
   match(Set dst (CountLeadingZerosL src));
   effect(TEMP dst, KILL cr);
@@ -5644,7 +5693,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
+instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
   predicate(!UseCountLeadingZerosInstruction);
   match(Set dst (CountLeadingZerosL src));
   effect(TEMP dst, KILL cr);
@@ -5680,7 +5729,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (CountTrailingZerosI src));
   effect(KILL cr);
 
@@ -5699,7 +5748,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
+instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
   match(Set dst (CountTrailingZerosL src));
   effect(TEMP dst, KILL cr);
 
@@ -5731,7 +5780,7 @@
 
 //---------- Population Count Instructions -------------------------------------
 
-instruct popCountI(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountI src));
   effect(KILL cr);
@@ -5743,7 +5792,7 @@
   ins_pipe(ialu_reg);
 %}
 
-instruct popCountI_mem(eRegI dst, memory mem, eFlagsReg cr) %{
+instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountI (LoadI mem)));
   effect(KILL cr);
@@ -5756,7 +5805,7 @@
 %}
 
 // Note: Long.bitCount(long) returns an int.
-instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountL src));
   effect(KILL cr, TEMP tmp, TEMP dst);
@@ -5773,7 +5822,7 @@
 %}
 
 // Note: Long.bitCount(long) returns an int.
-instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
+instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
   predicate(UsePopCountInstruction);
   match(Set dst (PopCountL (LoadL mem)));
   effect(KILL cr, TEMP tmp, TEMP dst);
@@ -5877,7 +5926,7 @@
 %}
 
 // Load Short (16bit signed)
-instruct loadS(eRegI dst, memory mem) %{
+instruct loadS(rRegI dst, memory mem) %{
   match(Set dst (LoadS mem));
 
   ins_cost(125);
@@ -5891,7 +5940,7 @@
 %}
 
 // Load Short (16 bit signed) to Byte (8 bit signed)
-instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
+instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
   match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
 
   ins_cost(125);
@@ -5922,7 +5971,7 @@
 %}
 
 // Load Unsigned Short/Char (16bit unsigned)
-instruct loadUS(eRegI dst, memory mem) %{
+instruct loadUS(rRegI dst, memory mem) %{
   match(Set dst (LoadUS mem));
 
   ins_cost(125);
@@ -5936,7 +5985,7 @@
 %}
 
 // Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
-instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
+instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
   match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
 
   ins_cost(125);
@@ -5997,7 +6046,7 @@
 %}
 
 // Load Integer
-instruct loadI(eRegI dst, memory mem) %{
+instruct loadI(rRegI dst, memory mem) %{
   match(Set dst (LoadI mem));
 
   ins_cost(125);
@@ -6011,7 +6060,7 @@
 %}
 
 // Load Integer (32 bit signed) to Byte (8 bit signed)
-instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
+instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
   match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
 
   ins_cost(125);
@@ -6023,7 +6072,7 @@
 %}
 
 // Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
-instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
+instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
   match(Set dst (AndI (LoadI mem) mask));
 
   ins_cost(125);
@@ -6035,7 +6084,7 @@
 %}
 
 // Load Integer (32 bit signed) to Short (16 bit signed)
-instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
+instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
   match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
 
   ins_cost(125);
@@ -6047,7 +6096,7 @@
 %}
 
 // Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
-instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
+instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
   match(Set dst (AndI (LoadI mem) mask));
 
   ins_cost(125);
@@ -6208,7 +6257,7 @@
 %}
 
 // Load Range
-instruct loadRange(eRegI dst, memory mem) %{
+instruct loadRange(rRegI dst, memory mem) %{
   match(Set dst (LoadRange mem));
 
   ins_cost(125);
@@ -6305,66 +6354,6 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-// Load Aligned Packed Byte to XMM register
-instruct loadA8B(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load8B mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed8B" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Short to XMM register
-instruct loadA4S(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load4S mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed4S" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Char to XMM register
-instruct loadA4C(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load4C mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed4C" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Integer to XMM register
-instruct load2IU(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load2I mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed2I" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Single to XMM
-instruct loadA2F(regD dst, memory mem) %{
-  predicate(UseSSE>=1);
-  match(Set dst (Load2F mem));
-  ins_cost(145);
-  format %{ "MOVQ  $dst,$mem\t! packed2F" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Load Effective Address
 instruct leaP8(eRegP dst, indOffset8 mem) %{
   match(Set dst mem);
@@ -6417,7 +6406,7 @@
 %}
 
 // Load Constant
-instruct loadConI(eRegI dst, immI src) %{
+instruct loadConI(rRegI dst, immI src) %{
   match(Set dst src);
 
   format %{ "MOV    $dst,$src" %}
@@ -6426,7 +6415,7 @@
 %}
 
 // Load Constant zero
-instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
+instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
   match(Set dst src);
   effect(KILL cr);
 
@@ -6594,7 +6583,7 @@
 %}
 
 // Load Stack Slot
-instruct loadSSI(eRegI dst, stackSlotI src) %{
+instruct loadSSI(rRegI dst, stackSlotI src) %{
   match(Set dst src);
   ins_cost(125);
 
@@ -6821,7 +6810,7 @@
 %}
 
 // Store Char/Short
-instruct storeC(memory mem, eRegI src) %{
+instruct storeC(memory mem, rRegI src) %{
   match(Set mem (StoreC mem src));
 
   ins_cost(125);
@@ -6832,7 +6821,7 @@
 %}
 
 // Store Integer
-instruct storeI(memory mem, eRegI src) %{
+instruct storeI(memory mem, rRegI src) %{
   match(Set mem (StoreI mem src));
 
   ins_cost(125);
@@ -6976,42 +6965,6 @@
   ins_pipe( ialu_mem_imm );
 %}
 
-// Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regD src) %{
-  predicate(UseSSE>=1);
-  match(Set mem (Store8B mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed8B" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regD src) %{
-  predicate(UseSSE>=1);
-  match(Set mem (Store4C mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed4C" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regD src) %{
-  predicate(UseSSE>=1);
-  match(Set mem (Store2I mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed2I" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Store CMS card-mark Immediate
 instruct storeImmCM(memory mem, immI8 src) %{
   match(Set mem (StoreCM mem src));
@@ -7073,18 +7026,6 @@
   ins_pipe( pipe_slow );
 %}
 
-// Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regD src) %{
-  predicate(UseSSE>=1);
-  match(Set mem (Store2F mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed2F" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Store Float
 instruct storeFPR( memory mem, regFPR1 src) %{
   predicate(UseSSE==0);
@@ -7146,7 +7087,7 @@
 %}
 
 // Store Integer to stack slot
-instruct storeSSI(stackSlotI dst, eRegI src) %{
+instruct storeSSI(stackSlotI dst, rRegI src) %{
   match(Set dst src);
 
   ins_cost(100);
@@ -7271,7 +7212,7 @@
   ins_pipe(empty);
 %}
 
-instruct castP2X(eRegI dst, eRegP src ) %{
+instruct castP2X(rRegI dst, eRegP src ) %{
   match(Set dst (CastP2X src));
   ins_cost(50);
   format %{ "MOV    $dst, $src\t# CastP2X" %}
@@ -7281,7 +7222,7 @@
 
 //----------Conditional Move---------------------------------------------------
 // Conditional move
-instruct jmovI_reg(cmpOp cop, eFlagsReg cr, eRegI dst, eRegI src) %{
+instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
   predicate(!VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7298,7 +7239,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src) %{
+instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
   predicate(!VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7315,7 +7256,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
+instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7325,7 +7266,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
+instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7335,7 +7276,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
+instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
   ins_cost(200);
@@ -7345,7 +7286,7 @@
 %}
 
 // Conditional move
-instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
+instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -7356,7 +7297,7 @@
 %}
 
 // Conditional move
-instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
+instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -7366,7 +7307,7 @@
   ins_pipe( pipe_cmov_mem );
 %}
 
-instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
+instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() );
   match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -7620,7 +7561,7 @@
 //----------Arithmetic Instructions--------------------------------------------
 //----------Addition Instructions----------------------------------------------
 // Integer Addition Instructions
-instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (AddI dst src));
   effect(KILL cr);
 
@@ -7631,7 +7572,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (AddI dst src));
   effect(KILL cr);
 
@@ -7641,7 +7582,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
   predicate(UseIncDec);
   match(Set dst (AddI dst src));
   effect(KILL cr);
@@ -7653,7 +7594,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
+instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
   match(Set dst (AddI src0 src1));
   ins_cost(110);
 
@@ -7673,7 +7614,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
+instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
   predicate(UseIncDec);
   match(Set dst (AddI dst src));
   effect(KILL cr);
@@ -7685,7 +7626,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
+instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (AddP dst src));
   effect(KILL cr);
 
@@ -7707,7 +7648,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (AddI dst (LoadI src)));
   effect(KILL cr);
 
@@ -7718,7 +7659,7 @@
   ins_pipe( ialu_reg_mem );
 %}
 
-instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (AddI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -7780,7 +7721,7 @@
   ins_pipe( empty );
 %}
 
-instruct castII( eRegI dst ) %{
+instruct castII( rRegI dst ) %{
   match(Set dst (CastII dst));
   format %{ "#castII of $dst" %}
   ins_encode( /*empty encoding*/ );
@@ -7814,7 +7755,7 @@
 
 // Conditional-store of an int value.
 // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
-instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
+instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
   match(Set cr (StoreIConditional mem (Binary oldval newval)));
   effect(KILL oldval);
   format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
@@ -7847,7 +7788,7 @@
 
 // No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
 
-instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
+instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
   match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7860,7 +7801,7 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
-instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
+instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
   match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7872,7 +7813,7 @@
   ins_pipe( pipe_cmpxchg );
 %}
 
-instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
+instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
   match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
   effect(KILL cr, KILL oldval);
   format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
@@ -7886,7 +7827,7 @@
 
 //----------Subtraction Instructions-------------------------------------------
 // Integer Subtraction Instructions
-instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (SubI dst src));
   effect(KILL cr);
 
@@ -7897,7 +7838,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (SubI dst src));
   effect(KILL cr);
 
@@ -7908,7 +7849,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (SubI dst (LoadI src)));
   effect(KILL cr);
 
@@ -7919,7 +7860,7 @@
   ins_pipe( ialu_reg_mem );
 %}
 
-instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (SubI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -7931,7 +7872,7 @@
 %}
 
 // Subtract from a pointer
-instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
+instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
   match(Set dst (AddP dst (SubI zero src)));
   effect(KILL cr);
 
@@ -7942,7 +7883,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
+instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
   match(Set dst (SubI zero dst));
   effect(KILL cr);
 
@@ -7957,7 +7898,7 @@
 //----------Multiplication/Division Instructions-------------------------------
 // Integer Multiplication Instructions
 // Multiply Register
-instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (MulI dst src));
   effect(KILL cr);
 
@@ -7970,7 +7911,7 @@
 %}
 
 // Multiply 32-bit Immediate
-instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
+instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
   match(Set dst (MulI src imm));
   effect(KILL cr);
 
@@ -8026,7 +7967,7 @@
 %}
 
 // Multiply Memory 32-bit Immediate
-instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
+instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
   match(Set dst (MulI (LoadI src) imm));
   effect(KILL cr);
 
@@ -8038,7 +7979,7 @@
 %}
 
 // Multiply Memory
-instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
+instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (MulI dst (LoadI src)));
   effect(KILL cr);
 
@@ -8075,7 +8016,7 @@
 %}
 
 // Multiply Register Long
-instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
   match(Set dst (MulL dst src));
   effect(KILL cr, TEMP tmp);
   ins_cost(4*100+3*400);
@@ -8093,7 +8034,7 @@
 %}
 
 // Multiply Register Long where the left operand's high 32 bits are zero
-instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
   predicate(is_operand_hi32_zero(n->in(1)));
   match(Set dst (MulL dst src));
   effect(KILL cr, TEMP tmp);
@@ -8114,7 +8055,7 @@
 %}
 
 // Multiply Register Long where the right operand's high 32 bits are zero
-instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
   predicate(is_operand_hi32_zero(n->in(2)));
   match(Set dst (MulL dst src));
   effect(KILL cr, TEMP tmp);
@@ -8150,7 +8091,7 @@
 %}
 
 // Multiply Register Long by small constant
-instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
+instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
   match(Set dst (MulL dst src));
   effect(KILL cr, TEMP tmp);
   ins_cost(2*100+2*400);
@@ -8248,7 +8189,7 @@
 %}
 
 // Divide Register Long (no special case since divisor != -1)
-instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
+instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
   match(Set dst (DivL dst imm));
   effect( TEMP tmp, TEMP tmp2, KILL cr );
   ins_cost(1000);
@@ -8319,7 +8260,7 @@
 %}
 
 // Remainder Register Long (remainder fit into 32 bits)
-instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
+instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
   match(Set dst (ModL dst imm));
   effect( TEMP tmp, TEMP tmp2, KILL cr );
   ins_cost(1000);
@@ -8387,7 +8328,7 @@
 
 // Integer Shift Instructions
 // Shift Left by one
-instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   match(Set dst (LShiftI dst shift));
   effect(KILL cr);
 
@@ -8399,7 +8340,7 @@
 %}
 
 // Shift Left by 8-bit immediate
-instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
   match(Set dst (LShiftI dst shift));
   effect(KILL cr);
 
@@ -8411,7 +8352,7 @@
 %}
 
 // Shift Left by variable
-instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
   match(Set dst (LShiftI dst shift));
   effect(KILL cr);
 
@@ -8423,7 +8364,7 @@
 %}
 
 // Arithmetic shift right by one
-instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   match(Set dst (RShiftI dst shift));
   effect(KILL cr);
 
@@ -8445,7 +8386,7 @@
 %}
 
 // Arithmetic Shift Right by 8-bit immediate
-instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
   match(Set dst (RShiftI dst shift));
   effect(KILL cr);
 
@@ -8468,7 +8409,7 @@
 %}
 
 // Arithmetic Shift Right by variable
-instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
   match(Set dst (RShiftI dst shift));
   effect(KILL cr);
 
@@ -8480,7 +8421,7 @@
 %}
 
 // Logical shift right by one
-instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   match(Set dst (URShiftI dst shift));
   effect(KILL cr);
 
@@ -8492,7 +8433,7 @@
 %}
 
 // Logical Shift Right by 8-bit immediate
-instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
   match(Set dst (URShiftI dst shift));
   effect(KILL cr);
 
@@ -8506,7 +8447,7 @@
 
 // Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
 // This idiom is used by the compiler for the i2b bytecode.
-instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
+instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
   match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
 
   size(3);
@@ -8519,7 +8460,7 @@
 
 // Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
 // This idiom is used by the compiler the i2s bytecode.
-instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
+instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
   match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
 
   size(3);
@@ -8532,7 +8473,7 @@
 
 
 // Logical Shift Right by variable
-instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
+instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
   match(Set dst (URShiftI dst shift));
   effect(KILL cr);
 
@@ -8548,7 +8489,7 @@
 //----------Integer Logical Instructions---------------------------------------
 // And Instructions
 // And Register with Register
-instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (AndI dst src));
   effect(KILL cr);
 
@@ -8560,7 +8501,7 @@
 %}
 
 // And Register with Immediate
-instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (AndI dst src));
   effect(KILL cr);
 
@@ -8572,7 +8513,7 @@
 %}
 
 // And Register with Memory
-instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (AndI dst (LoadI src)));
   effect(KILL cr);
 
@@ -8584,7 +8525,7 @@
 %}
 
 // And Memory with Register
-instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (AndI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -8610,7 +8551,7 @@
 
 // Or Instructions
 // Or Register with Register
-instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (OrI dst src));
   effect(KILL cr);
 
@@ -8621,7 +8562,7 @@
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
+instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
   match(Set dst (OrI dst (CastP2X src)));
   effect(KILL cr);
 
@@ -8634,7 +8575,7 @@
 
 
 // Or Register with Immediate
-instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (OrI dst src));
   effect(KILL cr);
 
@@ -8646,7 +8587,7 @@
 %}
 
 // Or Register with Memory
-instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (OrI dst (LoadI src)));
   effect(KILL cr);
 
@@ -8658,7 +8599,7 @@
 %}
 
 // Or Memory with Register
-instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (OrI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -8684,7 +8625,7 @@
 
 // ROL/ROR
 // ROL expand
-instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   effect(USE_DEF dst, USE shift, KILL cr);
 
   format %{ "ROL    $dst, $shift" %}
@@ -8693,7 +8634,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
   effect(USE_DEF dst, USE shift, KILL cr);
 
   format %{ "ROL    $dst, $shift" %}
@@ -8713,7 +8654,7 @@
 // end of ROL expand
 
 // ROL 32bit by one once
-instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
+instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 
   expand %{
@@ -8722,7 +8663,7 @@
 %}
 
 // ROL 32bit var by imm8 once
-instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
+instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
   match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
 
@@ -8750,7 +8691,7 @@
 %}
 
 // ROR expand
-instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
+instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
   effect(USE_DEF dst, USE shift, KILL cr);
 
   format %{ "ROR    $dst, $shift" %}
@@ -8759,7 +8700,7 @@
   ins_pipe( ialu_reg );
 %}
 
-instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
+instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
   effect (USE_DEF dst, USE shift, KILL cr);
 
   format %{ "ROR    $dst, $shift" %}
@@ -8779,7 +8720,7 @@
 // end of ROR expand
 
 // ROR right once
-instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
+instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 
   expand %{
@@ -8788,7 +8729,7 @@
 %}
 
 // ROR 32bit by immI8 once
-instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
+instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
   predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
   match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
 
@@ -8817,7 +8758,7 @@
 
 // Xor Instructions
 // Xor Register with Register
-instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
+instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (XorI dst src));
   effect(KILL cr);
 
@@ -8829,7 +8770,7 @@
 %}
 
 // Xor Register with Immediate -1
-instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
+instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
   match(Set dst (XorI dst imm));  
 
   size(2);
@@ -8841,7 +8782,7 @@
 %}
 
 // Xor Register with Immediate
-instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
+instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
   match(Set dst (XorI dst src));
   effect(KILL cr);
 
@@ -8853,7 +8794,7 @@
 %}
 
 // Xor Register with Memory
-instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
+instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
   match(Set dst (XorI dst (LoadI src)));
   effect(KILL cr);
 
@@ -8865,7 +8806,7 @@
 %}
 
 // Xor Memory with Register
-instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
+instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (StoreI dst (XorI (LoadI dst) src)));
   effect(KILL cr);
 
@@ -8890,14 +8831,14 @@
 
 //----------Convert Int to Boolean---------------------------------------------
 
-instruct movI_nocopy(eRegI dst, eRegI src) %{
+instruct movI_nocopy(rRegI dst, rRegI src) %{
   effect( DEF dst, USE src );
   format %{ "MOV    $dst,$src" %}
   ins_encode( enc_Copy( dst, src) );
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
+instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
   effect( USE_DEF dst, USE src, KILL cr );
 
   size(4);
@@ -8908,7 +8849,7 @@
   ins_pipe( ialu_reg_reg_long );
 %}
 
-instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
+instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
   match(Set dst (Conv2B src));
 
   expand %{
@@ -8917,14 +8858,14 @@
   %}
 %}
 
-instruct movP_nocopy(eRegI dst, eRegP src) %{
+instruct movP_nocopy(rRegI dst, eRegP src) %{
   effect( DEF dst, USE src );
   format %{ "MOV    $dst,$src" %}
   ins_encode( enc_Copy( dst, src) );
   ins_pipe( ialu_reg_reg );
 %}
 
-instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
+instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
   effect( USE_DEF dst, USE src, KILL cr );
   format %{ "NEG    $dst\n\t"
             "ADC    $dst,$src" %}
@@ -8933,7 +8874,7 @@
   ins_pipe( ialu_reg_reg_long );
 %}
 
-instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
+instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
   match(Set dst (Conv2B src));
 
   expand %{
@@ -8958,7 +8899,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
+instruct cmpLTMask0( rRegI dst, immI0 zero, eFlagsReg cr ) %{
   match(Set dst (CmpLTMask dst zero));
   effect( DEF dst, KILL cr );
   ins_cost(100);
@@ -9430,7 +9371,7 @@
 %}
 
 // Compare vs zero into -1,0,1
-instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (CmpD3 src1 zero));
   effect(KILL cr, KILL rax);
@@ -9444,7 +9385,7 @@
 %}
 
 // Compare into -1,0,1
-instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE<=1);
   match(Set dst (CmpD3 src1 src2));
   effect(KILL cr, KILL rax);
@@ -10222,7 +10163,7 @@
 %}
 
 // Compare vs zero into -1,0,1
-instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE == 0);
   match(Set dst (CmpF3 src1 zero));
   effect(KILL cr, KILL rax);
@@ -10236,7 +10177,7 @@
 %}
 
 // Compare into -1,0,1
-instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
+instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
   predicate(UseSSE == 0);
   match(Set dst (CmpF3 src1 src2));
   effect(KILL cr, KILL rax);
@@ -11156,7 +11097,7 @@
   ins_pipe( fpu_reg_mem );
 %}
 
-instruct convI2D_reg(regD dst, eRegI src) %{
+instruct convI2D_reg(regD dst, rRegI src) %{
   predicate( UseSSE>=2 && !UseXmmI2D );
   match(Set dst (ConvI2D src));
   format %{ "CVTSI2SD $dst,$src" %}
@@ -11176,7 +11117,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct convXI2D_reg(regD dst, eRegI src)
+instruct convXI2D_reg(regD dst, rRegI src)
 %{
   predicate( UseSSE>=2 && UseXmmI2D );
   match(Set dst (ConvI2D src));
@@ -11264,7 +11205,7 @@
 %}
 
 // Convert an int to a float in xmm; no rounding step needed.
-instruct convI2F_reg(regF dst, eRegI src) %{
+instruct convI2F_reg(regF dst, rRegI src) %{
   predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
   match(Set dst (ConvI2F src));
   format %{ "CVTSI2SS $dst, $src" %}
@@ -11274,7 +11215,7 @@
   ins_pipe( pipe_slow );
 %}
 
- instruct convXI2F_reg(regF dst, eRegI src)
+ instruct convXI2F_reg(regF dst, rRegI src)
 %{
   predicate( UseSSE>=2 && UseXmmI2F );
   match(Set dst (ConvI2F src));
@@ -11288,7 +11229,7 @@
   ins_pipe(pipe_slow); // XXX
 %}
 
-instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
+instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
   match(Set dst (ConvI2L src));
   effect(KILL cr);
   ins_cost(375);
@@ -11300,7 +11241,7 @@
 %}
 
 // Zero-extend convert int to long
-instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
+instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
   match(Set dst (AndL (ConvI2L src) mask) );
   effect( KILL flags );
   ins_cost(250);
@@ -11380,7 +11321,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct convL2I_reg( eRegI dst, eRegL src ) %{
+instruct convL2I_reg( rRegI dst, eRegL src ) %{
   match(Set dst (ConvL2I src));
   effect( DEF dst, USE src );
   format %{ "MOV    $dst,$src.lo" %}
@@ -11389,7 +11330,7 @@
 %}
 
 
-instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
+instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
   ins_cost(100);
@@ -11424,7 +11365,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
+instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveF2I src));
   effect( DEF dst, USE src );
@@ -11436,7 +11377,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
+instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
   match(Set dst (MoveI2F src));
   effect( DEF dst, USE src );
 
@@ -11476,7 +11417,7 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
+instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
   predicate(UseSSE>=2);
   match(Set dst (MoveI2F src));
   effect( DEF dst, USE src );
@@ -11610,186 +11551,6 @@
   ins_pipe( pipe_slow );
 %}
 
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate8B src));
-  format %{ "MOVDQA  $dst,$src\n\t"
-            "PUNPCKLBW $dst,$dst\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode %{
-    if ($dst$$reg != $src$$reg) {
-      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
-    }
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_eRegI(regD dst, eRegI src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate8B src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PUNPCKLBW $dst,$dst\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regD dst, immI0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate8B zero));
-  format %{ "PXOR  $dst,$dst\t! replicate8B" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4S src));
-  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
-  ins_encode %{
-    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_eRegI(regD dst, eRegI src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4S src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regD dst, immI0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4S zero));
-  format %{ "PXOR  $dst,$dst\t! replicate4S" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4C src));
-  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
-  ins_encode %{
-    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_eRegI(regD dst, eRegI src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4C src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regD dst, immI0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate4C zero));
-  format %{ "PXOR  $dst,$dst\t! replicate4C" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2I src));
-  format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_eRegI(regD dst, eRegI src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2I src));
-  format %{ "MOVD   $dst,$src\n\t"
-            "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regD dst, immI0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2I zero));
-  format %{ "PXOR  $dst,$dst\t! replicate2I" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regD dst, regD src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2F src));
-  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regF(regD dst, regF src) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2F src));
-  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immF0(regD dst, immF0 zero) %{
-  predicate(UseSSE>=2);
-  match(Set dst (Replicate2F zero));
-  format %{ "PXOR  $dst,$dst\t! replicate2F" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
 
 // =======================================================================
 // fast clearing of an array
@@ -11898,7 +11659,7 @@
 
 //----------Control Flow Instructions------------------------------------------
 // Signed compare Instructions
-instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
+instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
   match(Set cr (CmpI op1 op2));
   effect( DEF cr, USE op1, USE op2 );
   format %{ "CMP    $op1,$op2" %}
@@ -11907,7 +11668,7 @@
   ins_pipe( ialu_cr_reg_reg );
 %}
 
-instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
+instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
   match(Set cr (CmpI op1 op2));
   effect( DEF cr, USE op1 );
   format %{ "CMP    $op1,$op2" %}
@@ -11918,7 +11679,7 @@
 %}
 
 // Cisc-spilled version of cmpI_eReg
-instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
+instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
   match(Set cr (CmpI op1 (LoadI op2)));
 
   format %{ "CMP    $op1,$op2" %}
@@ -11928,7 +11689,7 @@
   ins_pipe( ialu_cr_reg_mem );
 %}
 
-instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
+instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
   match(Set cr (CmpI src zero));
   effect( DEF cr, USE src );
 
@@ -11938,7 +11699,7 @@
   ins_pipe( ialu_cr_reg_imm );
 %}
 
-instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
+instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
   match(Set cr (CmpI (AndI src con) zero));
 
   format %{ "TEST   $src,$con" %}
@@ -11947,7 +11708,7 @@
   ins_pipe( ialu_cr_reg_imm );
 %}
 
-instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
+instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
   match(Set cr (CmpI (AndI src mem) zero));
 
   format %{ "TEST   $src,$mem" %}
@@ -11958,7 +11719,7 @@
 
 // Unsigned compare Instructions; really, same as signed except they
 // produce an eFlagsRegU instead of eFlagsReg.
-instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
+instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
   match(Set cr (CmpU op1 op2));
 
   format %{ "CMPu   $op1,$op2" %}
@@ -11967,7 +11728,7 @@
   ins_pipe( ialu_cr_reg_reg );
 %}
 
-instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
+instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
   match(Set cr (CmpU op1 op2));
 
   format %{ "CMPu   $op1,$op2" %}
@@ -11977,7 +11738,7 @@
 %}
 
 // // Cisc-spilled version of cmpU_eReg
-instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
+instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
   match(Set cr (CmpU op1 (LoadI op2)));
 
   format %{ "CMPu   $op1,$op2" %}
@@ -11988,7 +11749,7 @@
 %}
 
 // // Cisc-spilled version of cmpU_eReg
-//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
+//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
 //  match(Set cr (CmpU (LoadI op1) op2));
 //
 //  format %{ "CMPu   $op1,$op2" %}
@@ -11997,7 +11758,7 @@
 //  ins_encode( OpcP, RegMem( op1, op2) );
 //%}
 
-instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
+instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
   match(Set cr (CmpU src zero));
 
   format %{ "TESTu  $src,$src" %}
@@ -12093,7 +11854,7 @@
 //   *** Min and Max using the conditional move are slower than the
 //   *** branch version on a Pentium III.
 // // Conditional move for min
-//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
 //  effect( USE_DEF op2, USE op1, USE cr );
 //  format %{ "CMOVlt $op2,$op1\t! min" %}
 //  opcode(0x4C,0x0F);
@@ -12102,7 +11863,7 @@
 //%}
 //
 //// Min Register with Register (P6 version)
-//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
 //  predicate(VM_Version::supports_cmov() );
 //  match(Set op2 (MinI op1 op2));
 //  ins_cost(200);
@@ -12114,7 +11875,7 @@
 //%}
 
 // Min Register with Register (generic version)
-instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
+instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
   match(Set dst (MinI dst src));
   effect(KILL flags);
   ins_cost(300);
@@ -12129,7 +11890,7 @@
 //   *** Min and Max using the conditional move are slower than the
 //   *** branch version on a Pentium III.
 // // Conditional move for max
-//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
+//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
 //  effect( USE_DEF op2, USE op1, USE cr );
 //  format %{ "CMOVgt $op2,$op1\t! max" %}
 //  opcode(0x4F,0x0F);
@@ -12138,7 +11899,7 @@
 //%}
 //
 // // Max Register with Register (P6 version)
-//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
+//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
 //  predicate(VM_Version::supports_cmov() );
 //  match(Set op2 (MaxI op1 op2));
 //  ins_cost(200);
@@ -12150,7 +11911,7 @@
 //%}
 
 // Max Register with Register (generic version)
-instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
+instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
   match(Set dst (MaxI dst src));
   effect(KILL flags);
   ins_cost(300);
@@ -12211,7 +11972,7 @@
 // ============================================================================
 // Branch Instructions
 // Jump Table
-instruct jumpXtnd(eRegI switch_val) %{
+instruct jumpXtnd(rRegI switch_val) %{
   match(Jump switch_val);
   ins_cost(350);
   format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
@@ -12629,7 +12390,7 @@
 // Manifest a CmpL result in the normal flags.  Only good for LT or GE
 // compares.  Can be used for LE or GT compares by reversing arguments.
 // NOT GOOD FOR EQ/NE tests.
-instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
+instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
   match( Set flags (CmpL src1 src2 ));
   effect( TEMP tmp );
   ins_cost(300);
@@ -12675,7 +12436,7 @@
 %}
 
 // Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
+instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
@@ -12685,7 +12446,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
+instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -12746,7 +12507,7 @@
 
 //======
 // Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
-instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
+instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
   match( Set flags (CmpL src zero ));
   effect(TEMP tmp);
   ins_cost(200);
@@ -12803,7 +12564,7 @@
 %}
 
 // Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
+instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
@@ -12813,7 +12574,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
+instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -12875,7 +12636,7 @@
 //======
 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
 // Same as cmpL_reg_flags_LEGT except must negate src
-instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
+instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
   match( Set flags (CmpL src zero ));
   effect( TEMP tmp );
   ins_cost(300);
@@ -12889,7 +12650,7 @@
 // Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
 // Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
 // requires a commuted test to get the same result.
-instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
+instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
   match( Set flags (CmpL src1 src2 ));
   effect( TEMP tmp );
   ins_cost(300);
@@ -12936,7 +12697,7 @@
 %}
 
 // Compare 2 longs and CMOVE ints.
-instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
+instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
   ins_cost(200);
@@ -12946,7 +12707,7 @@
   ins_pipe( pipe_cmov_reg );
 %}
 
-instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
+instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
   predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
   match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
   ins_cost(250);
@@ -13275,11 +13036,11 @@
 // ---------EXAMPLE----------------------------------------------------------
 //
 // // pertinent parts of existing instructions in architecture description
-// instruct movI(eRegI dst, eRegI src) %{
+// instruct movI(rRegI dst, rRegI src) %{
 //   match(Set dst (CopyI src));
 // %}
 //
-// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
+// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
 //   match(Set dst (AddI dst src));
 //   effect(KILL cr);
 // %}
@@ -13324,11 +13085,11 @@
 // %}
 
 // // Change load of spilled value to only a spill
-// instruct storeI(memory mem, eRegI src) %{
+// instruct storeI(memory mem, rRegI src) %{
 //   match(Set mem (StoreI mem src));
 // %}
 //
-// instruct loadI(eRegI dst, memory mem) %{
+// instruct loadI(rRegI dst, memory mem) %{
 //   match(Set dst (LoadI mem));
 // %}
 //
--- a/src/cpu/x86/vm/x86_64.ad	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Jun 29 17:04:39 2012 -0700
@@ -131,102 +131,6 @@
 
 // Floating Point Registers
 
-// XMM registers.  128-bit registers or 4 words each, labeled (a)-d.
-// Word a in each register holds a Float, words ab hold a Double.  We
-// currently do not use the SIMD capabilities, so registers cd are
-// unused at the moment.
-// XMM8-XMM15 must be encoded with REX.
-// Linux ABI:   No register preserved across function calls
-//              XMM0-XMM7 might hold parameters
-// Windows ABI: XMM6-XMM15 preserved across function calls
-//              XMM0-XMM3 might hold parameters
-
-reg_def XMM0   (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg());
-reg_def XMM0_H (SOC, SOC, Op_RegF,  0, xmm0->as_VMReg()->next());
-
-reg_def XMM1   (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg());
-reg_def XMM1_H (SOC, SOC, Op_RegF,  1, xmm1->as_VMReg()->next());
-
-reg_def XMM2   (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg());
-reg_def XMM2_H (SOC, SOC, Op_RegF,  2, xmm2->as_VMReg()->next());
-
-reg_def XMM3   (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg());
-reg_def XMM3_H (SOC, SOC, Op_RegF,  3, xmm3->as_VMReg()->next());
-
-reg_def XMM4   (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg());
-reg_def XMM4_H (SOC, SOC, Op_RegF,  4, xmm4->as_VMReg()->next());
-
-reg_def XMM5   (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg());
-reg_def XMM5_H (SOC, SOC, Op_RegF,  5, xmm5->as_VMReg()->next());
-
-#ifdef _WIN64
-
-reg_def XMM6   (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg());
-reg_def XMM6_H (SOC, SOE, Op_RegF,  6, xmm6->as_VMReg()->next());
-
-reg_def XMM7   (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg());
-reg_def XMM7_H (SOC, SOE, Op_RegF,  7, xmm7->as_VMReg()->next());
-
-reg_def XMM8   (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg());
-reg_def XMM8_H (SOC, SOE, Op_RegF,  8, xmm8->as_VMReg()->next());
-
-reg_def XMM9   (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg());
-reg_def XMM9_H (SOC, SOE, Op_RegF,  9, xmm9->as_VMReg()->next());
-
-reg_def XMM10  (SOC, SOE, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10_H(SOC, SOE, Op_RegF, 10, xmm10->as_VMReg()->next());
-
-reg_def XMM11  (SOC, SOE, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11_H(SOC, SOE, Op_RegF, 11, xmm11->as_VMReg()->next());
-
-reg_def XMM12  (SOC, SOE, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12_H(SOC, SOE, Op_RegF, 12, xmm12->as_VMReg()->next());
-
-reg_def XMM13  (SOC, SOE, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13_H(SOC, SOE, Op_RegF, 13, xmm13->as_VMReg()->next());
-
-reg_def XMM14  (SOC, SOE, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14_H(SOC, SOE, Op_RegF, 14, xmm14->as_VMReg()->next());
-
-reg_def XMM15  (SOC, SOE, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15_H(SOC, SOE, Op_RegF, 15, xmm15->as_VMReg()->next());
-
-#else
-
-reg_def XMM6   (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg());
-reg_def XMM6_H (SOC, SOC, Op_RegF,  6, xmm6->as_VMReg()->next());
-
-reg_def XMM7   (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg());
-reg_def XMM7_H (SOC, SOC, Op_RegF,  7, xmm7->as_VMReg()->next());
-
-reg_def XMM8   (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg());
-reg_def XMM8_H (SOC, SOC, Op_RegF,  8, xmm8->as_VMReg()->next());
-
-reg_def XMM9   (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg());
-reg_def XMM9_H (SOC, SOC, Op_RegF,  9, xmm9->as_VMReg()->next());
-
-reg_def XMM10  (SOC, SOC, Op_RegF, 10, xmm10->as_VMReg());
-reg_def XMM10_H(SOC, SOC, Op_RegF, 10, xmm10->as_VMReg()->next());
-
-reg_def XMM11  (SOC, SOC, Op_RegF, 11, xmm11->as_VMReg());
-reg_def XMM11_H(SOC, SOC, Op_RegF, 11, xmm11->as_VMReg()->next());
-
-reg_def XMM12  (SOC, SOC, Op_RegF, 12, xmm12->as_VMReg());
-reg_def XMM12_H(SOC, SOC, Op_RegF, 12, xmm12->as_VMReg()->next());
-
-reg_def XMM13  (SOC, SOC, Op_RegF, 13, xmm13->as_VMReg());
-reg_def XMM13_H(SOC, SOC, Op_RegF, 13, xmm13->as_VMReg()->next());
-
-reg_def XMM14  (SOC, SOC, Op_RegF, 14, xmm14->as_VMReg());
-reg_def XMM14_H(SOC, SOC, Op_RegF, 14, xmm14->as_VMReg()->next());
-
-reg_def XMM15  (SOC, SOC, Op_RegF, 15, xmm15->as_VMReg());
-reg_def XMM15_H(SOC, SOC, Op_RegF, 15, xmm15->as_VMReg()->next());
-
-#endif // _WIN64
-
-reg_def RFLAGS(SOC, SOC, 0, 16, VMRegImpl::Bad());
-
 // Specify priority of register selection within phases of register
 // allocation.  Highest priority is first.  A useful heuristic is to
 // give registers a low priority when they are required by machine
@@ -252,26 +156,6 @@
                    R15,         R15_H,
                    RSP,         RSP_H);
 
-// XXX probably use 8-15 first on Linux
-alloc_class chunk1(XMM0,  XMM0_H,
-                   XMM1,  XMM1_H,
-                   XMM2,  XMM2_H,
-                   XMM3,  XMM3_H,
-                   XMM4,  XMM4_H,
-                   XMM5,  XMM5_H,
-                   XMM6,  XMM6_H,
-                   XMM7,  XMM7_H,
-                   XMM8,  XMM8_H,
-                   XMM9,  XMM9_H,
-                   XMM10, XMM10_H,
-                   XMM11, XMM11_H,
-                   XMM12, XMM12_H,
-                   XMM13, XMM13_H,
-                   XMM14, XMM14_H,
-                   XMM15, XMM15_H);
-
-alloc_class chunk2(RFLAGS);
-
 
 //----------Architecture Description Register Classes--------------------------
 // Several register classes are automatically defined based upon information in
@@ -501,46 +385,7 @@
 // Singleton class for instruction pointer
 // reg_class ip_reg(RIP);
 
-// Singleton class for condition codes
-reg_class int_flags(RFLAGS);
-
-// Class for all float registers
-reg_class float_reg(XMM0,
-                    XMM1,
-                    XMM2,
-                    XMM3,
-                    XMM4,
-                    XMM5,
-                    XMM6,
-                    XMM7,
-                    XMM8,
-                    XMM9,
-                    XMM10,
-                    XMM11,
-                    XMM12,
-                    XMM13,
-                    XMM14,
-                    XMM15);
-
-// Class for all double registers
-reg_class double_reg(XMM0,  XMM0_H,
-                     XMM1,  XMM1_H,
-                     XMM2,  XMM2_H,
-                     XMM3,  XMM3_H,
-                     XMM4,  XMM4_H,
-                     XMM5,  XMM5_H,
-                     XMM6,  XMM6_H,
-                     XMM7,  XMM7_H,
-                     XMM8,  XMM8_H,
-                     XMM9,  XMM9_H,
-                     XMM10, XMM10_H,
-                     XMM11, XMM11_H,
-                     XMM12, XMM12_H,
-                     XMM13, XMM13_H,
-                     XMM14, XMM14_H,
-                     XMM15, XMM15_H);
-%}
-
+%}
 
 //----------SOURCE BLOCK-------------------------------------------------------
 // This is a block of C++ code which provides values, functions, and
@@ -1027,12 +872,84 @@
   return rc_float;
 }
 
+// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
+static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
+                          int src_hi, int dst_hi, uint ireg, outputStream* st);
+
+static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
+                            int stack_offset, int reg, uint ireg, outputStream* st);
+
+static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
+                                      int dst_offset, uint ireg, outputStream* st) {
+  if (cbuf) {
+    MacroAssembler _masm(cbuf);
+    switch (ireg) {
+    case Op_VecS:
+      __ movq(Address(rsp, -8), rax);
+      __ movl(rax, Address(rsp, src_offset));
+      __ movl(Address(rsp, dst_offset), rax);
+      __ movq(rax, Address(rsp, -8));
+      break;
+    case Op_VecD:
+      __ pushq(Address(rsp, src_offset));
+      __ popq (Address(rsp, dst_offset));
+      break;
+    case Op_VecX:
+      __ pushq(Address(rsp, src_offset));
+      __ popq (Address(rsp, dst_offset));
+      __ pushq(Address(rsp, src_offset+8));
+      __ popq (Address(rsp, dst_offset+8));
+      break;
+    case Op_VecY:
+      __ vmovdqu(Address(rsp, -32), xmm0);
+      __ vmovdqu(xmm0, Address(rsp, src_offset));
+      __ vmovdqu(Address(rsp, dst_offset), xmm0);
+      __ vmovdqu(xmm0, Address(rsp, -32));
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+#ifndef PRODUCT
+  } else {
+    switch (ireg) {
+    case Op_VecS:
+      st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
+                "movl    rax, [rsp + #%d]\n\t"
+                "movl    [rsp + #%d], rax\n\t"
+                "movq    rax, [rsp - #8]",
+                src_offset, dst_offset);
+      break;
+    case Op_VecD:
+      st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
+                "popq    [rsp + #%d]",
+                src_offset, dst_offset);
+      break;
+     case Op_VecX:
+      st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
+                "popq    [rsp + #%d]\n\t"
+                "pushq   [rsp + #%d]\n\t"
+                "popq    [rsp + #%d]",
+                src_offset, dst_offset, src_offset+8, dst_offset+8);
+      break;
+    case Op_VecY:
+      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
+                "vmovdqu xmm0, [rsp + #%d]\n\t"
+                "vmovdqu [rsp + #%d], xmm0\n\t"
+                "vmovdqu xmm0, [rsp - #32]",
+                src_offset, dst_offset);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+#endif
+  }
+}
+
 uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
                                        PhaseRegAlloc* ra_,
                                        bool do_size,
-                                       outputStream* st) const
-{
-
+                                       outputStream* st) const {
+  assert(cbuf != NULL || st  != NULL, "sanity");
   // Get registers to move
   OptoReg::Name src_second = ra_->get_reg_second(in(1));
   OptoReg::Name src_first = ra_->get_reg_first(in(1));
@@ -1050,7 +967,30 @@
   if (src_first == dst_first && src_second == dst_second) {
     // Self copy, no move
     return 0;
-  } else if (src_first_rc == rc_stack) {
+  }
+  if (bottom_type()->isa_vect() != NULL) {
+    uint ireg = ideal_reg();
+    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
+    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY), "sanity");
+    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
+      // mem -> mem
+      int src_offset = ra_->reg2offset(src_first);
+      int dst_offset = ra_->reg2offset(dst_first);
+      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
+    } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
+      vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
+    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
+      int stack_offset = ra_->reg2offset(dst_first);
+      vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
+    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
+      int stack_offset = ra_->reg2offset(src_first);
+      vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
+    } else {
+      ShouldNotReachHere();
+    }
+    return 0;
+  }
+  if (src_first_rc == rc_stack) {
     // mem ->
     if (dst_first_rc == rc_stack) {
       // mem -> mem
@@ -1061,23 +1001,16 @@
         int src_offset = ra_->reg2offset(src_first);
         int dst_offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, 0xFF);
-          encode_RegMem(*cbuf, RSI_enc, RSP_enc, 0x4, 0, src_offset, false);
-
-          emit_opcode(*cbuf, 0x8F);
-          encode_RegMem(*cbuf, RAX_enc, RSP_enc, 0x4, 0, dst_offset, false);
-
+          MacroAssembler _masm(cbuf);
+          __ pushq(Address(rsp, src_offset));
+          __ popq (Address(rsp, dst_offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
-                     "popq    [rsp + #%d]",
-                     src_offset,
-                     dst_offset);
+                    "popq    [rsp + #%d]",
+                     src_offset, dst_offset);
 #endif
         }
-        return
-          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) +
-          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4));
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1086,46 +1019,22 @@
         int src_offset = ra_->reg2offset(src_first);
         int dst_offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          emit_opcode(*cbuf, Assembler::REX_W);
-          emit_opcode(*cbuf, 0x89);
-          emit_opcode(*cbuf, 0x44);
-          emit_opcode(*cbuf, 0x24);
-          emit_opcode(*cbuf, 0xF8);
-
-          emit_opcode(*cbuf, 0x8B);
-          encode_RegMem(*cbuf,
-                        RAX_enc,
-                        RSP_enc, 0x4, 0, src_offset,
-                        false);
-
-          emit_opcode(*cbuf, 0x89);
-          encode_RegMem(*cbuf,
-                        RAX_enc,
-                        RSP_enc, 0x4, 0, dst_offset,
-                        false);
-
-          emit_opcode(*cbuf, Assembler::REX_W);
-          emit_opcode(*cbuf, 0x8B);
-          emit_opcode(*cbuf, 0x44);
-          emit_opcode(*cbuf, 0x24);
-          emit_opcode(*cbuf, 0xF8);
-
+          MacroAssembler _masm(cbuf);
+          __ movq(Address(rsp, -8), rax);
+          __ movl(rax, Address(rsp, src_offset));
+          __ movl(Address(rsp, dst_offset), rax);
+          __ movq(rax, Address(rsp, -8));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
-                     "movl    rax, [rsp + #%d]\n\t"
-                     "movl    [rsp + #%d], rax\n\t"
-                     "movq    rax, [rsp - #8]",
-                     src_offset,
-                     dst_offset);
+                    "movl    rax, [rsp + #%d]\n\t"
+                    "movl    [rsp + #%d], rax\n\t"
+                    "movq    rax, [rsp - #8]",
+                     src_offset, dst_offset);
 #endif
         }
-        return
-          5 + // movq
-          3 + ((src_offset == 0) ? 0 : (src_offset < 0x80 ? 1 : 4)) + // movl
-          3 + ((dst_offset == 0) ? 0 : (dst_offset < 0x80 ? 1 : 4)) + // movl
-          5; // movq
       }
+      return 0;
     } else if (dst_first_rc == rc_int) {
       // mem -> gpr
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1133,52 +1042,32 @@
         // 64-bit
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          if (Matcher::_regEncode[dst_first] < 8) {
-            emit_opcode(*cbuf, Assembler::REX_W);
-          } else {
-            emit_opcode(*cbuf, Assembler::REX_WR);
-          }
-          emit_opcode(*cbuf, 0x8B);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movq(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movq    %s, [rsp + #%d]\t# spill",
                      Matcher::regName[dst_first],
                      offset);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(src_first);
         if (cbuf) {
-          if (Matcher::_regEncode[dst_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x8B);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[dst_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movl(as_Register(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movl    %s, [rsp + #%d]\t# spill",
                      Matcher::regName[dst_first],
                      offset);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] < 8)
-           ? 3
-           : 4); // REX
       }
+      return 0;
     } else if (dst_first_rc == rc_float) {
       // mem-> xmm
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1189,18 +1078,13 @@
           MacroAssembler _masm(cbuf);
           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("%s  %s, [rsp + #%d]\t# spill",
                      UseXmmLoadAndClearUpper ? "movsd " : "movlpd",
                      Matcher::regName[dst_first],
                      offset);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] >= 8)
-           ? 6
-           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1210,18 +1094,14 @@
           MacroAssembler _masm(cbuf);
           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), Address(rsp, offset));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movss   %s, [rsp + #%d]\t# spill",
                      Matcher::regName[dst_first],
                      offset);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[dst_first] >= 8)
-           ? 6
-           : (5 + ((UseAVX>0)?1:0))); // REX
       }
+      return 0;
     }
   } else if (src_first_rc == rc_int) {
     // gpr ->
@@ -1232,113 +1112,65 @@
         // 64-bit
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          if (Matcher::_regEncode[src_first] < 8) {
-            emit_opcode(*cbuf, Assembler::REX_W);
-          } else {
-            emit_opcode(*cbuf, Assembler::REX_WR);
-          }
-          emit_opcode(*cbuf, 0x89);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movq(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movq    [rsp + #%d], %s\t# spill",
                      offset,
                      Matcher::regName[src_first]);
 #endif
         }
-        return ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) + 4; // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         int offset = ra_->reg2offset(dst_first);
         if (cbuf) {
-          if (Matcher::_regEncode[src_first] >= 8) {
-            emit_opcode(*cbuf, Assembler::REX_R);
-          }
-          emit_opcode(*cbuf, 0x89);
-          encode_RegMem(*cbuf,
-                        Matcher::_regEncode[src_first],
-                        RSP_enc, 0x4, 0, offset,
-                        false);
+          MacroAssembler _masm(cbuf);
+          __ movl(Address(rsp, offset), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movl    [rsp + #%d], %s\t# spill",
                      offset,
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] < 8)
-           ? 3
-           : 4); // REX
       }
+      return 0;
     } else if (dst_first_rc == rc_int) {
       // gpr -> gpr
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
           (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
         // 64-bit
         if (cbuf) {
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_W);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WB);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_WR);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_WRB);
-            }
-          }
-          emit_opcode(*cbuf, 0x8B);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movq(as_Register(Matcher::_regEncode[dst_first]),
+                  as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movq    %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return 3; // REX
+        return 0;
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
         assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
         if (cbuf) {
-          if (Matcher::_regEncode[dst_first] < 8) {
-            if (Matcher::_regEncode[src_first] >= 8) {
-              emit_opcode(*cbuf, Assembler::REX_B);
-            }
-          } else {
-            if (Matcher::_regEncode[src_first] < 8) {
-              emit_opcode(*cbuf, Assembler::REX_R);
-            } else {
-              emit_opcode(*cbuf, Assembler::REX_RB);
-            }
-          }
-          emit_opcode(*cbuf, 0x8B);
-          emit_rm(*cbuf, 0x3,
-                  Matcher::_regEncode[dst_first] & 7,
-                  Matcher::_regEncode[src_first] & 7);
+          MacroAssembler _masm(cbuf);
+          __ movl(as_Register(Matcher::_regEncode[dst_first]),
+                  as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movl    %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] < 8 && Matcher::_regEncode[dst_first] < 8)
-          ? 2
-          : 3; // REX
+        return 0;
       }
     } else if (dst_first_rc == rc_float) {
       // gpr -> xmm
@@ -1349,13 +1181,12 @@
           MacroAssembler _masm(cbuf);
           __ movdq( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movdq   %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return 5; // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1364,17 +1195,14 @@
           MacroAssembler _masm(cbuf);
           __ movdl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movdl   %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
-          ? 5
-          : (4 + ((UseAVX>0)?1:0)); // REX
       }
+      return 0;
     }
   } else if (src_first_rc == rc_float) {
     // xmm ->
@@ -1388,17 +1216,12 @@
           MacroAssembler _masm(cbuf);
           __ movdbl( Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movsd   [rsp + #%d], %s\t# spill",
                      offset,
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] >= 8)
-           ? 6
-           : (5 + ((UseAVX>0)?1:0))); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1408,18 +1231,14 @@
           MacroAssembler _masm(cbuf);
           __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movss   [rsp + #%d], %s\t# spill",
                      offset,
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          ((offset == 0) ? 0 : (offset < 0x80 ? 1 : 4)) +
-          ((Matcher::_regEncode[src_first] >=8)
-           ? 6
-           : (5 + ((UseAVX>0)?1:0))); // REX
       }
+      return 0;
     } else if (dst_first_rc == rc_int) {
       // xmm -> gpr
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1429,13 +1248,12 @@
           MacroAssembler _masm(cbuf);
           __ movdq( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movdq   %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return 5; // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1444,17 +1262,14 @@
           MacroAssembler _masm(cbuf);
           __ movdl( as_Register(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("movdl   %s, %s\t# spill",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
-          ? 5
-          : (4 + ((UseAVX>0)?1:0)); // REX
       }
+      return 0;
     } else if (dst_first_rc == rc_float) {
       // xmm -> xmm
       if ((src_first & 1) == 0 && src_first + 1 == src_second &&
@@ -1464,17 +1279,13 @@
           MacroAssembler _masm(cbuf);
           __ movdbl( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("%s  %s, %s\t# spill",
                      UseXmmRegToRegMoveAll ? "movapd" : "movsd ",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return
-          (Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
-          ? 5
-          : (4 + ((UseAVX>0)?1:0)); // REX
       } else {
         // 32-bit
         assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
@@ -1483,42 +1294,35 @@
           MacroAssembler _masm(cbuf);
           __ movflt( as_XMMRegister(Matcher::_regEncode[dst_first]), as_XMMRegister(Matcher::_regEncode[src_first]));
 #ifndef PRODUCT
-        } else if (!do_size) {
+        } else {
           st->print("%s  %s, %s\t# spill",
                      UseXmmRegToRegMoveAll ? "movaps" : "movss ",
                      Matcher::regName[dst_first],
                      Matcher::regName[src_first]);
 #endif
         }
-        return ((UseAVX>0) ? 5:
-          ((Matcher::_regEncode[src_first] >= 8 || Matcher::_regEncode[dst_first] >= 8)
-           ? (UseXmmRegToRegMoveAll ? 4 : 5)
-           : (UseXmmRegToRegMoveAll ? 3 : 4))); // REX
       }
+      return 0;
     }
   }
 
   assert(0," foo ");
   Unimplemented();
-
   return 0;
 }
 
 #ifndef PRODUCT
-void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const
-{
+void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
   implementation(NULL, ra_, false, st);
 }
 #endif
 
-void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const
-{
+void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
   implementation(&cbuf, ra_, false, NULL);
 }
 
-uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const
-{
-  return implementation(NULL, ra_, true, NULL);
+uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
+  return MachNode::size(ra_);
 }
 
 //=============================================================================
@@ -1735,16 +1539,6 @@
   return true;
 }
 
-// Vector width in bytes
-const uint Matcher::vector_width_in_bytes(void) {
-  return 8;
-}
-
-// Vector ideal reg
-const uint Matcher::vector_ideal_reg(void) {
-  return Op_RegD;
-}
-
 // Is this branch offset short enough that a short branch can be used?
 //
 // NOTE: If the platform does not provide any short branch variants, then
@@ -1831,21 +1625,21 @@
 bool Matcher::can_be_java_arg(int reg)
 {
   return
-    reg ==  RDI_num || reg ==  RDI_H_num ||
-    reg ==  RSI_num || reg ==  RSI_H_num ||
-    reg ==  RDX_num || reg ==  RDX_H_num ||
-    reg ==  RCX_num || reg ==  RCX_H_num ||
-    reg ==   R8_num || reg ==   R8_H_num ||
-    reg ==   R9_num || reg ==   R9_H_num ||
-    reg ==  R12_num || reg ==  R12_H_num ||
-    reg == XMM0_num || reg == XMM0_H_num ||
-    reg == XMM1_num || reg == XMM1_H_num ||
-    reg == XMM2_num || reg == XMM2_H_num ||
-    reg == XMM3_num || reg == XMM3_H_num ||
-    reg == XMM4_num || reg == XMM4_H_num ||
-    reg == XMM5_num || reg == XMM5_H_num ||
-    reg == XMM6_num || reg == XMM6_H_num ||
-    reg == XMM7_num || reg == XMM7_H_num;
+    reg ==  RDI_num || reg == RDI_H_num ||
+    reg ==  RSI_num || reg == RSI_H_num ||
+    reg ==  RDX_num || reg == RDX_H_num ||
+    reg ==  RCX_num || reg == RCX_H_num ||
+    reg ==   R8_num || reg ==  R8_H_num ||
+    reg ==   R9_num || reg ==  R9_H_num ||
+    reg ==  R12_num || reg == R12_H_num ||
+    reg == XMM0_num || reg == XMM0b_num ||
+    reg == XMM1_num || reg == XMM1b_num ||
+    reg == XMM2_num || reg == XMM2b_num ||
+    reg == XMM3_num || reg == XMM3b_num ||
+    reg == XMM4_num || reg == XMM4b_num ||
+    reg == XMM5_num || reg == XMM5b_num ||
+    reg == XMM6_num || reg == XMM6b_num ||
+    reg == XMM7_num || reg == XMM7b_num;
 }
 
 bool Matcher::is_spillable_arg(int reg)
@@ -3220,10 +3014,11 @@
       OptoReg::Bad, // Op_RegI
       RAX_H_num,    // Op_RegP
       OptoReg::Bad, // Op_RegF
-      XMM0_H_num,   // Op_RegD
+      XMM0b_num,    // Op_RegD
       RAX_H_num     // Op_RegL
     };
-    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 1, "missing type");
+    // Excluded flags and vector registers.
+    assert(ARRAY_SIZE(hi) == _last_machine_leaf - 5, "missing type");
     return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
   %}
 %}
@@ -3985,7 +3780,6 @@
   interface(REG_INTER);
 %}
 
-
 //----------Memory Operands----------------------------------------------------
 // Direct Memory Operand
 // operand direct(immP addr)
@@ -5416,61 +5210,6 @@
   ins_pipe(pipe_slow); // XXX
 %}
 
-// Load Aligned Packed Byte to XMM register
-instruct loadA8B(regD dst, memory mem) %{
-  match(Set dst (Load8B mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed8B" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Short to XMM register
-instruct loadA4S(regD dst, memory mem) %{
-  match(Set dst (Load4S mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed4S" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Char to XMM register
-instruct loadA4C(regD dst, memory mem) %{
-  match(Set dst (Load4C mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed4C" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Integer to XMM register
-instruct load2IU(regD dst, memory mem) %{
-  match(Set dst (Load2I mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed2I" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Load Aligned Packed Single to XMM
-instruct loadA2F(regD dst, memory mem) %{
-  match(Set dst (Load2F mem));
-  ins_cost(125);
-  format %{ "MOVQ  $dst,$mem\t! packed2F" %}
-  ins_encode %{
-    __ movq($dst$$XMMRegister, $mem$$Address);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Load Effective Address
 instruct leaP8(rRegP dst, indOffset8 mem)
 %{
@@ -6200,39 +5939,6 @@
   ins_pipe(ialu_mem_imm);
 %}
 
-// Store Aligned Packed Byte XMM register to memory
-instruct storeA8B(memory mem, regD src) %{
-  match(Set mem (Store8B mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed8B" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Char/Short XMM register to memory
-instruct storeA4C(memory mem, regD src) %{
-  match(Set mem (Store4C mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed4C" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Store Aligned Packed Integer XMM register to memory
-instruct storeA2I(memory mem, regD src) %{
-  match(Set mem (Store2I mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed2I" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Store CMS card-mark Immediate
 instruct storeImmCM0_reg(memory mem, immI0 zero)
 %{
@@ -6258,17 +5964,6 @@
   ins_pipe(ialu_mem_imm);
 %}
 
-// Store Aligned Packed Single Float XMM register to memory
-instruct storeA2F(memory mem, regD src) %{
-  match(Set mem (Store2F mem src));
-  ins_cost(145);
-  format %{ "MOVQ  $mem,$src\t! packed2F" %}
-  ins_encode %{
-    __ movq($mem$$Address, $src$$XMMRegister);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
 // Store Float
 instruct storeF(memory mem, regF src)
 %{
@@ -10377,172 +10072,6 @@
   ins_pipe( pipe_slow );
 %}
 
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_reg(regD dst, regD src) %{
-  match(Set dst (Replicate8B src));
-  format %{ "MOVDQA  $dst,$src\n\t"
-            "PUNPCKLBW $dst,$dst\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode %{
-    if ($dst$$reg != $src$$reg) {
-      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
-    }
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar to packed byte (1 byte) values in xmm
-instruct Repl8B_rRegI(regD dst, rRegI src) %{
-  match(Set dst (Replicate8B src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PUNPCKLBW $dst,$dst\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( pipe_slow );
-%}
-
-// Replicate scalar zero to packed byte (1 byte) values in xmm
-instruct Repl8B_immI0(regD dst, immI0 zero) %{
-  match(Set dst (Replicate8B zero));
-  format %{ "PXOR  $dst,$dst\t! replicate8B" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_reg(regD dst, regD src) %{
-  match(Set dst (Replicate4S src));
-  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
-  ins_encode %{
-    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed shore (2 byte) values in xmm
-instruct Repl4S_rRegI(regD dst, rRegI src) %{
-  match(Set dst (Replicate4S src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed short (2 byte) values in xmm
-instruct Repl4S_immI0(regD dst, immI0 zero) %{
-  match(Set dst (Replicate4S zero));
-  format %{ "PXOR  $dst,$dst\t! replicate4S" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_reg(regD dst, regD src) %{
-  match(Set dst (Replicate4C src));
-  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
-  ins_encode %{
-    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed char (2 byte) values in xmm
-instruct Repl4C_rRegI(regD dst, rRegI src) %{
-  match(Set dst (Replicate4C src));
-  format %{ "MOVD    $dst,$src\n\t"
-            "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed char (2 byte) values in xmm
-instruct Repl4C_immI0(regD dst, immI0 zero) %{
-  match(Set dst (Replicate4C zero));
-  format %{ "PXOR  $dst,$dst\t! replicate4C" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_reg(regD dst, regD src) %{
-  match(Set dst (Replicate2I src));
-  format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed integer (4 byte) values in xmm
-instruct Repl2I_rRegI(regD dst, rRegI src) %{
-  match(Set dst (Replicate2I src));
-  format %{ "MOVD   $dst,$src\n\t"
-            "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
-  ins_encode %{
-    __ movdl($dst$$XMMRegister, $src$$Register);
-    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar zero to packed integer (2 byte) values in xmm
-instruct Repl2I_immI0(regD dst, immI0 zero) %{
-  match(Set dst (Replicate2I zero));
-  format %{ "PXOR  $dst,$dst\t! replicate2I" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_reg(regD dst, regD src) %{
-  match(Set dst (Replicate2F src));
-  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_regF(regD dst, regF src) %{
-  match(Set dst (Replicate2F src));
-  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
-  ins_encode %{
-    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
-// Replicate scalar to packed single precision floating point values in xmm
-instruct Repl2F_immF0(regD dst, immF0 zero) %{
-  match(Set dst (Replicate2F zero));
-  format %{ "PXOR  $dst,$dst\t! replicate2F" %}
-  ins_encode %{
-    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
-  %}
-  ins_pipe( fpu_reg_reg );
-%}
-
 
 // =======================================================================
 // fast clearing of an array
--- a/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/os_cpu/bsd_x86/vm/os_bsd_x86.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -516,7 +516,12 @@
       }
     }
 
-    if (thread->thread_state() == _thread_in_Java) {
+    // We test if stub is already set (by the stack overflow code
+    // above) so it is not overwritten by the code that follows. This
+    // check is not required on other platforms, because on other
+    // platforms we check for SIGSEGV only or SIGBUS only, where here
+    // we have to check for both SIGSEGV and SIGBUS.
+    if (thread->thread_state() == _thread_in_Java && stub == NULL) {
       // Java thread running in Java code => find exception handler if any
       // a fault inside compiled code, the interpreter, or a stub
 
--- a/src/share/vm/adlc/adlparse.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/adlc/adlparse.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -115,6 +115,12 @@
       parse_err(SYNERR, "expected one of - instruct, operand, ins_attrib, op_attrib, source, register, pipeline, encode\n     Found %s",ident);
     }
   }
+  // Add reg_class spill_regs after parsing.
+  RegisterForm *regBlock = _AD.get_registers();
+  if (regBlock == NULL) {
+    parse_err(SEMERR, "Did not declare 'register' definitions");
+  }
+  regBlock->addSpillRegClass();
 
   // Done with parsing, check consistency.
 
@@ -768,11 +774,12 @@
 
 //------------------------------reg_parse--------------------------------------
 void ADLParser::reg_parse(void) {
-
-  // Create the RegisterForm for the architecture description.
-  RegisterForm *regBlock = new RegisterForm();    // Build new Source object
-  regBlock->_linenum = linenum();
-  _AD.addForm(regBlock);
+  RegisterForm *regBlock = _AD.get_registers(); // Information about registers encoding
+  if (regBlock == NULL) {
+    // Create the RegisterForm for the architecture description.
+    regBlock = new RegisterForm();    // Build new Source object
+    _AD.addForm(regBlock);
+  }
 
   skipws();                       // Skip leading whitespace
   if (_curchar == '%' && *(_ptr+1) == '{') {
@@ -796,15 +803,11 @@
     parse_err(SYNERR, "Missing %c{ ... %c} block after register keyword.\n",'%','%');
     return;
   }
-
-  // Add reg_class spill_regs
-  regBlock->addSpillRegClass();
 }
 
 //------------------------------encode_parse-----------------------------------
 void ADLParser::encode_parse(void) {
   EncodeForm *encBlock;         // Information about instruction/operand encoding
-  char       *desc = NULL;      // String representation of encode rule
 
   _AD.getForm(&encBlock);
   if ( encBlock == NULL) {
--- a/src/share/vm/adlc/archDesc.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/adlc/archDesc.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 //
-// Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+// Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 //
 // This code is free software; you can redistribute it and/or modify it
@@ -911,12 +911,24 @@
   // Find last character in idealOp, it specifies the type
   char  last_char = 0;
   const char *ptr = idealOp;
-  for( ; *ptr != '\0'; ++ptr) {
+  for (; *ptr != '\0'; ++ptr) {
     last_char = *ptr;
   }
 
+  // Match Vector types.
+  if (strncmp(idealOp, "Vec",3)==0) {
+    switch(last_char) {
+    case 'S':  return "TypeVect::VECTS";
+    case 'D':  return "TypeVect::VECTD";
+    case 'X':  return "TypeVect::VECTX";
+    case 'Y':  return "TypeVect::VECTY";
+    default:
+      internal_err("Vector type %s with unrecognized type\n",idealOp);
+    }
+  }
+
   // !!!!!
-  switch( last_char ) {
+  switch(last_char) {
   case 'I':    return "TypeInt::INT";
   case 'P':    return "TypePtr::BOTTOM";
   case 'N':    return "TypeNarrowOop::BOTTOM";
--- a/src/share/vm/adlc/forms.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/adlc/forms.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -265,47 +265,22 @@
   if( strcmp(opType,"LoadN")==0 )  return Form::idealN;
   if( strcmp(opType,"LoadRange")==0 )  return Form::idealI;
   if( strcmp(opType,"LoadS")==0 )  return Form::idealS;
-  if( strcmp(opType,"Load16B")==0 )  return Form::idealB;
-  if( strcmp(opType,"Load8B")==0 )  return Form::idealB;
-  if( strcmp(opType,"Load4B")==0 )  return Form::idealB;
-  if( strcmp(opType,"Load8C")==0 )  return Form::idealC;
-  if( strcmp(opType,"Load4C")==0 )  return Form::idealC;
-  if( strcmp(opType,"Load2C")==0 )  return Form::idealC;
-  if( strcmp(opType,"Load8S")==0 )  return Form::idealS;
-  if( strcmp(opType,"Load4S")==0 )  return Form::idealS;
-  if( strcmp(opType,"Load2S")==0 )  return Form::idealS;
-  if( strcmp(opType,"Load2D")==0 )  return Form::idealD;
-  if( strcmp(opType,"Load4F")==0 )  return Form::idealF;
-  if( strcmp(opType,"Load2F")==0 )  return Form::idealF;
-  if( strcmp(opType,"Load4I")==0 )  return Form::idealI;
-  if( strcmp(opType,"Load2I")==0 )  return Form::idealI;
-  if( strcmp(opType,"Load2L")==0 )  return Form::idealL;
+  if( strcmp(opType,"LoadVector")==0 )  return Form::idealV;
   assert( strcmp(opType,"Load") != 0, "Must type Loads" );
   return Form::none;
 }
 
 Form::DataType Form::is_store_to_memory(const char *opType) const {
   if( strcmp(opType,"StoreB")==0)  return Form::idealB;
-  if( strcmp(opType,"StoreCM")==0)  return Form::idealB;
+  if( strcmp(opType,"StoreCM")==0) return Form::idealB;
   if( strcmp(opType,"StoreC")==0)  return Form::idealC;
   if( strcmp(opType,"StoreD")==0)  return Form::idealD;
   if( strcmp(opType,"StoreF")==0)  return Form::idealF;
   if( strcmp(opType,"StoreI")==0)  return Form::idealI;
   if( strcmp(opType,"StoreL")==0)  return Form::idealL;
   if( strcmp(opType,"StoreP")==0)  return Form::idealP;
-  if( strcmp(opType,"StoreN")==0) return Form::idealN;
-  if( strcmp(opType,"Store16B")==0)  return Form::idealB;
-  if( strcmp(opType,"Store8B")==0)  return Form::idealB;
-  if( strcmp(opType,"Store4B")==0)  return Form::idealB;
-  if( strcmp(opType,"Store8C")==0)  return Form::idealC;
-  if( strcmp(opType,"Store4C")==0)  return Form::idealC;
-  if( strcmp(opType,"Store2C")==0)  return Form::idealC;
-  if( strcmp(opType,"Store2D")==0)  return Form::idealD;
-  if( strcmp(opType,"Store4F")==0)  return Form::idealF;
-  if( strcmp(opType,"Store2F")==0)  return Form::idealF;
-  if( strcmp(opType,"Store4I")==0)  return Form::idealI;
-  if( strcmp(opType,"Store2I")==0)  return Form::idealI;
-  if( strcmp(opType,"Store2L")==0)  return Form::idealL;
+  if( strcmp(opType,"StoreN")==0)  return Form::idealN;
+  if( strcmp(opType,"StoreVector")==0 )  return Form::idealV;
   assert( strcmp(opType,"Store") != 0, "Must type Stores" );
   return Form::none;
 }
--- a/src/share/vm/adlc/forms.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/adlc/forms.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -172,7 +172,8 @@
     idealB      =  6,  // Byte    type
     idealC      =  7,  // Char    type
     idealS      =  8,  // String  type
-    idealN      =  9   // Narrow oop types
+    idealN      =  9,  // Narrow oop types
+    idealV      = 10   // Vector  type
   };
   // Convert ideal name to a DataType, return DataType::none if not a 'ConX'
   Form::DataType  ideal_to_const_type(const char *ideal_type_name) const;
--- a/src/share/vm/adlc/formsopt.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/adlc/formsopt.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -66,7 +66,7 @@
 // for spill-slots/regs.
 void RegisterForm::addSpillRegClass() {
   // Stack slots start at the next available even register number.
-  _reg_ctr = (_reg_ctr+1) & ~1;
+  _reg_ctr = (_reg_ctr+7) & ~7;
   const char *rc_name   = "stack_slots";
   RegClass   *reg_class = new RegClass(rc_name);
   reg_class->_stack_or_reg = true;
@@ -150,9 +150,14 @@
 int RegisterForm::RegMask_Size() {
   // Need at least this many words
   int words_for_regs = (_reg_ctr + 31)>>5;
-  // Add a few for incoming & outgoing arguments to calls.
+  // The array of Register Mask bits should be large enough to cover
+  // all the machine registers and all parameters that need to be passed
+  // on the stack (stack registers) up to some interesting limit.  Methods
+  // that need more parameters will NOT be compiled.  On Intel, the limit
+  // is something like 90+ parameters.
+  // Add a few (3 words == 96 bits) for incoming & outgoing arguments to calls.
   // Round up to the next doubleword size.
-  return (words_for_regs + 2 + 1) & ~1;
+  return (words_for_regs + 3 + 1) & ~1;
 }
 
 void RegisterForm::dump() {                  // Debug printer
--- a/src/share/vm/adlc/formssel.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/adlc/formssel.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -432,6 +432,14 @@
   return  _matrule->is_ideal_store();
 }
 
+// Return 'true' if this instruction matches an ideal vector node
+bool InstructForm::is_vector() const {
+  if( _matrule == NULL ) return false;
+
+  return _matrule->is_vector();
+}
+
+
 // Return the input register that must match the output register
 // If this is not required, return 0
 uint InstructForm::two_address(FormDict &globals) {
@@ -751,6 +759,9 @@
 
   if (needs_base_oop_edge(globals)) return true;
 
+  if (is_vector()) return true;
+  if (is_mach_constant()) return true;
+
   return  false;
 }
 
@@ -3381,11 +3392,8 @@
     "StoreI","StoreL","StoreP","StoreN","StoreD","StoreF" ,
     "StoreB","StoreC","Store" ,"StoreFP",
     "LoadI", "LoadUI2L", "LoadL", "LoadP" ,"LoadN", "LoadD" ,"LoadF"  ,
-    "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load"   ,
-    "Store4I","Store2I","Store2L","Store2D","Store4F","Store2F","Store16B",
-    "Store8B","Store4B","Store8C","Store4C","Store2C",
-    "Load4I" ,"Load2I" ,"Load2L" ,"Load2D" ,"Load4F" ,"Load2F" ,"Load16B" ,
-    "Load8B" ,"Load4B" ,"Load8C" ,"Load4C" ,"Load2C" ,"Load8S", "Load4S","Load2S",
+    "LoadB" , "LoadUB", "LoadUS" ,"LoadS" ,"Load" ,
+    "StoreVector", "LoadVector",
     "LoadRange", "LoadKlass", "LoadNKlass", "LoadL_unaligned", "LoadD_unaligned",
     "LoadPLocked",
     "StorePConditional", "StoreIConditional", "StoreLConditional",
@@ -3822,6 +3830,10 @@
          strcmp(opType,"RegL")==0 ||
          strcmp(opType,"RegF")==0 ||
          strcmp(opType,"RegD")==0 ||
+         strcmp(opType,"VecS")==0 ||
+         strcmp(opType,"VecD")==0 ||
+         strcmp(opType,"VecX")==0 ||
+         strcmp(opType,"VecY")==0 ||
          strcmp(opType,"Reg" )==0) ) {
       return 1;
     }
@@ -3938,19 +3950,12 @@
         strcmp(opType,"ReverseBytesL")==0 ||
         strcmp(opType,"ReverseBytesUS")==0 ||
         strcmp(opType,"ReverseBytesS")==0 ||
-        strcmp(opType,"Replicate16B")==0 ||
-        strcmp(opType,"Replicate8B")==0 ||
-        strcmp(opType,"Replicate4B")==0 ||
-        strcmp(opType,"Replicate8C")==0 ||
-        strcmp(opType,"Replicate4C")==0 ||
-        strcmp(opType,"Replicate8S")==0 ||
-        strcmp(opType,"Replicate4S")==0 ||
-        strcmp(opType,"Replicate4I")==0 ||
-        strcmp(opType,"Replicate2I")==0 ||
-        strcmp(opType,"Replicate2L")==0 ||
-        strcmp(opType,"Replicate4F")==0 ||
-        strcmp(opType,"Replicate2F")==0 ||
-        strcmp(opType,"Replicate2D")==0 ||
+        strcmp(opType,"ReplicateB")==0 ||
+        strcmp(opType,"ReplicateS")==0 ||
+        strcmp(opType,"ReplicateI")==0 ||
+        strcmp(opType,"ReplicateL")==0 ||
+        strcmp(opType,"ReplicateF")==0 ||
+        strcmp(opType,"ReplicateD")==0 ||
         0 /* 0 to line up columns nicely */ )
       return 1;
   }
@@ -4034,6 +4039,23 @@
   return ideal_load;
 }
 
+bool MatchRule::is_vector() const {
+  if( _rChild ) {
+    const char  *opType = _rChild->_opType;
+    if( strcmp(opType,"ReplicateB")==0 ||
+        strcmp(opType,"ReplicateS")==0 ||
+        strcmp(opType,"ReplicateI")==0 ||
+        strcmp(opType,"ReplicateL")==0 ||
+        strcmp(opType,"ReplicateF")==0 ||
+        strcmp(opType,"ReplicateD")==0 ||
+        strcmp(opType,"LoadVector")==0 ||
+        strcmp(opType,"StoreVector")==0 ||
+        0 /* 0 to line up columns nicely */ )
+      return true;
+  }
+  return false;
+}
+
 
 bool MatchRule::skip_antidep_check() const {
   // Some loads operate on what is effectively immutable memory so we
--- a/src/share/vm/adlc/formssel.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/adlc/formssel.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -160,6 +160,7 @@
   virtual bool        is_ideal_safepoint() const; // node matches 'SafePoint'
   virtual bool        is_ideal_nop() const;     // node matches 'Nop'
   virtual bool        is_ideal_control() const; // control node
+  virtual bool        is_vector() const;        // vector instruction
 
   virtual Form::CallType is_ideal_call() const; // matches ideal 'Call'
   virtual Form::DataType is_ideal_load() const; // node matches ideal 'LoadXNode'
@@ -1011,6 +1012,7 @@
   bool       is_ideal_goto() const;    // node matches ideal 'Goto'
   bool       is_ideal_loopEnd() const; // node matches ideal 'LoopEnd'
   bool       is_ideal_bool() const;    // node matches ideal 'Bool'
+  bool       is_vector() const;        // vector instruction
   Form::DataType is_ideal_load() const;// node matches ideal 'LoadXNode'
   // Should antidep checks be disabled for this rule
   // See definition of MatchRule::skip_antidep_check
--- a/src/share/vm/adlc/main.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/adlc/main.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -250,6 +250,7 @@
   AD.addInclude(AD._HPP_file, "opto/node.hpp");
   AD.addInclude(AD._HPP_file, "opto/regalloc.hpp");
   AD.addInclude(AD._HPP_file, "opto/subnode.hpp");
+  AD.addInclude(AD._HPP_file, "opto/vectornode.hpp");
   AD.addInclude(AD._CPP_CLONE_file, "precompiled.hpp");
   AD.addInclude(AD._CPP_CLONE_file, "adfiles", get_basename(AD._HPP_file._name));
   AD.addInclude(AD._CPP_EXPAND_file, "precompiled.hpp");
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/classfile/altHashing.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -0,0 +1,304 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "classfile/altHashing.hpp"
+#include "classfile/symbolTable.hpp"
+#include "classfile/systemDictionary.hpp"
+#include "oops/markOop.hpp"
+#include "runtime/thread.hpp"
+
+// Get the hash code of the classes mirror if it exists, otherwise just
+// return a random number, which is one of the possible hash code used for
+// objects.  We don't want to call the synchronizer hash code to install
+// this value because it may safepoint.
+intptr_t object_hash(klassOop k) {
+  intptr_t hc = k->java_mirror()->mark()->hash();
+  return hc != markOopDesc::no_hash ? hc : os::random();
+}
+
+// Seed value used for each alternative hash calculated.
+jint AltHashing::compute_seed() {
+  jlong nanos = os::javaTimeNanos();
+  jlong now = os::javaTimeMillis();
+  jint SEED_MATERIAL[8] = {
+            (jint) object_hash(SystemDictionary::String_klass()),
+            (jint) object_hash(SystemDictionary::System_klass()),
+            (jint) os::random(),  // current thread isn't a java thread
+            (jint) (((julong)nanos) >> 32),
+            (jint) nanos,
+            (jint) (((julong)now) >> 32),
+            (jint) now,
+            (jint) (os::javaTimeNanos() >> 2)
+  };
+
+  return murmur3_32(SEED_MATERIAL, 8);
+}
+
+
+// Murmur3 hashing for Symbol
+jint AltHashing::murmur3_32(jint seed, const jbyte* data, int len) {
+  jint h1 = seed;
+  int count = len;
+  int offset = 0;
+
+  // body
+  while (count >= 4) {
+    jint k1 = (data[offset] & 0x0FF)
+        | (data[offset + 1] & 0x0FF) << 8
+        | (data[offset + 2] & 0x0FF) << 16
+        | data[offset + 3] << 24;
+
+    count -= 4;
+    offset += 4;
+
+    k1 *= 0xcc9e2d51;
+    k1 = Integer_rotateLeft(k1, 15);
+    k1 *= 0x1b873593;
+
+    h1 ^= k1;
+    h1 = Integer_rotateLeft(h1, 13);
+    h1 = h1 * 5 + 0xe6546b64;
+  }
+
+  // tail
+
+  if (count > 0) {
+    jint k1 = 0;
+
+    switch (count) {
+      case 3:
+        k1 ^= (data[offset + 2] & 0xff) << 16;
+      // fall through
+      case 2:
+        k1 ^= (data[offset + 1] & 0xff) << 8;
+      // fall through
+      case 1:
+        k1 ^= (data[offset] & 0xff);
+      // fall through
+      default:
+        k1 *= 0xcc9e2d51;
+        k1 = Integer_rotateLeft(k1, 15);
+        k1 *= 0x1b873593;
+        h1 ^= k1;
+    }
+  }
+
+  // finalization
+  h1 ^= len;
+
+  // finalization mix force all bits of a hash block to avalanche
+  h1 ^= ((unsigned int)h1) >> 16;
+  h1 *= 0x85ebca6b;
+  h1 ^= ((unsigned int)h1) >> 13;
+  h1 *= 0xc2b2ae35;
+  h1 ^= ((unsigned int)h1) >> 16;
+
+  return h1;
+}
+
+// Murmur3 hashing for Strings
+jint AltHashing::murmur3_32(jint seed, const jchar* data, int len) {
+  jint h1 = seed;
+
+  int off = 0;
+  int count = len;
+
+  // body
+  while (count >= 2) {
+    jchar d1 = data[off++] & 0xFFFF;
+    jchar d2 = data[off++];
+    jint k1 = (d1 | d2 << 16);
+
+    count -= 2;
+
+    k1 *= 0xcc9e2d51;
+    k1 = Integer_rotateLeft(k1, 15);
+    k1 *= 0x1b873593;
+
+    h1 ^= k1;
+    h1 = Integer_rotateLeft(h1, 13);
+    h1 = h1 * 5 + 0xe6546b64;
+  }
+
+  // tail
+
+  if (count > 0) {
+    int k1 = data[off];
+
+    k1 *= 0xcc9e2d51;
+    k1 = Integer_rotateLeft(k1, 15);
+    k1 *= 0x1b873593;
+    h1 ^= k1;
+  }
+
+  // finalization
+  h1 ^= len * 2; // (Character.SIZE / Byte.SIZE);
+
+  // finalization mix force all bits of a hash block to avalanche
+  h1 ^= ((unsigned int)h1) >> 16;
+  h1 *= 0x85ebca6b;
+  h1 ^= ((unsigned int)h1) >> 13;
+  h1 *= 0xc2b2ae35;
+  h1 ^= ((unsigned int)h1) >> 16;
+
+  return h1;
+}
+
+// Hash used for the seed.
+jint AltHashing::murmur3_32(jint seed, const int* data, int len) {
+  jint h1 = seed;
+
+  int off = 0;
+  int end = len;
+
+  // body
+  while (off < end) {
+    jint k1 = data[off++];
+
+    k1 *= 0xcc9e2d51;
+    k1 = Integer_rotateLeft(k1, 15);
+    k1 *= 0x1b873593;
+
+    h1 ^= k1;
+    h1 = Integer_rotateLeft(h1, 13);
+    h1 = h1 * 5 + 0xe6546b64;
+  }
+
+  // tail (always empty, as body is always 32-bit chunks)
+
+  // finalization
+
+  h1 ^= len * 4; // (Integer.SIZE / Byte.SIZE);
+
+  // finalization mix force all bits of a hash block to avalanche
+  h1 ^= ((juint)h1) >> 16;
+  h1 *= 0x85ebca6b;
+  h1 ^= ((juint)h1) >> 13;
+  h1 *= 0xc2b2ae35;
+  h1 ^= ((juint)h1) >> 16;
+
+  return h1;
+}
+
+jint AltHashing::murmur3_32(const int* data, int len) {
+  return murmur3_32(0, data, len);
+}
+
+#ifndef PRODUCT
+// Overloaded versions for internal test.
+jint AltHashing::murmur3_32(const jbyte* data, int len) {
+  return murmur3_32(0, data, len);
+}
+
+jint AltHashing::murmur3_32(const jchar* data, int len) {
+  return murmur3_32(0, data, len);
+}
+
+// Internal test for alternate hashing.  Translated from JDK version
+// test/sun/misc/Hashing.java
+static const jbyte ONE_BYTE[] = { (jbyte) 0x80};
+static const jbyte TWO_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81};
+static const jchar ONE_CHAR[] = { (jchar) 0x8180};
+static const jbyte THREE_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82};
+static const jbyte FOUR_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, (jbyte) 0x83};
+static const jchar TWO_CHAR[] = { (jchar) 0x8180, (jchar) 0x8382};
+static const jint ONE_INT[] = { 0x83828180};
+static const jbyte SIX_BYTE[] = { (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82, (jbyte) 0x83, (jbyte) 0x84, (jbyte) 0x85};
+static const jchar THREE_CHAR[] = { (jchar) 0x8180, (jchar) 0x8382, (jchar) 0x8584};
+static const jbyte EIGHT_BYTE[] = {
+  (jbyte) 0x80, (jbyte) 0x81, (jbyte) 0x82,
+  (jbyte) 0x83, (jbyte) 0x84, (jbyte) 0x85,
+  (jbyte) 0x86, (jbyte) 0x87};
+static const jchar FOUR_CHAR[] = {
+  (jchar) 0x8180, (jchar) 0x8382,
+  (jchar) 0x8584, (jchar) 0x8786};
+
+static const jint TWO_INT[] = { 0x83828180, 0x87868584};
+
+static const juint MURMUR3_32_X86_CHECK_VALUE = 0xB0F57EE3;
+
+void AltHashing::testMurmur3_32_ByteArray() {
+  // printf("testMurmur3_32_ByteArray\n");
+
+  jbyte* vector = new jbyte[256];
+  jbyte* hashes = new jbyte[4 * 256];
+
+  for (int i = 0; i < 256; i++) {
+    vector[i] = (jbyte) i;
+  }
+
+  // Hash subranges {}, {0}, {0,1}, {0,1,2}, ..., {0,...,255}
+  for (int i = 0; i < 256; i++) {
+    jint hash = murmur3_32(256 - i, vector, i);
+    hashes[i * 4] = (jbyte) hash;
+    hashes[i * 4 + 1] = (jbyte) (((juint)hash) >> 8);
+    hashes[i * 4 + 2] = (jbyte) (((juint)hash) >> 16);
+    hashes[i * 4 + 3] = (jbyte) (((juint)hash) >> 24);
+  }
+
+  // hash to get const result.
+  juint final_hash = murmur3_32(hashes, 4*256);
+
+  assert (MURMUR3_32_X86_CHECK_VALUE == final_hash,
+    err_msg(
+        "Calculated hash result not as expected. Expected %08X got %08X\n",
+        MURMUR3_32_X86_CHECK_VALUE,
+        final_hash));
+}
+
+void AltHashing::testEquivalentHashes() {
+  jint jbytes, jchars, ints;
+
+  // printf("testEquivalentHashes\n");
+
+  jbytes = murmur3_32(TWO_BYTE, 2);
+  jchars = murmur3_32(ONE_CHAR, 1);
+  assert (jbytes == jchars,
+    err_msg("Hashes did not match. b:%08x != c:%08x\n", jbytes, jchars));
+
+  jbytes = murmur3_32(FOUR_BYTE, 4);
+  jchars = murmur3_32(TWO_CHAR, 2);
+  ints = murmur3_32(ONE_INT, 1);
+  assert ((jbytes == jchars) && (jbytes == ints),
+    err_msg("Hashes did not match. b:%08x != c:%08x != i:%08x\n", jbytes, jchars, ints));
+
+  jbytes = murmur3_32(SIX_BYTE, 6);
+  jchars = murmur3_32(THREE_CHAR, 3);
+  assert (jbytes == jchars,
+    err_msg("Hashes did not match. b:%08x != c:%08x\n", jbytes, jchars));
+
+  jbytes = murmur3_32(EIGHT_BYTE, 8);
+  jchars = murmur3_32(FOUR_CHAR, 4);
+  ints = murmur3_32(TWO_INT, 2);
+  assert ((jbytes == jchars) && (jbytes == ints),
+    err_msg("Hashes did not match. b:%08x != c:%08x != i:%08x\n", jbytes, jchars, ints));
+}
+
+// Returns true if the alternate hashcode is correct
+void AltHashing::test_alt_hash() {
+  testMurmur3_32_ByteArray();
+  testEquivalentHashes();
+}
+#endif // PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/classfile/altHashing.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef SHARE_VM_CLASSFILE_ALTHASHING_HPP
+#define SHARE_VM_CLASSFILE_ALTHASHING_HPP
+
+#include "prims/jni.h"
+#include "classfile/symbolTable.hpp"
+
+/**
+ * Hashing utilities.
+ *
+ * Implementation of Murmur3 hashing.
+ * This code was translated from src/share/classes/sun/misc/Hashing.java
+ * code in the JDK.
+ */
+
+class AltHashing : AllStatic {
+
+  // utility function copied from java/lang/Integer
+  static jint Integer_rotateLeft(jint i, int distance) {
+    return (i << distance) | (((juint)i) >> (32-distance));
+  }
+  static jint murmur3_32(const int* data, int len);
+  static jint murmur3_32(jint seed, const int* data, int len);
+
+#ifndef PRODUCT
+  // Hashing functions used for internal testing
+  static jint murmur3_32(const jbyte* data, int len);
+  static jint murmur3_32(const jchar* data, int len);
+  static void testMurmur3_32_ByteArray();
+  static void testEquivalentHashes();
+#endif // PRODUCT
+
+ public:
+  static jint compute_seed();
+  static jint murmur3_32(jint seed, const jbyte* data, int len);
+  static jint murmur3_32(jint seed, const jchar* data, int len);
+  NOT_PRODUCT(static void test_alt_hash();)
+};
+#endif // SHARE_VM_CLASSFILE_ALTHASHING_HPP
--- a/src/share/vm/classfile/javaClasses.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/classfile/javaClasses.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "classfile/altHashing.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/symbolTable.hpp"
 #include "classfile/vmSymbols.hpp"
@@ -347,13 +348,26 @@
   return result;
 }
 
-unsigned int java_lang_String::hash_string(oop java_string) {
+unsigned int java_lang_String::to_hash(oop java_string) {
+  int          length = java_lang_String::length(java_string);
+  // Zero length string will hash to zero with String.toHash() function.
+  if (length == 0) return 0;
+
   typeArrayOop value  = java_lang_String::value(java_string);
   int          offset = java_lang_String::offset(java_string);
+  return java_lang_String::to_hash(value->char_at_addr(offset), length);
+}
+
+unsigned int java_lang_String::hash_string(oop java_string) {
   int          length = java_lang_String::length(java_string);
-
-  if (length == 0) return 0;
-  return hash_string(value->char_at_addr(offset), length);
+  // Zero length string doesn't hash necessarily hash to zero.
+  if (length == 0) {
+    return StringTable::hash_string(NULL, 0);
+  }
+
+  typeArrayOop value  = java_lang_String::value(java_string);
+  int          offset = java_lang_String::offset(java_string);
+  return StringTable::hash_string(value->char_at_addr(offset), length);
 }
 
 Symbol* java_lang_String::as_symbol(Handle java_string, TRAPS) {
--- a/src/share/vm/classfile/javaClasses.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/classfile/javaClasses.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -158,20 +158,16 @@
   static jchar* as_unicode_string(oop java_string, int& length);
 
   // Compute the hash value for a java.lang.String object which would
-  // contain the characters passed in. This hash value is used for at
-  // least two purposes.
+  // contain the characters passed in.
   //
-  // (a) As the hash value used by the StringTable for bucket selection
-  //     and comparison (stored in the HashtableEntry structures).  This
-  //     is used in the String.intern() method.
+  // As the hash value used by the String object itself, in
+  // String.hashCode().  This value is normally calculated in Java code
+  // in the String.hashCode method(), but is precomputed for String
+  // objects in the shared archive file.
+  // hash P(31) from Kernighan & Ritchie
   //
-  // (b) As the hash value used by the String object itself, in
-  //     String.hashCode().  This value is normally calculate in Java code
-  //     in the String.hashCode method(), but is precomputed for String
-  //     objects in the shared archive file.
-  //
-  //     For this reason, THIS ALGORITHM MUST MATCH String.hashCode().
-  static unsigned int hash_string(jchar* s, int len) {
+  // For this reason, THIS ALGORITHM MUST MATCH String.toHash().
+  template <typename T> static unsigned int to_hash(T* s, int len) {
     unsigned int h = 0;
     while (len-- > 0) {
       h = 31*h + (unsigned int) *s;
@@ -179,6 +175,10 @@
     }
     return h;
   }
+  static unsigned int to_hash(oop java_string);
+
+  // This is the string hash code used by the StringTable, which may be
+  // the same as String.toHash or an alternate hash code.
   static unsigned int hash_string(oop java_string);
 
   static bool equals(oop java_string, jchar* chars, int len);
--- a/src/share/vm/classfile/symbolTable.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/classfile/symbolTable.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -23,6 +23,7 @@
  */
 
 #include "precompiled.hpp"
+#include "classfile/altHashing.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/symbolTable.hpp"
 #include "classfile/systemDictionary.hpp"
@@ -34,19 +35,19 @@
 #include "oops/oop.inline2.hpp"
 #include "runtime/mutexLocker.hpp"
 #include "utilities/hashtable.inline.hpp"
+#include "utilities/numberSeq.hpp"
 
 // --------------------------------------------------------------------------
 
 SymbolTable* SymbolTable::_the_table = NULL;
 // Static arena for symbols that are not deallocated
 Arena* SymbolTable::_arena = NULL;
+bool SymbolTable::_needs_rehashing = false;
+jint SymbolTable::_seed = 0;
 
 Symbol* SymbolTable::allocate_symbol(const u1* name, int len, bool c_heap, TRAPS) {
-  // Don't allow symbols to be created which cannot fit in a Symbol*.
-  if (len > Symbol::max_length()) {
-    THROW_MSG_0(vmSymbols::java_lang_InternalError(),
-                "name is too long to represent");
-  }
+  assert (len <= Symbol::max_length(), "should be checked by caller");
+
   Symbol* sym;
   // Allocate symbols in the C heap when dumping shared spaces in case there
   // are temporary symbols we can remove.
@@ -91,9 +92,14 @@
   int total = 0;
   size_t memory_total = 0;
   for (int i = 0; i < the_table()->table_size(); ++i) {
-    for (HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i); *p != NULL; ) {
-      HashtableEntry<Symbol*>* entry = *p;
-      if (entry->is_shared()) {
+    HashtableEntry<Symbol*>** p = the_table()->bucket_addr(i);
+    HashtableEntry<Symbol*>* entry = the_table()->bucket(i);
+    while (entry != NULL) {
+      // Shared entries are normally at the end of the bucket and if we run into
+      // a shared entry, then there is nothing more to remove. However, if we
+      // have rehashed the table, then the shared entries are no longer at the
+      // end of the bucket.
+      if (entry->is_shared() && !use_alternate_hashcode()) {
         break;
       }
       Symbol* s = entry->literal();
@@ -102,6 +108,7 @@
       assert(s != NULL, "just checking");
       // If reference count is zero, remove.
       if (s->refcount() == 0) {
+        assert(!entry->is_shared(), "shared entries should be kept live");
         delete s;
         removed++;
         *p = entry->next();
@@ -109,6 +116,8 @@
       } else {
         p = entry->next_addr();
       }
+      // get next entry
+      entry = (HashtableEntry<Symbol*>*)HashtableEntry<Symbol*>::make_ptr(*p);
     }
   }
   symbols_removed += removed;
@@ -121,12 +130,42 @@
   }
 }
 
+unsigned int SymbolTable::new_hash(Symbol* sym) {
+  ResourceMark rm;
+  // Use alternate hashing algorithm on this symbol.
+  return AltHashing::murmur3_32(seed(), (const jbyte*)sym->as_C_string(), sym->utf8_length());
+}
+
+// Create a new table and using alternate hash code, populate the new table
+// with the existing strings.   Set flag to use the alternate hash code afterwards.
+void SymbolTable::rehash_table() {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+  // This should never happen with -Xshare:dump but it might in testing mode.
+  if (DumpSharedSpaces) return;
+  // Create a new symbol table
+  SymbolTable* new_table = new SymbolTable();
+
+  // Initialize the global seed for hashing.
+  _seed = AltHashing::compute_seed();
+  assert(seed() != 0, "shouldn't be zero");
+
+  the_table()->move_to(new_table);
+
+  // Delete the table and buckets (entries are reused in new table).
+  delete _the_table;
+  // Don't check if we need rehashing until the table gets unbalanced again.
+  // Then rehash with a new global seed.
+  _needs_rehashing = false;
+  _the_table = new_table;
+}
 
 // Lookup a symbol in a bucket.
 
 Symbol* SymbolTable::lookup(int index, const char* name,
                               int len, unsigned int hash) {
+  int count = 0;
   for (HashtableEntry<Symbol*>* e = bucket(index); e != NULL; e = e->next()) {
+    count++;  // count all entries in this bucket, not just ones with same hash
     if (e->hash() == hash) {
       Symbol* sym = e->literal();
       if (sym->equals(name, len)) {
@@ -136,9 +175,20 @@
       }
     }
   }
+  // If the bucket size is too deep check if this hash code is insufficient.
+  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
+    _needs_rehashing = check_rehash_table(count);
+  }
   return NULL;
 }
 
+// Pick hashing algorithm.
+unsigned int SymbolTable::hash_symbol(const char* s, int len) {
+  return use_alternate_hashcode() ?
+           AltHashing::murmur3_32(seed(), (const jbyte*)s, len) :
+           java_lang_String::to_hash(s, len);
+}
+
 
 // We take care not to be blocking while holding the
 // SymbolTable_lock. Otherwise, the system might deadlock, since the
@@ -156,6 +206,9 @@
   // Found
   if (s != NULL) return s;
 
+  // Grab SymbolTable_lock first.
+  MutexLocker ml(SymbolTable_lock, THREAD);
+
   // Otherwise, add to symbol to table
   return the_table()->basic_add(index, (u1*)name, len, hashValue, true, CHECK_NULL);
 }
@@ -193,6 +246,9 @@
   // We can't include the code in No_Safepoint_Verifier because of the
   // ResourceMark.
 
+  // Grab SymbolTable_lock first.
+  MutexLocker ml(SymbolTable_lock, THREAD);
+
   return the_table()->basic_add(index, (u1*)buffer, len, hashValue, true, CHECK_NULL);
 }
 
@@ -261,6 +317,9 @@
                       int names_count,
                       const char** names, int* lengths, int* cp_indices,
                       unsigned int* hashValues, TRAPS) {
+  // Grab SymbolTable_lock first.
+  MutexLocker ml(SymbolTable_lock, THREAD);
+
   SymbolTable* table = the_table();
   bool added = table->basic_add(class_loader, cp, names_count, names, lengths,
                                 cp_indices, hashValues, CHECK);
@@ -281,18 +340,39 @@
   if (result != NULL) {
     return result;
   }
+  // Grab SymbolTable_lock first.
+  MutexLocker ml(SymbolTable_lock, THREAD);
+
   SymbolTable* table = the_table();
   int index = table->hash_to_index(hash);
   return table->basic_add(index, (u1*)name, (int)strlen(name), hash, false, THREAD);
 }
 
-Symbol* SymbolTable::basic_add(int index, u1 *name, int len,
-                               unsigned int hashValue, bool c_heap, TRAPS) {
+Symbol* SymbolTable::basic_add(int index_arg, u1 *name, int len,
+                               unsigned int hashValue_arg, bool c_heap, TRAPS) {
   assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
          "proposed name of symbol must be stable");
 
-  // Grab SymbolTable_lock first.
-  MutexLocker ml(SymbolTable_lock, THREAD);
+  // Don't allow symbols to be created which cannot fit in a Symbol*.
+  if (len > Symbol::max_length()) {
+    THROW_MSG_0(vmSymbols::java_lang_InternalError(),
+                "name is too long to represent");
+  }
+
+  // Cannot hit a safepoint in this function because the "this" pointer can move.
+  No_Safepoint_Verifier nsv;
+
+  // Check if the symbol table has been rehashed, if so, need to recalculate
+  // the hash value and index.
+  unsigned int hashValue;
+  int index;
+  if (use_alternate_hashcode()) {
+    hashValue = hash_symbol((const char*)name, len);
+    index = hash_to_index(hashValue);
+  } else {
+    hashValue = hashValue_arg;
+    index = index_arg;
+  }
 
   // Since look-up was done lock-free, we need to check if another
   // thread beat us in the race to insert the symbol.
@@ -328,14 +408,22 @@
     }
   }
 
-  // Hold SymbolTable_lock through the symbol creation
-  MutexLocker ml(SymbolTable_lock, THREAD);
+  // Cannot hit a safepoint in this function because the "this" pointer can move.
+  No_Safepoint_Verifier nsv;
 
   for (int i=0; i<names_count; i++) {
+    // Check if the symbol table has been rehashed, if so, need to recalculate
+    // the hash value.
+    unsigned int hashValue;
+    if (use_alternate_hashcode()) {
+      hashValue = hash_symbol(names[i], lengths[i]);
+    } else {
+      hashValue = hashValues[i];
+    }
     // Since look-up was done lock-free, we need to check if another
     // thread beat us in the race to insert the symbol.
-    int index = hash_to_index(hashValues[i]);
-    Symbol* test = lookup(index, names[i], lengths[i], hashValues[i]);
+    int index = hash_to_index(hashValue);
+    Symbol* test = lookup(index, names[i], lengths[i], hashValue);
     if (test != NULL) {
       // A race occurred and another thread introduced the symbol, this one
       // will be dropped and collected. Use test instead.
@@ -347,7 +435,7 @@
       bool c_heap = class_loader() != NULL;
       Symbol* sym = allocate_symbol((const u1*)names[i], lengths[i], c_heap, CHECK_(false));
       assert(sym->equals(names[i], lengths[i]), "symbol must be properly initialized");  // why wouldn't it be???
-      HashtableEntry<Symbol*>* entry = new_entry(hashValues[i], sym);
+      HashtableEntry<Symbol*>* entry = new_entry(hashValue, sym);
       add_entry(index, entry);
       cp->symbol_at_put(cp_indices[i], sym);
     }
@@ -370,6 +458,24 @@
   }
 }
 
+void SymbolTable::dump(outputStream* st) {
+  NumberSeq summary;
+  for (int i = 0; i < the_table()->table_size(); ++i) {
+    int count = 0;
+    for (HashtableEntry<Symbol*>* e = the_table()->bucket(i);
+       e != NULL; e = e->next()) {
+      count++;
+    }
+    summary.add((double)count);
+  }
+  st->print_cr("SymbolTable statistics:");
+  st->print_cr("Number of buckets       : %7d", summary.num());
+  st->print_cr("Average bucket size     : %7.0f", summary.avg());
+  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
+  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
+  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
+}
+
 
 //---------------------------------------------------------------------------
 // Non-product code
@@ -468,7 +574,6 @@
     }
   }
 }
-
 #endif // PRODUCT
 
 // --------------------------------------------------------------------------
@@ -514,38 +619,53 @@
 // --------------------------------------------------------------------------
 StringTable* StringTable::_the_table = NULL;
 
+bool StringTable::_needs_rehashing = false;
+jint StringTable::_seed = 0;
+
+// Pick hashing algorithm
+unsigned int StringTable::hash_string(const jchar* s, int len) {
+  return use_alternate_hashcode() ? AltHashing::murmur3_32(seed(), s, len) :
+                                    java_lang_String::to_hash(s, len);
+}
+
 oop StringTable::lookup(int index, jchar* name,
                         int len, unsigned int hash) {
+  int count = 0;
   for (HashtableEntry<oop>* l = bucket(index); l != NULL; l = l->next()) {
+    count++;
     if (l->hash() == hash) {
       if (java_lang_String::equals(l->literal(), name, len)) {
         return l->literal();
       }
     }
   }
+  // If the bucket size is too deep check if this hash code is insufficient.
+  if (count >= BasicHashtable::rehash_count && !needs_rehashing()) {
+    _needs_rehashing = check_rehash_table(count);
+  }
   return NULL;
 }
 
 
-oop StringTable::basic_add(int index, Handle string_or_null, jchar* name,
-                           int len, unsigned int hashValue, TRAPS) {
-  debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
-  assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
-         "proposed name of symbol must be stable");
-
-  Handle string;
-  // try to reuse the string if possible
-  if (!string_or_null.is_null() && (!JavaObjectsInPerm || string_or_null()->is_perm())) {
-    string = string_or_null;
-  } else {
-    string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL);
-  }
-
-  // Allocation must be done before grapping the SymbolTable_lock lock
-  MutexLocker ml(StringTable_lock, THREAD);
+oop StringTable::basic_add(int index_arg, Handle string, jchar* name,
+                           int len, unsigned int hashValue_arg, TRAPS) {
 
   assert(java_lang_String::equals(string(), name, len),
          "string must be properly initialized");
+  // Cannot hit a safepoint in this function because the "this" pointer can move.
+  No_Safepoint_Verifier nsv;
+
+  // Check if the symbol table has been rehashed, if so, need to recalculate
+  // the hash value and index before second lookup.
+  unsigned int hashValue;
+  int index;
+  if (use_alternate_hashcode()) {
+    hashValue = hash_string(name, len);
+    index = hash_to_index(hashValue);
+  } else {
+    hashValue = hashValue_arg;
+    index = index_arg;
+  }
 
   // Since look-up was done lock-free, we need to check if another
   // thread beat us in the race to insert the symbol.
@@ -566,7 +686,7 @@
   ResourceMark rm;
   int length;
   jchar* chars = symbol->as_unicode(length);
-  unsigned int hashValue = java_lang_String::hash_string(chars, length);
+  unsigned int hashValue = hash_string(chars, length);
   int index = the_table()->hash_to_index(hashValue);
   return the_table()->lookup(index, chars, length, hashValue);
 }
@@ -574,15 +694,31 @@
 
 oop StringTable::intern(Handle string_or_null, jchar* name,
                         int len, TRAPS) {
-  unsigned int hashValue = java_lang_String::hash_string(name, len);
+  unsigned int hashValue = hash_string(name, len);
   int index = the_table()->hash_to_index(hashValue);
-  oop string = the_table()->lookup(index, name, len, hashValue);
+  oop found_string = the_table()->lookup(index, name, len, hashValue);
 
   // Found
-  if (string != NULL) return string;
+  if (found_string != NULL) return found_string;
+
+  debug_only(StableMemoryChecker smc(name, len * sizeof(name[0])));
+  assert(!Universe::heap()->is_in_reserved(name) || GC_locker::is_active(),
+         "proposed name of symbol must be stable");
+
+  Handle string;
+  // try to reuse the string if possible
+  if (!string_or_null.is_null() && (!JavaObjectsInPerm || string_or_null()->is_perm())) {
+    string = string_or_null;
+  } else {
+    string = java_lang_String::create_tenured_from_unicode(name, len, CHECK_NULL);
+  }
+
+  // Grab the StringTable_lock before getting the_table() because it could
+  // change at safepoint.
+  MutexLocker ml(StringTable_lock, THREAD);
 
   // Otherwise, add to symbol to table
-  return the_table()->basic_add(index, string_or_null, name, len,
+  return the_table()->basic_add(index, string, name, len,
                                 hashValue, CHECK_NULL);
 }
 
@@ -625,18 +761,24 @@
   // entries at a safepoint.
   assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
   for (int i = 0; i < the_table()->table_size(); ++i) {
-    for (HashtableEntry<oop>** p = the_table()->bucket_addr(i); *p != NULL; ) {
-      HashtableEntry<oop>* entry = *p;
-      if (entry->is_shared()) {
+    HashtableEntry<oop>** p = the_table()->bucket_addr(i);
+    HashtableEntry<oop>* entry = the_table()->bucket(i);
+    while (entry != NULL) {
+      // Shared entries are normally at the end of the bucket and if we run into
+      // a shared entry, then there is nothing more to remove. However, if we
+      // have rehashed the table, then the shared entries are no longer at the
+      // end of the bucket.
+      if (entry->is_shared() && !use_alternate_hashcode()) {
         break;
       }
       assert(entry->literal() != NULL, "just checking");
-      if (is_alive->do_object_b(entry->literal())) {
+      if (entry->is_shared() || is_alive->do_object_b(entry->literal())) {
         p = entry->next_addr();
       } else {
         *p = entry->next();
         the_table()->free_entry(entry);
       }
+      entry = (HashtableEntry<oop>*)HashtableEntry<oop>::make_ptr(*p);
     }
   }
 }
@@ -675,3 +817,53 @@
     }
   }
 }
+
+void StringTable::dump(outputStream* st) {
+  NumberSeq summary;
+  for (int i = 0; i < the_table()->table_size(); ++i) {
+    HashtableEntry<oop>* p = the_table()->bucket(i);
+    int count = 0;
+    for ( ; p != NULL; p = p->next()) {
+      count++;
+    }
+    summary.add((double)count);
+  }
+  st->print_cr("StringTable statistics:");
+  st->print_cr("Number of buckets       : %7d", summary.num());
+  st->print_cr("Average bucket size     : %7.0f", summary.avg());
+  st->print_cr("Variance of bucket size : %7.0f", summary.variance());
+  st->print_cr("Std. dev. of bucket size: %7.0f", summary.sd());
+  st->print_cr("Maximum bucket size     : %7.0f", summary.maximum());
+}
+
+
+unsigned int StringTable::new_hash(oop string) {
+  ResourceMark rm;
+  int length;
+  jchar* chars = java_lang_String::as_unicode_string(string, length);
+  // Use alternate hashing algorithm on the string
+  return AltHashing::murmur3_32(seed(), chars, length);
+}
+
+// Create a new table and using alternate hash code, populate the new table
+// with the existing strings.   Set flag to use the alternate hash code afterwards.
+void StringTable::rehash_table() {
+  assert(SafepointSynchronize::is_at_safepoint(), "must be at safepoint");
+  // This should never happen with -Xshare:dump but it might in testing mode.
+  if (DumpSharedSpaces) return;
+  StringTable* new_table = new StringTable();
+
+  // Initialize new global seed for hashing.
+  _seed = AltHashing::compute_seed();
+  assert(seed() != 0, "shouldn't be zero");
+
+  // Rehash the table
+  the_table()->move_to(new_table);
+
+  // Delete the table and buckets (entries are reused in new table).
+  delete _the_table;
+  // Don't check if we need rehashing until the table gets unbalanced again.
+  // Then rehash with a new global seed.
+  _needs_rehashing = false;
+  _the_table = new_table;
+}
--- a/src/share/vm/classfile/symbolTable.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/classfile/symbolTable.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -40,6 +40,7 @@
 //  - symbolTableEntrys are allocated in blocks to reduce the space overhead.
 
 class BoolObjectClosure;
+class outputStream;
 
 
 // Class to hold a newly created or referenced Symbol* temporarily in scope.
@@ -78,6 +79,10 @@
   // The symbol table
   static SymbolTable* _the_table;
 
+  // Set if one bucket is out of balance due to hash algorithm deficiency
+  static bool _needs_rehashing;
+  static jint _seed;
+
   // For statistics
   static int symbols_removed;
   static int symbols_counted;
@@ -119,6 +124,11 @@
   static Arena* arena() { return _arena; }  // called for statistics
 
   static void initialize_symbols(int arena_alloc_size = 0);
+
+  static bool use_alternate_hashcode()  { return _seed != 0; }
+  static jint seed()                    { return _seed; }
+
+  unsigned int new_hash(Symbol* sym);
 public:
   enum {
     symbol_alloc_batch_size = 8,
@@ -146,6 +156,8 @@
     initialize_symbols();
   }
 
+  static unsigned int hash_symbol(const char* s, int len);
+
   static Symbol* lookup(const char* name, int len, TRAPS);
   // lookup only, won't add. Also calculate hash.
   static Symbol* lookup_only(const char* name, int len, unsigned int& hash);
@@ -208,6 +220,7 @@
 
   // Debugging
   static void verify();
+  static void dump(outputStream* st);
 
   // Sharing
   static void copy_buckets(char** top, char*end) {
@@ -219,8 +232,13 @@
   static void reverse(void* boundary = NULL) {
     the_table()->Hashtable<Symbol*>::reverse(boundary);
   }
+
+  // Rehash the symbol table if it gets out of balance
+  static void rehash_table();
+  static bool needs_rehashing()         { return _needs_rehashing; }
 };
 
+
 class StringTable : public Hashtable<oop> {
   friend class VMStructs;
 
@@ -228,6 +246,10 @@
   // The string table
   static StringTable* _the_table;
 
+  // Set if one bucket is out of balance due to hash algorithm deficiency
+  static bool _needs_rehashing;
+  static jint _seed;
+
   static oop intern(Handle string_or_null, jchar* chars, int length, TRAPS);
   oop basic_add(int index, Handle string_or_null, jchar* name, int len,
                 unsigned int hashValue, TRAPS);
@@ -241,6 +263,10 @@
     : Hashtable<oop>((int)StringTableSize, sizeof (HashtableEntry<oop>), t,
                      number_of_entries) {}
 
+  static bool use_alternate_hashcode()  { return _seed != 0; }
+  static jint seed()                    { return _seed; }
+
+  unsigned int new_hash(oop s);
 public:
   // The string table
   static StringTable* the_table() { return _the_table; }
@@ -265,6 +291,14 @@
   // Invoke "f->do_oop" on the locations of all oops in the table.
   static void oops_do(OopClosure* f);
 
+  // Hashing algorithm, used as the hash value used by the
+  //     StringTable for bucket selection and comparison (stored in the
+  //     HashtableEntry structures).  This is used in the String.intern() method.
+  static unsigned int hash_string(const jchar* s, int len);
+
+  // Internal test.
+  static void test_alt_hash() PRODUCT_RETURN;
+
   // Probing
   static oop lookup(Symbol* symbol);
 
@@ -275,6 +309,7 @@
 
   // Debugging
   static void verify();
+  static void dump(outputStream* st);
 
   // Sharing
   static void copy_buckets(char** top, char*end) {
@@ -286,6 +321,9 @@
   static void reverse() {
     the_table()->Hashtable<oop>::reverse();
   }
+
+  // Rehash the symbol table if it gets out of balance
+  static void rehash_table();
+  static bool needs_rehashing() { return _needs_rehashing; }
 };
-
 #endif // SHARE_VM_CLASSFILE_SYMBOLTABLE_HPP
--- a/src/share/vm/classfile/vmSymbols.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/classfile/vmSymbols.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -111,6 +111,10 @@
   template(getBootClassPathEntryForClass_name,        "getBootClassPathEntryForClass")            \
   template(sun_misc_PostVMInitHook,                   "sun/misc/PostVMInitHook")                  \
                                                                                                   \
+  /* Java runtime version access */                                                               \
+  template(sun_misc_Version,                          "sun/misc/Version")                         \
+  template(java_runtime_name_name,                    "java_runtime_name")                        \
+                                                                                                  \
   /* class file format tags */                                                                    \
   template(tag_source_file,                           "SourceFile")                               \
   template(tag_inner_classes,                         "InnerClasses")                             \
--- a/src/share/vm/code/vmreg.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/code/vmreg.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,7 +27,7 @@
 #include "code/vmreg.hpp"
 
 // First VMReg value that could refer to a stack slot
-VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 1) & ~1);
+VMReg VMRegImpl::stack0 = (VMReg)(intptr_t)((ConcreteRegisterImpl::number_of_registers + 7) & ~7);
 
 // VMRegs are 4 bytes wide on all platforms
 const int VMRegImpl::stack_slot_size = 4;
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -4750,9 +4750,6 @@
       _g1h->g1_policy()->record_thread_age_table(pss.age_table());
       _g1h->update_surviving_young_words(pss.surviving_young_words()+1);
 
-      // Clean up any par-expanded rem sets.
-      HeapRegionRemSet::par_cleanup();
-
       if (ParallelGCVerbose) {
         MutexLocker x(stats_lock());
         pss.print_termination_stats(worker_id);
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -53,6 +53,9 @@
   develop(bool, G1TraceMarkStackOverflow, false,                            \
           "If true, extra debugging code for CM restart for ovflw.")        \
                                                                             \
+  develop(bool, G1TraceHeapRegionRememberedSet, false,                      \
+          "Enables heap region remembered set debug logs")                  \
+                                                                            \
   diagnostic(bool, G1SummarizeConcMark, false,                              \
           "Summarize concurrent mark info")                                 \
                                                                             \
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -30,13 +30,10 @@
 #include "gc_implementation/g1/heapRegionSeq.inline.hpp"
 #include "memory/allocation.hpp"
 #include "memory/space.inline.hpp"
+#include "oops/oop.inline.hpp"
 #include "utilities/bitMap.inline.hpp"
 #include "utilities/globalDefinitions.hpp"
 
-#define HRRS_VERBOSE 0
-
-#define PRT_COUNT_OCCUPIED 1
-
 // OtherRegionsTable
 
 class PerRegionTable: public CHeapObj {
@@ -45,14 +42,10 @@
 
   HeapRegion*     _hr;
   BitMap          _bm;
-#if PRT_COUNT_OCCUPIED
   jint            _occupied;
-#endif
-  PerRegionTable* _next_free;
 
-  PerRegionTable* next_free() { return _next_free; }
-  void set_next_free(PerRegionTable* prt) { _next_free = prt; }
-
+  // next pointer for free/allocated lis
+  PerRegionTable* _next;
 
   static PerRegionTable* _free_list;
 
@@ -69,63 +62,25 @@
   // We need access in order to union things into the base table.
   BitMap* bm() { return &_bm; }
 
-#if PRT_COUNT_OCCUPIED
   void recount_occupied() {
     _occupied = (jint) bm()->count_one_bits();
   }
-#endif
 
   PerRegionTable(HeapRegion* hr) :
     _hr(hr),
-#if PRT_COUNT_OCCUPIED
     _occupied(0),
-#endif
     _bm(HeapRegion::CardsPerRegion, false /* in-resource-area */)
   {}
 
-  static void free(PerRegionTable* prt) {
-    while (true) {
-      PerRegionTable* fl = _free_list;
-      prt->set_next_free(fl);
-      PerRegionTable* res =
-        (PerRegionTable*)
-        Atomic::cmpxchg_ptr(prt, &_free_list, fl);
-      if (res == fl) return;
-    }
-    ShouldNotReachHere();
-  }
-
-  static PerRegionTable* alloc(HeapRegion* hr) {
-    PerRegionTable* fl = _free_list;
-    while (fl != NULL) {
-      PerRegionTable* nxt = fl->next_free();
-      PerRegionTable* res =
-        (PerRegionTable*)
-        Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
-      if (res == fl) {
-        fl->init(hr);
-        return fl;
-      } else {
-        fl = _free_list;
-      }
-    }
-    assert(fl == NULL, "Loop condition.");
-    return new PerRegionTable(hr);
-  }
-
   void add_card_work(CardIdx_t from_card, bool par) {
     if (!_bm.at(from_card)) {
       if (par) {
         if (_bm.par_at_put(from_card, 1)) {
-#if PRT_COUNT_OCCUPIED
           Atomic::inc(&_occupied);
-#endif
         }
       } else {
         _bm.at_put(from_card, 1);
-#if PRT_COUNT_OCCUPIED
         _occupied++;
-#endif
       }
     }
   }
@@ -134,10 +89,13 @@
     // Must make this robust in case "from" is not in "_hr", because of
     // concurrency.
 
-#if HRRS_VERBOSE
-    gclog_or_tty->print_cr("    PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").",
-                           from, *from);
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print_cr("    PRT::Add_reference_work(" PTR_FORMAT "->" PTR_FORMAT").",
+                             from,
+                             UseCompressedOops
+                             ? oopDesc::load_decode_heap_oop((narrowOop*)from)
+                             : oopDesc::load_decode_heap_oop((oop*)from));
+    }
 
     HeapRegion* loc_hr = hr();
     // If the test below fails, then this table was reused concurrently
@@ -162,23 +120,16 @@
 
   HeapRegion* hr() const { return _hr; }
 
-#if PRT_COUNT_OCCUPIED
   jint occupied() const {
     // Overkill, but if we ever need it...
     // guarantee(_occupied == _bm.count_one_bits(), "Check");
     return _occupied;
   }
-#else
-  jint occupied() const {
-    return _bm.count_one_bits();
-  }
-#endif
 
   void init(HeapRegion* hr) {
     _hr = hr;
-#if PRT_COUNT_OCCUPIED
+    _next = NULL;
     _occupied = 0;
-#endif
     _bm.clear();
   }
 
@@ -194,9 +145,7 @@
     HeapWord* hr_bot = hr()->bottom();
     size_t hr_first_card_index = ctbs->index_for(hr_bot);
     bm()->set_intersection_at_offset(*card_bm, hr_first_card_index);
-#if PRT_COUNT_OCCUPIED
     recount_occupied();
-#endif
   }
 
   void add_card(CardIdx_t from_card_index) {
@@ -218,16 +167,6 @@
     return sizeof(this) + _bm.size_in_words() * HeapWordSize;
   }
 
-  static size_t fl_mem_size() {
-    PerRegionTable* cur = _free_list;
-    size_t res = 0;
-    while (cur != NULL) {
-      res += sizeof(PerRegionTable);
-      cur = cur->next_free();
-    }
-    return res;
-  }
-
   // Requires "from" to be in "hr()".
   bool contains_reference(OopOrNarrowOopStar from) const {
     assert(hr()->is_in_reserved(from), "Precondition.");
@@ -235,122 +174,29 @@
                                     CardTableModRefBS::card_size);
     return _bm.at(card_ind);
   }
-};
 
-PerRegionTable* PerRegionTable::_free_list = NULL;
+  PerRegionTable* next() const { return _next; }
+  void set_next(PerRegionTable* nxt) { _next = nxt; }
+  PerRegionTable** next_addr() { return &_next; }
 
-
-#define COUNT_PAR_EXPANDS 0
-
-#if COUNT_PAR_EXPANDS
-static jint n_par_expands = 0;
-static jint n_par_contracts = 0;
-static jint par_expand_list_len = 0;
-static jint max_par_expand_list_len = 0;
-
-static void print_par_expand() {
-  Atomic::inc(&n_par_expands);
-  Atomic::inc(&par_expand_list_len);
-  if (par_expand_list_len > max_par_expand_list_len) {
-    max_par_expand_list_len = par_expand_list_len;
-  }
-  if ((n_par_expands % 10) == 0) {
-    gclog_or_tty->print_cr("\n\n%d par expands: %d contracts, "
-                  "len = %d, max_len = %d\n.",
-                  n_par_expands, n_par_contracts, par_expand_list_len,
-                  max_par_expand_list_len);
-  }
-}
-#endif
-
-class PosParPRT: public PerRegionTable {
-  PerRegionTable** _par_tables;
-
-  enum SomePrivateConstants {
-    ReserveParTableExpansion = 1
-  };
-
-  void par_contract() {
-    assert(_par_tables != NULL, "Precondition.");
-    int n = HeapRegionRemSet::num_par_rem_sets()-1;
-    for (int i = 0; i < n; i++) {
-      _par_tables[i]->union_bitmap_into(bm());
-      PerRegionTable::free(_par_tables[i]);
-      _par_tables[i] = NULL;
-    }
-#if PRT_COUNT_OCCUPIED
-    // We must recount the "occupied."
-    recount_occupied();
-#endif
-    FREE_C_HEAP_ARRAY(PerRegionTable*, _par_tables);
-    _par_tables = NULL;
-#if COUNT_PAR_EXPANDS
-    Atomic::inc(&n_par_contracts);
-    Atomic::dec(&par_expand_list_len);
-#endif
-  }
-
-  static PerRegionTable** _par_table_fl;
-
-  PosParPRT* _next;
-
-  static PosParPRT* _free_list;
-
-  PerRegionTable** par_tables() const {
-    assert(uintptr_t(NULL) == 0, "Assumption.");
-    if (uintptr_t(_par_tables) <= ReserveParTableExpansion)
-      return NULL;
-    else
-      return _par_tables;
-  }
-
-  PosParPRT* _next_par_expanded;
-  PosParPRT* next_par_expanded() { return _next_par_expanded; }
-  void set_next_par_expanded(PosParPRT* ppprt) { _next_par_expanded = ppprt; }
-  static PosParPRT* _par_expanded_list;
-
-public:
-
-  PosParPRT(HeapRegion* hr) : PerRegionTable(hr), _par_tables(NULL) {}
-
-  jint occupied() const {
-    jint res = PerRegionTable::occupied();
-    if (par_tables() != NULL) {
-      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
-        res += par_tables()[i]->occupied();
-      }
-    }
-    return res;
-  }
-
-  void init(HeapRegion* hr) {
-    PerRegionTable::init(hr);
-    _next = NULL;
-    if (par_tables() != NULL) {
-      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
-        par_tables()[i]->init(hr);
-      }
-    }
-  }
-
-  static void free(PosParPRT* prt) {
+  static void free(PerRegionTable* prt) {
     while (true) {
-      PosParPRT* fl = _free_list;
+      PerRegionTable* fl = _free_list;
       prt->set_next(fl);
-      PosParPRT* res =
-        (PosParPRT*)
+      PerRegionTable* res =
+        (PerRegionTable*)
         Atomic::cmpxchg_ptr(prt, &_free_list, fl);
       if (res == fl) return;
     }
     ShouldNotReachHere();
   }
 
-  static PosParPRT* alloc(HeapRegion* hr) {
-    PosParPRT* fl = _free_list;
+  static PerRegionTable* alloc(HeapRegion* hr) {
+    PerRegionTable* fl = _free_list;
     while (fl != NULL) {
-      PosParPRT* nxt = fl->next();
-      PosParPRT* res =
-        (PosParPRT*)
+      PerRegionTable* nxt = fl->next();
+      PerRegionTable* res =
+        (PerRegionTable*)
         Atomic::cmpxchg_ptr(nxt, &_free_list, fl);
       if (res == fl) {
         fl->init(hr);
@@ -360,148 +206,26 @@
       }
     }
     assert(fl == NULL, "Loop condition.");
-    return new PosParPRT(hr);
-  }
-
-  PosParPRT* next() const { return _next; }
-  void set_next(PosParPRT* nxt) { _next = nxt; }
-  PosParPRT** next_addr() { return &_next; }
-
-  bool should_expand(int tid) {
-    // Given that we now defer RSet updates for after a GC we don't
-    // really need to expand the tables any more. This code should be
-    // cleaned up in the future (see CR 6921087).
-    return false;
-  }
-
-  void par_expand() {
-    int n = HeapRegionRemSet::num_par_rem_sets()-1;
-    if (n <= 0) return;
-    if (_par_tables == NULL) {
-      PerRegionTable* res =
-        (PerRegionTable*)
-        Atomic::cmpxchg_ptr((PerRegionTable*)ReserveParTableExpansion,
-                            &_par_tables, NULL);
-      if (res != NULL) return;
-      // Otherwise, we reserved the right to do the expansion.
-
-      PerRegionTable** ptables = NEW_C_HEAP_ARRAY(PerRegionTable*, n);
-      for (int i = 0; i < n; i++) {
-        PerRegionTable* ptable = PerRegionTable::alloc(hr());
-        ptables[i] = ptable;
-      }
-      // Here we do not need an atomic.
-      _par_tables = ptables;
-#if COUNT_PAR_EXPANDS
-      print_par_expand();
-#endif
-      // We must put this table on the expanded list.
-      PosParPRT* exp_head = _par_expanded_list;
-      while (true) {
-        set_next_par_expanded(exp_head);
-        PosParPRT* res =
-          (PosParPRT*)
-          Atomic::cmpxchg_ptr(this, &_par_expanded_list, exp_head);
-        if (res == exp_head) return;
-        // Otherwise.
-        exp_head = res;
-      }
-      ShouldNotReachHere();
-    }
-  }
-
-  void add_reference(OopOrNarrowOopStar from, int tid) {
-    // Expand if necessary.
-    PerRegionTable** pt = par_tables();
-    if (pt != NULL) {
-      // We always have to assume that mods to table 0 are in parallel,
-      // because of the claiming scheme in parallel expansion.  A thread
-      // with tid != 0 that finds the table to be NULL, but doesn't succeed
-      // in claiming the right of expanding it, will end up in the else
-      // clause of the above if test.  That thread could be delayed, and a
-      // thread 0 add reference could see the table expanded, and come
-      // here.  Both threads would be adding in parallel.  But we get to
-      // not use atomics for tids > 0.
-      if (tid == 0) {
-        PerRegionTable::add_reference(from);
-      } else {
-        pt[tid-1]->seq_add_reference(from);
-      }
-    } else {
-      // Not expanded -- add to the base table.
-      PerRegionTable::add_reference(from);
-    }
-  }
-
-  void scrub(CardTableModRefBS* ctbs, BitMap* card_bm) {
-    assert(_par_tables == NULL, "Precondition");
-    PerRegionTable::scrub(ctbs, card_bm);
-  }
-
-  size_t mem_size() const {
-    size_t res =
-      PerRegionTable::mem_size() + sizeof(this) - sizeof(PerRegionTable);
-    if (_par_tables != NULL) {
-      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
-        res += _par_tables[i]->mem_size();
-      }
-    }
-    return res;
+    return new PerRegionTable(hr);
   }
 
   static size_t fl_mem_size() {
-    PosParPRT* cur = _free_list;
+    PerRegionTable* cur = _free_list;
     size_t res = 0;
     while (cur != NULL) {
-      res += sizeof(PosParPRT);
+      res += sizeof(PerRegionTable);
       cur = cur->next();
     }
     return res;
   }
-
-  bool contains_reference(OopOrNarrowOopStar from) const {
-    if (PerRegionTable::contains_reference(from)) return true;
-    if (_par_tables != NULL) {
-      for (int i = 0; i < HeapRegionRemSet::num_par_rem_sets()-1; i++) {
-        if (_par_tables[i]->contains_reference(from)) return true;
-      }
-    }
-    return false;
-  }
-
-  static void par_contract_all();
 };
 
-void PosParPRT::par_contract_all() {
-  PosParPRT* hd = _par_expanded_list;
-  while (hd != NULL) {
-    PosParPRT* nxt = hd->next_par_expanded();
-    PosParPRT* res =
-      (PosParPRT*)
-      Atomic::cmpxchg_ptr(nxt, &_par_expanded_list, hd);
-    if (res == hd) {
-      // We claimed the right to contract this table.
-      hd->set_next_par_expanded(NULL);
-      hd->par_contract();
-      hd = _par_expanded_list;
-    } else {
-      hd = res;
-    }
-  }
-}
-
-PosParPRT* PosParPRT::_free_list = NULL;
-PosParPRT* PosParPRT::_par_expanded_list = NULL;
-
-jint OtherRegionsTable::_cache_probes = 0;
-jint OtherRegionsTable::_cache_hits = 0;
+PerRegionTable* PerRegionTable::_free_list = NULL;
 
 size_t OtherRegionsTable::_max_fine_entries = 0;
 size_t OtherRegionsTable::_mod_max_fine_entries_mask = 0;
-#if SAMPLE_FOR_EVICTION
 size_t OtherRegionsTable::_fine_eviction_stride = 0;
 size_t OtherRegionsTable::_fine_eviction_sample_size = 0;
-#endif
 
 OtherRegionsTable::OtherRegionsTable(HeapRegion* hr) :
   _g1h(G1CollectedHeap::heap()),
@@ -511,34 +235,36 @@
               false /* in-resource-area */),
   _fine_grain_regions(NULL),
   _n_fine_entries(0), _n_coarse_entries(0),
-#if SAMPLE_FOR_EVICTION
   _fine_eviction_start(0),
-#endif
   _sparse_table(hr)
 {
-  typedef PosParPRT* PosParPRTPtr;
+  typedef PerRegionTable* PerRegionTablePtr;
+
   if (_max_fine_entries == 0) {
     assert(_mod_max_fine_entries_mask == 0, "Both or none.");
     size_t max_entries_log = (size_t)log2_long((jlong)G1RSetRegionEntries);
     _max_fine_entries = (size_t)(1 << max_entries_log);
     _mod_max_fine_entries_mask = _max_fine_entries - 1;
-#if SAMPLE_FOR_EVICTION
+
     assert(_fine_eviction_sample_size == 0
            && _fine_eviction_stride == 0, "All init at same time.");
     _fine_eviction_sample_size = MAX2((size_t)4, max_entries_log);
     _fine_eviction_stride = _max_fine_entries / _fine_eviction_sample_size;
-#endif
   }
-  _fine_grain_regions = new PosParPRTPtr[_max_fine_entries];
-  if (_fine_grain_regions == NULL)
+
+  _fine_grain_regions = new PerRegionTablePtr[_max_fine_entries];
+
+  if (_fine_grain_regions == NULL) {
     vm_exit_out_of_memory(sizeof(void*)*_max_fine_entries,
                           "Failed to allocate _fine_grain_entries.");
+  }
+
   for (size_t i = 0; i < _max_fine_entries; i++) {
     _fine_grain_regions[i] = NULL;
   }
 }
 
-int** OtherRegionsTable::_from_card_cache = NULL;
+int**  OtherRegionsTable::_from_card_cache = NULL;
 size_t OtherRegionsTable::_from_card_cache_max_regions = 0;
 size_t OtherRegionsTable::_from_card_cache_mem_size = 0;
 
@@ -579,38 +305,26 @@
 void OtherRegionsTable::add_reference(OopOrNarrowOopStar from, int tid) {
   size_t cur_hrs_ind = (size_t) hr()->hrs_index();
 
-#if HRRS_VERBOSE
-  gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
-                                                  from,
-                                                  UseCompressedOops
-                                                  ? oopDesc::load_decode_heap_oop((narrowOop*)from)
-                                                  : oopDesc::load_decode_heap_oop((oop*)from));
-#endif
+  if (G1TraceHeapRegionRememberedSet) {
+    gclog_or_tty->print_cr("ORT::add_reference_work(" PTR_FORMAT "->" PTR_FORMAT ").",
+                                                    from,
+                                                    UseCompressedOops
+                                                    ? oopDesc::load_decode_heap_oop((narrowOop*)from)
+                                                    : oopDesc::load_decode_heap_oop((oop*)from));
+  }
 
   int from_card = (int)(uintptr_t(from) >> CardTableModRefBS::card_shift);
 
-#if HRRS_VERBOSE
-  gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)",
-                hr()->bottom(), from_card,
-                _from_card_cache[tid][cur_hrs_ind]);
-#endif
+  if (G1TraceHeapRegionRememberedSet) {
+    gclog_or_tty->print_cr("Table for [" PTR_FORMAT "...): card %d (cache = %d)",
+                  hr()->bottom(), from_card,
+                  _from_card_cache[tid][cur_hrs_ind]);
+  }
 
-#define COUNT_CACHE 0
-#if COUNT_CACHE
-  jint p = Atomic::add(1, &_cache_probes);
-  if ((p % 10000) == 0) {
-    jint hits = _cache_hits;
-    gclog_or_tty->print_cr("%d/%d = %5.2f%% RS cache hits.",
-                  _cache_hits, p, 100.0* (float)hits/(float)p);
-  }
-#endif
   if (from_card == _from_card_cache[tid][cur_hrs_ind]) {
-#if HRRS_VERBOSE
-    gclog_or_tty->print_cr("  from-card cache hit.");
-#endif
-#if COUNT_CACHE
-    Atomic::inc(&_cache_hits);
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print_cr("  from-card cache hit.");
+    }
     assert(contains_reference(from), "We just added it!");
     return;
   } else {
@@ -623,16 +337,16 @@
 
   // If the region is already coarsened, return.
   if (_coarse_map.at(from_hrs_ind)) {
-#if HRRS_VERBOSE
-    gclog_or_tty->print_cr("  coarse map hit.");
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print_cr("  coarse map hit.");
+    }
     assert(contains_reference(from), "We just added it!");
     return;
   }
 
   // Otherwise find a per-region table to add it to.
   size_t ind = from_hrs_ind & _mod_max_fine_entries_mask;
-  PosParPRT* prt = find_region_table(ind, from_hr);
+  PerRegionTable* prt = find_region_table(ind, from_hr);
   if (prt == NULL) {
     MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
     // Confirm that it's really not there...
@@ -649,35 +363,35 @@
           _sparse_table.add_card(from_hrs_ind, card_index)) {
         if (G1RecordHRRSOops) {
           HeapRegionRemSet::record(hr(), from);
-#if HRRS_VERBOSE
-          gclog_or_tty->print("   Added card " PTR_FORMAT " to region "
-                              "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
-                              align_size_down(uintptr_t(from),
-                                              CardTableModRefBS::card_size),
-                              hr()->bottom(), from);
-#endif
+          if (G1TraceHeapRegionRememberedSet) {
+            gclog_or_tty->print("   Added card " PTR_FORMAT " to region "
+                                "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                                align_size_down(uintptr_t(from),
+                                                CardTableModRefBS::card_size),
+                                hr()->bottom(), from);
+          }
         }
-#if HRRS_VERBOSE
-        gclog_or_tty->print_cr("   added card to sparse table.");
-#endif
+        if (G1TraceHeapRegionRememberedSet) {
+          gclog_or_tty->print_cr("   added card to sparse table.");
+        }
         assert(contains_reference_locked(from), "We just added it!");
         return;
       } else {
-#if HRRS_VERBOSE
-        gclog_or_tty->print_cr("   [tid %d] sparse table entry "
-                      "overflow(f: %d, t: %d)",
-                      tid, from_hrs_ind, cur_hrs_ind);
-#endif
+        if (G1TraceHeapRegionRememberedSet) {
+          gclog_or_tty->print_cr("   [tid %d] sparse table entry "
+                        "overflow(f: %d, t: %d)",
+                        tid, from_hrs_ind, cur_hrs_ind);
+        }
       }
 
       if (_n_fine_entries == _max_fine_entries) {
         prt = delete_region_table();
       } else {
-        prt = PosParPRT::alloc(from_hr);
+        prt = PerRegionTable::alloc(from_hr);
       }
       prt->init(from_hr);
 
-      PosParPRT* first_prt = _fine_grain_regions[ind];
+      PerRegionTable* first_prt = _fine_grain_regions[ind];
       prt->set_next(first_prt);  // XXX Maybe move to init?
       _fine_grain_regions[ind] = prt;
       _n_fine_entries++;
@@ -704,38 +418,25 @@
   // OtherRegionsTable for why this is OK.
   assert(prt != NULL, "Inv");
 
-  if (prt->should_expand(tid)) {
-    MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
-    HeapRegion* prt_hr = prt->hr();
-    if (prt_hr == from_hr) {
-      // Make sure the table still corresponds to the same region
-      prt->par_expand();
-      prt->add_reference(from, tid);
-    }
-    // else: The table has been concurrently coarsened, evicted, and
-    // the table data structure re-used for another table. So, we
-    // don't need to add the reference any more given that the table
-    // has been coarsened and the whole region will be scanned anyway.
-  } else {
-    prt->add_reference(from, tid);
-  }
+  prt->add_reference(from);
+
   if (G1RecordHRRSOops) {
     HeapRegionRemSet::record(hr(), from);
-#if HRRS_VERBOSE
-    gclog_or_tty->print("Added card " PTR_FORMAT " to region "
-                        "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
-                        align_size_down(uintptr_t(from),
-                                        CardTableModRefBS::card_size),
-                        hr()->bottom(), from);
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print("Added card " PTR_FORMAT " to region "
+                          "[" PTR_FORMAT "...) for ref " PTR_FORMAT ".\n",
+                          align_size_down(uintptr_t(from),
+                                          CardTableModRefBS::card_size),
+                          hr()->bottom(), from);
+    }
   }
   assert(contains_reference(from), "We just added it!");
 }
 
-PosParPRT*
+PerRegionTable*
 OtherRegionsTable::find_region_table(size_t ind, HeapRegion* hr) const {
   assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
-  PosParPRT* prt = _fine_grain_regions[ind];
+  PerRegionTable* prt = _fine_grain_regions[ind];
   while (prt != NULL && prt->hr() != hr) {
     prt = prt->next();
   }
@@ -743,32 +444,16 @@
   return prt;
 }
 
-
-#define DRT_CENSUS 0
-
-#if DRT_CENSUS
-static const int HistoSize = 6;
-static int global_histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
-static int coarsenings = 0;
-static int occ_sum = 0;
-#endif
-
 jint OtherRegionsTable::_n_coarsenings = 0;
 
-PosParPRT* OtherRegionsTable::delete_region_table() {
-#if DRT_CENSUS
-  int histo[HistoSize] = { 0, 0, 0, 0, 0, 0 };
-  const int histo_limits[] = { 1, 4, 16, 64, 256, 2048 };
-#endif
-
+PerRegionTable* OtherRegionsTable::delete_region_table() {
   assert(_m.owned_by_self(), "Precondition");
   assert(_n_fine_entries == _max_fine_entries, "Precondition");
-  PosParPRT* max = NULL;
+  PerRegionTable* max = NULL;
   jint max_occ = 0;
-  PosParPRT** max_prev;
+  PerRegionTable** max_prev;
   size_t max_ind;
 
-#if SAMPLE_FOR_EVICTION
   size_t i = _fine_eviction_start;
   for (size_t k = 0; k < _fine_eviction_sample_size; k++) {
     size_t ii = i;
@@ -778,8 +463,8 @@
       if (ii == _max_fine_entries) ii = 0;
       guarantee(ii != i, "We must find one.");
     }
-    PosParPRT** prev = &_fine_grain_regions[ii];
-    PosParPRT* cur = *prev;
+    PerRegionTable** prev = &_fine_grain_regions[ii];
+    PerRegionTable* cur = *prev;
     while (cur != NULL) {
       jint cur_occ = cur->occupied();
       if (max == NULL || cur_occ > max_occ) {
@@ -794,64 +479,27 @@
     i = i + _fine_eviction_stride;
     if (i >= _n_fine_entries) i = i - _n_fine_entries;
   }
+
   _fine_eviction_start++;
-  if (_fine_eviction_start >= _n_fine_entries)
+
+  if (_fine_eviction_start >= _n_fine_entries) {
     _fine_eviction_start -= _n_fine_entries;
-#else
-  for (int i = 0; i < _max_fine_entries; i++) {
-    PosParPRT** prev = &_fine_grain_regions[i];
-    PosParPRT* cur = *prev;
-    while (cur != NULL) {
-      jint cur_occ = cur->occupied();
-#if DRT_CENSUS
-      for (int k = 0; k < HistoSize; k++) {
-        if (cur_occ <= histo_limits[k]) {
-          histo[k]++; global_histo[k]++; break;
-        }
-      }
-#endif
-      if (max == NULL || cur_occ > max_occ) {
-        max = cur;
-        max_prev = prev;
-        max_ind = i;
-        max_occ = cur_occ;
-      }
-      prev = cur->next_addr();
-      cur = cur->next();
-    }
   }
-#endif
-  // XXX
+
   guarantee(max != NULL, "Since _n_fine_entries > 0");
-#if DRT_CENSUS
-  gclog_or_tty->print_cr("In a coarsening: histo of occs:");
-  for (int k = 0; k < HistoSize; k++) {
-    gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], histo[k]);
-  }
-  coarsenings++;
-  occ_sum += max_occ;
-  if ((coarsenings % 100) == 0) {
-    gclog_or_tty->print_cr("\ncoarsenings = %d; global summary:", coarsenings);
-    for (int k = 0; k < HistoSize; k++) {
-      gclog_or_tty->print_cr("  <= %4d: %5d.", histo_limits[k], global_histo[k]);
-    }
-    gclog_or_tty->print_cr("Avg occ of deleted region = %6.2f.",
-                  (float)occ_sum/(float)coarsenings);
-  }
-#endif
 
   // Set the corresponding coarse bit.
   size_t max_hrs_index = (size_t) max->hr()->hrs_index();
   if (!_coarse_map.at(max_hrs_index)) {
     _coarse_map.at_put(max_hrs_index, true);
     _n_coarse_entries++;
-#if 0
-    gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] "
-               "for region [" PTR_FORMAT "...] (%d coarse entries).\n",
-               hr()->bottom(),
-               max->hr()->bottom(),
-               _n_coarse_entries);
-#endif
+    if (G1TraceHeapRegionRememberedSet) {
+      gclog_or_tty->print("Coarsened entry in region [" PTR_FORMAT "...] "
+                 "for region [" PTR_FORMAT "...] (%d coarse entries).\n",
+                 hr()->bottom(),
+                 max->hr()->bottom(),
+                 _n_coarse_entries);
+    }
   }
 
   // Unsplice.
@@ -883,10 +531,10 @@
 
   // Now do the fine-grained maps.
   for (size_t i = 0; i < _max_fine_entries; i++) {
-    PosParPRT* cur = _fine_grain_regions[i];
-    PosParPRT** prev = &_fine_grain_regions[i];
+    PerRegionTable* cur = _fine_grain_regions[i];
+    PerRegionTable** prev = &_fine_grain_regions[i];
     while (cur != NULL) {
-      PosParPRT* nxt = cur->next();
+      PerRegionTable* nxt = cur->next();
       // If the entire region is dead, eliminate.
       if (G1RSScrubVerbose) {
         gclog_or_tty->print_cr("     For other region %u:",
@@ -899,7 +547,7 @@
         if (G1RSScrubVerbose) {
           gclog_or_tty->print_cr("          deleted via region map.");
         }
-        PosParPRT::free(cur);
+        PerRegionTable::free(cur);
       } else {
         // Do fine-grain elimination.
         if (G1RSScrubVerbose) {
@@ -914,7 +562,7 @@
           *prev = nxt;
           cur->set_next(NULL);
           _n_fine_entries--;
-          PosParPRT::free(cur);
+          PerRegionTable::free(cur);
         } else {
           prev = cur->next_addr();
         }
@@ -940,7 +588,7 @@
 size_t OtherRegionsTable::occ_fine() const {
   size_t sum = 0;
   for (size_t i = 0; i < _max_fine_entries; i++) {
-    PosParPRT* cur = _fine_grain_regions[i];
+    PerRegionTable* cur = _fine_grain_regions[i];
     while (cur != NULL) {
       sum += cur->occupied();
       cur = cur->next();
@@ -962,13 +610,13 @@
   MutexLockerEx x((Mutex*)&_m, Mutex::_no_safepoint_check_flag);
   size_t sum = 0;
   for (size_t i = 0; i < _max_fine_entries; i++) {
-    PosParPRT* cur = _fine_grain_regions[i];
+    PerRegionTable* cur = _fine_grain_regions[i];
     while (cur != NULL) {
       sum += cur->mem_size();
       cur = cur->next();
     }
   }
-  sum += (sizeof(PosParPRT*) * _max_fine_entries);
+  sum += (sizeof(PerRegionTable*) * _max_fine_entries);
   sum += (_coarse_map.size_in_words() * HeapWordSize);
   sum += (_sparse_table.mem_size());
   sum += sizeof(*this) - sizeof(_sparse_table); // Avoid double counting above.
@@ -980,7 +628,7 @@
 }
 
 size_t OtherRegionsTable::fl_mem_size() {
-  return PerRegionTable::fl_mem_size() + PosParPRT::fl_mem_size();
+  return PerRegionTable::fl_mem_size();
 }
 
 void OtherRegionsTable::clear_fcc() {
@@ -992,10 +640,10 @@
 void OtherRegionsTable::clear() {
   MutexLockerEx x(&_m, Mutex::_no_safepoint_check_flag);
   for (size_t i = 0; i < _max_fine_entries; i++) {
-    PosParPRT* cur = _fine_grain_regions[i];
+    PerRegionTable* cur = _fine_grain_regions[i];
     while (cur != NULL) {
-      PosParPRT* nxt = cur->next();
-      PosParPRT::free(cur);
+      PerRegionTable* nxt = cur->next();
+      PerRegionTable::free(cur);
       cur = nxt;
     }
     _fine_grain_regions[i] = NULL;
@@ -1035,8 +683,8 @@
 bool OtherRegionsTable::del_single_region_table(size_t ind,
                                                 HeapRegion* hr) {
   assert(0 <= ind && ind < _max_fine_entries, "Preconditions.");
-  PosParPRT** prev_addr = &_fine_grain_regions[ind];
-  PosParPRT* prt = *prev_addr;
+  PerRegionTable** prev_addr = &_fine_grain_regions[ind];
+  PerRegionTable* prt = *prev_addr;
   while (prt != NULL && prt->hr() != hr) {
     prev_addr = prt->next_addr();
     prt = prt->next();
@@ -1044,7 +692,7 @@
   if (prt != NULL) {
     assert(prt->hr() == hr, "Loop postcondition.");
     *prev_addr = prt->next();
-    PosParPRT::free(prt);
+    PerRegionTable::free(prt);
     _n_fine_entries--;
     return true;
   } else {
@@ -1065,7 +713,7 @@
   // Is this region in the coarse map?
   if (_coarse_map.at(hr_ind)) return true;
 
-  PosParPRT* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask,
+  PerRegionTable* prt = find_region_table(hr_ind & _mod_max_fine_entries_mask,
                                      hr);
   if (prt != NULL) {
     return prt->contains_reference(from);
@@ -1145,7 +793,7 @@
       G1CollectedHeap::heap()->bot_shared()->address_for_index(card_index);
     gclog_or_tty->print_cr("  Card " PTR_FORMAT, card_start);
   }
-  // XXX
+
   if (iter.n_yielded() != occupied()) {
     gclog_or_tty->print_cr("Yielded disagrees with occupied:");
     gclog_or_tty->print_cr("  %6d yielded (%6d coarse, %6d fine).",
@@ -1163,10 +811,6 @@
   SparsePRT::cleanup_all();
 }
 
-void HeapRegionRemSet::par_cleanup() {
-  PosParPRT::par_contract_all();
-}
-
 void HeapRegionRemSet::clear() {
   _other_regions.clear();
   assert(occupied() == 0, "Should be clear.");
--- a/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegionRemSet.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -35,7 +35,7 @@
 class G1BlockOffsetSharedArray;
 class HeapRegion;
 class HeapRegionRemSetIterator;
-class PosParPRT;
+class PerRegionTable;
 class SparsePRT;
 
 // Essentially a wrapper around SparsePRTCleanupTask. See
@@ -79,15 +79,14 @@
   size_t      _n_coarse_entries;
   static jint _n_coarsenings;
 
-  PosParPRT** _fine_grain_regions;
-  size_t      _n_fine_entries;
+  PerRegionTable** _fine_grain_regions;
+  size_t           _n_fine_entries;
 
-#define SAMPLE_FOR_EVICTION 1
-#if SAMPLE_FOR_EVICTION
+  // Used to sample a subset of the fine grain PRTs to determine which
+  // PRT to evict and coarsen.
   size_t        _fine_eviction_start;
   static size_t _fine_eviction_stride;
   static size_t _fine_eviction_sample_size;
-#endif
 
   SparsePRT   _sparse_table;
 
@@ -98,21 +97,18 @@
   // Requires "prt" to be the first element of the bucket list appropriate
   // for "hr".  If this list contains an entry for "hr", return it,
   // otherwise return "NULL".
-  PosParPRT* find_region_table(size_t ind, HeapRegion* hr) const;
+  PerRegionTable* find_region_table(size_t ind, HeapRegion* hr) const;
 
-  // Find, delete, and return a candidate PosParPRT, if any exists,
+  // Find, delete, and return a candidate PerRegionTable, if any exists,
   // adding the deleted region to the coarse bitmap.  Requires the caller
   // to hold _m, and the fine-grain table to be full.
-  PosParPRT* delete_region_table();
+  PerRegionTable* delete_region_table();
 
   // If a PRT for "hr" is in the bucket list indicated by "ind" (which must
   // be the correct index for "hr"), delete it and return true; else return
   // false.
   bool del_single_region_table(size_t ind, HeapRegion* hr);
 
-  static jint _cache_probes;
-  static jint _cache_hits;
-
   // Indexed by thread X heap region, to minimize thread contention.
   static int** _from_card_cache;
   static size_t _from_card_cache_max_regions;
@@ -127,10 +123,6 @@
   // sense.
   void add_reference(OopOrNarrowOopStar from, int tid);
 
-  void add_reference(OopOrNarrowOopStar from) {
-    return add_reference(from, 0);
-  }
-
   // Removes any entries shown by the given bitmaps to contain only dead
   // objects.
   void scrub(CardTableModRefBS* ctbs, BitMap* region_bm, BitMap* card_bm);
@@ -233,14 +225,12 @@
 
   static jint n_coarsenings() { return OtherRegionsTable::n_coarsenings(); }
 
-  /* Used in the sequential case.  Returns "true" iff this addition causes
-     the size limit to be reached. */
+  // Used in the sequential case.
   void add_reference(OopOrNarrowOopStar from) {
-    _other_regions.add_reference(from);
+    _other_regions.add_reference(from, 0);
   }
 
-  /* Used in the parallel case.  Returns "true" iff this addition causes
-     the size limit to be reached. */
+  // Used in the parallel case.
   void add_reference(OopOrNarrowOopStar from, int tid) {
     _other_regions.add_reference(from, tid);
   }
@@ -253,15 +243,6 @@
   // entries for this region in other remsets.
   void clear();
 
-  // Forget any entries due to pointers from "from_hr".
-  void clear_incoming_entry(HeapRegion* from_hr) {
-    _other_regions.clear_incoming_entry(from_hr);
-  }
-
-#if 0
-  virtual void cleanup() = 0;
-#endif
-
   // Attempt to claim the region.  Returns true iff this call caused an
   // atomic transition from Unclaimed to Claimed.
   bool claim_iter();
@@ -290,12 +271,6 @@
   // Initialize the given iterator to iterate over this rem set.
   void init_iterator(HeapRegionRemSetIterator* iter) const;
 
-#if 0
-  // Apply the "do_card" method to the start address of every card in the
-  // rem set.  Returns false if some application of the closure aborted.
-  virtual bool card_iterate(CardClosure* iter) = 0;
-#endif
-
   // The actual # of bytes this hr_remset takes up.
   size_t mem_size() {
     return _other_regions.mem_size()
@@ -322,10 +297,7 @@
   void print() const;
 
   // Called during a stop-world phase to perform any deferred cleanups.
-  // The second version may be called by parallel threads after then finish
-  // collection work.
   static void cleanup();
-  static void par_cleanup();
 
   // Declare the heap size (in # of regions) to the HeapRegionRemSet(s).
   // (Uses it to initialize from_card_cache).
@@ -367,7 +339,7 @@
 
   // Local caching of HRRS fields.
   const BitMap*             _coarse_map;
-  PosParPRT**               _fine_grain_regions;
+  PerRegionTable**          _fine_grain_regions;
 
   G1BlockOffsetSharedArray* _bosa;
   G1CollectedHeap*          _g1h;
@@ -404,8 +376,9 @@
 
   // Index of bucket-list we're working on.
   int _fine_array_index;
+
   // Per Region Table we're doing within current bucket list.
-  PosParPRT* _fine_cur_prt;
+  PerRegionTable* _fine_cur_prt;
 
   /* SparsePRT::*/ SparsePRTIter _sparse_iter;
 
@@ -435,12 +408,4 @@
   }
 };
 
-#if 0
-class CardClosure: public Closure {
-public:
-  virtual void do_card(HeapWord* card_start) = 0;
-};
-
-#endif
-
 #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONREMSET_HPP
--- a/src/share/vm/memory/dump.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/memory/dump.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -62,8 +62,8 @@
 // written later, increasing the likelihood that the shared page contain
 // the hash can be shared.
 //
-// NOTE THAT the algorithm in StringTable::hash_string() MUST MATCH the
-// algorithm in java.lang.String.hashCode().
+// NOTE THAT we have to call java_lang_String::to_hash() to match the
+// algorithm in java.lang.String.toHash().
 
 class StringHashCodeClosure: public OopClosure {
 private:
@@ -80,7 +80,7 @@
       oop obj = *p;
       if (obj->klass() == SystemDictionary::String_klass() &&
           java_lang_String::has_hash_field()) {
-        int hash = java_lang_String::hash_string(obj);
+        int hash = java_lang_String::to_hash(obj);
         obj->int_field_put(hash_offset, hash);
       }
     }
--- a/src/share/vm/memory/universe.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/memory/universe.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -273,7 +273,7 @@
   }
 
   static klassOop typeArrayKlassObj(BasicType t) {
-    assert((uint)t < T_VOID+1, "range check");
+    assert((uint)t < T_VOID+1, err_msg("range check for type: %s", type2name(t)));
     assert(_typeArrayKlassObjs[t] != NULL, "domain check");
     return _typeArrayKlassObjs[t];
   }
--- a/src/share/vm/opto/c2_globals.hpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/opto/c2_globals.hpp	Fri Jun 29 17:04:39 2012 -0700
@@ -81,6 +81,13 @@
   product(intx, MaxLoopPad, (OptoLoopAlignment-1),                          \
           "Align a loop if padding size in bytes is less or equal to this value") \
                                                                             \
+  product(intx, MaxVectorSize, 32,                                          \
+          "Max vector size in bytes, "                                      \
+          "actual size could be less depending on elements type")           \
+                                                                            \
+  product(bool, AlignVector, false,                                         \
+          "Perform vector store/load alignment in loop")                    \
+                                                                            \
   product(intx, NumberOfLoopInstrToAlign, 4,                                \
           "Number of first instructions in a loop to align")                \
                                                                             \
@@ -292,9 +299,12 @@
   develop(bool, SuperWordRTDepCheck, false,                                 \
           "Enable runtime dependency checks.")                              \
                                                                             \
-  product(bool, TraceSuperWord, false,                                      \
+  notproduct(bool, TraceSuperWord, false,                                   \
           "Trace superword transforms")                                     \
                                                                             \
+  notproduct(bool, TraceNewVectors, false,                                  \
+          "Trace creation of Vector nodes")                                 \
+                                                                            \
   product_pd(bool, OptoBundling,                                            \
           "Generate nops to fill i-cache lines")                            \
                                                                             \
--- a/src/share/vm/opto/callGenerator.cpp	Thu Jun 28 09:32:35 2012 -0700
+++ b/src/share/vm/opto/callGenerator.cpp	Fri Jun 29 17:04:39 2012 -0700
@@ -172,9 +172,11 @@
 
 JVMState* DynamicCallGenerator::generate(JVMState* jvms) {
   GraphKit kit(jvms);
+  Compile* C = kit.C;
+  PhaseGVN& gvn = kit.gvn();
 
-  if (kit.C->log() != NULL) {
-    kit.C->log()->elem("dynamic_call bci='%d'", jvms->bci());
+  if (C->log() != NULL) {
+    C->log()->elem("dynamic_call bci='%d'", jvms->bci());
   }
 
   // Get the constant pool cache from the caller class.
@@ -190,18 +192,21 @@
   size_t call_site_offset = cpcache->get_f1_offset(index);
 
   // Load the CallSite object from the constant pool cache.
-  const TypeOopPtr* cpcache_ptr = TypeOopPtr::make_from_constant(cpcache);
-  Node* cpcache_adr = kit.makecon(cpcache_ptr);
-  Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, cpcache_adr, call_site_offset);
-  Node* call_site = kit.make_load(kit.control(), call_site_adr, TypeInstPtr::BOTTOM, T_OBJECT, Compile::AliasIdxRaw);
+  const TypeOopPtr* cpcache_type   = TypeOopPtr::make_from_constant(cpcache);  // returns TypeAryPtr of type T_OBJECT
+  const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass());
+  Node* cpcache_adr   = kit.makecon(cpcache_type);
+  Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset);
+  // The oops in the constant pool cache are not compressed; load then as raw pointers.
+  Node* call_site     = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw);
 
   // Load the target MethodHandle from the CallSite object.
-  Node* target_mh_adr = kit.basic_plus_adr(call_site, call_site, java_lang_invoke_CallSite::target_offset_in_bytes());
-  Node* target_mh = kit.make_load(kit.control(), target_mh_adr, TypeInstPtr::BOTTOM, T_OBJECT);
+  const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass());
+  Node* target_mh_adr = kit.basic_plus_adr(call_site, java_lang_invoke_CallSite::target_offset_in_bytes());
+  Node* target_mh     = kit.make_load(kit.control(), target_mh_adr, target_type, T_OBJECT);
 
   address resolve_stub = SharedRuntime::get_resolve_opt_virtual_call_stub();
 
-  CallStaticJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), resolve_stub, method(), kit.bci());
+  CallStaticJavaNode* call = new (C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), resolve_stub, method(), kit.bci());
   // invokedynamic is treated as an optimized invokevirtual.
   call->set_optimized_virtual(true);
   // Take extra care (in the presence of argument motion) not to trash the SP:
@@ -785,9 +790,10 @@
 
 JVMState* PredictedDynamicCallGenerator::generate(JVMState* jvms) {
   GraphKit kit(jvms);
+  Compile* C = kit.C;
   PhaseGVN& gvn = kit.gvn();
 
-  CompileLog* log = kit.C->log();
+  CompileLog* log = C->log();
   if (log != NULL) {
     log->elem("predicted_dynamic_call bci='%d'", jvms->bci());
   }
@@ -803,8 +809,8 @@
     Node* receiver = kit.argument(0);
 
     // Check if the MethodHandle is the expected one
-    Node* cmp = gvn.transform(new(kit.C, 3) CmpPNode(receiver, predicted_mh));
-    bol = gvn.transform(new(kit.C, 2) BoolNode(cmp, BoolTest::eq) );
+    Node* cmp = gvn.transform(new (C, 3) CmpPNode(receiver, predicted_mh));
+    bol = gvn.transform(new (C, 2) BoolNode(cmp, BoolTest::eq) );
   } else {
     // Get the constant pool cache from the caller class.
     ciMethod* caller_method = jvms->method();
@@ -818,22 +824,25 @@
     size_t call_site_offset = cpcache->get_f1_offset(index);
 
     // Load the CallSite object from the constant pool cache.
-    const TypeOopPtr* cpcache_ptr = TypeOopPtr::make_from_constant(cpcache);
-    Node* cpcache_adr   = kit.makecon(cpcache_ptr);
-    Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, cpcache_adr, call_site_offset);
-    Node* call_site     = kit.make_load(kit.control(), call_site_adr, TypeInstPtr::BOTTOM, T_OBJECT, Compile::AliasIdxRaw);
+    const TypeOopPtr* cpcache_type   = TypeOopPtr::make_from_constant(cpcache);  // returns TypeAryPtr of type T_OBJECT
+    const TypeOopPtr* call_site_type = TypeOopPtr::make_from_klass(C->env()->CallSite_klass());
+    Node* cpcache_adr   = kit.makecon(cpcache_type);
+    Node* call_site_adr = kit.basic_plus_adr(cpcache_adr, call_site_offset);
+    // The oops in the constant pool cache are not compressed; load then as raw pointers.
+    Node* call_site     = kit.make_load(kit.control(), call_site_adr, call_site_type, T_ADDRESS, Compile::AliasIdxRaw);
 
     // Load the target MethodHandle from the CallSite object.
+    const TypeOopPtr* target_type = TypeOopPtr::make_from_klass(C->env()->MethodHandle_klass());
     Node* target_adr = kit.basic_plus_adr(call_site, call_site, java_lang_invoke_CallSite::target_offset_in_bytes());
-    Node* target_mh  = kit.make_load(kit.control(), target_adr, TypeInstPtr::BOTTOM, T_OBJECT);
+    Node* target_mh  = kit.make_load(kit.control(), target_adr, target_type, T_OBJECT);
 
     // Check if the MethodHandle is still the same.
-    Node* cmp = gvn.transform(new(kit.C, 3) CmpPNode(target_mh, predicted_mh));
-    bol = gvn.transform(new(kit.C, 2) BoolNode(cmp, BoolTest::eq) );
+    Node* cmp = gvn.transform(new (C, 3) CmpPNode(target_mh, predicted_mh));