changeset 3781:e4d10261499c

Merge
author asaha
date Fri, 07 Sep 2012 18:18:55 -0700
parents defeb6dad7d5 6124ff421829
children 521c82b9cbf8
files agent/src/share/classes/sun/jvm/hotspot/code/RicochetBlob.java agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCRicochetFrame.java agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86RicochetFrame.java src/share/vm/gc_implementation/parNew/parGCAllocBuffer.cpp src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp src/share/vm/prims/methodHandleWalk.cpp src/share/vm/prims/methodHandleWalk.hpp src/share/vm/runtime/arguments.cpp
diffstat 322 files changed, 20622 insertions(+), 17972 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Fri Aug 10 10:41:13 2012 -0700
+++ b/.hgtags	Fri Sep 07 18:18:55 2012 -0700
@@ -268,3 +268,10 @@
 58f237a9e83af6ded0d2e2c81d252cd47c0f4c45 jdk8-b50
 3b3ad16429701b2eb6712851c2f7c5a726eb2cbe hs24-b19
 663fc23da8d51c4c0552cbcb17ffc85f5869d4fd jdk8-b51
+4c8f2a12e757e7a808aa85827573e09f75d7459f hs24-b20
+6d0436885201db3f581523344a734793bb989549 jdk8-b52
+54240c1b8e87758f28da2c6a569a926fd9e0910a jdk8-b53
+9e3ae661284dc04185b029d85440fe7811f1ed07 hs24-b21
+e8fb566b94667f88462164defa654203f0ab6820 jdk8-b54
+09ea7e0752b306b8ae74713aeb4eb6263e1c6836 hs24-b22
+af0c8a0808516317333dcf9af15567cdd52761ce jdk8-b55
--- a/agent/make/saenv.sh	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/make/saenv.sh	Fri Sep 07 18:18:55 2012 -0700
@@ -26,7 +26,7 @@
 # This file sets common environment variables for all SA scripts
 
 OS=`uname`
-STARTDIR=`dirname $0`
+STARTDIR=`(cd \`dirname $0 \`; pwd)`
 ARCH=`uname -m`
 
 if [ "x$SA_JAVA" = "x" ]; then
--- a/agent/make/start-debug-server-proc.sh	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/make/start-debug-server-proc.sh	Fri Sep 07 18:18:55 2012 -0700
@@ -25,10 +25,11 @@
 
 . `dirname $0`/saenv.sh
 
-if [ -f $STARTDIR/sa.jar ] ; then
-  CP=$STARTDIR/sa.jar
+if [ -f $STARTDIR/../lib/sa-jdi.jar ] ; then
+  CP=$STARTDIR/../lib/sa-jdi.jar
 else
   CP=$STARTDIR/../build/classes
 fi
 
-$SA_JAVA -classpath $CP ${OPTIONS} -Djava.rmi.server.codebase=file:/$CP -Djava.security.policy=$STARTDIR\/grantAll.policy sun.jvm.hotspot.DebugServer $*
+$STARTDIR/java -classpath $CP ${OPTIONS} -Djava.rmi.server.codebase=file://$CP -Djava.security.policy=${STARTDIR}/grantAll.policy sun.jvm.hotspot.DebugServer $*
+
--- a/agent/src/os/linux/LinuxDebuggerLocal.c	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/os/linux/LinuxDebuggerLocal.c	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -55,11 +55,11 @@
 #define THROW_NEW_DEBUGGER_EXCEPTION_(str, value) { throw_new_debugger_exception(env, str); return value; }
 #define THROW_NEW_DEBUGGER_EXCEPTION(str) { throw_new_debugger_exception(env, str); return;}
 
-static void throw_new_debugger_exception(JNIEnv* env, const char* errMsg) {
+void throw_new_debugger_exception(JNIEnv* env, const char* errMsg) {
   (*env)->ThrowNew(env, (*env)->FindClass(env, "sun/jvm/hotspot/debugger/DebuggerException"), errMsg);
 }
 
-static struct ps_prochandle* get_proc_handle(JNIEnv* env, jobject this_obj) {
+struct ps_prochandle* get_proc_handle(JNIEnv* env, jobject this_obj) {
   jlong ptr = (*env)->GetLongField(env, this_obj, p_ps_prochandle_ID);
   return (struct ps_prochandle*)(intptr_t)ptr;
 }
@@ -280,6 +280,7 @@
   return (err == PS_OK)? array : 0;
 }
 
+#if defined(i386) || defined(ia64) || defined(amd64) || defined(sparc) || defined(sparcv9)
 JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
   (JNIEnv *env, jobject this_obj, jint lwp_id) {
 
@@ -410,3 +411,4 @@
   (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT);
   return array;
 }
+#endif
--- a/agent/src/os/linux/libproc.h	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/os/linux/libproc.h	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2007, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,10 +25,15 @@
 #ifndef _LIBPROC_H_
 #define _LIBPROC_H_
 
+#include <jni.h>
 #include <unistd.h>
 #include <stdint.h>
 #include "proc_service.h"
 
+#if defined(arm) || defined(ppc)
+#include "libproc_md.h"
+#endif
+
 #if defined(sparc) || defined(sparcv9)
 /*
   If _LP64 is defined ptrace.h should be taken from /usr/include/asm-sparc64
@@ -139,4 +144,8 @@
 // address->nearest symbol lookup. return NULL for no symbol
 const char* symbol_for_pc(struct ps_prochandle* ph, uintptr_t addr, uintptr_t* poffset);
 
+struct ps_prochandle* get_proc_handle(JNIEnv* env, jobject this_obj);
+
+void throw_new_debugger_exception(JNIEnv* env, const char* errMsg);
+
 #endif //__LIBPROC_H_
--- a/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/HotSpotAgent.java	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -549,7 +549,13 @@
                     machDesc = new MachineDescriptionSPARC32Bit();
             }
         } else {
-            throw new DebuggerException("Linux only supported on x86/ia64/amd64/sparc/sparc64");
+          try {
+            machDesc = (MachineDescription)
+              Class.forName("sun.jvm.hotspot.debugger.MachineDescription" +
+                            cpu.toUpperCase()).newInstance();
+          } catch (Exception e) {
+            throw new DebuggerException("Linux not supported on machine type " + cpu);
+          }
         }
 
         LinuxDebuggerLocal dbg =
--- a/agent/src/share/classes/sun/jvm/hotspot/bugspot/BugSpotAgent.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/bugspot/BugSpotAgent.java	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -737,9 +737,16 @@
                machDesc = new MachineDescriptionSPARC32Bit();
             }
         } else {
-            throw new DebuggerException("Linux only supported on x86/ia64/amd64/sparc/sparc64");
+          try {
+            machDesc = (MachineDescription)
+              Class.forName("sun.jvm.hotspot.debugger.MachineDescription" +
+              cpu.toUpperCase()).newInstance();
+          } catch (Exception e) {
+            throw new DebuggerException("unsupported machine type");
+          }
         }
 
+
         // Note we do not use a cache for the local debugger in server
         // mode; it will be taken care of on the client side (once remote
         // debugging is implemented).
--- a/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/CodeBlob.java	Fri Sep 07 18:18:55 2012 -0700
@@ -93,7 +93,6 @@
   public boolean isUncommonTrapStub()   { return false; }
   public boolean isExceptionStub()      { return false; }
   public boolean isSafepointStub()      { return false; }
-  public boolean isRicochetBlob()       { return false; }
   public boolean isAdapterBlob()        { return false; }
 
   // Fine grain nmethod support: isNmethod() == isJavaMethod() || isNativeMethod() || isOSRMethod()
--- a/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/code/CodeCache.java	Fri Sep 07 18:18:55 2012 -0700
@@ -57,7 +57,6 @@
     virtualConstructor.addMapping("BufferBlob", BufferBlob.class);
     virtualConstructor.addMapping("nmethod", NMethod.class);
     virtualConstructor.addMapping("RuntimeStub", RuntimeStub.class);
-    virtualConstructor.addMapping("RicochetBlob", RicochetBlob.class);
     virtualConstructor.addMapping("AdapterBlob", AdapterBlob.class);
     virtualConstructor.addMapping("MethodHandlesAdapterBlob", MethodHandlesAdapterBlob.class);
     virtualConstructor.addMapping("SafepointBlob", SafepointBlob.class);
@@ -127,10 +126,6 @@
       Assert.that(result.blobContains(start) || result.blobContains(start.addOffsetTo(8)),
                                                                     "found wrong CodeBlob");
     }
-    if (result.isRicochetBlob()) {
-      // This should probably be done for other SingletonBlobs
-      return VM.getVM().ricochetBlob();
-    }
     return result;
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/code/RicochetBlob.java	Fri Aug 10 10:41:13 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,70 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.code;
-
-import java.util.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-
-/** RicochetBlob (currently only used by Compiler 2) */
-
-public class RicochetBlob extends SingletonBlob {
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static void initialize(TypeDataBase db) {
-    Type type = db.lookupType("RicochetBlob");
-
-    bounceOffsetField                = type.getCIntegerField("_bounce_offset");
-    exceptionOffsetField             = type.getCIntegerField("_exception_offset");
-  }
-
-  private static CIntegerField bounceOffsetField;
-  private static CIntegerField exceptionOffsetField;
-
-  public RicochetBlob(Address addr) {
-    super(addr);
-  }
-
-  public boolean isRicochetBlob() {
-    return true;
-  }
-
-  public Address bounceAddr() {
-    return codeBegin().addOffsetTo(bounceOffsetField.getValue(addr));
-  }
-
-  public boolean returnsToBounceAddr(Address pc) {
-    Address bouncePc = bounceAddr();
-    return (pc.equals(bouncePc) || pc.addOffsetTo(Frame.pcReturnOffset()).equals(bouncePc));
-  }
-
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/ThreadContext.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/ThreadContext.java	Fri Sep 07 18:18:55 2012 -0700
@@ -24,6 +24,8 @@
 
 package sun.jvm.hotspot.debugger;
 
+import sun.jvm.hotspot.debugger.cdbg.*;
+
 /** This is a placeholder interface for a thread's context, containing
     only integer registers (no floating-point ones). What it contains
     is platform-dependent. Not all registers are guaranteed to be
@@ -54,4 +56,6 @@
   /** Set the value of the specified register (0..getNumRegisters() -
       1) as an Address */
   public void setRegisterAsAddress(int index, Address value);
+
+  public CFrame getTopFrame(Debugger dbg);
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/amd64/AMD64ThreadContext.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/amd64/AMD64ThreadContext.java	Fri Sep 07 18:18:55 2012 -0700
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.amd64;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
 
 /** Specifies the thread context on amd64 platforms; only a sub-portion
  * of the context is guaranteed to be present on all operating
@@ -98,6 +99,10 @@
         return data[index];
     }
 
+    public CFrame getTopFrame(Debugger dbg) {
+        return null;
+    }
+
     /** This can't be implemented in this class since we would have to
      * tie the implementation to, for example, the debugging system */
     public abstract void setRegisterAsAddress(int index, Address value);
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/ia64/IA64ThreadContext.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/ia64/IA64ThreadContext.java	Fri Sep 07 18:18:55 2012 -0700
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.ia64;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
 
 /** Specifies the thread context on ia64 platform; only a sub-portion
     of the context is guaranteed to be present on all operating
@@ -172,6 +173,10 @@
     return data[index];
   }
 
+  public CFrame getTopFrame(Debugger dbg) {
+    return null;
+  }
+
   /** This can't be implemented in this class since we would have to
       tie the implementation to, for example, the debugging system */
   public abstract void setRegisterAsAddress(int index, Address value);
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -107,7 +107,9 @@
        if (pc == null) return null;
        return new LinuxSPARCCFrame(dbg, sp, pc, LinuxDebuggerLocal.getAddressSize());
     } else {
-       throw new DebuggerException(cpu + " is not yet supported");
+       // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
+       ThreadContext context = (ThreadContext) thread.getContext();
+       return context.getTopFrame(dbg);
     }
   }
 
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/linux/LinuxThreadContextFactory.java	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2006, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -24,6 +24,7 @@
 
 package sun.jvm.hotspot.debugger.linux;
 
+import java.lang.reflect.*;
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.debugger.linux.amd64.*;
 import sun.jvm.hotspot.debugger.linux.ia64.*;
@@ -41,8 +42,16 @@
          return new LinuxIA64ThreadContext(dbg);
       } else if (cpu.equals("sparc")) {
          return new LinuxSPARCThreadContext(dbg);
-      } else {
-         throw new RuntimeException("cpu " + cpu + " is not yet supported");
+      } else  {
+        try {
+          Class tcc = Class.forName("sun.jvm.hotspot.debugger.linux." +
+             cpu.toLowerCase() + ".Linux" + cpu.toUpperCase() +
+             "ThreadContext");
+          Constructor[] ctcc = tcc.getConstructors();
+          return (ThreadContext)ctcc[0].newInstance(dbg);
+        } catch (Exception e) {
+          throw new RuntimeException("cpu " + cpu + " is not yet supported");
+        }
       }
    }
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2008, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -27,6 +27,7 @@
 import java.io.*;
 import java.net.*;
 import java.util.*;
+import java.lang.reflect.*;
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
 import sun.jvm.hotspot.debugger.proc.amd64.*;
@@ -86,7 +87,16 @@
             pcRegIndex = AMD64ThreadContext.RIP;
             fpRegIndex = AMD64ThreadContext.RBP;
         } else {
+          try {
+            Class tfc = Class.forName("sun.jvm.hotspot.debugger.proc." +
+               cpu.toLowerCase() + ".Proc" + cpu.toUpperCase() +
+               "ThreadFactory");
+            Constructor[] ctfc = tfc.getConstructors();
+            threadFactory = (ProcThreadFactory)ctfc[0].newInstance(this);
+          } catch (Exception e) {
             throw new RuntimeException("Thread access for CPU architecture " + PlatformInfo.getCPU() + " not yet supported");
+            // Note: pcRegIndex and fpRegIndex do not appear to be referenced
+          }
         }
         if (useCache) {
             // Cache portion of the remote process's address space.
@@ -375,7 +385,11 @@
         int pagesize = getPageSize0();
         if (pagesize == -1) {
             // return the hard coded default value.
-            pagesize = (PlatformInfo.getCPU().equals("x86"))? 4096 : 8192;
+            if (PlatformInfo.getCPU().equals("sparc") ||
+                PlatformInfo.getCPU().equals("amd64") )
+               pagesize = 8196;
+            else
+               pagesize = 4096;
         }
         return pagesize;
     }
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2009, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 
 import java.rmi.*;
 import java.util.*;
+import java.lang.reflect.*;
 
 import sun.jvm.hotspot.debugger.*;
 import sun.jvm.hotspot.debugger.cdbg.*;
@@ -70,7 +71,18 @@
         cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
         unalignedAccessesOkay = true;
       } else {
-        throw new DebuggerException("Thread access for CPU architecture " + cpu + " not yet supported");
+        try {
+          Class tf = Class.forName("sun.jvm.hotspot.debugger.remote." +
+            cpu.toLowerCase() + ".Remote" + cpu.toUpperCase() +
+            "ThreadFactory");
+          Constructor[] ctf = tf.getConstructors();
+          threadFactory = (RemoteThreadFactory)ctf[0].newInstance(this);
+        } catch (Exception e) {
+          throw new DebuggerException("Thread access for CPU architecture " + cpu + " not yet supported");
+        }
+        cachePageSize = 4096;
+        cacheNumPages = parseCacheNumPagesProperty(cacheSize / cachePageSize);
+        unalignedAccessesOkay = false;
       }
 
       // Cache portion of the remote process's address space.
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/sparc/SPARCThreadContext.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/sparc/SPARCThreadContext.java	Fri Sep 07 18:18:55 2012 -0700
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.sparc;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
 
 /** Currently provides just the minimal information necessary to get
     stack traces working. FIXME: currently hardwired for v9 -- will
@@ -124,6 +125,10 @@
     return data[index];
   }
 
+  public CFrame getTopFrame(Debugger dbg) {
+    return null;
+  }
+
   /** This can't be implemented in this class since we would have to
       tie the implementation to, for example, the debugging system */
   public abstract void setRegisterAsAddress(int index, Address value);
--- a/agent/src/share/classes/sun/jvm/hotspot/debugger/x86/X86ThreadContext.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/debugger/x86/X86ThreadContext.java	Fri Sep 07 18:18:55 2012 -0700
@@ -25,6 +25,7 @@
 package sun.jvm.hotspot.debugger.x86;
 
 import sun.jvm.hotspot.debugger.*;
+import sun.jvm.hotspot.debugger.cdbg.*;
 
 /** Specifies the thread context on x86 platforms; only a sub-portion
     of the context is guaranteed to be present on all operating
@@ -109,6 +110,10 @@
     return data[index];
   }
 
+  public CFrame getTopFrame(Debugger dbg) {
+    return null;
+  }
+
   /** This can't be implemented in this class since we would have to
       tie the implementation to, for example, the debugging system */
   public abstract void setRegisterAsAddress(int index, Address value);
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Frame.java	Fri Sep 07 18:18:55 2012 -0700
@@ -147,12 +147,6 @@
     }
   }
 
-  public boolean isRicochetFrame() {
-    CodeBlob cb = VM.getVM().getCodeCache().findBlob(getPC());
-    RicochetBlob rcb = VM.getVM().ricochetBlob();
-    return (cb == rcb && rcb != null && rcb.returnsToBounceAddr(getPC()));
-  }
-
   public boolean isCompiledFrame() {
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(!VM.getVM().isCore(), "noncore builds only");
@@ -216,8 +210,7 @@
   public Frame realSender(RegisterMap map) {
     if (!VM.getVM().isCore()) {
       Frame result = sender(map);
-      while (result.isRuntimeFrame() ||
-             result.isRicochetFrame()) {
+      while (result.isRuntimeFrame()) {
         result = result.sender(map);
       }
       return result;
@@ -631,9 +624,6 @@
     if (Assert.ASSERTS_ENABLED) {
       Assert.that(cb != null, "sanity check");
     }
-    if (cb == VM.getVM().ricochetBlob()) {
-      oopsRicochetDo(oopVisitor, regMap);
-    }
     if (cb.getOopMaps() != null) {
       OopMapSet.oopsDo(this, cb, regMap, oopVisitor, VM.getVM().isDebugging());
 
@@ -650,10 +640,6 @@
     //    }
   }
 
-  private void oopsRicochetDo      (AddressVisitor oopVisitor, RegisterMap regMap) {
-    // XXX Empty for now
-  }
-
   // FIXME: implement the above routines, plus add
   // oops_interpreted_arguments_do and oops_compiled_arguments_do
 }
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/Threads.java	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -91,6 +91,16 @@
                 access = new LinuxAMD64JavaThreadPDAccess();
             } else if (cpu.equals("sparc")) {
                 access = new LinuxSPARCJavaThreadPDAccess();
+            } else {
+              try {
+                access = (JavaThreadPDAccess)
+                  Class.forName("sun.jvm.hotspot.runtime.linux_" +
+                     cpu.toLowerCase() + ".Linux" + cpu.toUpperCase() +
+                     "JavaThreadPDAccess").newInstance();
+              } catch (Exception e) {
+                throw new RuntimeException("OS/CPU combination " + os + "/" + cpu +
+                                           " not yet supported");
+              }
             }
         } else if (os.equals("bsd")) {
             if (cpu.equals("x86")) {
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/VM.java	Fri Sep 07 18:18:55 2012 -0700
@@ -87,13 +87,13 @@
   private StubRoutines stubRoutines;
   private Bytes        bytes;
 
-  private RicochetBlob ricochetBlob;
-
   /** Flags indicating whether we are attached to a core, C1, or C2 build */
   private boolean      usingClientCompiler;
   private boolean      usingServerCompiler;
   /** Flag indicating whether UseTLAB is turned on */
   private boolean      useTLAB;
+  /** Flag indicating whether invokedynamic support is on */
+  private boolean      enableInvokeDynamic;
   /** alignment constants */
   private boolean      isLP64;
   private int          bytesPerLong;
@@ -319,6 +319,7 @@
     }
 
     useTLAB = (db.lookupIntConstant("UseTLAB").intValue() != 0);
+    enableInvokeDynamic = (db.lookupIntConstant("EnableInvokeDynamic").intValue() != 0);
 
     if (debugger != null) {
       isLP64 = debugger.getMachineDescription().isLP64();
@@ -554,6 +555,10 @@
     return useTLAB;
   }
 
+  public boolean getEnableInvokeDynamic() {
+    return enableInvokeDynamic;
+  }
+
   public TypeDataBase getTypeDataBase() {
     return db;
   }
@@ -628,18 +633,6 @@
     return stubRoutines;
   }
 
-  public RicochetBlob ricochetBlob() {
-    if (ricochetBlob == null) {
-      Type ricochetType  = db.lookupType("SharedRuntime");
-      AddressField ricochetBlobAddress = ricochetType.getAddressField("_ricochet_blob");
-      Address addr = ricochetBlobAddress.getValue();
-      if (addr != null) {
-        ricochetBlob = new RicochetBlob(addr);
-      }
-    }
-    return ricochetBlob;
-  }
-
   public VMRegImpl getVMRegImplInfo() {
     if (vmregImpl == null) {
       vmregImpl = new VMRegImpl();
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCFrame.java	Fri Sep 07 18:18:55 2012 -0700
@@ -571,8 +571,6 @@
     //        registers callee-saved, then we will have to copy over
     //        the RegisterMap update logic from the Intel code.
 
-    if (isRicochetFrame()) return senderForRicochetFrame(map);
-
     // The constructor of the sender must know whether this frame is interpreted so it can set the
     // sender's _interpreter_sp_adjustment field.
     if (VM.getVM().getInterpreter().contains(pc)) {
@@ -945,20 +943,6 @@
   }
 
 
-  private Frame senderForRicochetFrame(SPARCRegisterMap map) {
-    if (DEBUG) {
-      System.out.println("senderForRicochetFrame");
-    }
-    //RicochetFrame* f = RicochetFrame::from_frame(fr);
-    // Cf. is_interpreted_frame path of frame::sender
-    Address youngerSP = getSP();
-    Address sp        = getSenderSP();
-    map.makeIntegerRegsUnsaved();
-    map.shiftWindow(sp, youngerSP);
-    boolean thisFrameAdjustedStack = true;  // I5_savedSP is live in this RF
-    return new SPARCFrame(biasSP(sp), biasSP(youngerSP), thisFrameAdjustedStack);
-  }
-
   private Frame senderForEntryFrame(RegisterMap regMap) {
     SPARCRegisterMap map = (SPARCRegisterMap) regMap;
 
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/sparc/SPARCRicochetFrame.java	Fri Aug 10 10:41:13 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.runtime.sparc;
-
-import java.util.*;
-import sun.jvm.hotspot.asm.sparc.SPARCRegister;
-import sun.jvm.hotspot.asm.sparc.SPARCRegisters;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-
-public class SPARCRicochetFrame {
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private SPARCFrame frame;
-
-  private static void initialize(TypeDataBase db) {
-    // Type type = db.lookupType("MethodHandles::RicochetFrame");
-
-  }
-
-  static SPARCRicochetFrame fromFrame(SPARCFrame f) {
-    return new SPARCRicochetFrame(f);
-  }
-
-  private SPARCRicochetFrame(SPARCFrame f) {
-    frame = f;
-  }
-
-  private Address registerValue(SPARCRegister reg) {
-    return frame.getSP().addOffsetTo(reg.spOffsetInSavedWindow()).getAddressAt(0);
-  }
-
-  public Address savedArgsBase() {
-    return registerValue(SPARCRegisters.L4);
-  }
-  public Address exactSenderSP() {
-    return registerValue(SPARCRegisters.I5);
-  }
-  public Address senderLink() {
-    return frame.getSenderSP();
-  }
-  public Address senderPC() {
-    return frame.getSenderPC();
-  }
-  public Address extendedSenderSP() {
-    return savedArgsBase();
-  }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86Frame.java	Fri Sep 07 18:18:55 2012 -0700
@@ -269,7 +269,6 @@
 
     if (isEntryFrame())       return senderForEntryFrame(map);
     if (isInterpretedFrame()) return senderForInterpreterFrame(map);
-    if (isRicochetFrame())    return senderForRicochetFrame(map);
 
     if(cb == null) {
       cb = VM.getVM().getCodeCache().findBlob(getPC());
@@ -288,16 +287,6 @@
     return new X86Frame(getSenderSP(), getLink(), getSenderPC());
   }
 
-  private Frame senderForRicochetFrame(X86RegisterMap map) {
-    if (DEBUG) {
-      System.out.println("senderForRicochetFrame");
-    }
-    X86RicochetFrame f = X86RicochetFrame.fromFrame(this);
-    if (map.getUpdateMap())
-      updateMapWithSavedLink(map, f.senderLinkAddress());
-    return new X86Frame(f.extendedSenderSP(), f.exactSenderSP(), f.senderLink(), f.senderPC());
-  }
-
   private Frame senderForEntryFrame(X86RegisterMap map) {
     if (DEBUG) {
       System.out.println("senderForEntryFrame");
--- a/agent/src/share/classes/sun/jvm/hotspot/runtime/x86/X86RicochetFrame.java	Fri Aug 10 10:41:13 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,81 +0,0 @@
-/*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-package sun.jvm.hotspot.runtime.x86;
-
-import java.util.*;
-import sun.jvm.hotspot.debugger.*;
-import sun.jvm.hotspot.runtime.*;
-import sun.jvm.hotspot.types.*;
-
-public class X86RicochetFrame extends VMObject {
-  static {
-    VM.registerVMInitializedObserver(new Observer() {
-        public void update(Observable o, Object data) {
-          initialize(VM.getVM().getTypeDataBase());
-        }
-      });
-  }
-
-  private static void initialize(TypeDataBase db) {
-    Type type = db.lookupType("MethodHandles::RicochetFrame");
-
-    senderLinkField    = type.getAddressField("_sender_link");
-    savedArgsBaseField = type.getAddressField("_saved_args_base");
-    exactSenderSPField = type.getAddressField("_exact_sender_sp");
-    senderPCField      = type.getAddressField("_sender_pc");
-  }
-
-  private static AddressField senderLinkField;
-  private static AddressField savedArgsBaseField;
-  private static AddressField exactSenderSPField;
-  private static AddressField senderPCField;
-
-  static X86RicochetFrame fromFrame(X86Frame f) {
-    return new X86RicochetFrame(f.getFP().addOffsetTo(- senderLinkField.getOffset()));
-  }
-
-  private X86RicochetFrame(Address addr) {
-    super(addr);
-  }
-
-  public Address senderLink() {
-    return senderLinkField.getValue(addr);
-  }
-  public Address senderLinkAddress() {
-    return addr.addOffsetTo(senderLinkField.getOffset());
-  }
-  public Address savedArgsBase() {
-    return savedArgsBaseField.getValue(addr);
-  }
-  public Address extendedSenderSP() {
-    return savedArgsBase();
-  }
-  public Address exactSenderSP() {
-    return exactSenderSPField.getValue(addr);
-  }
-  public Address senderPC() {
-    return senderPCField.getValue(addr);
-  }
-}
--- a/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/ui/classbrowser/HTMLGenerator.java	Fri Sep 07 18:18:55 2012 -0700
@@ -204,7 +204,13 @@
       } else if (cpu.equals("ia64")) {
          cpuHelper = new IA64Helper();
       } else {
+        try {
+          cpuHelper = (CPUHelper)Class.forName("sun.jvm.hotspot.asm." +
+             cpu.toLowerCase() + "." + cpu.toUpperCase() +
+             "Helper").newInstance();
+        } catch (Exception e) {
           throw new RuntimeException("cpu '" + cpu + "' is not yet supported!");
+        }
       }
    }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/AltPlatformInfo.java	Fri Sep 07 18:18:55 2012 -0700
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+package sun.jvm.hotspot.utilities;
+
+public interface AltPlatformInfo {
+  // Additional cpu types can be tested via this interface
+
+  public boolean knownCPU(String cpu);
+}
\ No newline at end of file
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java	Fri Aug 10 10:41:13 2012 -0700
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java	Fri Sep 07 18:18:55 2012 -0700
@@ -64,6 +64,13 @@
     } else if (cpu.equals("ia64") || cpu.equals("amd64") || cpu.equals("x86_64")) {
       return cpu;
     } else {
+      try {
+        Class pic = Class.forName("sun.jvm.hotspot.utilities.PlatformInfoClosed");
+        AltPlatformInfo api = (AltPlatformInfo)pic.newInstance();
+        if (api.knownCPU(cpu)) {
+          return cpu;
+        }
+      } catch (Exception e) {}
       throw new UnsupportedPlatformException("CPU type " + cpu + " not yet supported");
     }
   }
--- a/make/defs.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/defs.make	Fri Sep 07 18:18:55 2012 -0700
@@ -22,6 +22,14 @@
 #  
 #
 
+ifeq ($(HS_ALT_MAKE),)
+  ifneq ($(OPENJDK),true)
+    HS_ALT_MAKE=$(GAMMADIR)/make/closed
+  else
+    HS_ALT_MAKE=NO_SUCH_PATH
+  endif
+endif
+
 # The common definitions for hotspot builds.
 
 # Optionally include SPEC file generated by configure.
@@ -327,3 +335,4 @@
 ifndef JAVASE_EMBEDDED
 EXPORT_LIST += $(EXPORT_INCLUDE_DIR)/jfr.h
 endif
+
--- a/make/hotspot_version	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/hotspot_version	Fri Sep 07 18:18:55 2012 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=24
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=19
+HS_BUILD_NUMBER=22
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/jprt.properties	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/jprt.properties	Fri Sep 07 18:18:55 2012 -0700
@@ -38,7 +38,7 @@
 
 # This tells jprt what default release we want to build
 
-jprt.hotspot.default.release=jdk7
+jprt.hotspot.default.release=jdk8
 
 jprt.tools.default.release=${jprt.submit.option.release?${jprt.submit.option.release}:${jprt.hotspot.default.release}}
 
@@ -54,77 +54,77 @@
 # Define the Solaris platforms we want for the various releases
 jprt.my.solaris.sparc.jdk8=solaris_sparc_5.10
 jprt.my.solaris.sparc.jdk7=solaris_sparc_5.10
-jprt.my.solaris.sparc.jdk7u6=${jprt.my.solaris.sparc.jdk7}
+jprt.my.solaris.sparc.jdk7u8=${jprt.my.solaris.sparc.jdk7}
 jprt.my.solaris.sparc=${jprt.my.solaris.sparc.${jprt.tools.default.release}}
 
 jprt.my.solaris.sparcv9.jdk8=solaris_sparcv9_5.10
 jprt.my.solaris.sparcv9.jdk7=solaris_sparcv9_5.10
-jprt.my.solaris.sparcv9.jdk7u6=${jprt.my.solaris.sparcv9.jdk7}
+jprt.my.solaris.sparcv9.jdk7u8=${jprt.my.solaris.sparcv9.jdk7}
 jprt.my.solaris.sparcv9=${jprt.my.solaris.sparcv9.${jprt.tools.default.release}}
 
 jprt.my.solaris.i586.jdk8=solaris_i586_5.10
 jprt.my.solaris.i586.jdk7=solaris_i586_5.10
-jprt.my.solaris.i586.jdk7u6=${jprt.my.solaris.i586.jdk7}
+jprt.my.solaris.i586.jdk7u8=${jprt.my.solaris.i586.jdk7}
 jprt.my.solaris.i586=${jprt.my.solaris.i586.${jprt.tools.default.release}}
 
 jprt.my.solaris.x64.jdk8=solaris_x64_5.10
 jprt.my.solaris.x64.jdk7=solaris_x64_5.10
-jprt.my.solaris.x64.jdk7u6=${jprt.my.solaris.x64.jdk7}
+jprt.my.solaris.x64.jdk7u8=${jprt.my.solaris.x64.jdk7}
 jprt.my.solaris.x64=${jprt.my.solaris.x64.${jprt.tools.default.release}}
 
 jprt.my.linux.i586.jdk8=linux_i586_2.6
 jprt.my.linux.i586.jdk7=linux_i586_2.6
-jprt.my.linux.i586.jdk7u6=${jprt.my.linux.i586.jdk7}
+jprt.my.linux.i586.jdk7u8=${jprt.my.linux.i586.jdk7}
 jprt.my.linux.i586=${jprt.my.linux.i586.${jprt.tools.default.release}}
 
 jprt.my.linux.x64.jdk8=linux_x64_2.6
 jprt.my.linux.x64.jdk7=linux_x64_2.6
-jprt.my.linux.x64.jdk7u6=${jprt.my.linux.x64.jdk7}
+jprt.my.linux.x64.jdk7u8=${jprt.my.linux.x64.jdk7}
 jprt.my.linux.x64=${jprt.my.linux.x64.${jprt.tools.default.release}}
 
 jprt.my.linux.ppc.jdk8=linux_ppc_2.6
 jprt.my.linux.ppc.jdk7=linux_ppc_2.6
-jprt.my.linux.ppc.jdk7u6=${jprt.my.linux.ppc.jdk7}
+jprt.my.linux.ppc.jdk7u8=${jprt.my.linux.ppc.jdk7}
 jprt.my.linux.ppc=${jprt.my.linux.ppc.${jprt.tools.default.release}}
 
 jprt.my.linux.ppcv2.jdk8=linux_ppcv2_2.6
 jprt.my.linux.ppcv2.jdk7=linux_ppcv2_2.6
-jprt.my.linux.ppcv2.jdk7u6=${jprt.my.linux.ppcv2.jdk7}
+jprt.my.linux.ppcv2.jdk7u8=${jprt.my.linux.ppcv2.jdk7}
 jprt.my.linux.ppcv2=${jprt.my.linux.ppcv2.${jprt.tools.default.release}}
 
 jprt.my.linux.ppcsflt.jdk8=linux_ppcsflt_2.6
 jprt.my.linux.ppcsflt.jdk7=linux_ppcsflt_2.6
-jprt.my.linux.ppcsflt.jdk7u6=${jprt.my.linux.ppcsflt.jdk7}
+jprt.my.linux.ppcsflt.jdk7u8=${jprt.my.linux.ppcsflt.jdk7}
 jprt.my.linux.ppcsflt=${jprt.my.linux.ppcsflt.${jprt.tools.default.release}}
 
 jprt.my.linux.armvfp.jdk8=linux_armvfp_2.6
 jprt.my.linux.armvfp.jdk7=linux_armvfp_2.6
-jprt.my.linux.armvfp.jdk7u6=${jprt.my.linux.armvfp.jdk7}
+jprt.my.linux.armvfp.jdk7u8=${jprt.my.linux.armvfp.jdk7}
 jprt.my.linux.armvfp=${jprt.my.linux.armvfp.${jprt.tools.default.release}}
 
 jprt.my.linux.armv6.jdk8=linux_armv6_2.6
 jprt.my.linux.armv6.jdk7=linux_armv6_2.6
-jprt.my.linux.armv6.jdk7u6=${jprt.my.linux.armv6.jdk7}
+jprt.my.linux.armv6.jdk7u8=${jprt.my.linux.armv6.jdk7}
 jprt.my.linux.armv6=${jprt.my.linux.armv6.${jprt.tools.default.release}}
 
 jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6
 jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6
-jprt.my.linux.armsflt.jdk7u6=${jprt.my.linux.armsflt.jdk7}
+jprt.my.linux.armsflt.jdk7u8=${jprt.my.linux.armsflt.jdk7}
 jprt.my.linux.armsflt=${jprt.my.linux.armsflt.${jprt.tools.default.release}}
 
 jprt.my.macosx.x64.jdk8=macosx_x64_10.7
 jprt.my.macosx.x64.jdk7=macosx_x64_10.7
-jprt.my.macosx.x64.jdk7u6=${jprt.my.macosx.x64.jdk7}
+jprt.my.macosx.x64.jdk7u8=${jprt.my.macosx.x64.jdk7}
 jprt.my.macosx.x64=${jprt.my.macosx.x64.${jprt.tools.default.release}}
 
 jprt.my.windows.i586.jdk8=windows_i586_5.1
 jprt.my.windows.i586.jdk7=windows_i586_5.1
-jprt.my.windows.i586.jdk7u6=${jprt.my.windows.i586.jdk7}
+jprt.my.windows.i586.jdk7u8=${jprt.my.windows.i586.jdk7}
 jprt.my.windows.i586=${jprt.my.windows.i586.${jprt.tools.default.release}}
 
 jprt.my.windows.x64.jdk8=windows_x64_5.2
 jprt.my.windows.x64.jdk7=windows_x64_5.2
-jprt.my.windows.x64.jdk7u6=${jprt.my.windows.x64.jdk7}
+jprt.my.windows.x64.jdk7u8=${jprt.my.windows.x64.jdk7}
 jprt.my.windows.x64=${jprt.my.windows.x64.${jprt.tools.default.release}}
 
 # Standard list of jprt build targets for this source tree
@@ -159,7 +159,7 @@
 
 jprt.build.targets.jdk8=${jprt.build.targets.all}
 jprt.build.targets.jdk7=${jprt.build.targets.all}
-jprt.build.targets.jdk7u6=${jprt.build.targets.all}
+jprt.build.targets.jdk7u8=${jprt.build.targets.all}
 jprt.build.targets=${jprt.build.targets.${jprt.tools.default.release}}
 
 # Subset lists of test targets for this source tree
@@ -452,7 +452,7 @@
 
 jprt.test.targets.jdk8=${jprt.test.targets.standard}
 jprt.test.targets.jdk7=${jprt.test.targets.standard}
-jprt.test.targets.jdk7u6=${jprt.test.targets.jdk7}
+jprt.test.targets.jdk7u8=${jprt.test.targets.jdk7}
 jprt.test.targets=${jprt.test.targets.${jprt.tools.default.release}}
 
 # The default test/Makefile targets that should be run
@@ -512,7 +512,7 @@
 
 jprt.make.rule.test.targets.jdk8=${jprt.make.rule.test.targets.standard}
 jprt.make.rule.test.targets.jdk7=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk7u6=${jprt.make.rule.test.targets.jdk7}
+jprt.make.rule.test.targets.jdk7u8=${jprt.make.rule.test.targets.jdk7}
 jprt.make.rule.test.targets=${jprt.make.rule.test.targets.${jprt.tools.default.release}}
 
 # 7155453: Work-around to prevent popups on OSX from blocking test completion
--- a/make/linux/makefiles/adlc.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/linux/makefiles/adlc.make	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -133,8 +133,10 @@
 # Note that product files are updated via "mv", which is atomic.
 TEMPDIR := $(OUTDIR)/mktmp$(shell echo $$$$)
 
-# Debuggable by default
-CFLAGS += -g
+ifneq ($(DEBUG_BINARIES), true)
+  # Debuggable by default (unless already done by DEBUG_BINARIES)
+  CFLAGS += -g
+endif
 
 # Pass -D flags into ADLC.
 ADLCFLAGS += $(SYSDEFS)
--- a/make/linux/makefiles/defs.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/linux/makefiles/defs.make	Fri Sep 07 18:18:55 2012 -0700
@@ -295,6 +295,8 @@
 ADD_SA_BINARIES/arm   = 
 ADD_SA_BINARIES/zero  = 
 
+-include $(HS_ALT_MAKE)/linux/makefiles/defs.make
+
 EXPORT_LIST += $(ADD_SA_BINARIES/$(HS_ARCH))
 
 
--- a/make/linux/makefiles/gcc.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/linux/makefiles/gcc.make	Fri Sep 07 18:18:55 2012 -0700
@@ -215,47 +215,46 @@
 #------------------------------------------------------------------------
 # Debug flags
 
-# Use the stabs format for debugging information (this is the default
-# on gcc-2.91). It's good enough, has all the information about line
-# numbers and local variables, and libjvm_g.so is only about 16M.
-# Change this back to "-g" if you want the most expressive format.
-# (warning: that could easily inflate libjvm_g.so to 150M!)
-# Note: The Itanium gcc compiler crashes when using -gstabs.
-DEBUG_CFLAGS/ia64  = -g
-DEBUG_CFLAGS/amd64 = -g
-DEBUG_CFLAGS/arm   = -g
-DEBUG_CFLAGS/ppc   = -g
-DEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
-ifeq ($(DEBUG_CFLAGS/$(BUILDARCH)),)
-DEBUG_CFLAGS += -gstabs
-endif
-
-ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-  FASTDEBUG_CFLAGS/ia64  = -g
-  FASTDEBUG_CFLAGS/amd64 = -g
-  FASTDEBUG_CFLAGS/arm   = -g
-  FASTDEBUG_CFLAGS/ppc   = -g
-  FASTDEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
-  ifeq ($(FASTDEBUG_CFLAGS/$(BUILDARCH)),)
-    FASTDEBUG_CFLAGS += -gstabs
+# DEBUG_BINARIES uses full -g debug information for all configs
+ifeq ($(DEBUG_BINARIES), true)
+  CFLAGS += -g
+else
+  # Use the stabs format for debugging information (this is the default
+  # on gcc-2.91). It's good enough, has all the information about line
+  # numbers and local variables, and libjvm_g.so is only about 16M.
+  # Change this back to "-g" if you want the most expressive format.
+  # (warning: that could easily inflate libjvm_g.so to 150M!)
+  # Note: The Itanium gcc compiler crashes when using -gstabs.
+  DEBUG_CFLAGS/ia64  = -g
+  DEBUG_CFLAGS/amd64 = -g
+  DEBUG_CFLAGS/arm   = -g
+  DEBUG_CFLAGS/ppc   = -g
+  DEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
+  ifeq ($(DEBUG_CFLAGS/$(BUILDARCH)),)
+    DEBUG_CFLAGS += -gstabs
   endif
-
-  OPT_CFLAGS/ia64  = -g
-  OPT_CFLAGS/amd64 = -g
-  OPT_CFLAGS/arm   = -g
-  OPT_CFLAGS/ppc   = -g
-  OPT_CFLAGS += $(OPT_CFLAGS/$(BUILDARCH))
-  ifeq ($(OPT_CFLAGS/$(BUILDARCH)),)
-    OPT_CFLAGS += -gstabs
+  
+  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+    FASTDEBUG_CFLAGS/ia64  = -g
+    FASTDEBUG_CFLAGS/amd64 = -g
+    FASTDEBUG_CFLAGS/arm   = -g
+    FASTDEBUG_CFLAGS/ppc   = -g
+    FASTDEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
+    ifeq ($(FASTDEBUG_CFLAGS/$(BUILDARCH)),)
+      FASTDEBUG_CFLAGS += -gstabs
+    endif
+  
+    OPT_CFLAGS/ia64  = -g
+    OPT_CFLAGS/amd64 = -g
+    OPT_CFLAGS/arm   = -g
+    OPT_CFLAGS/ppc   = -g
+    OPT_CFLAGS += $(OPT_CFLAGS/$(BUILDARCH))
+    ifeq ($(OPT_CFLAGS/$(BUILDARCH)),)
+      OPT_CFLAGS += -gstabs
+    endif
   endif
 endif
 
-# DEBUG_BINARIES overrides everything, use full -g debug information
-ifeq ($(DEBUG_BINARIES), true)
-  DEBUG_CFLAGS = -g
-  CFLAGS += $(DEBUG_CFLAGS)
-endif
-
 # If we are building HEADLESS, pass on to VM
 # so it can set the java.awt.headless property
 ifdef HEADLESS
--- a/make/linux/makefiles/sa.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/linux/makefiles/sa.make	Fri Sep 07 18:18:55 2012 -0700
@@ -30,10 +30,16 @@
 
 include $(GAMMADIR)/make/linux/makefiles/rules.make
 
+include $(GAMMADIR)/make/defs.make
+include $(GAMMADIR)/make/altsrc.make
+
 AGENT_DIR = $(GAMMADIR)/agent
 
 include $(GAMMADIR)/make/sa.files
 
+-include $(HS_ALT_MAKE)/linux/makefiles/sa.make
+
+
 TOPDIR    = $(shell echo `pwd`)
 GENERATED = $(TOPDIR)/../generated
 
@@ -52,17 +58,15 @@
 SA_PROPERTIES = $(SA_CLASSDIR)/sa.properties
 
 # if $(AGENT_DIR) does not exist, we don't build SA
-# also, we don't build SA on Itanium, PowerPC, ARM or zero.
+# also, we don't build SA on Itanium or zero.
 
 all: 
 	if [ -d $(AGENT_DIR) -a "$(SRCARCH)" != "ia64" \
-             -a "$(SRCARCH)" != "arm" \
-             -a "$(SRCARCH)" != "ppc" \
              -a "$(SRCARCH)" != "zero" ] ; then \
 	   $(MAKE) -f sa.make $(GENERATED)/sa-jdi.jar; \
 	fi
 
-$(GENERATED)/sa-jdi.jar: $(AGENT_FILES)
+$(GENERATED)/sa-jdi.jar:: $(AGENT_FILES)
 	$(QUIETLY) echo "Making $@"
 	$(QUIETLY) if [ "$(BOOT_JAVA_HOME)" = "" ]; then \
 	  echo "ALT_BOOTDIR, BOOTDIR or JAVA_HOME needs to be defined to build SA"; \
@@ -111,3 +115,5 @@
 	rm -rf $(SA_CLASSDIR)
 	rm -rf $(GENERATED)/sa-jdi.jar
 	rm -rf $(AGENT_FILES_LIST)
+
+-include $(HS_ALT_MAKE)/linux/makefiles/sa-rules.make
--- a/make/linux/makefiles/saproc.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/linux/makefiles/saproc.make	Fri Sep 07 18:18:55 2012 -0700
@@ -21,6 +21,8 @@
 # questions.
 #  
 #
+include $(GAMMADIR)/make/defs.make
+include $(GAMMADIR)/make/altsrc.make
 
 # Rules to build serviceability agent library, used by vm.make
 
@@ -48,6 +50,8 @@
              $(SASRCDIR)/ps_core.c                    \
              $(SASRCDIR)/LinuxDebuggerLocal.c
 
+-include $(HS_ALT_MAKE)/linux/makefiles/saproc.make
+
 SAMAPFILE = $(SASRCDIR)/mapfile
 
 DEST_SAPROC           = $(JDK_LIBDIR)/$(LIBSAPROC)
@@ -60,15 +64,19 @@
 endif
 
 # if $(AGENT_DIR) does not exist, we don't build SA
-# also, we don't build SA on Itanium, PPC, ARM or zero.
+# also, we don't build SA on Itanium or zero.
 
 ifneq ($(wildcard $(AGENT_DIR)),)
-ifneq ($(filter-out ia64 arm ppc zero,$(SRCARCH)),)
+ifneq ($(filter-out ia64 zero,$(SRCARCH)),)
   BUILDLIBSAPROC = $(LIBSAPROC)
 endif
 endif
 
-
+ifneq ($(ALT_SASRCDIR),)
+ALT_SAINCDIR=-I$(ALT_SASRCDIR)
+else
+ALT_SAINCDIR=
+endif
 SA_LFLAGS = $(MAPFLAG:FILENAME=$(SAMAPFILE)) $(LDFLAGS_HASH_STYLE)
 
 $(LIBSAPROC): $(SASRCFILES) $(SAMAPFILE)
@@ -84,6 +92,7 @@
 	           -I$(GENERATED)                                       \
 	           -I$(BOOT_JAVA_HOME)/include                          \
 	           -I$(BOOT_JAVA_HOME)/include/$(Platform_os_family)    \
+			   $(ALT_SAINCDIR) 										\
 	           $(SASRCFILES)                                        \
 	           $(SA_LFLAGS)                                         \
 	           $(SA_DEBUG_CFLAGS)                                   \
--- a/make/pic.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/pic.make	Fri Sep 07 18:18:55 2012 -0700
@@ -32,7 +32,7 @@
   ifndef LP64
     PARTIAL_NONPIC=1
   endif
-  PIC_ARCH = ppc
+  PIC_ARCH = ppc arm
   ifneq ("$(filter $(PIC_ARCH),$(BUILDARCH))","")
     PARTIAL_NONPIC=0
   endif
--- a/make/windows/makefiles/defs.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/windows/makefiles/defs.make	Fri Sep 07 18:18:55 2012 -0700
@@ -188,14 +188,22 @@
   MAKE_ARGS += JDK_BUILD_NUMBER=$(COOKED_BUILD_NUMBER)
 endif
 
-NMAKE= MAKEFLAGS= MFLAGS= nmake /NOLOGO
+NMAKE= MAKEFLAGS= MFLAGS= nmake -NOLOGO
+ifndef SYSTEM_UNAME
+  SYSTEM_UNAME := $(shell uname)
+  export SYSTEM_UNAME
+endif
 
 # Check for CYGWIN
-ifneq (,$(findstring CYGWIN,$(shell uname)))
+ifneq (,$(findstring CYGWIN,$(SYSTEM_UNAME)))
   USING_CYGWIN=true
 else
   USING_CYGWIN=false
 endif
+# Check for MinGW
+ifneq (,$(findstring MINGW,$(SYSTEM_UNAME)))
+  USING_MINGW=true
+endif
 # FIXUP: The subdirectory for a debug build is NOT the same on all platforms
 VM_DEBUG=debug
 
@@ -208,11 +216,16 @@
   ABS_BOOTDIR     := $(subst /,\\,$(shell /bin/cygpath -m -a "$(BOOTDIR)"))
   ABS_GAMMADIR    := $(subst /,\\,$(shell /bin/cygpath -m -a "$(GAMMADIR)"))
   ABS_OS_MAKEFILE := $(shell /bin/cygpath -m -a "$(HS_MAKE_DIR)/$(OSNAME)")/build.make
-else
-  ABS_OUTPUTDIR   := $(subst /,\\,$(shell $(CD) $(OUTPUTDIR);$(PWD)))
-  ABS_BOOTDIR     := $(subst /,\\,$(shell $(CD) $(BOOTDIR);$(PWD)))
-  ABS_GAMMADIR    := $(subst /,\\,$(shell $(CD) $(GAMMADIR);$(PWD)))
-  ABS_OS_MAKEFILE := $(subst /,\\,$(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make)
+else ifeq ($(USING_MINGW), true)
+    ABS_OUTPUTDIR   := $(shell $(CD) $(OUTPUTDIR);$(PWD))
+    ABS_BOOTDIR     := $(shell $(CD) $(BOOTDIR);$(PWD))
+    ABS_GAMMADIR    := $(shell $(CD) $(GAMMADIR);$(PWD))
+    ABS_OS_MAKEFILE := $(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make
+  else
+    ABS_OUTPUTDIR   := $(subst /,\\,$(shell $(CD) $(OUTPUTDIR);$(PWD)))
+    ABS_BOOTDIR     := $(subst /,\\,$(shell $(CD) $(BOOTDIR);$(PWD)))
+    ABS_GAMMADIR    := $(subst /,\\,$(shell $(CD) $(GAMMADIR);$(PWD)))
+    ABS_OS_MAKEFILE := $(subst /,\\,$(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make)
 endif
 
 # Disable building SA on windows until we are sure
--- a/make/windows/makefiles/rules.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/windows/makefiles/rules.make	Fri Sep 07 18:18:55 2012 -0700
@@ -23,14 +23,15 @@
 #
 
 # These are the commands used externally to compile and run.
-
+# The \ are used here for traditional Windows apps and " quoted to get
+# past the Unix-like shell:
 !ifdef BootStrapDir
-RUN_JAVA=$(BootStrapDir)\bin\java
-RUN_JAVAP=$(BootStrapDir)\bin\javap
-RUN_JAVAH=$(BootStrapDir)\bin\javah
-RUN_JAR=$(BootStrapDir)\bin\jar
-COMPILE_JAVAC=$(BootStrapDir)\bin\javac $(BOOTSTRAP_JAVAC_FLAGS)
-COMPILE_RMIC=$(BootStrapDir)\bin\rmic
+RUN_JAVA="$(BootStrapDir)\bin\java"
+RUN_JAVAP="$(BootStrapDir)\bin\javap"
+RUN_JAVAH="$(BootStrapDir)\bin\javah"
+RUN_JAR="$(BootStrapDir)\bin\jar"
+COMPILE_JAVAC="$(BootStrapDir)\bin\javac" $(BOOTSTRAP_JAVAC_FLAGS)
+COMPILE_RMIC="$(BootStrapDir)\bin\rmic"
 BOOT_JAVA_HOME=$(BootStrapDir)
 !else
 RUN_JAVA=java
--- a/make/windows/makefiles/sa.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/windows/makefiles/sa.make	Fri Sep 07 18:18:55 2012 -0700
@@ -36,37 +36,37 @@
 !include $(WorkSpace)/make/windows/makefiles/rules.make
 !include $(WorkSpace)/make/sa.files
 
-GENERATED = ..\generated
+GENERATED = ../generated
 
 # tools.jar is needed by the JDI - SA binding
-SA_CLASSPATH = $(BOOT_JAVA_HOME)\lib\tools.jar
+SA_CLASSPATH = $(BOOT_JAVA_HOME)/lib/tools.jar
 
-SA_CLASSDIR = $(GENERATED)\saclasses
+SA_CLASSDIR = $(GENERATED)/saclasses
 
 SA_BUILD_VERSION_PROP = sun.jvm.hotspot.runtime.VM.saBuildVersion=$(SA_BUILD_VERSION)
 
-SA_PROPERTIES = $(SA_CLASSDIR)\sa.properties
+SA_PROPERTIES = $(SA_CLASSDIR)/sa.properties
 
-default::  $(GENERATED)\sa-jdi.jar
+default::  $(GENERATED)/sa-jdi.jar
 
 # Remove the space between $(SA_BUILD_VERSION_PROP) and > below as it adds a white space
 # at the end of SA version string and causes a version mismatch with the target VM version.
 
-$(GENERATED)\sa-jdi.jar: $(AGENT_FILES:/=\)
-	@if not exist $(SA_CLASSDIR) mkdir $(SA_CLASSDIR)
-	@echo ...Building sa-jdi.jar
+$(GENERATED)/sa-jdi.jar: $(AGENT_FILES)
+	$(QUIETLY) mkdir -p $(SA_CLASSDIR)
+	@echo ...Building sa-jdi.jar into $(SA_CLASSDIR)
 	@echo ...$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -d $(SA_CLASSDIR) ....
-	@$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -sourcepath $(AGENT_SRC_DIR) -d $(SA_CLASSDIR) $(AGENT_FILES:/=\)
+	@$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -sourcepath $(AGENT_SRC_DIR) -d $(SA_CLASSDIR) $(AGENT_FILES)
 	$(COMPILE_RMIC) -classpath $(SA_CLASSDIR) -d $(SA_CLASSDIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer
 	$(QUIETLY) echo $(SA_BUILD_VERSION_PROP)> $(SA_PROPERTIES)
 	$(QUIETLY) rm -f $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql/sa.js
 	$(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql
 	$(QUIETLY) rm -rf $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
-	$(QUIETLY) mkdir $(SA_CLASSDIR)\sun\jvm\hotspot\ui\resources
+	$(QUIETLY) mkdir $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
 	$(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/ui/resources/*.png $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
 	$(QUIETLY) cp -r $(AGENT_SRC_DIR)/images/* $(SA_CLASSDIR)
 	$(RUN_JAR) cf $@ -C $(SA_CLASSDIR) .
-	$(RUN_JAR) uf $@ -C $(AGENT_SRC_DIR:/=\) META-INF\services\com.sun.jdi.connect.Connector
+	$(RUN_JAR) uf $@ -C $(AGENT_SRC_DIR) META-INF/services/com.sun.jdi.connect.Connector
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.windbg.WindbgDebuggerLocal
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext 
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.ia64.IA64ThreadContext 
@@ -85,27 +85,27 @@
 # will be useful to have the assertion checks in place
 
 !if "$(BUILDARCH)" == "ia64"
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 $(GX_OPTION) -Od -D "WIN32" -D "WIN64" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -c
 !elseif "$(BUILDARCH)" == "amd64"
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 $(GX_OPTION) -Od -D "WIN32" -D "WIN64" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -c
 !if "$(COMPILER_NAME)" == "VS2005"
 # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
 # otherwise we get missing __security_check_cookie externals at link time. 
 SA_LD_FLAGS = bufferoverflowU.lib
 !endif
 !else
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 -Gm $(GX_OPTION) -Od -D "WIN32" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -GZ -c
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-SA_CFLAGS = $(SA_CFLAGS) /ZI
+SA_CFLAGS = $(SA_CFLAGS) -ZI
 !endif
 !endif
 !if "$(MT)" != ""
-SA_LD_FLAGS = /manifest $(SA_LD_FLAGS)
+SA_LD_FLAGS = -manifest $(SA_LD_FLAGS)
 !endif
 SASRCFILE = $(AGENT_DIR)/src/os/win32/windbg/sawindbg.cpp
-SA_LFLAGS = $(SA_LD_FLAGS) /nologo /subsystem:console /machine:$(MACHINE)
+SA_LFLAGS = $(SA_LD_FLAGS) -nologo -subsystem:console -machine:$(MACHINE)
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-SA_LFLAGS = $(SA_LFLAGS) /map /debug
+SA_LFLAGS = $(SA_LFLAGS) -map -debug
 !endif
 
 # Note that we do not keep sawindbj.obj around as it would then
@@ -117,15 +117,15 @@
 $(SAWINDBG): $(SASRCFILE)
 	set INCLUDE=$(SA_INCLUDE)$(INCLUDE)
 	$(CXX) @<<
-	  /I"$(BootStrapDir)/include" /I"$(BootStrapDir)/include/win32" 
-	  /I"$(GENERATED)" $(SA_CFLAGS)
+	  -I"$(BootStrapDir)/include" -I"$(BootStrapDir)/include/win32" 
+	  -I"$(GENERATED)" $(SA_CFLAGS)
 	  $(SASRCFILE)
-	  /out:$*.obj
+	  -out:$*.obj
 <<
 	set LIB=$(SA_LIB)$(LIB)
-	$(LD) /out:$@ /DLL $*.obj dbgeng.lib $(SA_LFLAGS)
+	$(LD) -out:$@ -DLL $*.obj dbgeng.lib $(SA_LFLAGS)
 !if "$(MT)" != ""
-	$(MT) /manifest $(@F).manifest /outputresource:$(@F);#2
+	$(MT) -manifest $(@F).manifest -outputresource:$(@F);#2
 !endif
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
 !if "$(ZIP_DEBUGINFO_FILES)" == "1"
@@ -136,6 +136,6 @@
 	-@rm -f $*.obj
 
 cleanall :
-	rm -rf $(GENERATED:\=/)/saclasses
-	rm -rf $(GENERATED:\=/)/sa-jdi.jar
+	rm -rf $(GENERATED)/saclasses
+	rm -rf $(GENERATED)/sa-jdi.jar
 !endif
--- a/make/windows/makefiles/shared.make	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/windows/makefiles/shared.make	Fri Sep 07 18:18:55 2012 -0700
@@ -36,11 +36,12 @@
 
 
 !ifdef SUBDIRS
+# \ is used below because $(MAKE) is nmake here, which expects Windows paths
 $(SUBDIRS): FORCE
 	@if not exist $@ mkdir $@
-	@if not exist $@\local.make echo # Empty > $@\local.make
-	@echo nmake $(ACTION) in $(DIR)\$@
-	cd $@ && $(MAKE) /NOLOGO /f $(WorkSpace)\make\windows\makefiles\$@.make $(ACTION) DIR=$(DIR)\$@ BUILD_FLAVOR=$(BUILD_FLAVOR)
+	@if not exist $@/local.make echo # Empty > $@/local.make
+	@echo nmake $(ACTION) in $(DIR)/$@
+	cd $@ && $(MAKE) -NOLOGO -f $(WorkSpace)\make\windows\makefiles\$@.make $(ACTION) DIR=$(DIR)\$@ BUILD_FLAVOR=$(BUILD_FLAVOR)
 !endif
 
 # Creates the needed directory
--- a/make/windows/projectfiles/common/Makefile	Fri Aug 10 10:41:13 2012 -0700
+++ b/make/windows/projectfiles/common/Makefile	Fri Sep 07 18:18:55 2012 -0700
@@ -108,7 +108,7 @@
       -define              HOTSPOT_VM_DISTRO=\\\"$(HOTSPOT_VM_DISTRO)\\\"
 
 $(HOTSPOTBUILDSPACE)/$(ProjectFile): $(HOTSPOTBUILDSPACE)/classes/ProjectCreator.class
-	@$(RUN_JAVA) -Djava.class.path=$(HOTSPOTBUILDSPACE)/classes ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions)
+	@$(RUN_JAVA) -Djava.class.path="$(HOTSPOTBUILDSPACE)/classes" ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions)
 
 clean:
 	@rm -rf $(HOTSPOTBUILDSPACE)/classes
--- a/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -44,8 +44,10 @@
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
 #else
 #define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
 #endif
 
 // Convert the raw encoding form into the form expected by the
@@ -992,7 +994,7 @@
   save_frame(0);                // to avoid clobbering O0
   ld_ptr(pc_addr, L0);
   br_null_short(L0, Assembler::pt, PcOk);
-  stop("last_Java_pc not zeroed before leaving Java");
+  STOP("last_Java_pc not zeroed before leaving Java");
   bind(PcOk);
 
   // Verify that flags was zeroed on return to Java
@@ -1001,7 +1003,7 @@
   tst(L0);
   br(Assembler::zero, false, Assembler::pt, FlagsOk);
   delayed() -> restore();
-  stop("flags not zeroed before leaving Java");
+  STOP("flags not zeroed before leaving Java");
   bind(FlagsOk);
 #endif /* ASSERT */
   //
@@ -1021,7 +1023,7 @@
   andcc(last_java_sp, 0x01, G0);
   br(Assembler::notZero, false, Assembler::pt, StackOk);
   delayed()->nop();
-  stop("Stack Not Biased in set_last_Java_frame");
+  STOP("Stack Not Biased in set_last_Java_frame");
   bind(StackOk);
 #endif // ASSERT
   assert( last_java_sp != G4_scratch, "bad register usage in set_last_Java_frame");
@@ -1650,23 +1652,28 @@
 
 
 void RegistersForDebugging::print(outputStream* s) {
+  FlagSetting fs(Debugging, true);
   int j;
-  for ( j = 0;  j < 8;  ++j )
-    if ( j != 6 ) s->print_cr("i%d = 0x%.16lx", j, i[j]);
-    else          s->print_cr( "fp = 0x%.16lx",    i[j]);
+  for (j = 0; j < 8; ++j) {
+    if (j != 6) { s->print("i%d = ", j); os::print_location(s, i[j]); }
+    else        { s->print( "fp = "   ); os::print_location(s, i[j]); }
+  }
   s->cr();
 
-  for ( j = 0;  j < 8;  ++j )
-    s->print_cr("l%d = 0x%.16lx", j, l[j]);
+  for (j = 0;  j < 8;  ++j) {
+    s->print("l%d = ", j); os::print_location(s, l[j]);
+  }
   s->cr();
 
-  for ( j = 0;  j < 8;  ++j )
-    if ( j != 6 ) s->print_cr("o%d = 0x%.16lx", j, o[j]);
-    else          s->print_cr( "sp = 0x%.16lx",    o[j]);
+  for (j = 0; j < 8; ++j) {
+    if (j != 6) { s->print("o%d = ", j); os::print_location(s, o[j]); }
+    else        { s->print( "sp = "   ); os::print_location(s, o[j]); }
+  }
   s->cr();
 
-  for ( j = 0;  j < 8;  ++j )
-    s->print_cr("g%d = 0x%.16lx", j, g[j]);
+  for (j = 0; j < 8; ++j) {
+    s->print("g%d = ", j); os::print_location(s, g[j]);
+  }
   s->cr();
 
   // print out floats with compression
@@ -2020,8 +2027,8 @@
   char* b = new char[1024];
   sprintf(b, "untested: %s", what);
 
-  if ( ShowMessageBoxOnError )   stop(b);
-  else                           warn(b);
+  if (ShowMessageBoxOnError) { STOP(b); }
+  else                       { warn(b); }
 }
 
 
@@ -2998,26 +3005,60 @@
 }
 
 
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  assert_different_registers(recv_klass, method_result, vtable_index.register_or_noreg());
+  Register sethi_temp = method_result;
+  const int base = (instanceKlass::vtable_start_offset() * wordSize +
+                    // method pointer offset within the vtable entry:
+                    vtableEntry::method_offset_in_bytes());
+  RegisterOrConstant vtable_offset = vtable_index;
+  // Each of the following three lines potentially generates an instruction.
+  // But the total number of address formation instructions will always be
+  // at most two, and will often be zero.  In any case, it will be optimal.
+  // If vtable_index is a register, we will have (sll_ptr N,x; inc_ptr B,x; ld_ptr k,x).
+  // If vtable_index is a constant, we will have at most (set B+X<<N,t; ld_ptr k,t).
+  vtable_offset = regcon_sll_ptr(vtable_index, exact_log2(vtableEntry::size() * wordSize), vtable_offset);
+  vtable_offset = regcon_inc_ptr(vtable_offset, base, vtable_offset, sethi_temp);
+  Address vtable_entry_addr(recv_klass, ensure_simm13_or_reg(vtable_offset, sethi_temp));
+  ld_ptr(vtable_entry_addr, method_result);
+}
+
+
 void MacroAssembler::check_klass_subtype(Register sub_klass,
                                          Register super_klass,
                                          Register temp_reg,
                                          Register temp2_reg,
                                          Label& L_success) {
-  Label L_failure, L_pop_to_failure;
-  check_klass_subtype_fast_path(sub_klass, super_klass,
-                                temp_reg, temp2_reg,
-                                &L_success, &L_failure, NULL);
   Register sub_2 = sub_klass;
   Register sup_2 = super_klass;
   if (!sub_2->is_global())  sub_2 = L0;
   if (!sup_2->is_global())  sup_2 = L1;
-
-  save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
+  bool did_save = false;
+  if (temp_reg == noreg || temp2_reg == noreg) {
+    temp_reg = L2;
+    temp2_reg = L3;
+    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
+    sub_klass = sub_2;
+    super_klass = sup_2;
+    did_save = true;
+  }
+  Label L_failure, L_pop_to_failure, L_pop_to_success;
+  check_klass_subtype_fast_path(sub_klass, super_klass,
+                                temp_reg, temp2_reg,
+                                (did_save ? &L_pop_to_success : &L_success),
+                                (did_save ? &L_pop_to_failure : &L_failure), NULL);
+
+  if (!did_save)
+    save_frame_and_mov(0, sub_klass, sub_2, super_klass, sup_2);
   check_klass_subtype_slow_path(sub_2, sup_2,
                                 L2, L3, L4, L5,
                                 NULL, &L_pop_to_failure);
 
   // on success:
+  bind(L_pop_to_success);
   restore();
   ba_short(L_success);
 
@@ -3234,54 +3275,6 @@
 }
 
 
-void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
-                                              Register temp_reg,
-                                              Label& wrong_method_type) {
-  assert_different_registers(mtype_reg, mh_reg, temp_reg);
-  // compare method type against that of the receiver
-  RegisterOrConstant mhtype_offset = delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg);
-  load_heap_oop(mh_reg, mhtype_offset, temp_reg);
-  cmp_and_brx_short(temp_reg, mtype_reg, Assembler::notEqual, Assembler::pn, wrong_method_type);
-}
-
-
-// A method handle has a "vmslots" field which gives the size of its
-// argument list in JVM stack slots.  This field is either located directly
-// in every method handle, or else is indirectly accessed through the
-// method handle's MethodType.  This macro hides the distinction.
-void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
-                                                Register temp_reg) {
-  assert_different_registers(vmslots_reg, mh_reg, temp_reg);
-  // load mh.type.form.vmslots
-  Register temp2_reg = vmslots_reg;
-  load_heap_oop(Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)),      temp2_reg);
-  load_heap_oop(Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)),        temp2_reg);
-  ld(           Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)), vmslots_reg);
-}
-
-
-void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg, bool emit_delayed_nop) {
-  assert(mh_reg == G3_method_handle, "caller must put MH object in G3");
-  assert_different_registers(mh_reg, temp_reg);
-
-  // pick out the interpreted side of the handler
-  // NOTE: vmentry is not an oop!
-  ld_ptr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg), temp_reg);
-
-  // off we go...
-  ld_ptr(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes(), temp_reg);
-  jmp(temp_reg, 0);
-
-  // for the various stubs which take control at this point,
-  // see MethodHandles::generate_method_handle_stub
-
-  // Some callers can fill the delay slot.
-  if (emit_delayed_nop) {
-    delayed()->nop();
-  }
-}
-
-
 RegisterOrConstant MacroAssembler::argument_offset(RegisterOrConstant arg_slot,
                                                    Register temp_reg,
                                                    int extra_slot_offset) {
@@ -3914,7 +3907,7 @@
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_start_offset()), t2);
     or3(t1, t2, t3);
     cmp_and_br_short(t1, t2, Assembler::greaterEqual, Assembler::pn, next);
-    stop("assert(top >= start)");
+    STOP("assert(top >= start)");
     should_not_reach_here();
 
     bind(next);
@@ -3922,13 +3915,13 @@
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_end_offset()), t2);
     or3(t3, t2, t3);
     cmp_and_br_short(t1, t2, Assembler::lessEqual, Assembler::pn, next2);
-    stop("assert(top <= end)");
+    STOP("assert(top <= end)");
     should_not_reach_here();
 
     bind(next2);
     and3(t3, MinObjAlignmentInBytesMask, t3);
     cmp_and_br_short(t3, 0, Assembler::lessEqual, Assembler::pn, ok);
-    stop("assert(aligned)");
+    STOP("assert(aligned)");
     should_not_reach_here();
 
     bind(ok);
@@ -3976,7 +3969,7 @@
       btst(MinObjAlignmentInBytesMask, obj);
       br(Assembler::zero, false, Assembler::pt, L);
       delayed()->nop();
-      stop("eden top is not properly aligned");
+      STOP("eden top is not properly aligned");
       bind(L);
     }
 #endif // ASSERT
@@ -4013,7 +4006,7 @@
       btst(MinObjAlignmentInBytesMask, top_addr);
       br(Assembler::zero, false, Assembler::pt, L);
       delayed()->nop();
-      stop("eden top is not properly aligned");
+      STOP("eden top is not properly aligned");
       bind(L);
     }
 #endif // ASSERT
@@ -4066,7 +4059,7 @@
     btst(MinObjAlignmentInBytesMask, free);
     br(Assembler::zero, false, Assembler::pt, L);
     delayed()->nop();
-    stop("updated TLAB free is not properly aligned");
+    STOP("updated TLAB free is not properly aligned");
     bind(L);
   }
 #endif // ASSERT
@@ -4164,7 +4157,7 @@
     ld_ptr(G2_thread, in_bytes(JavaThread::tlab_size_offset()), t2);
     sll_ptr(t2, LogHeapWordSize, t2);
     cmp_and_br_short(t1, t2, Assembler::equal, Assembler::pt, ok);
-    stop("assert(t1 == tlab_size)");
+    STOP("assert(t1 == tlab_size)");
     should_not_reach_here();
 
     bind(ok);
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Sep 07 18:18:55 2012 -0700
@@ -2538,6 +2538,11 @@
                                Register temp_reg, Register temp2_reg,
                                Label& no_such_interface);
 
+  // virtual method calling
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
   // Test sub_klass against super_klass, with fast and slow paths.
 
   // The fast path produces a tri-state answer: yes / no / maybe-slow.
@@ -2577,12 +2582,6 @@
                            Label& L_success);
 
   // method handles (JSR 292)
-  void check_method_handle_type(Register mtype_reg, Register mh_reg,
-                                Register temp_reg,
-                                Label& wrong_method_type);
-  void load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
-                                  Register temp_reg);
-  void jump_to_method_handle_entry(Register mh_reg, Register temp_reg, bool emit_delayed_nop = true);
   // offset relative to Gargs of argument at tos[arg_slot].
   // (arg_slot == 0 means the last argument, not the first).
   RegisterOrConstant argument_offset(RegisterOrConstant arg_slot,
@@ -2590,7 +2589,7 @@
                                      int extra_slot_offset = 0);
   // Address of Gargs and argument_offset.
   Address            argument_address(RegisterOrConstant arg_slot,
-                                      Register temp_reg,
+                                      Register temp_reg = noreg,
                                       int extra_slot_offset = 0);
 
   // Stack overflow checking
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -435,85 +435,6 @@
 
 }
 
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
-  // At this point we know that offset == referent_offset.
-  //
-  // So we might have to emit:
-  //   if (src == null) goto continuation.
-  //
-  // and we definitely have to emit:
-  //   if (klass(src).reference_type == REF_NONE) goto continuation
-  //   if (!marking_active) goto continuation
-  //   if (pre_val == null) goto continuation
-  //   call pre_barrier(pre_val)
-  //   goto continuation
-  //
-  __ bind(_entry);
-
-  assert(src()->is_register(), "sanity");
-  Register src_reg = src()->as_register();
-
-  if (gen_src_check()) {
-    // The original src operand was not a constant.
-    // Generate src == null?
-    if (__ is_in_wdisp16_range(_continuation)) {
-      __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation);
-    } else {
-      __ cmp(src_reg, G0);
-      __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-    }
-    __ delayed()->nop();
-  }
-
-  // Generate src->_klass->_reference_type() == REF_NONE)?
-  assert(tmp()->is_register(), "sanity");
-  Register tmp_reg = tmp()->as_register();
-
-  __ load_klass(src_reg, tmp_reg);
-
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
-  __ ldub(ref_type_adr, tmp_reg);
-
-  // _reference_type field is of type ReferenceType (enum)
-  assert(REF_NONE == 0, "check this code");
-  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
-  __ delayed()->nop();
-
-  // Is marking active?
-  assert(thread()->is_register(), "precondition");
-  Register thread_reg = thread()->as_pointer_register();
-
-  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_active()));
-
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    __ ld(in_progress, tmp_reg);
-  } else {
-    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
-    __ ldsb(in_progress, tmp_reg);
-  }
-
-  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
-  __ delayed()->nop();
-
-  // val == null?
-  assert(val()->is_register(), "Precondition.");
-  Register val_reg = val()->as_register();
-
-  if (__ is_in_wdisp16_range(_continuation)) {
-    __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation);
-  } else {
-    __ cmp(val_reg, G0);
-    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-  }
-  __ delayed()->nop();
-
-  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
-  __ delayed()->mov(val_reg, G4);
-  __ br(Assembler::always, false, Assembler::pt, _continuation);
-  __ delayed()->nop();
-}
-
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -2956,6 +2956,7 @@
 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   ciMethod* method = op->profiled_method();
   int bci          = op->profiled_bci();
+  ciMethod* callee = op->profiled_callee();
 
   // Update counter for all call types
   ciMethodData* md = method->method_data_or_null();
@@ -2984,9 +2985,11 @@
 
   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
   Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
   // Perform additional virtual call profiling for invokevirtual and
   // invokeinterface bytecodes
   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // required for optimized MH invokes
       C1ProfileVirtualCalls) {
     assert(op->recv()->is_single_cpu(), "recv must be allocated");
     Register recv = op->recv()->as_register();
--- a/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/cppInterpreter_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -515,9 +515,9 @@
     // Need to differentiate between igetfield, agetfield, bgetfield etc.
     // because they are different sizes.
     // Get the type from the constant pool cache
-    __ srl(G1_scratch, ConstantPoolCacheEntry::tosBits, G1_scratch);
-    // Make sure we don't need to mask G1_scratch for tosBits after the above shift
-    ConstantPoolCacheEntry::verify_tosBits();
+    __ srl(G1_scratch, ConstantPoolCacheEntry::tos_state_shift, G1_scratch);
+    // Make sure we don't need to mask G1_scratch after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
     __ cmp(G1_scratch, atos );
     __ br(Assembler::equal, true, Assembler::pt, xreturn_path);
     __ delayed()->ld_ptr(Otos_i, G3_scratch, Otos_i);
--- a/src/cpu/sparc/vm/frame_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/frame_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -514,7 +514,6 @@
   // interpreted but its pc is in the code cache (for c1 -> osr_frame_return_id stub), so it must be
   // explicitly recognized.
 
-  if (is_ricochet_frame())    return sender_for_ricochet_frame(map);
 
   bool frame_is_interpreted = is_interpreted_frame();
   if (frame_is_interpreted) {
@@ -821,9 +820,7 @@
     values.describe(frame_no, sp() + w, err_msg("register save area word %d", w), 1);
   }
 
-  if (is_ricochet_frame()) {
-    MethodHandles::RicochetFrame::describe(this, values, frame_no);
-  } else if (is_interpreted_frame()) {
+  if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_d_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_l_scratch_fp);
     DESCRIBE_FP_OFFSET(interpreter_frame_padding);
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -75,4 +75,43 @@
 
 // GC Ergo Flags
 define_pd_global(intx, CMSYoungGenPerWorker, 16*M);  // default max size of CMS young gen, per GC worker thread
+
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                            \
+  product(intx, UseVIS, 99,                                                 \
+          "Highest supported VIS instructions set on Sparc")                \
+                                                                            \
+  product(bool, UseCBCond, false,                                           \
+          "Use compare and branch instruction on SPARC")                    \
+                                                                            \
+  product(bool, UseBlockZeroing, false,                                     \
+          "Use special cpu instructions for block zeroing")                 \
+                                                                            \
+  product(intx, BlockZeroingLowLimit, 2048,                                 \
+          "Minimum size in bytes when block zeroing will be used")          \
+                                                                            \
+  product(bool, UseBlockCopy, false,                                        \
+          "Use special cpu instructions for block copy")                    \
+                                                                            \
+  product(intx, BlockCopyLowLimit, 2048,                                    \
+          "Minimum size in bytes when block copy will be used")             \
+                                                                            \
+  develop(bool, UseV8InstrsOnly, false,                                     \
+          "Use SPARC-V8 Compliant instruction subset")                      \
+                                                                            \
+  product(bool, UseNiagaraInstrs, false,                                    \
+          "Use Niagara-efficient instruction subset")                       \
+                                                                            \
+  develop(bool, UseCASForSwap, false,                                       \
+          "Do not use swap instructions, but only CAS (in a loop) on SPARC")\
+                                                                            \
+  product(uintx,  ArraycopySrcPrefetchDistance, 0,                          \
+          "Distance to prefetch source array in arracopy")                  \
+                                                                            \
+  product(uintx,  ArraycopyDstPrefetchDistance, 0,                          \
+          "Distance to prefetch destination array in arracopy")             \
+                                                                            \
+  develop(intx, V8AtomicOperationUnderLockSpinCount,    50,                 \
+          "Number of times to spin wait on a v8 atomic operation lock")     \
+
 #endif // CPU_SPARC_VM_GLOBALS_SPARC_HPP
--- a/src/cpu/sparc/vm/interp_masm_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/interp_masm_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -505,7 +505,7 @@
 void InterpreterMacroAssembler::load_receiver(Register param_count,
                                               Register recv) {
   sll(param_count, Interpreter::logStackElementSize, param_count);
-  ld_ptr(Lesp, param_count, recv);                      // gets receiver Oop
+  ld_ptr(Lesp, param_count, recv);  // gets receiver oop
 }
 
 void InterpreterMacroAssembler::empty_expression_stack() {
@@ -767,8 +767,12 @@
   get_cache_and_index_at_bcp(cache, temp, bcp_offset, index_size);
   ld_ptr(cache, constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset(), bytecode);
   const int shift_count = (1 + byte_no) * BitsPerByte;
-  srl( bytecode, shift_count, bytecode);
-  and3(bytecode,        0xFF, bytecode);
+  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
+         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
+         "correct shift count");
+  srl(bytecode, shift_count, bytecode);
+  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
+  and3(bytecode, ConstantPoolCacheEntry::bytecode_1_mask, bytecode);
 }
 
 
--- a/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/interpreterGenerator_sparc.hpp	Fri Sep 07 18:18:55 2012 -0700
@@ -32,7 +32,6 @@
   address generate_normal_entry(bool synchronized);
   address generate_native_entry(bool synchronized);
   address generate_abstract_entry(void);
-  address generate_method_handle_entry(void);
   address generate_math_entry(AbstractInterpreter::MethodKind kind);
   address generate_empty_entry(void);
   address generate_accessor_entry(void);
--- a/src/cpu/sparc/vm/interpreter_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/interpreter_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -255,17 +255,6 @@
 }
 
 
-// Method handle invoker
-// Dispatch a method of the form java.lang.invoke.MethodHandles::invoke(...)
-address InterpreterGenerator::generate_method_handle_entry(void) {
-  if (!EnableInvokeDynamic) {
-    return generate_abstract_entry();
-  }
-
-  return MethodHandles::generate_method_handle_interpreter_entry(_masm);
-}
-
-
 //----------------------------------------------------------------------------------------------------
 // Entry points & stack frame layout
 //
@@ -395,7 +384,7 @@
     case Interpreter::empty                  : entry_point = ((InterpreterGenerator*)this)->generate_empty_entry();        break;
     case Interpreter::accessor               : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();     break;
     case Interpreter::abstract               : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();     break;
-    case Interpreter::method_handle          : entry_point = ((InterpreterGenerator*)this)->generate_method_handle_entry(); break;
+
     case Interpreter::java_lang_math_sin     :                                                                             break;
     case Interpreter::java_lang_math_cos     :                                                                             break;
     case Interpreter::java_lang_math_tan     :                                                                             break;
@@ -407,7 +396,9 @@
     case Interpreter::java_lang_math_exp     :                                                                             break;
     case Interpreter::java_lang_ref_reference_get
                                              : entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
-    default                                  : ShouldNotReachHere();                                                       break;
+    default:
+      fatal(err_msg("unexpected method kind: %d", kind));
+      break;
   }
 
   if (entry_point) return entry_point;
--- a/src/cpu/sparc/vm/methodHandles_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/methodHandles_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -31,452 +31,37 @@
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
 #else
 #define BLOCK_COMMENT(str) __ block_comment(str)
+#define STOP(error) block_comment(error); __ stop(error)
 #endif
 
 #define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 
-address MethodHandleEntry::start_compiled_entry(MacroAssembler* _masm,
-                                                address interpreted_entry) {
-  // Just before the actual machine code entry point, allocate space
-  // for a MethodHandleEntry::Data record, so that we can manage everything
-  // from one base pointer.
-  __ align(wordSize);
-  address target = __ pc() + sizeof(Data);
-  while (__ pc() < target) {
-    __ nop();
-    __ align(wordSize);
-  }
-
-  MethodHandleEntry* me = (MethodHandleEntry*) __ pc();
-  me->set_end_address(__ pc());         // set a temporary end_address
-  me->set_from_interpreted_entry(interpreted_entry);
-  me->set_type_checking_entry(NULL);
-
-  return (address) me;
+// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
+static RegisterOrConstant constant(int value) {
+  return RegisterOrConstant(value);
 }
 
-MethodHandleEntry* MethodHandleEntry::finish_compiled_entry(MacroAssembler* _masm,
-                                                address start_addr) {
-  MethodHandleEntry* me = (MethodHandleEntry*) start_addr;
-  assert(me->end_address() == start_addr, "valid ME");
-
-  // Fill in the real end_address:
-  __ align(wordSize);
-  me->set_end_address(__ pc());
-
-  return me;
-}
-
-// stack walking support
-
-frame MethodHandles::ricochet_frame_sender(const frame& fr, RegisterMap *map) {
-  //RicochetFrame* f = RicochetFrame::from_frame(fr);
-  // Cf. is_interpreted_frame path of frame::sender
-  intptr_t* younger_sp = fr.sp();
-  intptr_t* sp         = fr.sender_sp();
-  map->make_integer_regs_unsaved();
-  map->shift_window(sp, younger_sp);
-  bool this_frame_adjusted_stack = true;  // I5_savedSP is live in this RF
-  return frame(sp, younger_sp, this_frame_adjusted_stack);
-}
-
-void MethodHandles::ricochet_frame_oops_do(const frame& fr, OopClosure* blk, const RegisterMap* reg_map) {
-  ResourceMark rm;
-  RicochetFrame* f = RicochetFrame::from_frame(fr);
-
-  // pick up the argument type descriptor:
-  Thread* thread = Thread::current();
-  Handle cookie(thread, f->compute_saved_args_layout(true, true));
-
-  // process fixed part
-  blk->do_oop((oop*)f->saved_target_addr());
-  blk->do_oop((oop*)f->saved_args_layout_addr());
-
-  // process variable arguments:
-  if (cookie.is_null())  return;  // no arguments to describe
-
-  // the cookie is actually the invokeExact method for my target
-  // his argument signature is what I'm interested in
-  assert(cookie->is_method(), "");
-  methodHandle invoker(thread, methodOop(cookie()));
-  assert(invoker->name() == vmSymbols::invokeExact_name(), "must be this kind of method");
-  assert(!invoker->is_static(), "must have MH argument");
-  int slot_count = invoker->size_of_parameters();
-  assert(slot_count >= 1, "must include 'this'");
-  intptr_t* base = f->saved_args_base();
-  intptr_t* retval = NULL;
-  if (f->has_return_value_slot())
-    retval = f->return_value_slot_addr();
-  int slot_num = slot_count - 1;
-  intptr_t* loc = &base[slot_num];
-  //blk->do_oop((oop*) loc);   // original target, which is irrelevant
-  int arg_num = 0;
-  for (SignatureStream ss(invoker->signature()); !ss.is_done(); ss.next()) {
-    if (ss.at_return_type())  continue;
-    BasicType ptype = ss.type();
-    if (ptype == T_ARRAY)  ptype = T_OBJECT; // fold all refs to T_OBJECT
-    assert(ptype >= T_BOOLEAN && ptype <= T_OBJECT, "not array or void");
-    slot_num -= type2size[ptype];
-    loc = &base[slot_num];
-    bool is_oop = (ptype == T_OBJECT && loc != retval);
-    if (is_oop)  blk->do_oop((oop*)loc);
-    arg_num += 1;
-  }
-  assert(slot_num == 0, "must have processed all the arguments");
-}
-
-// Ricochet Frames
-const Register MethodHandles::RicochetFrame::L1_continuation      = L1;
-const Register MethodHandles::RicochetFrame::L2_saved_target      = L2;
-const Register MethodHandles::RicochetFrame::L3_saved_args_layout = L3;
-const Register MethodHandles::RicochetFrame::L4_saved_args_base   = L4; // cf. Gargs = G4
-const Register MethodHandles::RicochetFrame::L5_conversion        = L5;
-#ifdef ASSERT
-const Register MethodHandles::RicochetFrame::L0_magic_number_1    = L0;
-#endif //ASSERT
-
-oop MethodHandles::RicochetFrame::compute_saved_args_layout(bool read_cache, bool write_cache) {
-  if (read_cache) {
-    oop cookie = saved_args_layout();
-    if (cookie != NULL)  return cookie;
-  }
-  oop target = saved_target();
-  oop mtype  = java_lang_invoke_MethodHandle::type(target);
-  oop mtform = java_lang_invoke_MethodType::form(mtype);
-  oop cookie = java_lang_invoke_MethodTypeForm::vmlayout(mtform);
-  if (write_cache)  {
-    (*saved_args_layout_addr()) = cookie;
-  }
-  return cookie;
-}
-
-void MethodHandles::RicochetFrame::generate_ricochet_blob(MacroAssembler* _masm,
-                                                          // output params:
-                                                          int* bounce_offset,
-                                                          int* exception_offset,
-                                                          int* frame_size_in_words) {
-  (*frame_size_in_words) = RicochetFrame::frame_size_in_bytes() / wordSize;
-
-  address start = __ pc();
-
-#ifdef ASSERT
-  __ illtrap(0); __ illtrap(0); __ illtrap(0);
-  // here's a hint of something special:
-  __ set(MAGIC_NUMBER_1, G0);
-  __ set(MAGIC_NUMBER_2, G0);
-#endif //ASSERT
-  __ illtrap(0);  // not reached
-
-  // Return values are in registers.
-  // L1_continuation contains a cleanup continuation we must return
-  // to.
-
-  (*bounce_offset) = __ pc() - start;
-  BLOCK_COMMENT("ricochet_blob.bounce");
-
-  if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-  trace_method_handle(_masm, "return/ricochet_blob.bounce");
-
-  __ JMP(L1_continuation, 0);
-  __ delayed()->nop();
-  __ illtrap(0);
-
-  DEBUG_ONLY(__ set(MAGIC_NUMBER_2, G0));
-
-  (*exception_offset) = __ pc() - start;
-  BLOCK_COMMENT("ricochet_blob.exception");
-
-  // compare this to Interpreter::rethrow_exception_entry, which is parallel code
-  // for example, see TemplateInterpreterGenerator::generate_throw_exception
-  // Live registers in:
-  //   Oexception  (O0): exception
-  //   Oissuing_pc (O1): return address/pc that threw exception (ignored, always equal to bounce addr)
-  __ verify_oop(Oexception);
-
-  // Take down the frame.
-
-  // Cf. InterpreterMacroAssembler::remove_activation.
-  leave_ricochet_frame(_masm, /*recv_reg=*/ noreg, I5_savedSP, I7);
-
-  // We are done with this activation frame; find out where to go next.
-  // The continuation point will be an exception handler, which expects
-  // the following registers set up:
-  //
-  // Oexception: exception
-  // Oissuing_pc: the local call that threw exception
-  // Other On: garbage
-  // In/Ln:  the contents of the caller's register window
-  //
-  // We do the required restore at the last possible moment, because we
-  // need to preserve some state across a runtime call.
-  // (Remember that the caller activation is unknown--it might not be
-  // interpreted, so things like Lscratch are useless in the caller.)
-  __ mov(Oexception,  Oexception ->after_save());  // get exception in I0 so it will be on O0 after restore
-  __ add(I7, frame::pc_return_offset, Oissuing_pc->after_save());  // likewise set I1 to a value local to the caller
-  __ call_VM_leaf(L7_thread_cache,
-                  CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
-                  G2_thread, Oissuing_pc->after_save());
-
-  // The caller's SP was adjusted upon method entry to accomodate
-  // the callee's non-argument locals. Undo that adjustment.
-  __ JMP(O0, 0);                         // return exception handler in caller
-  __ delayed()->restore(I5_savedSP, G0, SP);
-
-  // (same old exception object is already in Oexception; see above)
-  // Note that an "issuing PC" is actually the next PC after the call
-}
-
-void MethodHandles::RicochetFrame::enter_ricochet_frame(MacroAssembler* _masm,
-                                                        Register recv_reg,
-                                                        Register argv_reg,
-                                                        address return_handler) {
-  // does not include the __ save()
-  assert(argv_reg == Gargs, "");
-  Address G3_mh_vmtarget(   recv_reg, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes());
-  Address G3_amh_conversion(recv_reg, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
-
-  // Create the RicochetFrame.
-  // Unlike on x86 we can store all required information in local
-  // registers.
-  BLOCK_COMMENT("push RicochetFrame {");
-  __ set(ExternalAddress(return_handler),          L1_continuation);
-  __ load_heap_oop(G3_mh_vmtarget,                 L2_saved_target);
-  __ mov(G0,                                       L3_saved_args_layout);
-  __ mov(Gargs,                                    L4_saved_args_base);
-  __ lduw(G3_amh_conversion,                       L5_conversion);  // 32-bit field
-  // I5, I6, I7 are already set up
-  DEBUG_ONLY(__ set((int32_t) MAGIC_NUMBER_1,      L0_magic_number_1));
-  BLOCK_COMMENT("} RicochetFrame");
-}
-
-void MethodHandles::RicochetFrame::leave_ricochet_frame(MacroAssembler* _masm,
-                                                        Register recv_reg,
-                                                        Register new_sp_reg,
-                                                        Register sender_pc_reg) {
-  assert(new_sp_reg == I5_savedSP, "exact_sender_sp already in place");
-  assert(sender_pc_reg == I7, "in a fixed place");
-  // does not include the __ ret() & __ restore()
-  assert_different_registers(recv_reg, new_sp_reg, sender_pc_reg);
-  // Take down the frame.
-  // Cf. InterpreterMacroAssembler::remove_activation.
-  BLOCK_COMMENT("end_ricochet_frame {");
-  if (recv_reg->is_valid())
-    __ mov(L2_saved_target, recv_reg);
-  BLOCK_COMMENT("} end_ricochet_frame");
-}
-
-// Emit code to verify that FP is pointing at a valid ricochet frame.
-#ifndef PRODUCT
-enum {
-  ARG_LIMIT = 255, SLOP = 45,
-  // use this parameter for checking for garbage stack movements:
-  UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP)
-  // the slop defends against false alarms due to fencepost errors
-};
-#endif
-
-#ifdef ASSERT
-void MethodHandles::RicochetFrame::verify_clean(MacroAssembler* _masm) {
-  // The stack should look like this:
-  //    ... keep1 | dest=42 | keep2 | magic | handler | magic | recursive args | [RF]
-  // Check various invariants.
-
-  Register O7_temp = O7, O5_temp = O5;
-
-  Label L_ok_1, L_ok_2, L_ok_3, L_ok_4;
-  BLOCK_COMMENT("verify_clean {");
-  // Magic numbers must check out:
-  __ set((int32_t) MAGIC_NUMBER_1, O7_temp);
-  __ cmp_and_br_short(O7_temp, L0_magic_number_1, Assembler::equal, Assembler::pt, L_ok_1);
-  __ stop("damaged ricochet frame: MAGIC_NUMBER_1 not found");
-
-  __ BIND(L_ok_1);
-
-  // Arguments pointer must look reasonable:
-#ifdef _LP64
-  Register FP_temp = O5_temp;
-  __ add(FP, STACK_BIAS, FP_temp);
-#else
-  Register FP_temp = FP;
-#endif
-  __ cmp_and_brx_short(L4_saved_args_base, FP_temp, Assembler::greaterEqualUnsigned, Assembler::pt, L_ok_2);
-  __ stop("damaged ricochet frame: L4 < FP");
-
-  __ BIND(L_ok_2);
-  // Disable until we decide on it's fate
-  // __ sub(L4_saved_args_base, UNREASONABLE_STACK_MOVE * Interpreter::stackElementSize, O7_temp);
-  // __ cmp(O7_temp, FP_temp);
-  // __ br(Assembler::lessEqualUnsigned, false, Assembler::pt, L_ok_3);
-  // __ delayed()->nop();
-  // __ stop("damaged ricochet frame: (L4 - UNREASONABLE_STACK_MOVE) > FP");
-
-  __ BIND(L_ok_3);
-  extract_conversion_dest_type(_masm, L5_conversion, O7_temp);
-  __ cmp_and_br_short(O7_temp, T_VOID, Assembler::equal, Assembler::pt, L_ok_4);
-  extract_conversion_vminfo(_masm, L5_conversion, O5_temp);
-  __ ld_ptr(L4_saved_args_base, __ argument_offset(O5_temp, O5_temp), O7_temp);
-  assert(Assembler::is_simm13(RETURN_VALUE_PLACEHOLDER), "must be simm13");
-  __ cmp_and_brx_short(O7_temp, (int32_t) RETURN_VALUE_PLACEHOLDER, Assembler::equal, Assembler::pt, L_ok_4);
-  __ stop("damaged ricochet frame: RETURN_VALUE_PLACEHOLDER not found");
-  __ BIND(L_ok_4);
-  BLOCK_COMMENT("} verify_clean");
-}
-#endif //ASSERT
-
 void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg) {
   if (VerifyMethodHandles)
     verify_klass(_masm, klass_reg, SystemDictionaryHandles::Class_klass(), temp_reg, temp2_reg,
-                 "AMH argument is a Class");
+                 "MH argument is a Class");
   __ load_heap_oop(Address(klass_reg, java_lang_Class::klass_offset_in_bytes()), klass_reg);
 }
 
-void MethodHandles::load_conversion_vminfo(MacroAssembler* _masm, Address conversion_field_addr, Register reg) {
-  assert(CONV_VMINFO_SHIFT == 0, "preshifted");
-  assert(CONV_VMINFO_MASK == right_n_bits(BitsPerByte), "else change type of following load");
-  __ ldub(conversion_field_addr.plus_disp(BytesPerInt - 1), reg);
+#ifdef ASSERT
+static int check_nonzero(const char* xname, int x) {
+  assert(x != 0, err_msg("%s should be nonzero", xname));
+  return x;
 }
-
-void MethodHandles::extract_conversion_vminfo(MacroAssembler* _masm, Register conversion_field_reg, Register reg) {
-  assert(CONV_VMINFO_SHIFT == 0, "preshifted");
-  __ and3(conversion_field_reg, CONV_VMINFO_MASK, reg);
-}
-
-void MethodHandles::extract_conversion_dest_type(MacroAssembler* _masm, Register conversion_field_reg, Register reg) {
-  __ srl(conversion_field_reg, CONV_DEST_TYPE_SHIFT, reg);
-  __ and3(reg, 0x0F, reg);
-}
-
-void MethodHandles::load_stack_move(MacroAssembler* _masm,
-                                    Address G3_amh_conversion,
-                                    Register stack_move_reg) {
-  BLOCK_COMMENT("load_stack_move {");
-  __ ldsw(G3_amh_conversion, stack_move_reg);
-  __ sra(stack_move_reg, CONV_STACK_MOVE_SHIFT, stack_move_reg);
-#ifdef ASSERT
-  if (VerifyMethodHandles) {
-    Label L_ok, L_bad;
-    int32_t stack_move_limit = 0x0800;  // extra-large
-    __ cmp_and_br_short(stack_move_reg, stack_move_limit, Assembler::greaterEqual, Assembler::pn, L_bad);
-    __ cmp(stack_move_reg, -stack_move_limit);
-    __ br(Assembler::greater, false, Assembler::pt, L_ok);
-    __ delayed()->nop();
-    __ BIND(L_bad);
-    __ stop("load_stack_move of garbage value");
-    __ BIND(L_ok);
-  }
-#endif
-  BLOCK_COMMENT("} load_stack_move");
-}
+#define NONZERO(x) check_nonzero(#x, x)
+#else //ASSERT
+#define NONZERO(x) (x)
+#endif //ASSERT
 
 #ifdef ASSERT
-void MethodHandles::RicochetFrame::verify() const {
-  assert(magic_number_1() == MAGIC_NUMBER_1, "");
-  if (!Universe::heap()->is_gc_active()) {
-    if (saved_args_layout() != NULL) {
-      assert(saved_args_layout()->is_method(), "must be valid oop");
-    }
-    if (saved_target() != NULL) {
-      assert(java_lang_invoke_MethodHandle::is_instance(saved_target()), "checking frame value");
-    }
-  }
-  int conv_op = adapter_conversion_op(conversion());
-  assert(conv_op == java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS ||
-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS ||
-         conv_op == java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF,
-         "must be a sane conversion");
-  if (has_return_value_slot()) {
-    assert(*return_value_slot_addr() == RETURN_VALUE_PLACEHOLDER, "");
-  }
-}
-
-void MethodHandles::verify_argslot(MacroAssembler* _masm, Register argslot_reg, Register temp_reg, const char* error_message) {
-  // Verify that argslot lies within (Gargs, FP].
-  Label L_ok, L_bad;
-  BLOCK_COMMENT("verify_argslot {");
-  __ cmp_and_brx_short(Gargs, argslot_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad);
-  __ add(FP, STACK_BIAS, temp_reg);  // STACK_BIAS is zero on !_LP64
-  __ cmp_and_brx_short(argslot_reg, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok);
-  __ BIND(L_bad);
-  __ stop(error_message);
-  __ BIND(L_ok);
-  BLOCK_COMMENT("} verify_argslot");
-}
-
-void MethodHandles::verify_argslots(MacroAssembler* _masm,
-                                    RegisterOrConstant arg_slots,
-                                    Register arg_slot_base_reg,
-                                    Register temp_reg,
-                                    Register temp2_reg,
-                                    bool negate_argslots,
-                                    const char* error_message) {
-  // Verify that [argslot..argslot+size) lies within (Gargs, FP).
-  Label L_ok, L_bad;
-  BLOCK_COMMENT("verify_argslots {");
-  if (negate_argslots) {
-    if (arg_slots.is_constant()) {
-      arg_slots = -1 * arg_slots.as_constant();
-    } else {
-      __ neg(arg_slots.as_register(), temp_reg);
-      arg_slots = temp_reg;
-    }
-  }
-  __ add(arg_slot_base_reg, __ argument_offset(arg_slots, temp_reg), temp_reg);
-  __ add(FP, STACK_BIAS, temp2_reg);  // STACK_BIAS is zero on !_LP64
-  __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::greaterUnsigned, Assembler::pn, L_bad);
-  // Gargs points to the first word so adjust by BytesPerWord
-  __ add(arg_slot_base_reg, BytesPerWord, temp_reg);
-  __ cmp_and_brx_short(Gargs, temp_reg, Assembler::lessEqualUnsigned, Assembler::pt, L_ok);
-  __ BIND(L_bad);
-  __ stop(error_message);
-  __ BIND(L_ok);
-  BLOCK_COMMENT("} verify_argslots");
-}
-
-// Make sure that arg_slots has the same sign as the given direction.
-// If (and only if) arg_slots is a assembly-time constant, also allow it to be zero.
-void MethodHandles::verify_stack_move(MacroAssembler* _masm,
-                                      RegisterOrConstant arg_slots, int direction) {
-  enum { UNREASONABLE_STACK_MOVE = 256 * 4 };  // limit of 255 arguments
-  bool allow_zero = arg_slots.is_constant();
-  if (direction == 0) { direction = +1; allow_zero = true; }
-  assert(stack_move_unit() == -1, "else add extra checks here");
-  if (arg_slots.is_register()) {
-    Label L_ok, L_bad;
-    BLOCK_COMMENT("verify_stack_move {");
-    // __ btst(-stack_move_unit() - 1, arg_slots.as_register());  // no need
-    // __ br(Assembler::notZero, false, Assembler::pn, L_bad);
-    // __ delayed()->nop();
-    __ cmp(arg_slots.as_register(), (int32_t) NULL_WORD);
-    if (direction > 0) {
-      __ br(allow_zero ? Assembler::less : Assembler::lessEqual, false, Assembler::pn, L_bad);
-      __ delayed()->nop();
-      __ cmp(arg_slots.as_register(), (int32_t) UNREASONABLE_STACK_MOVE);
-      __ br(Assembler::less, false, Assembler::pn, L_ok);
-      __ delayed()->nop();
-    } else {
-      __ br(allow_zero ? Assembler::greater : Assembler::greaterEqual, false, Assembler::pn, L_bad);
-      __ delayed()->nop();
-      __ cmp(arg_slots.as_register(), (int32_t) -UNREASONABLE_STACK_MOVE);
-      __ br(Assembler::greater, false, Assembler::pn, L_ok);
-      __ delayed()->nop();
-    }
-    __ BIND(L_bad);
-    if (direction > 0)
-      __ stop("assert arg_slots > 0");
-    else
-      __ stop("assert arg_slots < 0");
-    __ BIND(L_ok);
-    BLOCK_COMMENT("} verify_stack_move");
-  } else {
-    intptr_t size = arg_slots.as_constant();
-    if (direction < 0)  size = -size;
-    assert(size >= 0, "correct direction of constant move");
-    assert(size < UNREASONABLE_STACK_MOVE, "reasonable size of constant move");
-  }
-}
-
 void MethodHandles::verify_klass(MacroAssembler* _masm,
                                  Register obj_reg, KlassHandle klass,
                                  Register temp_reg, Register temp2_reg,
@@ -485,6 +70,14 @@
   assert(klass_addr >= SystemDictionaryHandles::Object_klass().raw_value() &&
          klass_addr <= SystemDictionaryHandles::Long_klass().raw_value(),
          "must be one of the SystemDictionaryHandles");
+  bool did_save = false;
+  if (temp_reg == noreg || temp2_reg == noreg) {
+    temp_reg = L1;
+    temp2_reg = L2;
+    __ save_frame_and_mov(0, obj_reg, L0);
+    obj_reg = L0;
+    did_save = true;
+  }
   Label L_ok, L_bad;
   BLOCK_COMMENT("verify_klass {");
   __ verify_oop(obj_reg);
@@ -499,537 +92,415 @@
   __ ld_ptr(Address(temp2_reg, 0), temp2_reg);
   __ cmp_and_brx_short(temp_reg, temp2_reg, Assembler::equal, Assembler::pt, L_ok);
   __ BIND(L_bad);
-  __ stop(error_message);
+  if (did_save)  __ restore();
+  __ STOP(error_message);
   __ BIND(L_ok);
+  if (did_save)  __ restore();
   BLOCK_COMMENT("} verify_klass");
 }
+
+void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
+  Label L;
+  BLOCK_COMMENT("verify_ref_kind {");
+  __ lduw(Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())), temp);
+  __ srl( temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT, temp);
+  __ and3(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK,  temp);
+  __ cmp_and_br_short(temp, ref_kind, Assembler::equal, Assembler::pt, L);
+  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
+    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
+    if (ref_kind == JVM_REF_invokeVirtual ||
+        ref_kind == JVM_REF_invokeSpecial)
+      // could do this for all ref_kinds, but would explode assembly code size
+      trace_method_handle(_masm, buf);
+    __ STOP(buf);
+  }
+  BLOCK_COMMENT("} verify_ref_kind");
+  __ bind(L);
+}
+
 #endif // ASSERT
 
-
-void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp) {
+void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register target, Register temp,
+                                            bool for_compiler_entry) {
   assert(method == G5_method, "interpreter calling convention");
   __ verify_oop(method);
-  __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_interpreted_offset()), target);
-  if (JvmtiExport::can_post_interpreter_events()) {
+
+  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
     // JVMTI events, such as single-stepping, are implemented partly by avoiding running
     // compiled code in threads for which the event is enabled.  Check here for
     // interp_only_mode if these events CAN be enabled.
     __ verify_thread();
-    Label skip_compiled_code;
-
     const Address interp_only(G2_thread, JavaThread::interp_only_mode_offset());
     __ ld(interp_only, temp);
-    __ tst(temp);
-    __ br(Assembler::notZero, true, Assembler::pn, skip_compiled_code);
-    __ delayed()->ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), target);
-    __ bind(skip_compiled_code);
+    __ cmp_and_br_short(temp, 0, Assembler::zero, Assembler::pt, run_compiled_code);
+    __ ld_ptr(G5_method, in_bytes(methodOopDesc::interpreter_entry_offset()), target);
+    __ jmp(target, 0);
+    __ delayed()->nop();
+    __ BIND(run_compiled_code);
+    // Note: we could fill some delay slots here, but
+    // it doesn't matter, since this is interpreter code.
   }
+
+  const ByteSize entry_offset = for_compiler_entry ? methodOopDesc::from_compiled_offset() :
+                                                     methodOopDesc::from_interpreted_offset();
+  __ ld_ptr(G5_method, in_bytes(entry_offset), target);
   __ jmp(target, 0);
   __ delayed()->nop();
 }
 
+void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
+                                        Register recv, Register method_temp,
+                                        Register temp2, Register temp3,
+                                        bool for_compiler_entry) {
+  BLOCK_COMMENT("jump_to_lambda_form {");
+  // This is the initial entry point of a lazy method handle.
+  // After type checking, it picks up the invoker from the LambdaForm.
+  assert_different_registers(recv, method_temp, temp2, temp3);
+  assert(method_temp == G5_method, "required register for loading method");
+
+  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
+
+  // Load the invoker, as MH -> MH.form -> LF.vmentry
+  __ verify_oop(recv);
+  __ load_heap_oop(Address(recv,        NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())),       method_temp);
+  __ verify_oop(method_temp);
+  __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), method_temp);
+  __ verify_oop(method_temp);
+  // the following assumes that a methodOop is normally compressed in the vmtarget field:
+  __ load_heap_oop(Address(method_temp, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes())),     method_temp);
+  __ verify_oop(method_temp);
+
+  if (VerifyMethodHandles && !for_compiler_entry) {
+    // make sure recv is already on stack
+    __ load_sized_value(Address(method_temp, methodOopDesc::size_of_parameters_offset()),
+                        temp2,
+                        sizeof(u2), /*is_signed*/ false);
+    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
+    Label L;
+    __ ld_ptr(__ argument_address(temp2, temp2, -1), temp2);
+    __ cmp_and_br_short(temp2, recv, Assembler::equal, Assembler::pt, L);
+    __ STOP("receiver not on stack");
+    __ BIND(L);
+  }
+
+  jump_from_method_handle(_masm, method_temp, temp2, temp3, for_compiler_entry);
+  BLOCK_COMMENT("} jump_to_lambda_form");
+}
+
 
 // Code generation
-address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm) {
-  // I5_savedSP/O5_savedSP: sender SP (must preserve)
+address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
+                                                                vmIntrinsics::ID iid) {
+  const bool not_for_compiler_entry = false;  // this is the interpreter entry
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  if (iid == vmIntrinsics::_invokeGeneric ||
+      iid == vmIntrinsics::_compiledLambdaForm) {
+    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
+    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
+    // They all allow an appendix argument.
+    __ should_not_reach_here();           // empty stubs make SG sick
+    return NULL;
+  }
+
+  // I5_savedSP/O5_savedSP: sender SP (must preserve; see prepare_to_jump_from_interpreted)
+  // G5_method:  methodOop
   // G4 (Gargs): incoming argument list (must preserve)
-  // G5_method:  invoke methodOop
-  // G3_method_handle: receiver method handle (must load from sp[MethodTypeForm.vmslots])
-  // O0, O1, O2, O3, O4: garbage temps, blown away
-  Register O0_mtype   = O0;
-  Register O1_scratch = O1;
-  Register O2_scratch = O2;
-  Register O3_scratch = O3;
-  Register O4_argslot = O4;
-  Register O4_argbase = O4;
+  // O0: used as temp to hold mh or receiver
+  // O1, O4: garbage temps, blown away
+  Register O1_scratch    = O1;
+  Register O4_param_size = O4;   // size of parameters
 
-  // emit WrongMethodType path first, to enable back-branch from main path
-  Label wrong_method_type;
-  __ bind(wrong_method_type);
-  Label invoke_generic_slow_path;
-  assert(methodOopDesc::intrinsic_id_size_in_bytes() == sizeof(u1), "");;
-  __ ldub(Address(G5_method, methodOopDesc::intrinsic_id_offset_in_bytes()), O1_scratch);
-  __ cmp(O1_scratch, (int) vmIntrinsics::_invokeExact);
-  __ brx(Assembler::notEqual, false, Assembler::pt, invoke_generic_slow_path);
-  __ delayed()->nop();
-  __ mov(O0_mtype, G5_method_type);  // required by throw_WrongMethodType
-  __ mov(G3_method_handle, G3_method_handle);  // already in this register
-  // O0 will be filled in with JavaThread in stub
-  __ jump_to(AddressLiteral(StubRoutines::throw_WrongMethodTypeException_entry()), O3_scratch);
-  __ delayed()->nop();
+  address code_start = __ pc();
 
   // here's where control starts out:
   __ align(CodeEntryAlignment);
   address entry_point = __ pc();
 
-  // fetch the MethodType from the method handle
-  // FIXME: Interpreter should transmit pre-popped stack pointer, to locate base of arg list.
-  // This would simplify several touchy bits of code.
-  // See 6984712: JSR 292 method handle calls need a clean argument base pointer
-  {
-    Register tem = G5_method;
-    for (jint* pchase = methodOopDesc::method_type_offsets_chain(); (*pchase) != -1; pchase++) {
-      __ ld_ptr(Address(tem, *pchase), O0_mtype);
-      tem = O0_mtype;          // in case there is another indirection
+  if (VerifyMethodHandles) {
+    Label L;
+    BLOCK_COMMENT("verify_intrinsic_id {");
+    __ ldub(Address(G5_method, methodOopDesc::intrinsic_id_offset_in_bytes()), O1_scratch);
+    __ cmp_and_br_short(O1_scratch, (int) iid, Assembler::equal, Assembler::pt, L);
+    if (iid == vmIntrinsics::_linkToVirtual ||
+        iid == vmIntrinsics::_linkToSpecial) {
+      // could do this for all kinds, but would explode assembly code size
+      trace_method_handle(_masm, "bad methodOop::intrinsic_id");
     }
+    __ STOP("bad methodOop::intrinsic_id");
+    __ bind(L);
+    BLOCK_COMMENT("} verify_intrinsic_id");
   }
 
-  // given the MethodType, find out where the MH argument is buried
-  __ load_heap_oop(Address(O0_mtype,   __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,        O1_scratch)), O4_argslot);
-  __ ldsw(         Address(O4_argslot, __ delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, O1_scratch)), O4_argslot);
-  __ add(__ argument_address(O4_argslot, O4_argslot, 1), O4_argbase);
-  // Note: argument_address uses its input as a scratch register!
-  Address mh_receiver_slot_addr(O4_argbase, -Interpreter::stackElementSize);
-  __ ld_ptr(mh_receiver_slot_addr, G3_method_handle);
+  // First task:  Find out how big the argument list is.
+  Address O4_first_arg_addr;
+  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
+  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
+  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
+    __ load_sized_value(Address(G5_method, methodOopDesc::size_of_parameters_offset()),
+                        O4_param_size,
+                        sizeof(u2), /*is_signed*/ false);
+    // assert(sizeof(u2) == sizeof(methodOopDesc::_size_of_parameters), "");
+    O4_first_arg_addr = __ argument_address(O4_param_size, O4_param_size, -1);
+  } else {
+    DEBUG_ONLY(O4_param_size = noreg);
+  }
 
-  trace_method_handle(_masm, "invokeExact");
+  Register O0_mh = noreg;
+  if (!is_signature_polymorphic_static(iid)) {
+    __ ld_ptr(O4_first_arg_addr, O0_mh = O0);
+    DEBUG_ONLY(O4_param_size = noreg);
+  }
 
-  __ check_method_handle_type(O0_mtype, G3_method_handle, O1_scratch, wrong_method_type);
+  // O4_first_arg_addr is live!
 
-  // Nobody uses the MH receiver slot after this.  Make sure.
-  DEBUG_ONLY(__ set((int32_t) 0x999999, O1_scratch); __ st_ptr(O1_scratch, mh_receiver_slot_addr));
+  if (TraceMethodHandles) {
+    const char* name = vmIntrinsics::name_at(iid);
+    if (*name == '_')  name += 1;
+    const size_t len = strlen(name) + 50;
+    char* qname = NEW_C_HEAP_ARRAY(char, len, mtInternal);
+    const char* suffix = "";
+    if (vmIntrinsics::method_for(iid) == NULL ||
+        !vmIntrinsics::method_for(iid)->access_flags().is_public()) {
+      if (is_signature_polymorphic_static(iid))
+        suffix = "/static";
+      else
+        suffix = "/private";
+    }
+    jio_snprintf(qname, len, "MethodHandle::interpreter_entry::%s%s", name, suffix);
+    if (O0_mh != noreg)
+      __ mov(O0_mh, G3_method_handle);  // make stub happy
+    trace_method_handle(_masm, qname);
+  }
 
-  __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+  if (iid == vmIntrinsics::_invokeBasic) {
+    generate_method_handle_dispatch(_masm, iid, O0_mh, noreg, not_for_compiler_entry);
 
-  // for invokeGeneric (only), apply argument and result conversions on the fly
-  __ bind(invoke_generic_slow_path);
-#ifdef ASSERT
-  if (VerifyMethodHandles) {
-    Label L;
-    __ ldub(Address(G5_method, methodOopDesc::intrinsic_id_offset_in_bytes()), O1_scratch);
-    __ cmp(O1_scratch, (int) vmIntrinsics::_invokeGeneric);
-    __ brx(Assembler::equal, false, Assembler::pt, L);
-    __ delayed()->nop();
-    __ stop("bad methodOop::intrinsic_id");
-    __ bind(L);
+  } else {
+    // Adjust argument list by popping the trailing MemberName argument.
+    Register O0_recv = noreg;
+    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
+      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
+      __ ld_ptr(O4_first_arg_addr, O0_recv = O0);
+      DEBUG_ONLY(O4_param_size = noreg);
+    }
+    Register G5_member = G5_method;  // MemberName ptr; incoming method ptr is dead now
+    __ ld_ptr(__ argument_address(constant(0)), G5_member);
+    __ add(Gargs, Interpreter::stackElementSize, Gargs);
+    generate_method_handle_dispatch(_masm, iid, O0_recv, G5_member, not_for_compiler_entry);
   }
-#endif //ASSERT
 
-  // make room on the stack for another pointer:
-  insert_arg_slots(_masm, 2 * stack_move_unit(), O4_argbase, O1_scratch, O2_scratch, O3_scratch);
-  // load up an adapter from the calling type (Java weaves this)
-  Register O2_form    = O2_scratch;
-  Register O3_adapter = O3_scratch;
-  __ load_heap_oop(Address(O0_mtype, __ delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes,               O1_scratch)), O2_form);
-  __ load_heap_oop(Address(O2_form,  __ delayed_value(java_lang_invoke_MethodTypeForm::genericInvoker_offset_in_bytes, O1_scratch)), O3_adapter);
-  __ verify_oop(O3_adapter);
-  __ st_ptr(O3_adapter, Address(O4_argbase, 1 * Interpreter::stackElementSize));
-  // As a trusted first argument, pass the type being called, so the adapter knows
-  // the actual types of the arguments and return values.
-  // (Generic invokers are shared among form-families of method-type.)
-  __ st_ptr(O0_mtype,   Address(O4_argbase, 0 * Interpreter::stackElementSize));
-  // FIXME: assert that O3_adapter is of the right method-type.
-  __ mov(O3_adapter, G3_method_handle);
-  trace_method_handle(_masm, "invokeGeneric");
-  __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
+  if (PrintMethodHandleStubs) {
+    address code_end = __ pc();
+    tty->print_cr("--------");
+    tty->print_cr("method handle interpreter entry for %s", vmIntrinsics::name_at(iid));
+    Disassembler::decode(code_start, code_end);
+    tty->cr();
+  }
 
   return entry_point;
 }
 
-// Workaround for C++ overloading nastiness on '0' for RegisterOrConstant.
-static RegisterOrConstant constant(int value) {
-  return RegisterOrConstant(value);
-}
+void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
+                                                    vmIntrinsics::ID iid,
+                                                    Register receiver_reg,
+                                                    Register member_reg,
+                                                    bool for_compiler_entry) {
+  assert(is_signature_polymorphic(iid), "expected invoke iid");
+  // temps used in this code are not used in *either* compiled or interpreted calling sequences
+  Register temp1 = (for_compiler_entry ? G1_scratch : O1);
+  Register temp2 = (for_compiler_entry ? G4_scratch : O4);
+  Register temp3 = G3_scratch;
+  Register temp4 = (for_compiler_entry ? noreg      : O2);
+  if (for_compiler_entry) {
+    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : O0), "only valid assignment");
+    assert_different_registers(temp1,      O0, O1, O2, O3, O4, O5);
+    assert_different_registers(temp2,      O0, O1, O2, O3, O4, O5);
+    assert_different_registers(temp3,      O0, O1, O2, O3, O4, O5);
+    assert_different_registers(temp4,      O0, O1, O2, O3, O4, O5);
+  }
+  if (receiver_reg != noreg)  assert_different_registers(temp1, temp2, temp3, temp4, receiver_reg);
+  if (member_reg   != noreg)  assert_different_registers(temp1, temp2, temp3, temp4, member_reg);
+  if (!for_compiler_entry)    assert_different_registers(temp1, temp2, temp3, temp4, O5_savedSP);  // don't trash lastSP
 
-static void load_vmargslot(MacroAssembler* _masm, Address vmargslot_addr, Register result) {
-  __ ldsw(vmargslot_addr, result);
-}
+  if (iid == vmIntrinsics::_invokeBasic) {
+    // indirect through MH.form.vmentry.vmtarget
+    jump_to_lambda_form(_masm, receiver_reg, G5_method, temp2, temp3, for_compiler_entry);
 
-static RegisterOrConstant adjust_SP_and_Gargs_down_by_slots(MacroAssembler* _masm,
-                                                            RegisterOrConstant arg_slots,
-                                                            Register temp_reg, Register temp2_reg) {
-  // Keep the stack pointer 2*wordSize aligned.
-  const int TwoWordAlignmentMask = right_n_bits(LogBytesPerWord + 1);
-  if (arg_slots.is_constant()) {
-    const int        offset = arg_slots.as_constant() << LogBytesPerWord;
-    const int masked_offset = round_to(offset, 2 * BytesPerWord);
-    const int masked_offset2 = (offset + 1*BytesPerWord) & ~TwoWordAlignmentMask;
-    assert(masked_offset == masked_offset2, "must agree");
-    __ sub(Gargs,        offset, Gargs);
-    __ sub(SP,    masked_offset, SP   );
-    return offset;
   } else {
-#ifdef ASSERT
+    // The method is a member invoker used by direct method handles.
+    if (VerifyMethodHandles) {
+      // make sure the trailing argument really is a MemberName (caller responsibility)
+      verify_klass(_masm, member_reg, SystemDictionaryHandles::MemberName_klass(),
+                   temp1, temp2,
+                   "MemberName required for invokeVirtual etc.");
+    }
+
+    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
+    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
+    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::vmtarget_offset_in_bytes()));
+
+    Register temp1_recv_klass = temp1;
+    if (iid != vmIntrinsics::_linkToStatic) {
+      __ verify_oop(receiver_reg);
+      if (iid == vmIntrinsics::_linkToSpecial) {
+        // Don't actually load the klass; just null-check the receiver.
+        __ null_check(receiver_reg);
+      } else {
+        // load receiver klass itself
+        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
+        __ load_klass(receiver_reg, temp1_recv_klass);
+        __ verify_oop(temp1_recv_klass);
+      }
+      BLOCK_COMMENT("check_receiver {");
+      // The receiver for the MemberName must be in receiver_reg.
+      // Check the receiver against the MemberName.clazz
+      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
+        // Did not load it above...
+        __ load_klass(receiver_reg, temp1_recv_klass);
+        __ verify_oop(temp1_recv_klass);
+      }
+      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
+        Label L_ok;
+        Register temp2_defc = temp2;
+        __ load_heap_oop(member_clazz, temp2_defc);
+        load_klass_from_Class(_masm, temp2_defc, temp3, temp4);
+        __ verify_oop(temp2_defc);
+        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, temp4, L_ok);
+        // If we get here, the type check failed!
+        __ STOP("receiver class disagrees with MemberName.clazz");
+        __ bind(L_ok);
+      }
+      BLOCK_COMMENT("} check_receiver");
+    }
+    if (iid == vmIntrinsics::_linkToSpecial ||
+        iid == vmIntrinsics::_linkToStatic) {
+      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
+    }
+
+    // Live registers at this point:
+    //  member_reg - MemberName that was the trailing argument
+    //  temp1_recv_klass - klass of stacked receiver, if needed
+    //  O5_savedSP - interpreter linkage (if interpreted)
+    //  O0..O7,G1,G4 - compiler arguments (if compiled)
+
+    bool method_is_live = false;
+    switch (iid) {
+    case vmIntrinsics::_linkToSpecial:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
+      }
+      __ load_heap_oop(member_vmtarget, G5_method);
+      method_is_live = true;
+      break;
+
+    case vmIntrinsics::_linkToStatic:
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
+      }
+      __ load_heap_oop(member_vmtarget, G5_method);
+      method_is_live = true;
+      break;
+
+    case vmIntrinsics::_linkToVirtual:
     {
-      Label L_ok;
-      __ cmp_and_br_short(arg_slots.as_register(), 0, Assembler::greaterEqual, Assembler::pt, L_ok);
-      __ stop("negative arg_slots");
-      __ bind(L_ok);
+      // same as TemplateTable::invokevirtual,
+      // minus the CP setup and profiling:
+
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
+      }
+
+      // pick out the vtable index from the MemberName, and then we can discard it:
+      Register temp2_index = temp2;
+      __ ld_ptr(member_vmindex, temp2_index);
+
+      if (VerifyMethodHandles) {
+        Label L_index_ok;
+        __ cmp_and_br_short(temp2_index, (int) 0, Assembler::greaterEqual, Assembler::pn, L_index_ok);
+        __ STOP("no virtual index");
+        __ BIND(L_index_ok);
+      }
+
+      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
+      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
+
+      // get target methodOop & entry point
+      __ lookup_virtual_method(temp1_recv_klass, temp2_index, G5_method);
+      method_is_live = true;
+      break;
     }
-#endif
-    __ sll_ptr(arg_slots.as_register(), LogBytesPerWord, temp_reg);
-    __ add( temp_reg,  1*BytesPerWord,       temp2_reg);
-    __ andn(temp2_reg, TwoWordAlignmentMask, temp2_reg);
-    __ sub(Gargs, temp_reg,  Gargs);
-    __ sub(SP,    temp2_reg, SP   );
-    return temp_reg;
+
+    case vmIntrinsics::_linkToInterface:
+    {
+      // same as TemplateTable::invokeinterface
+      // (minus the CP setup and profiling, with different argument motion)
+      if (VerifyMethodHandles) {
+        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
+      }
+
+      Register temp3_intf = temp3;
+      __ load_heap_oop(member_clazz, temp3_intf);
+      load_klass_from_Class(_masm, temp3_intf, temp2, temp4);
+      __ verify_oop(temp3_intf);
+
+      Register G5_index = G5_method;
+      __ ld_ptr(member_vmindex, G5_index);
+      if (VerifyMethodHandles) {
+        Label L;
+        __ cmp_and_br_short(G5_index, 0, Assembler::greaterEqual, Assembler::pt, L);
+        __ STOP("invalid vtable index for MH.invokeInterface");
+        __ bind(L);
+      }
+
+      // given intf, index, and recv klass, dispatch to the implementation method
+      Label L_no_such_interface;
+      Register no_sethi_temp = noreg;
+      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
+                                 // note: next two args must be the same:
+                                 G5_index, G5_method,
+                                 temp2, no_sethi_temp,
+                                 L_no_such_interface);
+
+      __ verify_oop(G5_method);
+      jump_from_method_handle(_masm, G5_method, temp2, temp3, for_compiler_entry);
+
+      __ bind(L_no_such_interface);
+      AddressLiteral icce(StubRoutines::throw_IncompatibleClassChangeError_entry());
+      __ jump_to(icce, temp3);
+      __ delayed()->nop();
+      break;
+    }
+
+    default:
+      fatal(err_msg("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)));
+      break;
+    }
+
+    if (method_is_live) {
+      // live at this point:  G5_method, O5_savedSP (if interpreted)
+
+      // After figuring out which concrete method to call, jump into it.
+      // Note that this works in the interpreter with no data motion.
+      // But the compiled version will require that rcx_recv be shifted out.
+      __ verify_oop(G5_method);
+      jump_from_method_handle(_masm, G5_method, temp1, temp3, for_compiler_entry);
+    }
   }
 }
 
-static RegisterOrConstant adjust_SP_and_Gargs_up_by_slots(MacroAssembler* _masm,
-                                                          RegisterOrConstant arg_slots,
-                                                          Register temp_reg, Register temp2_reg) {
-  // Keep the stack pointer 2*wordSize aligned.
-  const int TwoWordAlignmentMask = right_n_bits(LogBytesPerWord + 1);
-  if (arg_slots.is_constant()) {
-    const int        offset = arg_slots.as_constant() << LogBytesPerWord;
-    const int masked_offset = offset & ~TwoWordAlignmentMask;
-    __ add(Gargs,        offset, Gargs);
-    __ add(SP,    masked_offset, SP   );
-    return offset;
-  } else {
-    __ sll_ptr(arg_slots.as_register(), LogBytesPerWord, temp_reg);
-    __ andn(temp_reg, TwoWordAlignmentMask, temp2_reg);
-    __ add(Gargs, temp_reg,  Gargs);
-    __ add(SP,    temp2_reg, SP   );
-    return temp_reg;
-  }
-}
-
-// Helper to insert argument slots into the stack.
-// arg_slots must be a multiple of stack_move_unit() and < 0
-// argslot_reg is decremented to point to the new (shifted) location of the argslot
-// But, temp_reg ends up holding the original value of argslot_reg.
-void MethodHandles::insert_arg_slots(MacroAssembler* _masm,
-                                     RegisterOrConstant arg_slots,
-                                     Register argslot_reg,
-                                     Register temp_reg, Register temp2_reg, Register temp3_reg) {
-  // allow constant zero
-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
-    return;
-
-  assert_different_registers(argslot_reg, temp_reg, temp2_reg, temp3_reg,
-                             (!arg_slots.is_register() ? Gargs : arg_slots.as_register()));
-
-  BLOCK_COMMENT("insert_arg_slots {");
-  if (VerifyMethodHandles)
-    verify_argslot(_masm, argslot_reg, temp_reg, "insertion point must fall within current frame");
-  if (VerifyMethodHandles)
-    verify_stack_move(_masm, arg_slots, -1);
-
-  // Make space on the stack for the inserted argument(s).
-  // Then pull down everything shallower than argslot_reg.
-  // The stacked return address gets pulled down with everything else.
-  // That is, copy [sp, argslot) downward by -size words.  In pseudo-code:
-  //   sp -= size;
-  //   for (temp = sp + size; temp < argslot; temp++)
-  //     temp[-size] = temp[0]
-  //   argslot -= size;
-
-  // offset is temp3_reg in case of arg_slots being a register.
-  RegisterOrConstant offset = adjust_SP_and_Gargs_up_by_slots(_masm, arg_slots, temp3_reg, temp_reg);
-  __ sub(Gargs, offset, temp_reg);  // source pointer for copy
-
-  {
-    Label loop;
-    __ BIND(loop);
-    // pull one word down each time through the loop
-    __ ld_ptr(           Address(temp_reg, 0     ), temp2_reg);
-    __ st_ptr(temp2_reg, Address(temp_reg, offset)           );
-    __ add(temp_reg, wordSize, temp_reg);
-    __ cmp_and_brx_short(temp_reg, argslot_reg, Assembler::lessUnsigned, Assembler::pt, loop);
-  }
-
-  // Now move the argslot down, to point to the opened-up space.
-  __ add(argslot_reg, offset, argslot_reg);
-  BLOCK_COMMENT("} insert_arg_slots");
-}
-
-
-// Helper to remove argument slots from the stack.
-// arg_slots must be a multiple of stack_move_unit() and > 0
-void MethodHandles::remove_arg_slots(MacroAssembler* _masm,
-                                     RegisterOrConstant arg_slots,
-                                     Register argslot_reg,
-                                     Register temp_reg, Register temp2_reg, Register temp3_reg) {
-  // allow constant zero
-  if (arg_slots.is_constant() && arg_slots.as_constant() == 0)
-    return;
-  assert_different_registers(argslot_reg, temp_reg, temp2_reg, temp3_reg,
-                             (!arg_slots.is_register() ? Gargs : arg_slots.as_register()));
-
-  BLOCK_COMMENT("remove_arg_slots {");
-  if (VerifyMethodHandles)
-    verify_argslots(_masm, arg_slots, argslot_reg, temp_reg, temp2_reg, false,
-                    "deleted argument(s) must fall within current frame");
-  if (VerifyMethodHandles)
-    verify_stack_move(_masm, arg_slots, +1);
-
-  // Pull up everything shallower than argslot.
-  // Then remove the excess space on the stack.
-  // The stacked return address gets pulled up with everything else.
-  // That is, copy [sp, argslot) upward by size words.  In pseudo-code:
-  //   for (temp = argslot-1; temp >= sp; --temp)
-  //     temp[size] = temp[0]
-  //   argslot += size;
-  //   sp += size;
-
-  RegisterOrConstant offset = __ regcon_sll_ptr(arg_slots, LogBytesPerWord, temp3_reg);
-  __ sub(argslot_reg, wordSize, temp_reg);  // source pointer for copy
-
-  {
-    Label L_loop;
-    __ BIND(L_loop);
-    // pull one word up each time through the loop
-    __ ld_ptr(           Address(temp_reg, 0     ), temp2_reg);
-    __ st_ptr(temp2_reg, Address(temp_reg, offset)           );
-    __ sub(temp_reg, wordSize, temp_reg);
-    __ cmp_and_brx_short(temp_reg, Gargs, Assembler::greaterEqualUnsigned, Assembler::pt, L_loop);
-  }
-
-  // And adjust the argslot address to point at the deletion point.
-  __ add(argslot_reg, offset, argslot_reg);
-
-  // We don't need the offset at this point anymore, just adjust SP and Gargs.
-  (void) adjust_SP_and_Gargs_up_by_slots(_masm, arg_slots, temp3_reg, temp_reg);
-
-  BLOCK_COMMENT("} remove_arg_slots");
-}
-
-// Helper to copy argument slots to the top of the stack.
-// The sequence starts with argslot_reg and is counted by slot_count
-// slot_count must be a multiple of stack_move_unit() and >= 0
-// This function blows the temps but does not change argslot_reg.
-void MethodHandles::push_arg_slots(MacroAssembler* _masm,
-                                   Register argslot_reg,
-                                   RegisterOrConstant slot_count,
-                                   Register temp_reg, Register temp2_reg) {
-  // allow constant zero
-  if (slot_count.is_constant() && slot_count.as_constant() == 0)
-    return;
-  assert_different_registers(argslot_reg, temp_reg, temp2_reg,
-                             (!slot_count.is_register() ? Gargs : slot_count.as_register()),
-                             SP);
-  assert(Interpreter::stackElementSize == wordSize, "else change this code");
-
-  BLOCK_COMMENT("push_arg_slots {");
-  if (VerifyMethodHandles)
-    verify_stack_move(_masm, slot_count, 0);
-
-  RegisterOrConstant offset = adjust_SP_and_Gargs_down_by_slots(_masm, slot_count, temp2_reg, temp_reg);
-
-  if (slot_count.is_constant()) {
-    for (int i = slot_count.as_constant() - 1; i >= 0; i--) {
-      __ ld_ptr(          Address(argslot_reg, i * wordSize), temp_reg);
-      __ st_ptr(temp_reg, Address(Gargs,       i * wordSize));
-    }
-  } else {
-    Label L_plural, L_loop, L_break;
-    // Emit code to dynamically check for the common cases, zero and one slot.
-    __ cmp(slot_count.as_register(), (int32_t) 1);
-    __ br(Assembler::greater, false, Assembler::pn, L_plural);
-    __ delayed()->nop();
-    __ br(Assembler::less, false, Assembler::pn, L_break);
-    __ delayed()->nop();
-    __ ld_ptr(          Address(argslot_reg, 0), temp_reg);
-    __ st_ptr(temp_reg, Address(Gargs,       0));
-    __ ba_short(L_break);
-    __ BIND(L_plural);
-
-    // Loop for 2 or more:
-    //   top = &argslot[slot_count]
-    //   while (top > argslot)  *(--Gargs) = *(--top)
-    Register top_reg = temp_reg;
-    __ add(argslot_reg, offset, top_reg);
-    __ add(Gargs,       offset, Gargs  );  // move back up again so we can go down
-    __ BIND(L_loop);
-    __ sub(top_reg, wordSize, top_reg);
-    __ sub(Gargs,   wordSize, Gargs  );
-    __ ld_ptr(           Address(top_reg, 0), temp2_reg);
-    __ st_ptr(temp2_reg, Address(Gargs,   0));
-    __ cmp_and_brx_short(top_reg, argslot_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop);
-    __ BIND(L_break);
-  }
-  BLOCK_COMMENT("} push_arg_slots");
-}
-
-// in-place movement; no change to Gargs
-// blows temp_reg, temp2_reg
-void MethodHandles::move_arg_slots_up(MacroAssembler* _masm,
-                                      Register bottom_reg,  // invariant
-                                      Address  top_addr,    // can use temp_reg
-                                      RegisterOrConstant positive_distance_in_slots,  // destroyed if register
-                                      Register temp_reg, Register temp2_reg) {
-  assert_different_registers(bottom_reg,
-                             temp_reg, temp2_reg,
-                             positive_distance_in_slots.register_or_noreg());
-  BLOCK_COMMENT("move_arg_slots_up {");
-  Label L_loop, L_break;
-  Register top_reg = temp_reg;
-  if (!top_addr.is_same_address(Address(top_reg, 0))) {
-    __ add(top_addr, top_reg);
-  }
-  // Detect empty (or broken) loop:
-#ifdef ASSERT
-  if (VerifyMethodHandles) {
-    // Verify that &bottom < &top (non-empty interval)
-    Label L_ok, L_bad;
-    if (positive_distance_in_slots.is_register()) {
-      __ cmp(positive_distance_in_slots.as_register(), (int32_t) 0);
-      __ br(Assembler::lessEqual, false, Assembler::pn, L_bad);
-      __ delayed()->nop();
-    }
-    __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
-    __ BIND(L_bad);
-    __ stop("valid bounds (copy up)");
-    __ BIND(L_ok);
-  }
-#endif
-  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break);
-  // work top down to bottom, copying contiguous data upwards
-  // In pseudo-code:
-  //   while (--top >= bottom) *(top + distance) = *(top + 0);
-  RegisterOrConstant offset = __ argument_offset(positive_distance_in_slots, positive_distance_in_slots.register_or_noreg());
-  __ BIND(L_loop);
-  __ sub(top_reg, wordSize, top_reg);
-  __ ld_ptr(           Address(top_reg, 0     ), temp2_reg);
-  __ st_ptr(temp2_reg, Address(top_reg, offset)           );
-  __ cmp_and_brx_short(top_reg, bottom_reg, Assembler::greaterUnsigned, Assembler::pt, L_loop);
-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
-  __ BIND(L_break);
-  BLOCK_COMMENT("} move_arg_slots_up");
-}
-
-// in-place movement; no change to rsp
-// blows temp_reg, temp2_reg
-void MethodHandles::move_arg_slots_down(MacroAssembler* _masm,
-                                        Address  bottom_addr,  // can use temp_reg
-                                        Register top_reg,      // invariant
-                                        RegisterOrConstant negative_distance_in_slots,  // destroyed if register
-                                        Register temp_reg, Register temp2_reg) {
-  assert_different_registers(top_reg,
-                             negative_distance_in_slots.register_or_noreg(),
-                             temp_reg, temp2_reg);
-  BLOCK_COMMENT("move_arg_slots_down {");
-  Label L_loop, L_break;
-  Register bottom_reg = temp_reg;
-  if (!bottom_addr.is_same_address(Address(bottom_reg, 0))) {
-    __ add(bottom_addr, bottom_reg);
-  }
-  // Detect empty (or broken) loop:
-#ifdef ASSERT
-  assert(!negative_distance_in_slots.is_constant() || negative_distance_in_slots.as_constant() < 0, "");
-  if (VerifyMethodHandles) {
-    // Verify that &bottom < &top (non-empty interval)
-    Label L_ok, L_bad;
-    if (negative_distance_in_slots.is_register()) {
-      __ cmp(negative_distance_in_slots.as_register(), (int32_t) 0);
-      __ br(Assembler::greaterEqual, false, Assembler::pn, L_bad);
-      __ delayed()->nop();
-    }
-    __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
-    __ BIND(L_bad);
-    __ stop("valid bounds (copy down)");
-    __ BIND(L_ok);
-  }
-#endif
-  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::greaterEqualUnsigned, Assembler::pn, L_break);
-  // work bottom up to top, copying contiguous data downwards
-  // In pseudo-code:
-  //   while (bottom < top) *(bottom - distance) = *(bottom + 0), bottom++;
-  RegisterOrConstant offset = __ argument_offset(negative_distance_in_slots, negative_distance_in_slots.register_or_noreg());
-  __ BIND(L_loop);
-  __ ld_ptr(           Address(bottom_reg, 0     ), temp2_reg);
-  __ st_ptr(temp2_reg, Address(bottom_reg, offset)           );
-  __ add(bottom_reg, wordSize, bottom_reg);
-  __ cmp_and_brx_short(bottom_reg, top_reg, Assembler::lessUnsigned, Assembler::pt, L_loop);
-  assert(Interpreter::stackElementSize == wordSize, "else change loop");
-  __ BIND(L_break);
-  BLOCK_COMMENT("} move_arg_slots_down");
-}
-
-// Copy from a field or array element to a stacked argument slot.
-// is_element (ignored) says whether caller is loading an array element instead of an instance field.
-void MethodHandles::move_typed_arg(MacroAssembler* _masm,
-                                   BasicType type, bool is_element,
-                                   Address value_src, Address slot_dest,
-                                   Register temp_reg) {
-  assert(!slot_dest.uses(temp_reg), "must be different register");
-  BLOCK_COMMENT(!is_element ? "move_typed_arg {" : "move_typed_arg { (array element)");
-  if (type == T_OBJECT || type == T_ARRAY) {
-    __ load_heap_oop(value_src, temp_reg);
-    __ verify_oop(temp_reg);
-    __ st_ptr(temp_reg, slot_dest);
-  } else if (type != T_VOID) {
-    int  arg_size      = type2aelembytes(type);
-    bool arg_is_signed = is_signed_subword_type(type);
-    int  slot_size     = is_subword_type(type) ? type2aelembytes(T_INT) : arg_size;  // store int sub-words as int
-    __ load_sized_value( value_src, temp_reg, arg_size, arg_is_signed);
-    __ store_sized_value(temp_reg, slot_dest, slot_size              );
-  }
-  BLOCK_COMMENT("} move_typed_arg");
-}
-
-// Cf. TemplateInterpreterGenerator::generate_return_entry_for and
-// InterpreterMacroAssembler::save_return_value
-void MethodHandles::move_return_value(MacroAssembler* _masm, BasicType type,
-                                      Address return_slot) {
-  BLOCK_COMMENT("move_return_value {");
-  // Look at the type and pull the value out of the corresponding register.
-  if (type == T_VOID) {
-    // nothing to do
-  } else if (type == T_OBJECT) {
-    __ verify_oop(O0);
-    __ st_ptr(O0, return_slot);
-  } else if (type == T_INT || is_subword_type(type)) {
-    int type_size = type2aelembytes(T_INT);
-    __ store_sized_value(O0, return_slot, type_size);
-  } else if (type == T_LONG) {
-    // store the value by parts
-    // Note: We assume longs are continguous (if misaligned) on the interpreter stack.
-#if !defined(_LP64) && defined(COMPILER2)
-    __ stx(G1, return_slot);
-#else
-  #ifdef _LP64
-    __ stx(O0, return_slot);
-  #else
-    if (return_slot.has_disp()) {
-      // The displacement is a constant
-      __ st(O0, return_slot);
-      __ st(O1, return_slot.plus_disp(Interpreter::stackElementSize));
-    } else {
-      __ std(O0, return_slot);
-    }
-  #endif
-#endif
-  } else if (type == T_FLOAT) {
-    __ stf(FloatRegisterImpl::S, Ftos_f, return_slot);
-  } else if (type == T_DOUBLE) {
-    __ stf(FloatRegisterImpl::D, Ftos_f, return_slot);
-  } else {
-    ShouldNotReachHere();
-  }
-  BLOCK_COMMENT("} move_return_value");
-}
-
 #ifndef PRODUCT
-void MethodHandles::RicochetFrame::describe(const frame* fr, FrameValues& values, int frame_no)  {
-    RicochetFrame* rf = new RicochetFrame(*fr);
-
-    // ricochet slots (kept in registers for sparc)
-    values.describe(frame_no, rf->register_addr(I5_savedSP), err_msg("exact_sender_sp reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L5_conversion), err_msg("conversion reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L4_saved_args_base), err_msg("saved_args_base reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L3_saved_args_layout), err_msg("saved_args_layout reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L2_saved_target), err_msg("saved_target reg for #%d", frame_no));
-    values.describe(frame_no, rf->register_addr(L1_continuation), err_msg("continuation reg for #%d", frame_no));
-
-    // relevant ricochet targets (in caller frame)
-    values.describe(-1, rf->saved_args_base(),  err_msg("*saved_args_base for #%d", frame_no));
-    values.describe(-1, (intptr_t *)(STACK_BIAS+(uintptr_t)rf->exact_sender_sp()),  err_msg("*exact_sender_sp+STACK_BIAS for #%d", frame_no));
-}
-#endif // ASSERT
-
-#ifndef PRODUCT
-extern "C" void print_method_handle(oop mh);
 void trace_method_handle_stub(const char* adaptername,
                               oopDesc* mh,
                               intptr_t* saved_sp,
                               intptr_t* args,
                               intptr_t* tracing_fp) {
-  bool has_mh = (strstr(adaptername, "return/") == NULL);  // return adapters don't have mh
-
-  tty->print_cr("MH %s mh="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT, adaptername, (intptr_t) mh, saved_sp, args);
+  bool has_mh = (strstr(adaptername, "/static") == NULL &&
+                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
+  const char* mh_reg_name = has_mh ? "G3_mh" : "G3";
+  tty->print_cr("MH %s %s="INTPTR_FORMAT " saved_sp=" INTPTR_FORMAT " args=" INTPTR_FORMAT,
+                adaptername, mh_reg_name,
+                (intptr_t) mh, saved_sp, args);
 
   if (Verbose) {
     // dumping last frame with frame::describe
@@ -1090,6 +561,7 @@
 
     // mark saved_sp, if seems valid (may not be valid for some adapters)
     intptr_t *unbiased_sp = (intptr_t *)(STACK_BIAS+(uintptr_t)saved_sp);
+    const int ARG_LIMIT = 255, SLOP = 45, UNREASONABLE_STACK_MOVE = (ARG_LIMIT + SLOP);
     if ((unbiased_sp >= dump_sp - UNREASONABLE_STACK_MOVE) && (unbiased_sp < dump_fp)) {
       values.describe(-1, unbiased_sp, "*saved_sp+STACK_BIAS");
     }
@@ -1097,10 +569,13 @@
     // Note: the unextended_sp may not be correct
     tty->print_cr("  stack layout:");
     values.print(p);
-  }
-
-  if (has_mh) {
-    print_method_handle(mh);
+    if (has_mh && mh->is_oop()) {
+      mh->print();
+      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
+        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
+          java_lang_invoke_MethodHandle::form(mh)->print();
+      }
+    }
   }
 }
 
@@ -1143,1260 +618,3 @@
   BLOCK_COMMENT("} trace_method_handle");
 }
 #endif // PRODUCT
-
-// which conversion op types are implemented here?
-int MethodHandles::adapter_conversion_ops_supported_mask() {
-  return ((1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_ONLY)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_RETYPE_RAW)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_CHECK_CAST)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_PRIM)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_REF_TO_PRIM)
-          // OP_PRIM_TO_REF is below...
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SWAP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_ROT_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DUP_ARGS)
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_DROP_ARGS)
-          // OP_COLLECT_ARGS is below...
-         |(1<<java_lang_invoke_AdapterMethodHandle::OP_SPREAD_ARGS)
-         |(
-           java_lang_invoke_MethodTypeForm::vmlayout_offset_in_bytes() <= 0 ? 0 :
-           ((1<<java_lang_invoke_AdapterMethodHandle::OP_PRIM_TO_REF)
-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_COLLECT_ARGS)
-           |(1<<java_lang_invoke_AdapterMethodHandle::OP_FOLD_ARGS)
-           )
-          )
-         );
-}
-
-//------------------------------------------------------------------------------
-// MethodHandles::generate_method_handle_stub
-//
-// Generate an "entry" field for a method handle.
-// This determines how the method handle will respond to calls.
-void MethodHandles::generate_method_handle_stub(MacroAssembler* _masm, MethodHandles::EntryKind ek) {
-  MethodHandles::EntryKind ek_orig = ek_original_kind(ek);
-
-  // Here is the register state during an interpreted call,
-  // as set up by generate_method_handle_interpreter_entry():
-  // - G5: garbage temp (was MethodHandle.invoke methodOop, unused)
-  // - G3: receiver method handle
-  // - O5_savedSP: sender SP (must preserve)
-
-  const Register O0_scratch = O0;
-  const Register O1_scratch = O1;
-  const Register O2_scratch = O2;
-  const Register O3_scratch = O3;
-  const Register O4_scratch = O4;
-  const Register G5_scratch = G5;
-
-  // Often used names:
-  const Register O0_argslot = O0;
-
-  // Argument registers for _raise_exception:
-  const Register O0_code     = O0;
-  const Register O1_actual   = O1;
-  const Register O2_required = O2;
-
-  guarantee(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes() != 0, "must have offsets");
-
-  // Some handy addresses:
-  Address G3_mh_vmtarget(   G3_method_handle, java_lang_invoke_MethodHandle::vmtarget_offset_in_bytes());
-
-  Address G3_dmh_vmindex(   G3_method_handle, java_lang_invoke_DirectMethodHandle::vmindex_offset_in_bytes());
-
-  Address G3_bmh_vmargslot( G3_method_handle, java_lang_invoke_BoundMethodHandle::vmargslot_offset_in_bytes());
-  Address G3_bmh_argument(  G3_method_handle, java_lang_invoke_BoundMethodHandle::argument_offset_in_bytes());
-
-  Address G3_amh_vmargslot( G3_method_handle, java_lang_invoke_AdapterMethodHandle::vmargslot_offset_in_bytes());
-  Address G3_amh_argument ( G3_method_handle, java_lang_invoke_AdapterMethodHandle::argument_offset_in_bytes());
-  Address G3_amh_conversion(G3_method_handle, java_lang_invoke_AdapterMethodHandle::conversion_offset_in_bytes());
-
-  const int java_mirror_offset = in_bytes(Klass::java_mirror_offset());
-
-  if (have_entry(ek)) {
-    __ nop();  // empty stubs make SG sick
-    return;
-  }
-
-  address interp_entry = __ pc();
-
-  trace_method_handle(_masm, entry_name(ek));
-
-  BLOCK_COMMENT(err_msg("Entry %s {", entry_name(ek)));
-
-  switch ((int) ek) {
-  case _raise_exception:
-    {
-      // Not a real MH entry, but rather shared code for raising an
-      // exception.  For sharing purposes the arguments are passed into registers
-      // and then placed in the intepreter calling convention here.
-      assert(raise_exception_method(), "must be set");
-      assert(raise_exception_method()->from_compiled_entry(), "method must be linked");
-
-      __ set(AddressLiteral((address) &_raise_exception_method), G5_method);
-      __ ld_ptr(Address(G5_method, 0), G5_method);
-
-      const int jobject_oop_offset = 0;
-      __ ld_ptr(Address(G5_method, jobject_oop_offset), G5_method);
-
-      adjust_SP_and_Gargs_down_by_slots(_masm, 3, noreg, noreg);
-
-      __ st    (O0_code,     __ argument_address(constant(2), noreg, 0));
-      __ st_ptr(O1_actual,   __ argument_address(constant(1), noreg, 0));
-      __ st_ptr(O2_required, __ argument_address(constant(0), noreg, 0));
-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-    }
-    break;
-
-  case _invokestatic_mh:
-  case _invokespecial_mh:
-    {
-      __ load_heap_oop(G3_mh_vmtarget, G5_method);  // target is a methodOop
-      // Same as TemplateTable::invokestatic or invokespecial,
-      // minus the CP setup and profiling:
-      if (ek == _invokespecial_mh) {
-        // Must load & check the first argument before entering the target method.
-        __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
-        __ ld_ptr(__ argument_address(O0_argslot, O0_argslot, -1), G3_method_handle);
-        __ null_check(G3_method_handle);
-        __ verify_oop(G3_method_handle);
-      }
-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-    }
-    break;
-
-  case _invokevirtual_mh:
-    {
-      // Same as TemplateTable::invokevirtual,
-      // minus the CP setup and profiling:
-
-      // Pick out the vtable index and receiver offset from the MH,
-      // and then we can discard it:
-      Register O2_index = O2_scratch;
-      __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
-      __ ldsw(G3_dmh_vmindex, O2_index);
-      // Note:  The verifier allows us to ignore G3_mh_vmtarget.
-      __ ld_ptr(__ argument_address(O0_argslot, O0_argslot, -1), G3_method_handle);
-      __ null_check(G3_method_handle, oopDesc::klass_offset_in_bytes());
-
-      // Get receiver klass:
-      Register O0_klass = O0_argslot;
-      __ load_klass(G3_method_handle, O0_klass);
-      __ verify_oop(O0_klass);
-
-      // Get target methodOop & entry point:
-      const int base = instanceKlass::vtable_start_offset() * wordSize;
-      assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
-
-      __ sll_ptr(O2_index, LogBytesPerWord, O2_index);
-      __ add(O0_klass, O2_index, O0_klass);
-      Address vtable_entry_addr(O0_klass, base + vtableEntry::method_offset_in_bytes());
-      __ ld_ptr(vtable_entry_addr, G5_method);
-
-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-    }
-    break;
-
-  case _invokeinterface_mh:
-    {
-      // Same as TemplateTable::invokeinterface,
-      // minus the CP setup and profiling:
-      __ load_method_handle_vmslots(O0_argslot, G3_method_handle, O1_scratch);
-      Register O1_intf  = O1_scratch;
-      Register G5_index = G5_scratch;
-      __ load_heap_oop(G3_mh_vmtarget, O1_intf);
-      __ ldsw(G3_dmh_vmindex, G5_index);
-      __ ld_ptr(__ argument_address(O0_argslot, O0_argslot, -1), G3_method_handle);
-      __ null_check(G3_method_handle, oopDesc::klass_offset_in_bytes());
-
-      // Get receiver klass:
-      Register O0_klass = O0_argslot;
-      __ load_klass(G3_method_handle, O0_klass);
-      __ verify_oop(O0_klass);
-
-      // Get interface:
-      Label no_such_interface;
-      __ verify_oop(O1_intf);
-      __ lookup_interface_method(O0_klass, O1_intf,
-                                 // Note: next two args must be the same:
-                                 G5_index, G5_method,
-                                 O2_scratch,
-                                 O3_scratch,
-                                 no_such_interface);
-
-      jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-
-      __ bind(no_such_interface);
-      // Throw an exception.
-      // For historical reasons, it will be IncompatibleClassChangeError.
-      __ unimplemented("not tested yet");
-      __ ld_ptr(Address(O1_intf, java_mirror_offset), O2_required);  // required interface
-      __ mov(   O0_klass,                             O1_actual);    // bad receiver
-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
-      __ delayed()->mov(Bytecodes::_invokeinterface,  O0_code);      // who is complaining?
-    }
-    break;
-
-  case _bound_ref_mh:
-  case _bound_int_mh:
-  case _bound_long_mh:
-  case _bound_ref_direct_mh:
-  case _bound_int_direct_mh:
-  case _bound_long_direct_mh:
-    {
-      const bool direct_to_method = (ek >= _bound_ref_direct_mh);
-      BasicType arg_type  = ek_bound_mh_arg_type(ek);
-      int       arg_slots = type2size[arg_type];
-
-      // Make room for the new argument:
-      load_vmargslot(_masm, G3_bmh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-
-      insert_arg_slots(_masm, arg_slots * stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-
-      // Store bound argument into the new stack slot:
-      __ load_heap_oop(G3_bmh_argument, O1_scratch);
-      if (arg_type == T_OBJECT) {
-        __ st_ptr(O1_scratch, Address(O0_argslot, 0));
-      } else {
-        Address prim_value_addr(O1_scratch, java_lang_boxing_object::value_offset_in_bytes(arg_type));
-        move_typed_arg(_masm, arg_type, false,
-                       prim_value_addr,
-                       Address(O0_argslot, 0),
-                      O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
-      }
-
-      if (direct_to_method) {
-        __ load_heap_oop(G3_mh_vmtarget, G5_method);  // target is a methodOop
-        jump_from_method_handle(_masm, G5_method, O1_scratch, O2_scratch);
-      } else {
-        __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);  // target is a methodOop
-        __ verify_oop(G3_method_handle);
-        __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-      }
-    }
-    break;
-
-  case _adapter_opt_profiling:
-    if (java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes() != 0) {
-      Address G3_mh_vmcount(G3_method_handle, java_lang_invoke_CountingMethodHandle::vmcount_offset_in_bytes());
-      __ ld(G3_mh_vmcount, O1_scratch);
-      __ add(O1_scratch, 1, O1_scratch);
-      __ st(O1_scratch, G3_mh_vmcount);
-    }
-    // fall through
-
-  case _adapter_retype_only:
-  case _adapter_retype_raw:
-    // Immediately jump to the next MH layer:
-    __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-    __ verify_oop(G3_method_handle);
-    __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    // This is OK when all parameter types widen.
-    // It is also OK when a return type narrows.
-    break;
-
-  case _adapter_check_cast:
-    {
-      // Check a reference argument before jumping to the next layer of MH:
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      Address vmarg = __ argument_address(O0_argslot, O0_argslot);
-
-      // What class are we casting to?
-      Register O1_klass = O1_scratch;  // Interesting AMH data.
-      __ load_heap_oop(G3_amh_argument, O1_klass);  // This is a Class object!
-      load_klass_from_Class(_masm, O1_klass, O2_scratch, O3_scratch);
-
-      Label L_done;
-      __ ld_ptr(vmarg, O2_scratch);
-      __ br_null_short(O2_scratch, Assembler::pn, L_done);  // No cast if null.
-      __ load_klass(O2_scratch, O2_scratch);
-
-      // Live at this point:
-      // - O0_argslot      :  argslot index in vmarg; may be required in the failing path
-      // - O1_klass        :  klass required by the target method
-      // - O2_scratch      :  argument klass to test
-      // - G3_method_handle:  adapter method handle
-      __ check_klass_subtype(O2_scratch, O1_klass, O3_scratch, O4_scratch, L_done);
-
-      // If we get here, the type check failed!
-      __ load_heap_oop(G3_amh_argument,        O2_required);  // required class
-      __ ld_ptr(       vmarg,                  O1_actual);    // bad object
-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
-      __ delayed()->mov(Bytecodes::_checkcast, O0_code);      // who is complaining?
-
-      __ BIND(L_done);
-      // Get the new MH:
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_prim_to_prim:
-  case _adapter_ref_to_prim:
-    // Handled completely by optimized cases.
-    __ stop("init_AdapterMethodHandle should not issue this");
-    break;
-
-  case _adapter_opt_i2i:        // optimized subcase of adapt_prim_to_prim
-//case _adapter_opt_f2i:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_l2i:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_unboxi:     // optimized subcase of adapt_ref_to_prim
-    {
-      // Perform an in-place conversion to int or an int subword.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      Address value;
-      Address vmarg;
-      bool value_left_justified = false;
-
-      switch (ek) {
-      case _adapter_opt_i2i:
-        value = vmarg = __ argument_address(O0_argslot, O0_argslot);
-        break;
-      case _adapter_opt_l2i:
-        {
-          // just delete the extra slot
-#ifdef _LP64
-          // In V9, longs are given 2 64-bit slots in the interpreter, but the
-          // data is passed in only 1 slot.
-          // Keep the second slot.
-          __ add(__ argument_address(O0_argslot, O0_argslot, -1), O0_argslot);
-          remove_arg_slots(_masm, -stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-          value = Address(O0_argslot, 4);  // Get least-significant 32-bit of 64-bit value.
-          vmarg = Address(O0_argslot, Interpreter::stackElementSize);
-#else
-          // Keep the first slot.
-          __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-          remove_arg_slots(_masm, -stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-          value = Address(O0_argslot, 0);
-          vmarg = value;
-#endif
-        }
-        break;
-      case _adapter_opt_unboxi:
-        {
-          vmarg = __ argument_address(O0_argslot, O0_argslot);
-          // Load the value up from the heap.
-          __ ld_ptr(vmarg, O1_scratch);
-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_INT);
-#ifdef ASSERT
-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
-            if (is_subword_type(BasicType(bt)))
-              assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(BasicType(bt)), "");
-          }
-#endif
-          __ null_check(O1_scratch, value_offset);
-          value = Address(O1_scratch, value_offset);
-#ifdef _BIG_ENDIAN
-          // Values stored in objects are packed.
-          value_left_justified = true;
-#endif
-        }
-        break;
-      default:
-        ShouldNotReachHere();
-      }
-
-      // This check is required on _BIG_ENDIAN
-      Register G5_vminfo = G5_scratch;
-      __ ldsw(G3_amh_conversion, G5_vminfo);
-      assert(CONV_VMINFO_SHIFT == 0, "preshifted");
-
-      // Original 32-bit vmdata word must be of this form:
-      // | MBZ:6 | signBitCount:8 | srcDstTypes:8 | conversionOp:8 |
-      __ lduw(value, O1_scratch);
-      if (!value_left_justified)
-        __ sll(O1_scratch, G5_vminfo, O1_scratch);
-      Label zero_extend, done;
-      __ btst(CONV_VMINFO_SIGN_FLAG, G5_vminfo);
-      __ br(Assembler::zero, false, Assembler::pn, zero_extend);
-      __ delayed()->nop();
-
-      // this path is taken for int->byte, int->short
-      __ sra(O1_scratch, G5_vminfo, O1_scratch);
-      __ ba_short(done);
-
-      __ bind(zero_extend);
-      // this is taken for int->char
-      __ srl(O1_scratch, G5_vminfo, O1_scratch);
-
-      __ bind(done);
-      __ st(O1_scratch, vmarg);
-
-      // Get the new MH:
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_opt_i2l:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_unboxl:     // optimized subcase of adapt_ref_to_prim
-    {
-      // Perform an in-place int-to-long or ref-to-long conversion.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-
-      // On big-endian machine we duplicate the slot and store the MSW
-      // in the first slot.
-      __ add(__ argument_address(O0_argslot, O0_argslot, 1), O0_argslot);
-
-      insert_arg_slots(_masm, stack_move_unit(), O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-
-      Address arg_lsw(O0_argslot, 0);
-      Address arg_msw(O0_argslot, -Interpreter::stackElementSize);
-
-      switch (ek) {
-      case _adapter_opt_i2l:
-        {
-#ifdef _LP64
-          __ ldsw(arg_lsw, O2_scratch);                 // Load LSW sign-extended
-#else
-          __ ldsw(arg_lsw, O3_scratch);                 // Load LSW sign-extended
-          __ srlx(O3_scratch, BitsPerInt, O2_scratch);  // Move MSW value to lower 32-bits for std
-#endif
-          __ st_long(O2_scratch, arg_msw);              // Uses O2/O3 on !_LP64
-        }
-        break;
-      case _adapter_opt_unboxl:
-        {
-          // Load the value up from the heap.
-          __ ld_ptr(arg_lsw, O1_scratch);
-          int value_offset = java_lang_boxing_object::value_offset_in_bytes(T_LONG);
-          assert(value_offset == java_lang_boxing_object::value_offset_in_bytes(T_DOUBLE), "");
-          __ null_check(O1_scratch, value_offset);
-          __ ld_long(Address(O1_scratch, value_offset), O2_scratch);  // Uses O2/O3 on !_LP64
-          __ st_long(O2_scratch, arg_msw);
-        }
-        break;
-      default:
-        ShouldNotReachHere();
-      }
-
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_opt_f2d:        // optimized subcase of adapt_prim_to_prim
-  case _adapter_opt_d2f:        // optimized subcase of adapt_prim_to_prim
-    {
-      // perform an in-place floating primitive conversion
-      __ unimplemented(entry_name(ek));
-    }
-    break;
-
-  case _adapter_prim_to_ref:
-    __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
-    break;
-
-  case _adapter_swap_args:
-  case _adapter_rot_args:
-    // handled completely by optimized cases
-    __ stop("init_AdapterMethodHandle should not issue this");
-    break;
-
-  case _adapter_opt_swap_1:
-  case _adapter_opt_swap_2:
-  case _adapter_opt_rot_1_up:
-  case _adapter_opt_rot_1_down:
-  case _adapter_opt_rot_2_up:
-  case _adapter_opt_rot_2_down:
-    {
-      int swap_slots = ek_adapter_opt_swap_slots(ek);
-      int rotate     = ek_adapter_opt_swap_mode(ek);
-
-      // 'argslot' is the position of the first argument to swap.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-      if (VerifyMethodHandles)
-        verify_argslot(_masm, O0_argslot, O2_scratch, "swap point must fall within current frame");
-
-      // 'vminfo' is the second.
-      Register O1_destslot = O1_scratch;
-      load_conversion_vminfo(_masm, G3_amh_conversion, O1_destslot);
-      __ add(__ argument_address(O1_destslot, O1_destslot), O1_destslot);
-      if (VerifyMethodHandles)
-        verify_argslot(_masm, O1_destslot, O2_scratch, "swap point must fall within current frame");
-
-      assert(Interpreter::stackElementSize == wordSize, "else rethink use of wordSize here");
-      if (!rotate) {
-        // simple swap
-        for (int i = 0; i < swap_slots; i++) {
-          __ ld_ptr(            Address(O0_argslot,  i * wordSize), O2_scratch);
-          __ ld_ptr(            Address(O1_destslot, i * wordSize), O3_scratch);
-          __ st_ptr(O3_scratch, Address(O0_argslot,  i * wordSize));
-          __ st_ptr(O2_scratch, Address(O1_destslot, i * wordSize));
-        }
-      } else {
-        // A rotate is actually pair of moves, with an "odd slot" (or pair)
-        // changing place with a series of other slots.
-        // First, push the "odd slot", which is going to get overwritten
-        switch (swap_slots) {
-        case 2 :  __ ld_ptr(Address(O0_argslot, 1 * wordSize), O4_scratch); // fall-thru
-        case 1 :  __ ld_ptr(Address(O0_argslot, 0 * wordSize), O3_scratch); break;
-        default:  ShouldNotReachHere();
-        }
-        if (rotate > 0) {
-          // Here is rotate > 0:
-          // (low mem)                                          (high mem)
-          //     | dest:     more_slots...     | arg: odd_slot :arg+1 |
-          // =>
-          //     | dest: odd_slot | dest+1: more_slots...      :arg+1 |
-          // work argslot down to destslot, copying contiguous data upwards
-          // pseudo-code:
-          //   argslot  = src_addr - swap_bytes
-          //   destslot = dest_addr
-          //   while (argslot >= destslot) *(argslot + swap_bytes) = *(argslot + 0), argslot--;
-          move_arg_slots_up(_masm,
-                            O1_destslot,
-                            Address(O0_argslot, 0),
-                            swap_slots,
-                            O0_argslot, O2_scratch);
-        } else {
-          // Here is the other direction, rotate < 0:
-          // (low mem)                                          (high mem)
-          //     | arg: odd_slot | arg+1: more_slots...       :dest+1 |
-          // =>
-          //     | arg:    more_slots...     | dest: odd_slot :dest+1 |
-          // work argslot up to destslot, copying contiguous data downwards
-          // pseudo-code:
-          //   argslot  = src_addr + swap_bytes
-          //   destslot = dest_addr
-          //   while (argslot <= destslot) *(argslot - swap_bytes) = *(argslot + 0), argslot++;
-          // dest_slot denotes an exclusive upper limit
-          int limit_bias = OP_ROT_ARGS_DOWN_LIMIT_BIAS;
-          if (limit_bias != 0)
-            __ add(O1_destslot, - limit_bias * wordSize, O1_destslot);
-          move_arg_slots_down(_masm,
-                              Address(O0_argslot, swap_slots * wordSize),
-                              O1_destslot,
-                              -swap_slots,
-                              O0_argslot, O2_scratch);
-
-          __ sub(O1_destslot, swap_slots * wordSize, O1_destslot);
-        }
-        // pop the original first chunk into the destination slot, now free
-        switch (swap_slots) {
-        case 2 :  __ st_ptr(O4_scratch, Address(O1_destslot, 1 * wordSize)); // fall-thru
-        case 1 :  __ st_ptr(O3_scratch, Address(O1_destslot, 0 * wordSize)); break;
-        default:  ShouldNotReachHere();
-        }
-      }
-
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_dup_args:
-    {
-      // 'argslot' is the position of the first argument to duplicate.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-
-      // 'stack_move' is negative number of words to duplicate.
-      Register O1_stack_move = O1_scratch;
-      load_stack_move(_masm, G3_amh_conversion, O1_stack_move);
-
-      if (VerifyMethodHandles) {
-        verify_argslots(_masm, O1_stack_move, O0_argslot, O2_scratch, O3_scratch, true,
-                        "copied argument(s) must fall within current frame");
-      }
-
-      // insert location is always the bottom of the argument list:
-      __ neg(O1_stack_move);
-      push_arg_slots(_masm, O0_argslot, O1_stack_move, O2_scratch, O3_scratch);
-
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_drop_args:
-    {
-      // 'argslot' is the position of the first argument to nuke.
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-
-      // 'stack_move' is number of words to drop.
-      Register O1_stack_move = O1_scratch;
-      load_stack_move(_masm, G3_amh_conversion, O1_stack_move);
-
-      remove_arg_slots(_masm, O1_stack_move, O0_argslot, O2_scratch, O3_scratch, O4_scratch);
-
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_collect_args:
-  case _adapter_fold_args:
-  case _adapter_spread_args:
-    // Handled completely by optimized cases.
-    __ stop("init_AdapterMethodHandle should not issue this");
-    break;
-
-  case _adapter_opt_collect_ref:
-  case _adapter_opt_collect_int:
-  case _adapter_opt_collect_long:
-  case _adapter_opt_collect_float:
-  case _adapter_opt_collect_double:
-  case _adapter_opt_collect_void:
-  case _adapter_opt_collect_0_ref:
-  case _adapter_opt_collect_1_ref:
-  case _adapter_opt_collect_2_ref:
-  case _adapter_opt_collect_3_ref:
-  case _adapter_opt_collect_4_ref:
-  case _adapter_opt_collect_5_ref:
-  case _adapter_opt_filter_S0_ref:
-  case _adapter_opt_filter_S1_ref:
-  case _adapter_opt_filter_S2_ref:
-  case _adapter_opt_filter_S3_ref:
-  case _adapter_opt_filter_S4_ref:
-  case _adapter_opt_filter_S5_ref:
-  case _adapter_opt_collect_2_S0_ref:
-  case _adapter_opt_collect_2_S1_ref:
-  case _adapter_opt_collect_2_S2_ref:
-  case _adapter_opt_collect_2_S3_ref:
-  case _adapter_opt_collect_2_S4_ref:
-  case _adapter_opt_collect_2_S5_ref:
-  case _adapter_opt_fold_ref:
-  case _adapter_opt_fold_int:
-  case _adapter_opt_fold_long:
-  case _adapter_opt_fold_float:
-  case _adapter_opt_fold_double:
-  case _adapter_opt_fold_void:
-  case _adapter_opt_fold_1_ref:
-  case _adapter_opt_fold_2_ref:
-  case _adapter_opt_fold_3_ref:
-  case _adapter_opt_fold_4_ref:
-  case _adapter_opt_fold_5_ref:
-    {
-      // Given a fresh incoming stack frame, build a new ricochet frame.
-      // On entry, TOS points at a return PC, and FP is the callers frame ptr.
-      // RSI/R13 has the caller's exact stack pointer, which we must also preserve.
-      // RCX contains an AdapterMethodHandle of the indicated kind.
-
-      // Relevant AMH fields:
-      // amh.vmargslot:
-      //   points to the trailing edge of the arguments
-      //   to filter, collect, or fold.  For a boxing operation,
-      //   it points just after the single primitive value.
-      // amh.argument:
-      //   recursively called MH, on |collect| arguments
-      // amh.vmtarget:
-      //   final destination MH, on return value, etc.
-      // amh.conversion.dest:
-      //   tells what is the type of the return value
-      //   (not needed here, since dest is also derived from ek)
-      // amh.conversion.vminfo:
-      //   points to the trailing edge of the return value
-      //   when the vmtarget is to be called; this is
-      //   equal to vmargslot + (retained ? |collect| : 0)
-
-      // Pass 0 or more argument slots to the recursive target.
-      int collect_count_constant = ek_adapter_opt_collect_count(ek);
-
-      // The collected arguments are copied from the saved argument list:
-      int collect_slot_constant = ek_adapter_opt_collect_slot(ek);
-
-      assert(ek_orig == _adapter_collect_args ||
-             ek_orig == _adapter_fold_args, "");
-      bool retain_original_args = (ek_orig == _adapter_fold_args);
-
-      // The return value is replaced (or inserted) at the 'vminfo' argslot.
-      // Sometimes we can compute this statically.
-      int dest_slot_constant = -1;
-      if (!retain_original_args)
-        dest_slot_constant = collect_slot_constant;
-      else if (collect_slot_constant >= 0 && collect_count_constant >= 0)
-        // We are preserving all the arguments, and the return value is prepended,
-        // so the return slot is to the left (above) the |collect| sequence.
-        dest_slot_constant = collect_slot_constant + collect_count_constant;
-
-      // Replace all those slots by the result of the recursive call.
-      // The result type can be one of ref, int, long, float, double, void.
-      // In the case of void, nothing is pushed on the stack after return.
-      BasicType dest = ek_adapter_opt_collect_type(ek);
-      assert(dest == type2wfield[dest], "dest is a stack slot type");
-      int dest_count = type2size[dest];
-      assert(dest_count == 1 || dest_count == 2 || (dest_count == 0 && dest == T_VOID), "dest has a size");
-
-      // Choose a return continuation.
-      EntryKind ek_ret = _adapter_opt_return_any;
-      if (dest != T_CONFLICT && OptimizeMethodHandles) {
-        switch (dest) {
-        case T_INT    : ek_ret = _adapter_opt_return_int;     break;
-        case T_LONG   : ek_ret = _adapter_opt_return_long;    break;
-        case T_FLOAT  : ek_ret = _adapter_opt_return_float;   break;
-        case T_DOUBLE : ek_ret = _adapter_opt_return_double;  break;
-        case T_OBJECT : ek_ret = _adapter_opt_return_ref;     break;
-        case T_VOID   : ek_ret = _adapter_opt_return_void;    break;
-        default       : ShouldNotReachHere();
-        }
-        if (dest == T_OBJECT && dest_slot_constant >= 0) {
-          EntryKind ek_try = EntryKind(_adapter_opt_return_S0_ref + dest_slot_constant);
-          if (ek_try <= _adapter_opt_return_LAST &&
-              ek_adapter_opt_return_slot(ek_try) == dest_slot_constant) {
-            ek_ret = ek_try;
-          }
-        }
-        assert(ek_adapter_opt_return_type(ek_ret) == dest, "");
-      }
-
-      // Already pushed:  ... keep1 | collect | keep2 |
-
-      // Push a few extra argument words, if we need them to store the return value.
-      {
-        int extra_slots = 0;
-        if (retain_original_args) {
-          extra_slots = dest_count;
-        } else if (collect_count_constant == -1) {
-          extra_slots = dest_count;  // collect_count might be zero; be generous
-        } else if (dest_count > collect_count_constant) {
-          extra_slots = (dest_count - collect_count_constant);
-        } else {
-          // else we know we have enough dead space in |collect| to repurpose for return values
-        }
-        if (extra_slots != 0) {
-          __ sub(SP, round_to(extra_slots, 2) * Interpreter::stackElementSize, SP);
-        }
-      }
-
-      // Set up Ricochet Frame.
-      __ mov(SP, O5_savedSP);  // record SP for the callee
-
-      // One extra (empty) slot for outgoing target MH (see Gargs computation below).
-      __ save_frame(2);  // Note: we need to add 2 slots since frame::memory_parameter_word_sp_offset is 23.
-
-      // Note: Gargs is live throughout the following, until we make our recursive call.
-      // And the RF saves a copy in L4_saved_args_base.
-
-      RicochetFrame::enter_ricochet_frame(_masm, G3_method_handle, Gargs,
-                                          entry(ek_ret)->from_interpreted_entry());
-
-      // Compute argument base:
-      // Set up Gargs for current frame, extra (empty) slot is for outgoing target MH (space reserved by save_frame above).
-      __ add(FP, STACK_BIAS - (1 * Interpreter::stackElementSize), Gargs);
-
-      // Now pushed:  ... keep1 | collect | keep2 | extra | [RF]
-
-#ifdef ASSERT
-      if (VerifyMethodHandles && dest != T_CONFLICT) {
-        BLOCK_COMMENT("verify AMH.conv.dest {");
-        extract_conversion_dest_type(_masm, RicochetFrame::L5_conversion, O1_scratch);
-        Label L_dest_ok;
-        __ cmp(O1_scratch, (int) dest);
-        __ br(Assembler::equal, false, Assembler::pt, L_dest_ok);
-        __ delayed()->nop();
-        if (dest == T_INT) {
-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
-            if (is_subword_type(BasicType(bt))) {
-              __ cmp(O1_scratch, (int) bt);
-              __ br(Assembler::equal, false, Assembler::pt, L_dest_ok);
-              __ delayed()->nop();
-            }
-          }
-        }
-        __ stop("bad dest in AMH.conv");
-        __ BIND(L_dest_ok);
-        BLOCK_COMMENT("} verify AMH.conv.dest");
-      }
-#endif //ASSERT
-
-      // Find out where the original copy of the recursive argument sequence begins.
-      Register O0_coll = O0_scratch;
-      {
-        RegisterOrConstant collect_slot = collect_slot_constant;
-        if (collect_slot_constant == -1) {
-          load_vmargslot(_masm, G3_amh_vmargslot, O1_scratch);
-          collect_slot = O1_scratch;
-        }
-        // collect_slot might be 0, but we need the move anyway.
-        __ add(RicochetFrame::L4_saved_args_base, __ argument_offset(collect_slot, collect_slot.register_or_noreg()), O0_coll);
-        // O0_coll now points at the trailing edge of |collect| and leading edge of |keep2|
-      }
-
-      // Replace the old AMH with the recursive MH.  (No going back now.)
-      // In the case of a boxing call, the recursive call is to a 'boxer' method,
-      // such as Integer.valueOf or Long.valueOf.  In the case of a filter
-      // or collect call, it will take one or more arguments, transform them,
-      // and return some result, to store back into argument_base[vminfo].
-      __ load_heap_oop(G3_amh_argument, G3_method_handle);
-      if (VerifyMethodHandles)  verify_method_handle(_masm, G3_method_handle, O1_scratch, O2_scratch);
-
-      // Calculate |collect|, the number of arguments we are collecting.
-      Register O1_collect_count = O1_scratch;
-      RegisterOrConstant collect_count;
-      if (collect_count_constant < 0) {
-        __ load_method_handle_vmslots(O1_collect_count, G3_method_handle, O2_scratch);
-        collect_count = O1_collect_count;
-      } else {
-        collect_count = collect_count_constant;
-#ifdef ASSERT
-        if (VerifyMethodHandles) {
-          BLOCK_COMMENT("verify collect_count_constant {");
-          __ load_method_handle_vmslots(O3_scratch, G3_method_handle, O2_scratch);
-          Label L_count_ok;
-          __ cmp_and_br_short(O3_scratch, collect_count_constant, Assembler::equal, Assembler::pt, L_count_ok);
-          __ stop("bad vminfo in AMH.conv");
-          __ BIND(L_count_ok);
-          BLOCK_COMMENT("} verify collect_count_constant");
-        }
-#endif //ASSERT
-      }
-
-      // copy |collect| slots directly to TOS:
-      push_arg_slots(_masm, O0_coll, collect_count, O2_scratch, O3_scratch);
-      // Now pushed:  ... keep1 | collect | keep2 | RF... | collect |
-      // O0_coll still points at the trailing edge of |collect| and leading edge of |keep2|
-
-      // If necessary, adjust the saved arguments to make room for the eventual return value.
-      // Normal adjustment:  ... keep1 | +dest+ | -collect- | keep2 | RF... | collect |
-      // If retaining args:  ... keep1 | +dest+ |  collect  | keep2 | RF... | collect |
-      // In the non-retaining case, this might move keep2 either up or down.
-      // We don't have to copy the whole | RF... collect | complex,
-      // but we must adjust RF.saved_args_base.
-      // Also, from now on, we will forget about the original copy of |collect|.
-      // If we are retaining it, we will treat it as part of |keep2|.
-      // For clarity we will define |keep3| = |collect|keep2| or |keep2|.
-
-      BLOCK_COMMENT("adjust trailing arguments {");
-      // Compare the sizes of |+dest+| and |-collect-|, which are opposed opening and closing movements.
-      int                open_count  = dest_count;
-      RegisterOrConstant close_count = collect_count_constant;
-      Register O1_close_count = O1_collect_count;
-      if (retain_original_args) {
-        close_count = constant(0);
-      } else if (collect_count_constant == -1) {
-        close_count = O1_collect_count;
-      }
-
-      // How many slots need moving?  This is simply dest_slot (0 => no |keep3|).
-      RegisterOrConstant keep3_count;
-      Register O2_keep3_count = O2_scratch;
-      if (dest_slot_constant < 0) {
-        extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O2_keep3_count);
-        keep3_count = O2_keep3_count;
-      } else  {
-        keep3_count = dest_slot_constant;
-#ifdef ASSERT
-        if (VerifyMethodHandles && dest_slot_constant < 0) {
-          BLOCK_COMMENT("verify dest_slot_constant {");
-          extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O3_scratch);
-          Label L_vminfo_ok;
-          __ cmp_and_br_short(O3_scratch, dest_slot_constant, Assembler::equal, Assembler::pt, L_vminfo_ok);
-          __ stop("bad vminfo in AMH.conv");
-          __ BIND(L_vminfo_ok);
-          BLOCK_COMMENT("} verify dest_slot_constant");
-        }
-#endif //ASSERT
-      }
-
-      // tasks remaining:
-      bool move_keep3 = (!keep3_count.is_constant() || keep3_count.as_constant() != 0);
-      bool stomp_dest = (NOT_DEBUG(dest == T_OBJECT) DEBUG_ONLY(dest_count != 0));
-      bool fix_arg_base = (!close_count.is_constant() || open_count != close_count.as_constant());
-
-      // Old and new argument locations (based at slot 0).
-      // Net shift (&new_argv - &old_argv) is (close_count - open_count).
-      bool zero_open_count = (open_count == 0);  // remember this bit of info
-      if (move_keep3 && fix_arg_base) {
-        // It will be easier to have everything in one register:
-        if (close_count.is_register()) {
-          // Deduct open_count from close_count register to get a clean +/- value.
-          __ sub(close_count.as_register(), open_count, close_count.as_register());
-        } else {
-          close_count = close_count.as_constant() - open_count;
-        }
-        open_count = 0;
-      }
-      Register L4_old_argv = RicochetFrame::L4_saved_args_base;
-      Register O3_new_argv = O3_scratch;
-      if (fix_arg_base) {
-        __ add(L4_old_argv, __ argument_offset(close_count, O4_scratch), O3_new_argv,
-               -(open_count * Interpreter::stackElementSize));
-      }
-
-      // First decide if any actual data are to be moved.
-      // We can skip if (a) |keep3| is empty, or (b) the argument list size didn't change.
-      // (As it happens, all movements involve an argument list size change.)
-
-      // If there are variable parameters, use dynamic checks to skip around the whole mess.
-      Label L_done;
-      if (keep3_count.is_register()) {
-        __ cmp_and_br_short(keep3_count.as_register(), 0, Assembler::equal, Assembler::pn, L_done);
-      }
-      if (close_count.is_register()) {
-        __ cmp_and_br_short(close_count.as_register(), open_count, Assembler::equal, Assembler::pn, L_done);
-      }
-
-      if (move_keep3 && fix_arg_base) {
-        bool emit_move_down = false, emit_move_up = false, emit_guard = false;
-        if (!close_count.is_constant()) {
-          emit_move_down = emit_guard = !zero_open_count;
-          emit_move_up   = true;
-        } else if (open_count != close_count.as_constant()) {
-          emit_move_down = (open_count > close_count.as_constant());
-          emit_move_up   = !emit_move_down;
-        }
-        Label L_move_up;
-        if (emit_guard) {
-          __ cmp(close_count.as_register(), open_count);
-          __ br(Assembler::greater, false, Assembler::pn, L_move_up);
-          __ delayed()->nop();
-        }
-
-        if (emit_move_down) {
-          // Move arguments down if |+dest+| > |-collect-|
-          // (This is rare, except when arguments are retained.)
-          // This opens space for the return value.
-          if (keep3_count.is_constant()) {
-            for (int i = 0; i < keep3_count.as_constant(); i++) {
-              __ ld_ptr(            Address(L4_old_argv, i * Interpreter::stackElementSize), O4_scratch);
-              __ st_ptr(O4_scratch, Address(O3_new_argv, i * Interpreter::stackElementSize)            );
-            }
-          } else {
-            // Live: O1_close_count, O2_keep3_count, O3_new_argv
-            Register argv_top = O0_scratch;
-            __ add(L4_old_argv, __ argument_offset(keep3_count, O4_scratch), argv_top);
-            move_arg_slots_down(_masm,
-                                Address(L4_old_argv, 0),  // beginning of old argv
-                                argv_top,                 // end of old argv
-                                close_count,              // distance to move down (must be negative)
-                                O4_scratch, G5_scratch);
-          }
-        }
-
-        if (emit_guard) {
-          __ ba_short(L_done);  // assumes emit_move_up is true also
-          __ BIND(L_move_up);
-        }
-
-        if (emit_move_up) {
-          // Move arguments up if |+dest+| < |-collect-|
-          // (This is usual, except when |keep3| is empty.)
-          // This closes up the space occupied by the now-deleted collect values.
-          if (keep3_count.is_constant()) {
-            for (int i = keep3_count.as_constant() - 1; i >= 0; i--) {
-              __ ld_ptr(            Address(L4_old_argv, i * Interpreter::stackElementSize), O4_scratch);
-              __ st_ptr(O4_scratch, Address(O3_new_argv, i * Interpreter::stackElementSize)            );
-            }
-          } else {
-            Address argv_top(L4_old_argv, __ argument_offset(keep3_count, O4_scratch));
-            // Live: O1_close_count, O2_keep3_count, O3_new_argv
-            move_arg_slots_up(_masm,
-                              L4_old_argv,  // beginning of old argv
-                              argv_top,     // end of old argv
-                              close_count,  // distance to move up (must be positive)
-                              O4_scratch, G5_scratch);
-          }
-        }
-      }
-      __ BIND(L_done);
-
-      if (fix_arg_base) {
-        // adjust RF.saved_args_base
-        __ mov(O3_new_argv, RicochetFrame::L4_saved_args_base);
-      }
-
-      if (stomp_dest) {
-        // Stomp the return slot, so it doesn't hold garbage.
-        // This isn't strictly necessary, but it may help detect bugs.
-        __ set(RicochetFrame::RETURN_VALUE_PLACEHOLDER, O4_scratch);
-        __ st_ptr(O4_scratch, Address(RicochetFrame::L4_saved_args_base,
-                                      __ argument_offset(keep3_count, keep3_count.register_or_noreg())));  // uses O2_keep3_count
-      }
-      BLOCK_COMMENT("} adjust trailing arguments");
-
-      BLOCK_COMMENT("do_recursive_call");
-      __ mov(SP, O5_savedSP);  // record SP for the callee
-      __ set(ExternalAddress(SharedRuntime::ricochet_blob()->bounce_addr() - frame::pc_return_offset), O7);
-      // The globally unique bounce address has two purposes:
-      // 1. It helps the JVM recognize this frame (frame::is_ricochet_frame).
-      // 2. When returned to, it cuts back the stack and redirects control flow
-      //    to the return handler.
-      // The return handler will further cut back the stack when it takes
-      // down the RF.  Perhaps there is a way to streamline this further.
-
-      // State during recursive call:
-      // ... keep1 | dest | dest=42 | keep3 | RF... | collect | bounce_pc |
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-    }
-    break;
-
-  case _adapter_opt_return_ref:
-  case _adapter_opt_return_int:
-  case _adapter_opt_return_long:
-  case _adapter_opt_return_float:
-  case _adapter_opt_return_double:
-  case _adapter_opt_return_void:
-  case _adapter_opt_return_S0_ref:
-  case _adapter_opt_return_S1_ref:
-  case _adapter_opt_return_S2_ref:
-  case _adapter_opt_return_S3_ref:
-  case _adapter_opt_return_S4_ref:
-  case _adapter_opt_return_S5_ref:
-    {
-      BasicType dest_type_constant = ek_adapter_opt_return_type(ek);
-      int       dest_slot_constant = ek_adapter_opt_return_slot(ek);
-
-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-
-      if (dest_slot_constant == -1) {
-        // The current stub is a general handler for this dest_type.
-        // It can be called from _adapter_opt_return_any below.
-        // Stash the address in a little table.
-        assert((dest_type_constant & CONV_TYPE_MASK) == dest_type_constant, "oob");
-        address return_handler = __ pc();
-        _adapter_return_handlers[dest_type_constant] = return_handler;
-        if (dest_type_constant == T_INT) {
-          // do the subword types too
-          for (int bt = T_BOOLEAN; bt < T_INT; bt++) {
-            if (is_subword_type(BasicType(bt)) &&
-                _adapter_return_handlers[bt] == NULL) {
-              _adapter_return_handlers[bt] = return_handler;
-            }
-          }
-        }
-      }
-
-      // On entry to this continuation handler, make Gargs live again.
-      __ mov(RicochetFrame::L4_saved_args_base, Gargs);
-
-      Register O7_temp   = O7;
-      Register O5_vminfo = O5;
-
-      RegisterOrConstant dest_slot = dest_slot_constant;
-      if (dest_slot_constant == -1) {
-        extract_conversion_vminfo(_masm, RicochetFrame::L5_conversion, O5_vminfo);
-        dest_slot = O5_vminfo;
-      }
-      // Store the result back into the argslot.
-      // This code uses the interpreter calling sequence, in which the return value
-      // is usually left in the TOS register, as defined by InterpreterMacroAssembler::pop.
-      // There are certain irregularities with floating point values, which can be seen
-      // in TemplateInterpreterGenerator::generate_return_entry_for.
-      move_return_value(_masm, dest_type_constant, __ argument_address(dest_slot, O7_temp));
-
-      RicochetFrame::leave_ricochet_frame(_masm, G3_method_handle, I5_savedSP, I7);
-
-      // Load the final target and go.
-      if (VerifyMethodHandles)  verify_method_handle(_masm, G3_method_handle, O0_scratch, O1_scratch);
-      __ restore(I5_savedSP, G0, SP);
-      __ jump_to_method_handle_entry(G3_method_handle, O0_scratch);
-      __ illtrap(0);
-    }
-    break;
-
-  case _adapter_opt_return_any:
-    {
-      Register O7_temp      = O7;
-      Register O5_dest_type = O5;
-
-      if (VerifyMethodHandles)  RicochetFrame::verify_clean(_masm);
-      extract_conversion_dest_type(_masm, RicochetFrame::L5_conversion, O5_dest_type);
-      __ set(ExternalAddress((address) &_adapter_return_handlers[0]), O7_temp);
-      __ sll_ptr(O5_dest_type, LogBytesPerWord, O5_dest_type);
-      __ ld_ptr(O7_temp, O5_dest_type, O7_temp);
-
-#ifdef ASSERT
-      { Label L_ok;
-        __ br_notnull_short(O7_temp, Assembler::pt, L_ok);
-        __ stop("bad method handle return");
-        __ BIND(L_ok);
-      }
-#endif //ASSERT
-      __ JMP(O7_temp, 0);
-      __ delayed()->nop();
-    }
-    break;
-
-  case _adapter_opt_spread_0:
-  case _adapter_opt_spread_1_ref:
-  case _adapter_opt_spread_2_ref:
-  case _adapter_opt_spread_3_ref:
-  case _adapter_opt_spread_4_ref:
-  case _adapter_opt_spread_5_ref:
-  case _adapter_opt_spread_ref:
-  case _adapter_opt_spread_byte:
-  case _adapter_opt_spread_char:
-  case _adapter_opt_spread_short:
-  case _adapter_opt_spread_int:
-  case _adapter_opt_spread_long:
-  case _adapter_opt_spread_float:
-  case _adapter_opt_spread_double:
-    {
-      // spread an array out into a group of arguments
-      int  length_constant    = ek_adapter_opt_spread_count(ek);
-      bool length_can_be_zero = (length_constant == 0);
-      if (length_constant < 0) {
-        // some adapters with variable length must handle the zero case
-        if (!OptimizeMethodHandles ||
-            ek_adapter_opt_spread_type(ek) != T_OBJECT)
-          length_can_be_zero = true;
-      }
-
-      // find the address of the array argument
-      load_vmargslot(_masm, G3_amh_vmargslot, O0_argslot);
-      __ add(__ argument_address(O0_argslot, O0_argslot), O0_argslot);
-
-      // O0_argslot points both to the array and to the first output arg
-      Address vmarg = Address(O0_argslot, 0);
-
-      // Get the array value.
-      Register  O1_array       = O1_scratch;
-      Register  O2_array_klass = O2_scratch;
-      BasicType elem_type      = ek_adapter_opt_spread_type(ek);
-      int       elem_slots     = type2size[elem_type];  // 1 or 2
-      int       array_slots    = 1;  // array is always a T_OBJECT
-      int       length_offset  = arrayOopDesc::length_offset_in_bytes();
-      int       elem0_offset   = arrayOopDesc::base_offset_in_bytes(elem_type);
-      __ ld_ptr(vmarg, O1_array);
-
-      Label L_array_is_empty, L_insert_arg_space, L_copy_args, L_args_done;
-      if (length_can_be_zero) {
-        // handle the null pointer case, if zero is allowed
-        Label L_skip;
-        if (length_constant < 0) {
-          load_conversion_vminfo(_masm, G3_amh_conversion, O3_scratch);
-          __ cmp_zero_and_br(Assembler::notZero, O3_scratch, L_skip);
-          __ delayed()->nop(); // to avoid back-to-back cbcond instructions
-        }
-        __ br_null_short(O1_array, Assembler::pn, L_array_is_empty);
-        __ BIND(L_skip);
-      }
-      __ null_check(O1_array, oopDesc::klass_offset_in_bytes());
-      __ load_klass(O1_array, O2_array_klass);
-
-      // Check the array type.
-      Register O3_klass = O3_scratch;
-      __ load_heap_oop(G3_amh_argument, O3_klass);  // this is a Class object!
-      load_klass_from_Class(_masm, O3_klass, O4_scratch, G5_scratch);
-
-      Label L_ok_array_klass, L_bad_array_klass, L_bad_array_length;
-      __ check_klass_subtype(O2_array_klass, O3_klass, O4_scratch, G5_scratch, L_ok_array_klass);
-      // If we get here, the type check failed!
-      __ ba_short(L_bad_array_klass);
-      __ BIND(L_ok_array_klass);
-
-      // Check length.
-      if (length_constant >= 0) {
-        __ ldsw(Address(O1_array, length_offset), O4_scratch);
-        __ cmp(O4_scratch, length_constant);
-      } else {
-        Register O3_vminfo = O3_scratch;
-        load_conversion_vminfo(_masm, G3_amh_conversion, O3_vminfo);
-        __ ldsw(Address(O1_array, length_offset), O4_scratch);
-        __ cmp(O3_vminfo, O4_scratch);
-      }
-      __ br(Assembler::notEqual, false, Assembler::pn, L_bad_array_length);
-      __ delayed()->nop();
-
-      Register O2_argslot_limit = O2_scratch;
-
-      // Array length checks out.  Now insert any required stack slots.
-      if (length_constant == -1) {
-        // Form a pointer to the end of the affected region.
-        __ add(O0_argslot, Interpreter::stackElementSize, O2_argslot_limit);
-        // 'stack_move' is negative number of words to insert
-        // This number already accounts for elem_slots.
-        Register O3_stack_move = O3_scratch;
-        load_stack_move(_masm, G3_amh_conversion, O3_stack_move);
-        __ cmp(O3_stack_move, 0);
-        assert(stack_move_unit() < 0, "else change this comparison");
-        __ br(Assembler::less, false, Assembler::pn, L_insert_arg_space);
-        __ delayed()->nop();
-        __ br(Assembler::equal, false, Assembler::pn, L_copy_args);
-        __ delayed()->nop();
-        // single argument case, with no array movement
-        __ BIND(L_array_is_empty);
-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
-                         O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-        __ ba_short(L_args_done);  // no spreading to do
-        __ BIND(L_insert_arg_space);
-        // come here in the usual case, stack_move < 0 (2 or more spread arguments)
-        // Live: O1_array, O2_argslot_limit, O3_stack_move
-        insert_arg_slots(_masm, O3_stack_move,
-                         O0_argslot, O4_scratch, G5_scratch, O1_scratch);
-        // reload from rdx_argslot_limit since rax_argslot is now decremented
-        __ ld_ptr(Address(O2_argslot_limit, -Interpreter::stackElementSize), O1_array);
-      } else if (length_constant >= 1) {
-        int new_slots = (length_constant * elem_slots) - array_slots;
-        insert_arg_slots(_masm, new_slots * stack_move_unit(),
-                         O0_argslot, O2_scratch, O3_scratch, O4_scratch);
-      } else if (length_constant == 0) {
-        __ BIND(L_array_is_empty);
-        remove_arg_slots(_masm, -stack_move_unit() * array_slots,
-                         O0_argslot, O1_scratch, O2_scratch, O3_scratch);
-      } else {
-        ShouldNotReachHere();
-      }
-
-      // Copy from the array to the new slots.
-      // Note: Stack change code preserves integrity of O0_argslot pointer.
-      // So even after slot insertions, O0_argslot still points to first argument.
-      // Beware:  Arguments that are shallow on the stack are deep in the array,
-      // and vice versa.  So a downward-growing stack (the usual) has to be copied
-      // elementwise in reverse order from the source array.
-      __ BIND(L_copy_args);
-      if (length_constant == -1) {
-        // [O0_argslot, O2_argslot_limit) is the area we are inserting into.
-        // Array element [0] goes at O0_argslot_limit[-wordSize].
-        Register O1_source = O1_array;
-        __ add(Address(O1_array, elem0_offset), O1_source);
-        Register O4_fill_ptr = O4_scratch;
-        __ mov(O2_argslot_limit, O4_fill_ptr);
-        Label L_loop;
-        __ BIND(L_loop);
-        __ add(O4_fill_ptr, -Interpreter::stackElementSize * elem_slots, O4_fill_ptr);
-        move_typed_arg(_masm, elem_type, true,
-                       Address(O1_source, 0), Address(O4_fill_ptr, 0),
-                       O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
-        __ add(O1_source, type2aelembytes(elem_type), O1_source);
-        __ cmp_and_brx_short(O4_fill_ptr, O0_argslot, Assembler::greaterUnsigned, Assembler::pt, L_loop);
-      } else if (length_constant == 0) {
-        // nothing to copy
-      } else {
-        int elem_offset = elem0_offset;
-        int slot_offset = length_constant * Interpreter::stackElementSize;
-        for (int index = 0; index < length_constant; index++) {
-          slot_offset -= Interpreter::stackElementSize * elem_slots;  // fill backward
-          move_typed_arg(_masm, elem_type, true,
-                         Address(O1_array, elem_offset), Address(O0_argslot, slot_offset),
-                         O2_scratch);  // must be an even register for !_LP64 long moves (uses O2/O3)
-          elem_offset += type2aelembytes(elem_type);
-        }
-      }
-      __ BIND(L_args_done);
-
-      // Arguments are spread.  Move to next method handle.
-      __ load_heap_oop(G3_mh_vmtarget, G3_method_handle);
-      __ jump_to_method_handle_entry(G3_method_handle, O1_scratch);
-
-      __ BIND(L_bad_array_klass);
-      assert(!vmarg.uses(O2_required), "must be different registers");
-      __ load_heap_oop(Address(O2_array_klass, java_mirror_offset), O2_required);  // required class
-      __ ld_ptr(       vmarg,                                       O1_actual);    // bad object
-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
-      __ delayed()->mov(Bytecodes::_aaload,                         O0_code);      // who is complaining?
-
-      __ bind(L_bad_array_length);
-      assert(!vmarg.uses(O2_required), "must be different registers");
-      __ mov(   G3_method_handle,                O2_required);  // required class
-      __ ld_ptr(vmarg,                           O1_actual);    // bad object
-      __ jump_to(AddressLiteral(from_interpreted_entry(_raise_exception)), O3_scratch);
-      __ delayed()->mov(Bytecodes::_arraylength, O0_code);      // who is complaining?
-    }
-    break;
-
-  default:
-    DEBUG_ONLY(tty->print_cr("bad ek=%d (%s)", (int)ek, entry_name(ek)));
-    ShouldNotReachHere();
-  }
-  BLOCK_COMMENT(err_msg("} Entry %s", entry_name(ek)));
-
-  address me_cookie = MethodHandleEntry::start_compiled_entry(_masm, interp_entry);
-  __ unimplemented(entry_name(ek)); // %%% FIXME: NYI
-
-  init_entry(ek, MethodHandleEntry::finish_compiled_entry(_masm, me_cookie));
-}
--- a/src/cpu/sparc/vm/methodHandles_sparc.hpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/methodHandles_sparc.hpp	Fri Sep 07 18:18:55 2012 -0700
@@ -30,186 +30,9 @@
   adapter_code_size = NOT_LP64(23000 DEBUG_ONLY(+ 40000)) LP64_ONLY(35000 DEBUG_ONLY(+ 50000))
 };
 
-public:
-
-class RicochetFrame : public ResourceObj {
-  friend class MethodHandles;
-
- private:
-  /*
-    RF field            x86                 SPARC
-    sender_pc           *(rsp+0)            I7-0x8
-    sender_link         rbp                 I6+BIAS
-    exact_sender_sp     rsi/r13             I5_savedSP
-    conversion          *(rcx+&amh_conv)    L5_conv
-    saved_args_base     rax                 L4_sab (cf. Gargs = G4)
-    saved_args_layout   #NULL               L3_sal
-    saved_target        *(rcx+&mh_vmtgt)    L2_stgt
-    continuation        #STUB_CON           L1_cont
-   */
-  static const Register L1_continuation     ;  // what to do when control gets back here
-  static const Register L2_saved_target     ;  // target method handle to invoke on saved_args
-  static const Register L3_saved_args_layout;  // caching point for MethodTypeForm.vmlayout cookie
-  static const Register L4_saved_args_base  ;  // base of pushed arguments (slot 0, arg N) (-3)
-  static const Register L5_conversion       ;  // misc. information from original AdapterMethodHandle (-2)
-
-  frame _fr;
-
-  RicochetFrame(const frame& fr) : _fr(fr) { }
-
-  intptr_t* register_addr(Register reg) const  {
-    assert((_fr.sp() + reg->sp_offset_in_saved_window()) == _fr.register_addr(reg), "must agree");
-    return _fr.register_addr(reg);
-  }
-  intptr_t  register_value(Register reg) const { return *register_addr(reg); }
-
- public:
-  intptr_t* continuation() const        { return (intptr_t*) register_value(L1_continuation); }
-  oop       saved_target() const        { return (oop)       register_value(L2_saved_target); }
-  oop       saved_args_layout() const   { return (oop)       register_value(L3_saved_args_layout); }
-  intptr_t* saved_args_base() const     { return (intptr_t*) register_value(L4_saved_args_base); }
-  intptr_t  conversion() const          { return             register_value(L5_conversion); }
-  intptr_t* exact_sender_sp() const     { return (intptr_t*) register_value(I5_savedSP); }
-  intptr_t* sender_link() const         { return _fr.sender_sp(); }  // XXX
-  address   sender_pc() const           { return _fr.sender_pc(); }
-
-  // This value is not used for much, but it apparently must be nonzero.
-  static int frame_size_in_bytes()              { return wordSize * 4; }
-
-  intptr_t* extended_sender_sp() const  { return saved_args_base(); }
-
-  intptr_t  return_value_slot_number() const {
-    return adapter_conversion_vminfo(conversion());
-  }
-  BasicType return_value_type() const {
-    return adapter_conversion_dest_type(conversion());
-  }
-  bool has_return_value_slot() const {
-    return return_value_type() != T_VOID;
-  }
-  intptr_t* return_value_slot_addr() const {
-    assert(has_return_value_slot(), "");
-    return saved_arg_slot_addr(return_value_slot_number());
-  }
-  intptr_t* saved_target_slot_addr() const {
-    return saved_arg_slot_addr(saved_args_length());
-  }
-  intptr_t* saved_arg_slot_addr(int slot) const {
-    assert(slot >= 0, "");
-    return (intptr_t*)( (address)saved_args_base() + (slot * Interpreter::stackElementSize) );
-  }
-
-  jint      saved_args_length() const;
-  jint      saved_arg_offset(int arg) const;
-
-  // GC interface
-  oop*  saved_target_addr()                     { return (oop*)register_addr(L2_saved_target); }
-  oop*  saved_args_layout_addr()                { return (oop*)register_addr(L3_saved_args_layout); }
-
-  oop  compute_saved_args_layout(bool read_cache, bool write_cache);
-
-#ifdef ASSERT
-  // The magic number is supposed to help find ricochet frames within the bytes of stack dumps.
-  enum { MAGIC_NUMBER_1 = 0xFEED03E, MAGIC_NUMBER_2 = 0xBEEF03E };
-  static const Register L0_magic_number_1   ;  // cookie for debugging, at start of RSA
-  static Address magic_number_2_addr()  { return Address(L4_saved_args_base, -wordSize); }
-  intptr_t magic_number_1() const       { return register_value(L0_magic_number_1); }
-  intptr_t magic_number_2() const       { return saved_args_base()[-1]; }
-#endif //ASSERT
-
- public:
-  enum { RETURN_VALUE_PLACEHOLDER = (NOT_DEBUG(0) DEBUG_ONLY(42)) };
-
-  void verify() const NOT_DEBUG_RETURN; // check for MAGIC_NUMBER, etc.
-
-  static void generate_ricochet_blob(MacroAssembler* _masm,
-                                     // output params:
-                                     int* bounce_offset,
-                                     int* exception_offset,
-                                     int* frame_size_in_words);
-
-  static void enter_ricochet_frame(MacroAssembler* _masm,
-                                   Register recv_reg,
-                                   Register argv_reg,
-                                   address return_handler);
-
-  static void leave_ricochet_frame(MacroAssembler* _masm,
-                                   Register recv_reg,
-                                   Register new_sp_reg,
-                                   Register sender_pc_reg);
-
-  static RicochetFrame* from_frame(const frame& fr) {
-    RicochetFrame* rf = new RicochetFrame(fr);
-    rf->verify();
-    return rf;
-  }
-
-  static void verify_clean(MacroAssembler* _masm) NOT_DEBUG_RETURN;
-
-  static void describe(const frame* fr, FrameValues& values, int frame_no) PRODUCT_RETURN;
-};
-
 // Additional helper methods for MethodHandles code generation:
 public:
   static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg, Register temp_reg, Register temp2_reg);
-  static void load_conversion_vminfo(MacroAssembler* _masm, Address conversion_field_addr, Register reg);
-  static void extract_conversion_vminfo(MacroAssembler* _masm, Register conversion_field_reg, Register reg);
-  static void extract_conversion_dest_type(MacroAssembler* _masm, Register conversion_field_reg, Register reg);
-
-  static void load_stack_move(MacroAssembler* _masm,
-                              Address G3_amh_conversion,
-                              Register G5_stack_move);
-
-  static void insert_arg_slots(MacroAssembler* _masm,
-                               RegisterOrConstant arg_slots,
-                               Register argslot_reg,
-                               Register temp_reg, Register temp2_reg, Register temp3_reg);
-
-  static void remove_arg_slots(MacroAssembler* _masm,
-                               RegisterOrConstant arg_slots,
-                               Register argslot_reg,
-                               Register temp_reg, Register temp2_reg, Register temp3_reg);
-
-  static void push_arg_slots(MacroAssembler* _masm,
-                             Register argslot_reg,
-                             RegisterOrConstant slot_count,
-                             Register temp_reg, Register temp2_reg);
-
-  static void move_arg_slots_up(MacroAssembler* _masm,
-                                Register bottom_reg,  // invariant
-                                Address  top_addr,    // can use temp_reg
-                                RegisterOrConstant positive_distance_in_slots,
-                                Register temp_reg, Register temp2_reg);
-
-  static void move_arg_slots_down(MacroAssembler* _masm,
-                                  Address  bottom_addr,  // can use temp_reg
-                                  Register top_reg,      // invariant
-                                  RegisterOrConstant negative_distance_in_slots,
-                                  Register temp_reg, Register temp2_reg);
-
-  static void move_typed_arg(MacroAssembler* _masm,
-                             BasicType type, bool is_element,
-                             Address value_src, Address slot_dest,
-                             Register temp_reg);
-
-  static void move_return_value(MacroAssembler* _masm, BasicType type,
-                                Address return_slot);
-
-  static void verify_argslot(MacroAssembler* _masm, Register argslot_reg,
-                             Register temp_reg,
-                             const char* error_message) NOT_DEBUG_RETURN;
-
-  static void verify_argslots(MacroAssembler* _masm,
-                              RegisterOrConstant argslot_count,
-                              Register argslot_reg,
-                              Register temp_reg,
-                              Register temp2_reg,
-                              bool negate_argslot,
-                              const char* error_message) NOT_DEBUG_RETURN;
-
-  static void verify_stack_move(MacroAssembler* _masm,
-                                RegisterOrConstant arg_slots,
-                                int direction) NOT_DEBUG_RETURN;
 
   static void verify_klass(MacroAssembler* _masm,
                            Register obj_reg, KlassHandle klass,
@@ -223,8 +46,17 @@
                  "reference is a MH");
   }
 
+  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
+
   // Similar to InterpreterMacroAssembler::jump_from_interpreted.
   // Takes care of special dispatch from single stepping too.
-  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, Register temp2);
+  static void jump_from_method_handle(MacroAssembler* _masm, Register method,
+                                      Register temp, Register temp2,
+                                      bool for_compiler_entry);
+
+  static void jump_to_lambda_form(MacroAssembler* _masm,
+                                  Register recv, Register method_temp,
+                                  Register temp2, Register temp3,
+                                  bool for_compiler_entry);
 
   static void trace_method_handle(MacroAssembler* _masm, const char* adaptername) PRODUCT_RETURN;
--- a/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/sharedRuntime_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -400,13 +400,13 @@
     case T_LONG:                // LP64, longs compete with int args
       assert(sig_bt[i+1] == T_VOID, "");
 #ifdef _LP64
-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
+      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
 #endif
       break;
     case T_OBJECT:
     case T_ARRAY:
     case T_ADDRESS: // Used, e.g., in slow-path locking for the lock's stack address
-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
+      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
 #ifndef _LP64
       else                            stk_reg_pairs++;
 #endif
@@ -416,11 +416,11 @@
     case T_CHAR:
     case T_BYTE:
     case T_BOOLEAN:
-      if (int_reg_cnt < int_reg_max) int_reg_cnt++;
+      if (int_reg_cnt < int_reg_max)  int_reg_cnt++;
       else                            stk_reg_pairs++;
       break;
     case T_FLOAT:
-      if (flt_reg_cnt < flt_reg_max) flt_reg_cnt++;
+      if (flt_reg_cnt < flt_reg_max)  flt_reg_cnt++;
       else                            stk_reg_pairs++;
       break;
     case T_DOUBLE:
@@ -436,7 +436,6 @@
   // This is where the longs/doubles start on the stack.
   stk_reg_pairs = (stk_reg_pairs+1) & ~1; // Round
 
-  int int_reg_pairs = (int_reg_cnt+1) & ~1; // 32-bit 2-reg longs only
   int flt_reg_pairs = (flt_reg_cnt+1) & ~1;
 
   // int stk_reg = frame::register_save_words*(wordSize>>2);
@@ -517,24 +516,15 @@
           stk_reg_pairs += 2;
         }
 #else // COMPILER2
-        if (int_reg_pairs + 1 < int_reg_max) {
-          if (is_outgoing) {
-            regs[i].set_pair(as_oRegister(int_reg_pairs + 1)->as_VMReg(), as_oRegister(int_reg_pairs)->as_VMReg());
-          } else {
-            regs[i].set_pair(as_iRegister(int_reg_pairs + 1)->as_VMReg(), as_iRegister(int_reg_pairs)->as_VMReg());
-          }
-          int_reg_pairs += 2;
-        } else {
           regs[i].set2(VMRegImpl::stack2reg(stk_reg_pairs));
           stk_reg_pairs += 2;
-        }
 #endif // COMPILER2
 #endif // _LP64
       break;
 
     case T_FLOAT:
       if (flt_reg < flt_reg_max) regs[i].set1(as_FloatRegister(flt_reg++)->as_VMReg());
-      else                       regs[i].set1(    VMRegImpl::stack2reg(stk_reg++));
+      else                       regs[i].set1(VMRegImpl::stack2reg(stk_reg++));
       break;
     case T_DOUBLE:
       assert(sig_bt[i+1] == T_VOID, "expecting half");
@@ -886,6 +876,20 @@
   __ delayed()->add(SP, G1, Gargs);
 }
 
+static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg, Register temp2_reg,
+                        address code_start, address code_end,
+                        Label& L_ok) {
+  Label L_fail;
+  __ set(ExternalAddress(code_start), temp_reg);
+  __ set(pointer_delta(code_end, code_start, 1), temp2_reg);
+  __ cmp(pc_reg, temp_reg);
+  __ brx(Assembler::lessEqualUnsigned, false, Assembler::pn, L_fail);
+  __ delayed()->add(temp_reg, temp2_reg, temp_reg);
+  __ cmp(pc_reg, temp_reg);
+  __ cmp_and_brx_short(pc_reg, temp_reg, Assembler::lessUnsigned, Assembler::pt, L_ok);
+  __ bind(L_fail);
+}
+
 void AdapterGenerator::gen_i2c_adapter(
                             int total_args_passed,
                             // VMReg max_arg,
@@ -907,6 +911,51 @@
   // This removes all sorts of headaches on the x86 side and also eliminates
   // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
 
+  // More detail:
+  // Adapters can be frameless because they do not require the caller
+  // to perform additional cleanup work, such as correcting the stack pointer.
+  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
+  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
+  // even if a callee has modified the stack pointer.
+  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
+  // routinely repairs its caller's stack pointer (from sender_sp, which is set
+  // up via the senderSP register).
+  // In other words, if *either* the caller or callee is interpreted, we can
+  // get the stack pointer repaired after a call.
+  // This is why c2i and i2c adapters cannot be indefinitely composed.
+  // In particular, if a c2i adapter were to somehow call an i2c adapter,
+  // both caller and callee would be compiled methods, and neither would
+  // clean up the stack pointer changes performed by the two adapters.
+  // If this happens, control eventually transfers back to the compiled
+  // caller, but with an uncorrected stack, causing delayed havoc.
+
+  if (VerifyAdapterCalls &&
+      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
+    // So, let's test for cascading c2i/i2c adapters right now.
+    //  assert(Interpreter::contains($return_addr) ||
+    //         StubRoutines::contains($return_addr),
+    //         "i2c adapter must return to an interpreter frame");
+    __ block_comment("verify_i2c { ");
+    Label L_ok;
+    if (Interpreter::code() != NULL)
+      range_check(masm, O7, O0, O1,
+                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
+                  L_ok);
+    if (StubRoutines::code1() != NULL)
+      range_check(masm, O7, O0, O1,
+                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
+                  L_ok);
+    if (StubRoutines::code2() != NULL)
+      range_check(masm, O7, O0, O1,
+                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
+                  L_ok);
+    const char* msg = "i2c adapter must return to an interpreter frame";
+    __ block_comment(msg);
+    __ stop(msg);
+    __ bind(L_ok);
+    __ block_comment("} verify_i2ce ");
+  }
+
   // As you can see from the list of inputs & outputs there are not a lot
   // of temp registers to work with: mostly G1, G3 & G4.
 
@@ -1937,20 +1986,156 @@
   __ bind(done);
 }
 
+static void verify_oop_args(MacroAssembler* masm,
+                            int total_args_passed,
+                            const BasicType* sig_bt,
+                            const VMRegPair* regs) {
+  Register temp_reg = G5_method;  // not part of any compiled calling seq
+  if (VerifyOops) {
+    for (int i = 0; i < total_args_passed; i++) {
+      if (sig_bt[i] == T_OBJECT ||
+          sig_bt[i] == T_ARRAY) {
+        VMReg r = regs[i].first();
+        assert(r->is_valid(), "bad oop arg");
+        if (r->is_stack()) {
+          RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
+          ld_off = __ ensure_simm13_or_reg(ld_off, temp_reg);
+          __ ld_ptr(SP, ld_off, temp_reg);
+          __ verify_oop(temp_reg);
+        } else {
+          __ verify_oop(r->as_Register());
+        }
+      }
+    }
+  }
+}
+
+static void gen_special_dispatch(MacroAssembler* masm,
+                                 int total_args_passed,
+                                 int comp_args_on_stack,
+                                 vmIntrinsics::ID special_dispatch,
+                                 const BasicType* sig_bt,
+                                 const VMRegPair* regs) {
+  verify_oop_args(masm, total_args_passed, sig_bt, regs);
+
+  // Now write the args into the outgoing interpreter space
+  bool     has_receiver   = false;
+  Register receiver_reg   = noreg;
+  int      member_arg_pos = -1;
+  Register member_reg     = noreg;
+  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(special_dispatch);
+  if (ref_kind != 0) {
+    member_arg_pos = total_args_passed - 1;  // trailing MemberName argument
+    member_reg = G5_method;  // known to be free at this point
+    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
+  } else if (special_dispatch == vmIntrinsics::_invokeBasic) {
+    has_receiver = true;
+  } else {
+    fatal(err_msg("special_dispatch=%d", special_dispatch));
+  }
+
+  if (member_reg != noreg) {
+    // Load the member_arg into register, if necessary.
+    assert(member_arg_pos >= 0 && member_arg_pos < total_args_passed, "oob");
+    assert(sig_bt[member_arg_pos] == T_OBJECT, "dispatch argument must be an object");
+    VMReg r = regs[member_arg_pos].first();
+    assert(r->is_valid(), "bad member arg");
+    if (r->is_stack()) {
+      RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
+      ld_off = __ ensure_simm13_or_reg(ld_off, member_reg);
+      __ ld_ptr(SP, ld_off, member_reg);
+    } else {
+      // no data motion is needed
+      member_reg = r->as_Register();
+    }
+  }
+
+  if (has_receiver) {
+    // Make sure the receiver is loaded into a register.
+    assert(total_args_passed > 0, "oob");
+    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
+    VMReg r = regs[0].first();
+    assert(r->is_valid(), "bad receiver arg");
+    if (r->is_stack()) {
+      // Porting note:  This assumes that compiled calling conventions always
+      // pass the receiver oop in a register.  If this is not true on some
+      // platform, pick a temp and load the receiver from stack.
+      assert(false, "receiver always in a register");
+      receiver_reg = G3_scratch;  // known to be free at this point
+      RegisterOrConstant ld_off = reg2offset(r) + STACK_BIAS;
+      ld_off = __ ensure_simm13_or_reg(ld_off, member_reg);
+      __ ld_ptr(SP, ld_off, receiver_reg);
+    } else {
+      // no data motion is needed
+      receiver_reg = r->as_Register();
+    }
+  }
+
+  // Figure out which address we are really jumping to:
+  MethodHandles::generate_method_handle_dispatch(masm, special_dispatch,
+                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
+}
+
 // ---------------------------------------------------------------------------
 // Generate a native wrapper for a given method.  The method takes arguments
 // in the Java compiled code convention, marshals them to the native
 // convention (handlizes oops, etc), transitions to native, makes the call,
 // returns to java state (possibly blocking), unhandlizes any result and
 // returns.
+//
+// Critical native functions are a shorthand for the use of
+// GetPrimtiveArrayCritical and disallow the use of any other JNI
+// functions.  The wrapper is expected to unpack the arguments before
+// passing them to the callee and perform checks before and after the
+// native call to ensure that they GC_locker
+// lock_critical/unlock_critical semantics are followed.  Some other
+// parts of JNI setup are skipped like the tear down of the JNI handle
+// block and the check for pending exceptions it's impossible for them
+// to be thrown.
+//
+// They are roughly structured like this:
+//    if (GC_locker::needs_gc())
+//      SharedRuntime::block_for_jni_critical();
+//    tranistion to thread_in_native
+//    unpack arrray arguments and call native entry point
+//    check for safepoint in progress
+//    check if any thread suspend flags are set
+//      call into JVM and possible unlock the JNI critical
+//      if a GC was suppressed while in the critical native.
+//    transition back to thread_in_Java
+//    return to caller
+//
 nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
                                                 methodHandle method,
                                                 int compile_id,
                                                 int total_in_args,
                                                 int comp_args_on_stack, // in VMRegStackSlots
-                                                BasicType *in_sig_bt,
-                                                VMRegPair *in_regs,
+                                                BasicType* in_sig_bt,
+                                                VMRegPair* in_regs,
                                                 BasicType ret_type) {
+  if (method->is_method_handle_intrinsic()) {
+    vmIntrinsics::ID iid = method->intrinsic_id();
+    intptr_t start = (intptr_t)__ pc();
+    int vep_offset = ((intptr_t)__ pc()) - start;
+    gen_special_dispatch(masm,
+                         total_in_args,
+                         comp_args_on_stack,
+                         method->intrinsic_id(),
+                         in_sig_bt,
+                         in_regs);
+    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
+    __ flush();
+    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
+    return nmethod::new_native_nmethod(method,
+                                       compile_id,
+                                       masm->code(),
+                                       vep_offset,
+                                       frame_complete,
+                                       stack_slots / VMRegImpl::slots_per_word,
+                                       in_ByteSize(-1),
+                                       in_ByteSize(-1),
+                                       (OopMapSet*)NULL);
+  }
   bool is_critical_native = true;
   address native_func = method->critical_native_function();
   if (native_func == NULL) {
--- a/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/stubGenerator_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -3404,14 +3404,6 @@
     StubRoutines::_atomic_add_ptr_entry      = StubRoutines::_atomic_add_entry;
 #endif  // COMPILER2 !=> _LP64
 
-    // Build this early so it's available for the interpreter.  The
-    // stub expects the required and actual type to already be in O1
-    // and O2 respectively.
-    StubRoutines::_throw_WrongMethodTypeException_entry =
-      generate_throw_exception("WrongMethodTypeException throw_exception",
-                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_WrongMethodTypeException),
-                               G5_method_type, G3_method_handle);
-
     // Build this early so it's available for the interpreter.
     StubRoutines::_throw_StackOverflowError_entry          = generate_throw_exception("StackOverflowError throw_exception",           CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError));
   }
--- a/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/templateInterpreter_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -694,9 +694,9 @@
     // Need to differentiate between igetfield, agetfield, bgetfield etc.
     // because they are different sizes.
     // Get the type from the constant pool cache
-    __ srl(G1_scratch, ConstantPoolCacheEntry::tosBits, G1_scratch);
-    // Make sure we don't need to mask G1_scratch for tosBits after the above shift
-    ConstantPoolCacheEntry::verify_tosBits();
+    __ srl(G1_scratch, ConstantPoolCacheEntry::tos_state_shift, G1_scratch);
+    // Make sure we don't need to mask G1_scratch after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
     __ cmp(G1_scratch, atos );
     __ br(Assembler::equal, true, Assembler::pt, xreturn_path);
     __ delayed()->ld_ptr(Otos_i, G3_scratch, Otos_i);
@@ -1662,7 +1662,7 @@
       int computed_sp_adjustment = (delta > 0) ? round_to(delta, WordsPerLong) : 0;
       *interpreter_frame->register_addr(I5_savedSP)    = (intptr_t) (fp + computed_sp_adjustment) - STACK_BIAS;
     } else {
-      assert(caller->is_compiled_frame() || caller->is_entry_frame() || caller->is_ricochet_frame(), "only possible cases");
+      assert(caller->is_compiled_frame() || caller->is_entry_frame(), "only possible cases");
       // Don't have Lesp available; lay out locals block in the caller
       // adjacent to the register window save area.
       //
--- a/src/cpu/sparc/vm/templateTable_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/templateTable_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -378,7 +378,7 @@
   Register Rcache = G3_scratch;
   Register Rscratch = G4_scratch;
 
-  resolve_cache_and_index(f1_oop, Otos_i, Rcache, Rscratch, wide ? sizeof(u2) : sizeof(u1));
+  resolve_cache_and_index(f12_oop, Otos_i, Rcache, Rscratch, wide ? sizeof(u2) : sizeof(u1));
 
   __ verify_oop(Otos_i);
 
@@ -2093,10 +2093,12 @@
   // Depends on cpCacheOop layout!
   Label resolved;
 
-  if (byte_no == f1_oop) {
-    // We are resolved if the f1 field contains a non-null object (CallSite, etc.)
-    // This kind of CP cache entry does not need to match the flags byte, because
+  if (byte_no == f12_oop) {
+    // We are resolved if the f1 field contains a non-null object (CallSite, MethodType, etc.)
+    // This kind of CP cache entry does not need to match bytecode_1 or bytecode_2, because
     // there is a 1-1 relation between bytecode type and CP entry type.
+    // The caller will also load a methodOop from f2.
+    assert(result != noreg, "");
     assert_different_registers(result, Rcache);
     __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
     __ ld_ptr(Rcache, constantPoolCacheOopDesc::base_offset() +
@@ -2123,10 +2125,13 @@
     case Bytecodes::_invokespecial  : // fall through
     case Bytecodes::_invokestatic   : // fall through
     case Bytecodes::_invokeinterface: entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invoke);  break;
+    case Bytecodes::_invokehandle   : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokehandle);  break;
     case Bytecodes::_invokedynamic  : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_invokedynamic);  break;
     case Bytecodes::_fast_aldc      : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
     case Bytecodes::_fast_aldc_w    : entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);     break;
-    default                         : ShouldNotReachHere();                                 break;
+    default:
+      fatal(err_msg("unexpected bytecode: %s", Bytecodes::name(bytecode())));
+      break;
   }
   // first time invocation - must resolve first
   __ call_VM(noreg, entry, O1);
@@ -2139,48 +2144,54 @@
 }
 
 void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
-                                               Register Rmethod,
-                                               Register Ritable_index,
-                                               Register Rflags,
+                                               Register method,
+                                               Register itable_index,
+                                               Register flags,
                                                bool is_invokevirtual,
                                                bool is_invokevfinal,
                                                bool is_invokedynamic) {
   // Uses both G3_scratch and G4_scratch
-  Register Rcache = G3_scratch;
-  Register Rscratch = G4_scratch;
-  assert_different_registers(Rcache, Rmethod, Ritable_index);
-
-  ByteSize cp_base_offset = constantPoolCacheOopDesc::base_offset();
+  Register cache = G3_scratch;
+  Register index = G4_scratch;
+  assert_different_registers(cache, method, itable_index);
 
   // determine constant pool cache field offsets
+  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
   const int method_offset = in_bytes(
-    cp_base_offset +
-      (is_invokevirtual
+      constantPoolCacheOopDesc::base_offset() +
+      ((byte_no == f2_byte)
        ? ConstantPoolCacheEntry::f2_offset()
        : ConstantPoolCacheEntry::f1_offset()
       )
     );
-  const int flags_offset = in_bytes(cp_base_offset +
+  const int flags_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
                                     ConstantPoolCacheEntry::flags_offset());
   // access constant pool cache fields
-  const int index_offset = in_bytes(cp_base_offset +
+  const int index_offset = in_bytes(constantPoolCacheOopDesc::base_offset() +
                                     ConstantPoolCacheEntry::f2_offset());
 
   if (is_invokevfinal) {
-    __ get_cache_and_index_at_bcp(Rcache, Rscratch, 1);
-    __ ld_ptr(Rcache, method_offset, Rmethod);
-  } else if (byte_no == f1_oop) {
-    // Resolved f1_oop goes directly into 'method' register.
-    resolve_cache_and_index(byte_no, Rmethod, Rcache, Rscratch, sizeof(u4));
+    __ get_cache_and_index_at_bcp(cache, index, 1);
+    __ ld_ptr(Address(cache, method_offset), method);
+  } else if (byte_no == f12_oop) {
+    // Resolved f1_oop (CallSite, MethodType, etc.) goes into 'itable_index'.
+    // Resolved f2_oop (methodOop invoker) will go into 'method' (at index_offset).
+    // See ConstantPoolCacheEntry::set_dynamic_call and set_method_handle.
+    size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
+    resolve_cache_and_index(byte_no, itable_index, cache, index, index_size);
+    __ ld_ptr(Address(cache, index_offset), method);
+    itable_index = noreg;  // hack to disable load below
   } else {
-    resolve_cache_and_index(byte_no, noreg, Rcache, Rscratch, sizeof(u2));
-    __ ld_ptr(Rcache, method_offset, Rmethod);
+    resolve_cache_and_index(byte_no, noreg, cache, index, sizeof(u2));
+    __ ld_ptr(Address(cache, method_offset), method);
   }
 
-  if (Ritable_index != noreg) {
-    __ ld_ptr(Rcache, index_offset, Ritable_index);
+  if (itable_index != noreg) {
+    // pick up itable index from f2 also:
+    assert(byte_no == f1_byte, "already picked up f1");
+    __ ld_ptr(Address(cache, index_offset), itable_index);
   }
-  __ ld_ptr(Rcache, flags_offset, Rflags);
+  __ ld_ptr(Address(cache, flags_offset), flags);
 }
 
 // The Rcache register must be set before call
@@ -2272,7 +2283,7 @@
 
   if (__ membar_has_effect(membar_bits)) {
     // Get volatile flag
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
     __ and3(Rflags, Lscratch, Lscratch);
   }
 
@@ -2280,9 +2291,9 @@
 
   // compute field type
   Label notByte, notInt, notShort, notChar, notLong, notFloat, notObj;
-  __ srl(Rflags, ConstantPoolCacheEntry::tosBits, Rflags);
-  // Make sure we don't need to mask Rflags for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags);
+  // Make sure we don't need to mask Rflags after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
 
   // Check atos before itos for getstatic, more likely (in Queens at least)
   __ cmp(Rflags, atos);
@@ -2445,7 +2456,7 @@
   if (__ membar_has_effect(membar_bits)) {
     // Get volatile flag
     __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::f2_offset(), Rflags);
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
   }
 
   switch (bytecode()) {
@@ -2569,9 +2580,9 @@
       Label two_word, valsizeknown;
       __ ld_ptr(G1_scratch, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), Rflags);
       __ mov(Lesp, G4_scratch);
-      __ srl(Rflags, ConstantPoolCacheEntry::tosBits, Rflags);
-      // Make sure we don't need to mask Rflags for tosBits after the above shift
-      ConstantPoolCacheEntry::verify_tosBits();
+      __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags);
+      // Make sure we don't need to mask Rflags after the above shift
+      ConstantPoolCacheEntry::verify_tos_state_shift();
       __ cmp(Rflags, ltos);
       __ br(Assembler::equal, false, Assembler::pt, two_word);
       __ delayed()->cmp(Rflags, dtos);
@@ -2625,7 +2636,7 @@
 
   Label notVolatile, checkVolatile, exit;
   if (__ membar_has_effect(read_bits) || __ membar_has_effect(write_bits)) {
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
     __ and3(Rflags, Lscratch, Lscratch);
 
     if (__ membar_has_effect(read_bits)) {
@@ -2635,9 +2646,9 @@
     }
   }
 
-  __ srl(Rflags, ConstantPoolCacheEntry::tosBits, Rflags);
-  // Make sure we don't need to mask Rflags for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ srl(Rflags, ConstantPoolCacheEntry::tos_state_shift, Rflags);
+  // Make sure we don't need to mask Rflags after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
 
   // compute field type
   Label notInt, notShort, notChar, notObj, notByte, notLong, notFloat;
@@ -2833,7 +2844,7 @@
   Label notVolatile, checkVolatile, exit;
   if (__ membar_has_effect(read_bits) || __ membar_has_effect(write_bits)) {
     __ ld_ptr(Rcache, cp_base_offset + ConstantPoolCacheEntry::flags_offset(), Rflags);
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
     __ and3(Rflags, Lscratch, Lscratch);
     if (__ membar_has_effect(read_bits)) {
       __ cmp_and_br_short(Lscratch, 0, Assembler::equal, Assembler::pt, notVolatile);
@@ -2916,7 +2927,7 @@
 
     // Test volatile
     Label notVolatile;
-    __ set((1 << ConstantPoolCacheEntry::volatileField), Lscratch);
+    __ set((1 << ConstantPoolCacheEntry::is_volatile_shift), Lscratch);
     __ btst(Rflags, Lscratch);
     __ br(Assembler::zero, false, Assembler::pt, notVolatile);
     __ delayed()->nop();
@@ -2936,27 +2947,82 @@
   ShouldNotReachHere();
 }
 
+
+void TemplateTable::prepare_invoke(int byte_no,
+                                   Register method,  // linked method (or i-klass)
+                                   Register ra,      // return address
+                                   Register index,   // itable index, MethodType, etc.
+                                   Register recv,    // if caller wants to see it
+                                   Register flags    // if caller wants to test it
+                                   ) {
+  // determine flags
+  const Bytecodes::Code code = bytecode();
+  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
+  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
+  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
+  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
+  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
+  const bool load_receiver       = (recv != noreg);
+  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
+  assert(recv  == noreg || recv  == O0, "");
+  assert(flags == noreg || flags == O1, "");
+
+  // setup registers & access constant pool cache
+  if (recv  == noreg)  recv  = O0;
+  if (flags == noreg)  flags = O1;
+  const Register temp = O2;
+  assert_different_registers(method, ra, index, recv, flags, temp);
+
+  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
+
+  __ mov(SP, O5_savedSP);  // record SP that we wanted the callee to restore
+
+  // maybe push appendix to arguments
+  if (is_invokedynamic || is_invokehandle) {
+    Label L_no_push;
+    __ verify_oop(index);
+    __ set((1 << ConstantPoolCacheEntry::has_appendix_shift), temp);
+    __ btst(flags, temp);
+    __ br(Assembler::zero, false, Assembler::pt, L_no_push);
+    __ delayed()->nop();
+    // Push the appendix as a trailing parameter.
+    // This must be done before we get the receiver,
+    // since the parameter_size includes it.
+    __ push_ptr(index);  // push appendix (MethodType, CallSite, etc.)
+    __ bind(L_no_push);
+  }
+
+  // load receiver if needed (after appendix is pushed so parameter size is correct)
+  if (load_receiver) {
+    __ and3(flags, ConstantPoolCacheEntry::parameter_size_mask, temp);  // get parameter size
+    __ load_receiver(temp, recv);  //  __ argument_address uses Gargs but we need Lesp
+    __ verify_oop(recv);
+  }
+
+  // compute return type
+  __ srl(flags, ConstantPoolCacheEntry::tos_state_shift, ra);
+  // Make sure we don't need to mask flags after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
+  // load return address
+  {
+    const address table_addr = (is_invokeinterface || is_invokedynamic) ?
+        (address)Interpreter::return_5_addrs_by_index_table() :
+        (address)Interpreter::return_3_addrs_by_index_table();
+    AddressLiteral table(table_addr);
+    __ set(table, temp);
+    __ sll(ra, LogBytesPerWord, ra);
+    __ ld_ptr(Address(temp, ra), ra);
+  }
+}
+
+
 void TemplateTable::generate_vtable_call(Register Rrecv, Register Rindex, Register Rret) {
   Register Rtemp = G4_scratch;
   Register Rcall = Rindex;
   assert_different_registers(Rcall, G5_method, Gargs, Rret);
 
   // get target methodOop & entry point
-  const int base = instanceKlass::vtable_start_offset() * wordSize;
-  if (vtableEntry::size() % 3 == 0) {
-    // scale the vtable index by 12:
-    int one_third = vtableEntry::size() / 3;
-    __ sll(Rindex, exact_log2(one_third * 1 * wordSize), Rtemp);
-    __ sll(Rindex, exact_log2(one_third * 2 * wordSize), Rindex);
-    __ add(Rindex, Rtemp, Rindex);
-  } else {
-    // scale the vtable index by 8:
-    __ sll(Rindex, exact_log2(vtableEntry::size() * wordSize), Rindex);
-  }
-
-  __ add(Rrecv, Rindex, Rrecv);
-  __ ld_ptr(Rrecv, base + vtableEntry::method_offset_in_bytes(), G5_method);
-
+  __ lookup_virtual_method(Rrecv, Rindex, G5_method);
   __ call_from_interpreter(Rcall, Gargs, Rret);
 }
 
@@ -2965,16 +3031,16 @@
   assert(byte_no == f2_byte, "use this argument");
 
   Register Rscratch = G3_scratch;
-  Register Rtemp = G4_scratch;
-  Register Rret = Lscratch;
-  Register Rrecv = G5_method;
+  Register Rtemp    = G4_scratch;
+  Register Rret     = Lscratch;
+  Register O0_recv  = O0;
   Label notFinal;
 
   load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, true, false, false);
   __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
 
   // Check for vfinal
-  __ set((1 << ConstantPoolCacheEntry::vfinalMethod), G4_scratch);
+  __ set((1 << ConstantPoolCacheEntry::is_vfinal_shift), G4_scratch);
   __ btst(Rret, G4_scratch);
   __ br(Assembler::zero, false, Assembler::pt, notFinal);
   __ delayed()->and3(Rret, 0xFF, G4_scratch);      // gets number of parameters
@@ -2986,27 +3052,27 @@
   __ bind(notFinal);
 
   __ mov(G5_method, Rscratch);  // better scratch register
-  __ load_receiver(G4_scratch, O0);  // gets receiverOop
-  // receiver is in O0
-  __ verify_oop(O0);
+  __ load_receiver(G4_scratch, O0_recv);  // gets receiverOop
+  // receiver is in O0_recv
+  __ verify_oop(O0_recv);
 
   // get return address
   AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
   __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ srl(Rret, ConstantPoolCacheEntry::tos_state_shift, Rret);          // get return type
+  // Make sure we don't need to mask Rret after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
   __ sll(Rret,  LogBytesPerWord, Rret);
   __ ld_ptr(Rtemp, Rret, Rret);         // get return address
 
   // get receiver klass
-  __ null_check(O0, oopDesc::klass_offset_in_bytes());
-  __ load_klass(O0, Rrecv);
-  __ verify_oop(Rrecv);
-
-  __ profile_virtual_call(Rrecv, O4);
-
-  generate_vtable_call(Rrecv, Rscratch, Rret);
+  __ null_check(O0_recv, oopDesc::klass_offset_in_bytes());
+  __ load_klass(O0_recv, O0_recv);
+  __ verify_oop(O0_recv);
+
+  __ profile_virtual_call(O0_recv, O4);
+
+  generate_vtable_call(O0_recv, Rscratch, Rret);
 }
 
 void TemplateTable::fast_invokevfinal(int byte_no) {
@@ -3036,9 +3102,9 @@
   // get return address
   AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
   __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
+  __ srl(Rret, ConstantPoolCacheEntry::tos_state_shift, Rret);          // get return type
+  // Make sure we don't need to mask Rret after the above shift
+  ConstantPoolCacheEntry::verify_tos_state_shift();
   __ sll(Rret,  LogBytesPerWord, Rret);
   __ ld_ptr(Rtemp, Rret, Rret);         // get return address
 
@@ -3047,65 +3113,37 @@
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
+
 void TemplateTable::invokespecial(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
 
-  Register Rscratch = G3_scratch;
-  Register Rtemp = G4_scratch;
-  Register Rret = Lscratch;
-
-  load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, /*virtual*/ false, false, false);
-  __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
-
+  const Register Rret     = Lscratch;
+  const Register O0_recv  = O0;
+  const Register Rscratch = G3_scratch;
+
+  prepare_invoke(byte_no, G5_method, Rret, noreg, O0_recv);  // get receiver also for null check
+  __ null_check(O0_recv);
+
+  // do the call
   __ verify_oop(G5_method);
-
-  __ lduh(G5_method, in_bytes(methodOopDesc::size_of_parameters_offset()), G4_scratch);
-  __ load_receiver(G4_scratch, O0);
-
-  // receiver NULL check
-  __ null_check(O0);
-
   __ profile_call(O4);
-
-  // get return address
-  AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
-  __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
-  __ sll(Rret,  LogBytesPerWord, Rret);
-  __ ld_ptr(Rtemp, Rret, Rret);         // get return address
-
-  // do the call
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
+
 void TemplateTable::invokestatic(int byte_no) {
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
 
-  Register Rscratch = G3_scratch;
-  Register Rtemp = G4_scratch;
-  Register Rret = Lscratch;
-
-  load_invoke_cp_cache_entry(byte_no, G5_method, noreg, Rret, /*virtual*/ false, false, false);
-  __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
-
+  const Register Rret     = Lscratch;
+  const Register Rscratch = G3_scratch;
+
+  prepare_invoke(byte_no, G5_method, Rret);  // get f1 methodOop
+
+  // do the call
   __ verify_oop(G5_method);
-
   __ profile_call(O4);
-
-  // get return address
-  AddressLiteral table(Interpreter::return_3_addrs_by_index_table());
-  __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
-  __ sll(Rret,  LogBytesPerWord, Rret);
-  __ ld_ptr(Rtemp, Rret, Rret);         // get return address
-
-  // do the call
   __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
@@ -3122,7 +3160,7 @@
   Label notFinal;
 
   // Check for vfinal
-  __ set((1 << ConstantPoolCacheEntry::vfinalMethod), Rscratch);
+  __ set((1 << ConstantPoolCacheEntry::is_vfinal_shift), Rscratch);
   __ btst(Rflags, Rscratch);
   __ br(Assembler::zero, false, Assembler::pt, notFinal);
   __ delayed()->nop();
@@ -3144,53 +3182,37 @@
   transition(vtos, vtos);
   assert(byte_no == f1_byte, "use this argument");
 
-  Register Rscratch = G4_scratch;
-  Register Rret = G3_scratch;
-  Register Rindex = Lscratch;
-  Register Rinterface = G1_scratch;
-  Register RklassOop = G5_method;
-  Register Rflags = O1;
+  const Register Rinterface  = G1_scratch;
+  const Register Rret        = G3_scratch;
+  const Register Rindex      = Lscratch;
+  const Register O0_recv     = O0;
+  const Register O1_flags    = O1;
+  const Register O2_klassOop = O2;
+  const Register Rscratch    = G4_scratch;
   assert_different_registers(Rscratch, G5_method);
 
-  load_invoke_cp_cache_entry(byte_no, Rinterface, Rindex, Rflags, /*virtual*/ false, false, false);
-  __ mov(SP, O5_savedSP); // record SP that we wanted the callee to restore
-
-  // get receiver
-  __ and3(Rflags, 0xFF, Rscratch);       // gets number of parameters
-  __ load_receiver(Rscratch, O0);
-  __ verify_oop(O0);
-
-  __ mov(Rflags, Rret);
-
-  // get return address
-  AddressLiteral table(Interpreter::return_5_addrs_by_index_table());
-  __ set(table, Rscratch);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);          // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
-  __ sll(Rret,  LogBytesPerWord, Rret);
-  __ ld_ptr(Rscratch, Rret, Rret);      // get return address
+  prepare_invoke(byte_no, Rinterface, Rret, Rindex, O0_recv, O1_flags);
 
   // get receiver klass
-  __ null_check(O0, oopDesc::klass_offset_in_bytes());
-  __ load_klass(O0, RklassOop);
-  __ verify_oop(RklassOop);
+  __ null_check(O0_recv, oopDesc::klass_offset_in_bytes());
+  __ load_klass(O0_recv, O2_klassOop);
+  __ verify_oop(O2_klassOop);
 
   // Special case of invokeinterface called for virtual method of
   // java.lang.Object.  See cpCacheOop.cpp for details.
   // This code isn't produced by javac, but could be produced by
   // another compliant java compiler.
   Label notMethod;
-  __ set((1 << ConstantPoolCacheEntry::methodInterface), Rscratch);
-  __ btst(Rflags, Rscratch);
+  __ set((1 << ConstantPoolCacheEntry::is_forced_virtual_shift), Rscratch);
+  __ btst(O1_flags, Rscratch);
   __ br(Assembler::zero, false, Assembler::pt, notMethod);
   __ delayed()->nop();
 
-  invokeinterface_object_method(RklassOop, Rinterface, Rret, Rflags);
+  invokeinterface_object_method(O2_klassOop, Rinterface, Rret, O1_flags);
 
   __ bind(notMethod);
 
-  __ profile_virtual_call(RklassOop, O4);
+  __ profile_virtual_call(O2_klassOop, O4);
 
   //
   // find entry point to call
@@ -3199,9 +3221,9 @@
   // compute start of first itableOffsetEntry (which is at end of vtable)
   const int base = instanceKlass::vtable_start_offset() * wordSize;
   Label search;
-  Register Rtemp = Rflags;
-
-  __ ld(RklassOop, instanceKlass::vtable_length_offset() * wordSize, Rtemp);
+  Register Rtemp = O1_flags;
+
+  __ ld(O2_klassOop, instanceKlass::vtable_length_offset() * wordSize, Rtemp);
   if (align_object_offset(1) > 1) {
     __ round_to(Rtemp, align_object_offset(1));
   }
@@ -3212,7 +3234,7 @@
     __ set(base, Rscratch);
     __ add(Rscratch, Rtemp, Rtemp);
   }
-  __ add(RklassOop, Rtemp, Rscratch);
+  __ add(O2_klassOop, Rtemp, Rscratch);
 
   __ bind(search);
 
@@ -3244,7 +3266,7 @@
   assert(itableMethodEntry::method_offset_in_bytes() == 0, "adjust instruction below");
   __ sll(Rindex, exact_log2(itableMethodEntry::size() * wordSize), Rindex);       // Rindex *= 8;
   __ add(Rscratch, Rindex, Rscratch);
-  __ ld_ptr(RklassOop, Rscratch, G5_method);
+  __ ld_ptr(O2_klassOop, Rscratch, G5_method);
 
   // Check for abstract method error.
   {
@@ -3260,13 +3282,42 @@
 
   __ verify_oop(G5_method);
   __ call_from_interpreter(Rcall, Gargs, Rret);
-
+}
+
+
+void TemplateTable::invokehandle(int byte_no) {
+  transition(vtos, vtos);
+  assert(byte_no == f12_oop, "use this argument");
+
+  if (!EnableInvokeDynamic) {
+    // rewriter does not generate this bytecode
+    __ should_not_reach_here();
+    return;
+  }
+
+  const Register Rret       = Lscratch;
+  const Register G4_mtype   = G4_scratch;  // f1
+  const Register O0_recv    = O0;
+  const Register Rscratch   = G3_scratch;
+
+  prepare_invoke(byte_no, G5_method, Rret, G4_mtype, O0_recv);
+  __ null_check(O0_recv);
+
+  // G4: MethodType object (from f1)
+  // G5: MH.linkToCallSite method (from f2)
+
+  // Note:  G4_mtype is already pushed (if necessary) by prepare_invoke
+
+  // do the call
+  __ verify_oop(G5_method);
+  __ profile_final_call(O4);  // FIXME: profile the LambdaForm also
+  __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
 
 void TemplateTable::invokedynamic(int byte_no) {
   transition(vtos, vtos);
-  assert(byte_no == f1_oop, "use this argument");
+  assert(byte_no == f12_oop, "use this argument");
 
   if (!EnableInvokeDynamic) {
     // We should not encounter this bytecode if !EnableInvokeDynamic.
@@ -3279,42 +3330,24 @@
     return;
   }
 
-  // G5: CallSite object (f1)
-  // XX: unused (f2)
-  // XX: flags (unused)
-
-  Register G5_callsite = G5_method;
-  Register Rscratch    = G3_scratch;
-  Register Rtemp       = G1_scratch;
-  Register Rret        = Lscratch;
-
-  load_invoke_cp_cache_entry(byte_no, G5_callsite, noreg, Rret,
-                             /*virtual*/ false, /*vfinal*/ false, /*indy*/ true);
-  __ mov(SP, O5_savedSP);  // record SP that we wanted the callee to restore
-
+  const Register Rret        = Lscratch;
+  const Register G4_callsite = G4_scratch;
+  const Register Rscratch    = G3_scratch;
+
+  prepare_invoke(byte_no, G5_method, Rret, G4_callsite);
+
+  // G4: CallSite object (from f1)
+  // G5: MH.linkToCallSite method (from f2)
+
+  // Note:  G4_callsite is already pushed by prepare_invoke
+
+  // %%% should make a type profile for any invokedynamic that takes a ref argument
   // profile this call
   __ profile_call(O4);
 
-  // get return address
-  AddressLiteral table(Interpreter::return_5_addrs_by_index_table());
-  __ set(table, Rtemp);
-  __ srl(Rret, ConstantPoolCacheEntry::tosBits, Rret);  // get return type
-  // Make sure we don't need to mask Rret for tosBits after the above shift
-  ConstantPoolCacheEntry::verify_tosBits();
-  __ sll(Rret, LogBytesPerWord, Rret);
-  __ ld_ptr(Rtemp, Rret, Rret);  // get return address
-
-  __ verify_oop(G5_callsite);
-  __ load_heap_oop(G5_callsite, __ delayed_value(java_lang_invoke_CallSite::target_offset_in_bytes, Rscratch), G3_method_handle);
-  __ null_check(G3_method_handle);
-  __ verify_oop(G3_method_handle);
-
-  // Adjust Rret first so Llast_SP can be same as Rret
-  __ add(Rret, -frame::pc_return_offset, O7);
-  __ add(Lesp, BytesPerWord, Gargs);  // setup parameter pointer
-  __ jump_to_method_handle_entry(G3_method_handle, Rtemp, /* emit_delayed_nop */ false);
-  // Record SP so we can remove any stack space allocated by adapter transition
-  __ delayed()->mov(SP, Llast_SP);
+  // do the call
+  __ verify_oop(G5_method);
+  __ call_from_interpreter(Rscratch, Gargs, Rret);
 }
 
 
--- a/src/cpu/sparc/vm/templateTable_sparc.hpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/templateTable_sparc.hpp	Fri Sep 07 18:18:55 2012 -0700
@@ -25,6 +25,13 @@
 #ifndef CPU_SPARC_VM_TEMPLATETABLE_SPARC_HPP
 #define CPU_SPARC_VM_TEMPLATETABLE_SPARC_HPP
 
+  static void prepare_invoke(int byte_no,
+                             Register method,         // linked method (or i-klass)
+                             Register ra,             // return address
+                             Register index = noreg,  // itable index, MethodType, etc.
+                             Register recv  = noreg,  // if caller wants to see it
+                             Register flags = noreg   // if caller wants to test it
+                             );
   // helper function
   static void invokevfinal_helper(Register Rcache, Register Rret);
   static void invokeinterface_object_method(Register RklassOop, Register Rcall,
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -106,10 +106,10 @@
     if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
       FLAG_SET_DEFAULT(OptoLoopAlignment, 4);
     }
-    // When using CMS, we cannot use memset() in BOT updates because
-    // the sun4v/CMT version in libc_psr uses BIS which exposes
-    // "phantom zeros" to concurrent readers. See 6948537.
-    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && UseConcMarkSweepGC) {
+    // When using CMS or G1, we cannot use memset() in BOT updates
+    // because the sun4v/CMT version in libc_psr uses BIS which
+    // exposes "phantom zeros" to concurrent readers. See 6948537.
+    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) {
       FLAG_SET_DEFAULT(UseMemSetInBOT, false);
     }
 #ifdef _LP64
--- a/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.hpp	Fri Sep 07 18:18:55 2012 -0700
@@ -44,10 +44,11 @@
     fmaf_instructions    = 10,
     fmau_instructions    = 11,
     vis3_instructions    = 12,
-    sparc64_family       = 13,
-    T_family             = 14,
-    T1_model             = 15,
-    cbcond_instructions  = 16
+    cbcond_instructions  = 13,
+    sparc64_family       = 14,
+    M_family             = 15,
+    T_family             = 16,
+    T1_model             = 17
   };
 
   enum Feature_Flag_Set {
@@ -67,10 +68,11 @@
     fmaf_instructions_m     = 1 << fmaf_instructions,
     fmau_instructions_m     = 1 << fmau_instructions,
     vis3_instructions_m     = 1 << vis3_instructions,
+    cbcond_instructions_m   = 1 << cbcond_instructions,
     sparc64_family_m        = 1 << sparc64_family,
+    M_family_m              = 1 << M_family,
     T_family_m              = 1 << T_family,
     T1_model_m              = 1 << T1_model,
-    cbcond_instructions_m   = 1 << cbcond_instructions,
 
     generic_v8_m        = v8_instructions_m | hardware_mul32_m | hardware_div32_m | hardware_fsmuld_m,
     generic_v9_m        = generic_v8_m | v9_instructions_m,
@@ -89,6 +91,7 @@
   static int  platform_features(int features);
 
   // Returns true if the platform is in the niagara line (T series)
+  static bool is_M_family(int features) { return (features & M_family_m) != 0; }
   static bool is_T_family(int features) { return (features & T_family_m) != 0; }
   static bool is_niagara() { return is_T_family(_features); }
   DEBUG_ONLY( static bool is_niagara(int features)  { return (features & sun4v_m) != 0; } )
--- a/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/sparc/vm/vtableStubs_sparc.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -70,7 +70,6 @@
   __ load_klass(O0, G3_scratch);
 
   // set methodOop (in case of interpreted method), and destination address
-  int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
 #ifndef PRODUCT
   if (DebugVtables) {
     Label L;
@@ -82,13 +81,8 @@
     __ bind(L);
   }
 #endif
-  int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
-  if (Assembler::is_simm13(v_off)) {
-    __ ld_ptr(G3, v_off, G5_method);
-  } else {
-    __ set(v_off,G5);
-    __ ld_ptr(G3, G5, G5_method);
-  }
+
+  __ lookup_virtual_method(G3_scratch, vtable_index, G5_method);
 
 #ifndef PRODUCT
   if (DebugVtables) {
--- a/src/cpu/x86/vm/assembler_x86.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -41,6 +41,15 @@
 #include "gc_implementation/g1/heapRegion.hpp"
 #endif
 
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#define STOP(error) stop(error)
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#define STOP(error) block_comment(error); stop(error)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 // Implementation of AddressLiteral
 
 AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
@@ -990,32 +999,22 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::andl(Address dst, int32_t imm32) {
@@ -1043,36 +1042,6 @@
   emit_arith(0x23, 0xC0, dst, src);
 }
 
-void Assembler::andpd(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::andpd(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x54);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::andps(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::andps(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x54);
-  emit_byte(0xC0 | encode);
-}
-
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   emit_byte(0x0F);
@@ -1237,61 +1206,42 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x2F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x2F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0xE6);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x5B);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5A);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
@@ -1303,10 +1253,7 @@
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
@@ -1318,25 +1265,17 @@
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5A);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
 }
 
 
@@ -1364,32 +1303,22 @@
 
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::divss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::divss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::emms() {
@@ -1625,16 +1554,12 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x28);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x28);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
@@ -1703,24 +1628,17 @@
 
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movdqu(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x6F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movdqu(Address dst, XMMRegister src) {
@@ -1801,10 +1719,7 @@
 // The selection is done in MacroAssembler::movdbl() and movflt().
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x12);
-  emit_operand(dst, src);
+  emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movq( MMXRegister dst, Address src ) {
@@ -1861,17 +1776,12 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x10);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x10);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::movsd(Address dst, XMMRegister src) {
@@ -1884,17 +1794,12 @@
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x10);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x10);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movss(Address dst, XMMRegister src) {
@@ -1992,32 +1897,22 @@
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::negl(Register dst) {
@@ -2306,17 +2201,12 @@
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x67);
-  emit_operand(dst, src);
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x67);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
@@ -2330,7 +2220,7 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
   emit_byte(0x61);
   emit_byte(0xC0 | encode);
   emit_byte(imm8);
@@ -2346,7 +2236,7 @@
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x30);
   emit_byte(0xC0 | encode);
 }
@@ -2447,28 +2337,10 @@
   a_byte(p);
 }
 
-void Assembler::por(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEB);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::por(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEB);
-  emit_operand(dst, src);
-}
-
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x70);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
   emit_byte(mode & 0xFF);
 
 }
@@ -2487,9 +2359,7 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
-  emit_byte(0x70);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
   emit_byte(mode & 0xFF);
 }
 
@@ -2504,18 +2374,6 @@
   emit_byte(mode & 0xFF);
 }
 
-void Assembler::psrlq(XMMRegister dst, int shift) {
-  // Shift 64 bit value logically right by specified number of bits.
-  // HMM Table D-1 says sse2 or mmx.
-  // Do not confuse it with psrldq SSE2 instruction which
-  // shifts 128 bit value in xmm register by number of bytes.
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift);
-}
-
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -2536,7 +2394,7 @@
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x17);
   emit_byte(0xC0 | encode);
 }
@@ -2544,40 +2402,28 @@
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x60);
-  emit_operand(dst, src);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x60);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpckldq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x62);
-  emit_operand(dst, src);
+  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x62);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x6C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::push(int32_t imm32) {
@@ -2607,22 +2453,6 @@
 }
 #endif
 
-void Assembler::pxor(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEF);
-  emit_operand(dst, src);
-}
-
-void Assembler::pxor(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEF);
-  emit_byte(0xC0 | encode);
-}
-
 void Assembler::rcll(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
@@ -2781,32 +2611,22 @@
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x51);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x51);
-  emit_operand(dst, src);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x51);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x51);
-  emit_operand(dst, src);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::stmxcsr( Address dst) {
@@ -2856,32 +2676,22 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::testb(Register dst, int imm8) {
@@ -2919,32 +2729,22 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x2E);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x2E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2E);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }
 
 
@@ -2986,211 +2786,714 @@
   emit_arith(0x33, 0xC0, dst, src);
 }
 
+
+// AVX 3-operands scalar float-point arithmetic instructions
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+//====================VECTOR ARITHMETIC=====================================
+
+// Float-point vector arithmetic
+
+void Assembler::addpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::addps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::subpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::subps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::mulps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::divpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::divps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::andpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::andps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andps(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andpd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::xorpd(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x57);
-  emit_operand(dst, src);
-}
-
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+}
 
 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::xorpd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::xorps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x57);
-  emit_operand(dst, src);
-}
-
-// AVX 3-operands non destructive source instructions (encoded with VEX prefix)
-
-void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_operand(dst, src);
-}
-
-void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_operand(dst, src);
-}
-
-void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
-}
-
-void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
-}
-
-void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_operand(dst, src);
-}
-
-void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_operand(dst, src);
-}
-
-void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
-}
-
-
-void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
-}
-
-void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
-}
-
-void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
-  emit_byte(0x57);
-  emit_operand(dst, src);
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
-  emit_byte(0x57);
-  emit_operand(dst, src);
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
 }
 
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, vector256);
-  emit_byte(0x57);
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+
+// Integer vector arithmetic
+void Assembler::paddb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::psubb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x40);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_byte(0x40);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  InstructionMark im(this);
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  emit_byte(0x40);
+  emit_operand(dst, src);
+}
+
+// Shift packed integers left by specified number of bits.
+void Assembler::psllw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::pslld(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllq(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x73);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+  emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+// Shift packed integers logically right by specified number of bits.
+void Assembler::psrlw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 71 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrld(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 72 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlq(XMMRegister dst, int shift) {
+  // Do not confuse it with psrldq SSE2 instruction which
+  // shifts 128 bit value in xmm register by number of bytes.
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x73);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+// Shift packed integers arithmetically right by specified number of bits.
+void Assembler::psraw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrad(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM4 is for /4 encoding: 66 0F 72 /4 ib
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+
+// AND packed integers
+void Assembler::pand(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::por(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pxor(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
+}
+
 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx2() || (!vector256) && VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
-  emit_byte(0xEF);
-  emit_byte(0xC0 | encode);
-}
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
 
 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
@@ -3796,6 +4099,49 @@
   }
 }
 
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, pre);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+  int encode = simd_prefix_and_encode(dst, dst, src, pre);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
+// Versions with no second source register (non-destructive source).
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+  InstructionMark im(this);
+  simd_prefix(dst, xnoreg, src, pre);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
+// 3-operands AVX instructions
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                               Address src, VexSimdPrefix pre, bool vector256) {
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, pre, vector256);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                               XMMRegister src, VexSimdPrefix pre, bool vector256) {
+  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
@@ -5508,23 +5854,7 @@
     // To see where a verify_oop failed, get $ebx+40/X for this frame.
     // This is the value of eip which points to where verify_oop will return.
     if (os::message_box(msg, "Execution stopped, print registers?")) {
-      ttyLocker ttyl;
-      tty->print_cr("eip = 0x%08x", eip);
-#ifndef PRODUCT
-      if ((WizardMode || Verbose) && PrintMiscellaneous) {
-        tty->cr();
-        findpc(eip);
-        tty->cr();
-      }
-#endif
-      tty->print_cr("rax = 0x%08x", rax);
-      tty->print_cr("rbx = 0x%08x", rbx);
-      tty->print_cr("rcx = 0x%08x", rcx);
-      tty->print_cr("rdx = 0x%08x", rdx);
-      tty->print_cr("rdi = 0x%08x", rdi);
-      tty->print_cr("rsi = 0x%08x", rsi);
-      tty->print_cr("rbp = 0x%08x", rbp);
-      tty->print_cr("rsp = 0x%08x", rsp);
+      print_state32(rdi, rsi, rbp, rsp, rbx, rdx, rcx, rax, eip);
       BREAKPOINT;
       assert(false, "start up GDB");
     }
@@ -5536,12 +5866,53 @@
   ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
 }
 
+void MacroAssembler::print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip) {
+  ttyLocker ttyl;
+  FlagSetting fs(Debugging, true);
+  tty->print_cr("eip = 0x%08x", eip);
+#ifndef PRODUCT
+  if ((WizardMode || Verbose) && PrintMiscellaneous) {
+    tty->cr();
+    findpc(eip);
+    tty->cr();
+  }
+#endif
+#define PRINT_REG(rax) \
+  { tty->print("%s = ", #rax); os::print_location(tty, rax); }
+  PRINT_REG(rax);
+  PRINT_REG(rbx);
+  PRINT_REG(rcx);
+  PRINT_REG(rdx);
+  PRINT_REG(rdi);
+  PRINT_REG(rsi);
+  PRINT_REG(rbp);
+  PRINT_REG(rsp);
+#undef PRINT_REG
+  // Print some words near top of staack.
+  int* dump_sp = (int*) rsp;
+  for (int col1 = 0; col1 < 8; col1++) {
+    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
+    os::print_location(tty, *dump_sp++);
+  }
+  for (int row = 0; row < 16; row++) {
+    tty->print("(rsp+0x%03x) 0x%08x: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (intptr_t)dump_sp);
+    for (int col = 0; col < 8; col++) {
+      tty->print(" 0x%08x", *dump_sp++);
+    }
+    tty->cr();
+  }
+  // Print some instructions around pc:
+  Disassembler::decode((address)eip-64, (address)eip);
+  tty->print_cr("--------");
+  Disassembler::decode((address)eip, (address)eip+32);
+}
+
 void MacroAssembler::stop(const char* msg) {
   ExternalAddress message((address)msg);
   // push address of message
   pushptr(message.addr());
   { Label L; call(L, relocInfo::none); bind(L); }     // push eip
-  pusha();                                           // push registers
+  pusha();                                            // push registers
   call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug32)));
   hlt();
 }
@@ -5558,6 +5929,18 @@
   pop_CPU_state();
 }
 
+void MacroAssembler::print_state() {
+  { Label L; call(L, relocInfo::none); bind(L); }     // push eip
+  pusha();                                            // push registers
+
+  push_CPU_state();
+  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::print_state32)));
+  pop_CPU_state();
+
+  popa();
+  addl(rsp, wordSize);
+}
+
 #else // _LP64
 
 // 64 bit versions
@@ -6023,14 +6406,33 @@
 }
 
 void MacroAssembler::warn(const char* msg) {
-  push(rsp);
+  push(rbp);
+  movq(rbp, rsp);
   andq(rsp, -16);     // align stack as required by push_CPU_state and call
-
   push_CPU_state();   // keeps alignment at 16 bytes
   lea(c_rarg0, ExternalAddress((address) msg));
   call_VM_leaf(CAST_FROM_FN_PTR(address, warning), c_rarg0);
   pop_CPU_state();
-  pop(rsp);
+  mov(rsp, rbp);
+  pop(rbp);
+}
+
+void MacroAssembler::print_state() {
+  address rip = pc();
+  pusha();            // get regs on stack
+  push(rbp);
+  movq(rbp, rsp);
+  andq(rsp, -16);     // align stack as required by push_CPU_state and call
+  push_CPU_state();   // keeps alignment at 16 bytes
+
+  lea(c_rarg0, InternalAddress(rip));
+  lea(c_rarg1, Address(rbp, wordSize)); // pass pointer to regs array
+  call_VM_leaf(CAST_FROM_FN_PTR(address, MacroAssembler::print_state64), c_rarg0, c_rarg1);
+
+  pop_CPU_state();
+  mov(rsp, rbp);
+  pop(rbp);
+  popa();
 }
 
 #ifndef PRODUCT
@@ -6039,7 +6441,7 @@
 
 void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[]) {
   // In order to get locks to work, we need to fake a in_VM state
-  if (ShowMessageBoxOnError ) {
+  if (ShowMessageBoxOnError) {
     JavaThread* thread = JavaThread::current();
     JavaThreadState saved_state = thread->thread_state();
     thread->set_thread_state(_thread_in_vm);
@@ -6053,30 +6455,9 @@
     // XXX correct this offset for amd64
     // This is the value of eip which points to where verify_oop will return.
     if (os::message_box(msg, "Execution stopped, print registers?")) {
-      ttyLocker ttyl;
-      tty->print_cr("rip = 0x%016lx", pc);
-#ifndef PRODUCT
-      tty->cr();
-      findpc(pc);
-      tty->cr();
-#endif
-      tty->print_cr("rax = 0x%016lx", regs[15]);
-      tty->print_cr("rbx = 0x%016lx", regs[12]);
-      tty->print_cr("rcx = 0x%016lx", regs[14]);
-      tty->print_cr("rdx = 0x%016lx", regs[13]);
-      tty->print_cr("rdi = 0x%016lx", regs[8]);
-      tty->print_cr("rsi = 0x%016lx", regs[9]);
-      tty->print_cr("rbp = 0x%016lx", regs[10]);
-      tty->print_cr("rsp = 0x%016lx", regs[11]);
-      tty->print_cr("r8  = 0x%016lx", regs[7]);
-      tty->print_cr("r9  = 0x%016lx", regs[6]);
-      tty->print_cr("r10 = 0x%016lx", regs[5]);
-      tty->print_cr("r11 = 0x%016lx", regs[4]);
-      tty->print_cr("r12 = 0x%016lx", regs[3]);
-      tty->print_cr("r13 = 0x%016lx", regs[2]);
-      tty->print_cr("r14 = 0x%016lx", regs[1]);
-      tty->print_cr("r15 = 0x%016lx", regs[0]);
+      print_state64(pc, regs);
       BREAKPOINT;
+      assert(false, "start up GDB");
     }
     ThreadStateTransition::transition(thread, _thread_in_vm, saved_state);
   } else {
@@ -6087,6 +6468,54 @@
   }
 }
 
+void MacroAssembler::print_state64(int64_t pc, int64_t regs[]) {
+  ttyLocker ttyl;
+  FlagSetting fs(Debugging, true);
+  tty->print_cr("rip = 0x%016lx", pc);
+#ifndef PRODUCT
+  tty->cr();
+  findpc(pc);
+  tty->cr();
+#endif
+#define PRINT_REG(rax, value) \
+  { tty->print("%s = ", #rax); os::print_location(tty, value); }
+  PRINT_REG(rax, regs[15]);
+  PRINT_REG(rbx, regs[12]);
+  PRINT_REG(rcx, regs[14]);
+  PRINT_REG(rdx, regs[13]);
+  PRINT_REG(rdi, regs[8]);
+  PRINT_REG(rsi, regs[9]);
+  PRINT_REG(rbp, regs[10]);
+  PRINT_REG(rsp, regs[11]);
+  PRINT_REG(r8 , regs[7]);
+  PRINT_REG(r9 , regs[6]);
+  PRINT_REG(r10, regs[5]);
+  PRINT_REG(r11, regs[4]);
+  PRINT_REG(r12, regs[3]);
+  PRINT_REG(r13, regs[2]);
+  PRINT_REG(r14, regs[1]);
+  PRINT_REG(r15, regs[0]);
+#undef PRINT_REG
+  // Print some words near top of staack.
+  int64_t* rsp = (int64_t*) regs[11];
+  int64_t* dump_sp = rsp;
+  for (int col1 = 0; col1 < 8; col1++) {
+    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
+    os::print_location(tty, *dump_sp++);
+  }
+  for (int row = 0; row < 25; row++) {
+    tty->print("(rsp+0x%03x) 0x%016lx: ", (int)((intptr_t)dump_sp - (intptr_t)rsp), (int64_t)dump_sp);
+    for (int col = 0; col < 4; col++) {
+      tty->print(" 0x%016lx", *dump_sp++);
+    }
+    tty->cr();
+  }
+  // Print some instructions around pc:
+  Disassembler::decode((address)pc-64, (address)pc);
+  tty->print_cr("--------");
+  Disassembler::decode((address)pc, (address)pc+32);
+}
+
 #endif // _LP64
 
 // Now versions that are common to 32/64 bit
@@ -6456,7 +6885,7 @@
       get_thread(rax);
       cmpptr(java_thread, rax);
       jcc(Assembler::equal, L);
-      stop("MacroAssembler::call_VM_base: rdi not callee saved?");
+      STOP("MacroAssembler::call_VM_base: rdi not callee saved?");
       bind(L);
     }
     pop(rax);
@@ -7196,7 +7625,7 @@
       jcc(Assembler::notZero, integer);
       cmpl(tmp3, 0x80000000);
       jcc(Assembler::notZero, integer);
-      stop("integer indefinite value shouldn't be seen here");
+      STOP("integer indefinite value shouldn't be seen here");
       bind(integer);
     }
 #else
@@ -7206,7 +7635,7 @@
       shlq(tmp3, 1);
       jcc(Assembler::carryClear, integer);
       jcc(Assembler::notZero, integer);
-      stop("integer indefinite value shouldn't be seen here");
+      STOP("integer indefinite value shouldn't be seen here");
       bind(integer);
     }
 #endif
@@ -7876,21 +8305,21 @@
   }
 }
 
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vandpd(dst, nds, as_Address(src));
+    vandpd(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vandpd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+    vandpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vandps(dst, nds, as_Address(src));
+    vandps(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vandps(dst, nds, Address(rscratch1, 0));
+    vandps(dst, nds, Address(rscratch1, 0), vector256);
   }
 }
 
@@ -7948,21 +8377,21 @@
   }
 }
 
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vxorpd(dst, nds, as_Address(src));
+    vxorpd(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vxorpd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+    vxorpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vxorps(dst, nds, as_Address(src));
+    vxorps(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vxorps(dst, nds, Address(rscratch1, 0));
+    vxorps(dst, nds, Address(rscratch1, 0), vector256);
   }
 }
 
@@ -8388,7 +8817,7 @@
     shlptr(tsize, LogHeapWordSize);
     cmpptr(t1, tsize);
     jcc(Assembler::equal, ok);
-    stop("assert(t1 != tlab size)");
+    STOP("assert(t1 != tlab size)");
     should_not_reach_here();
 
     bind(ok);
@@ -8727,6 +9156,19 @@
 }
 
 
+// virtual method calling
+void MacroAssembler::lookup_virtual_method(Register recv_klass,
+                                           RegisterOrConstant vtable_index,
+                                           Register method_result) {
+  const int base = instanceKlass::vtable_start_offset() * wordSize;
+  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
+  Address vtable_entry_addr(recv_klass,
+                            vtable_index, Address::times_ptr,
+                            base + vtableEntry::method_offset_in_bytes());
+  movptr(method_result, vtable_entry_addr);
+}
+
+
 void MacroAssembler::check_klass_subtype(Register sub_klass,
                            Register super_klass,
                            Register temp_reg,
@@ -8976,6 +9418,7 @@
   // Pass register number to verify_oop_subroutine
   char* b = new char[strlen(s) + 50];
   sprintf(b, "verify_oop: %s: %s", reg->name(), s);
+  BLOCK_COMMENT("verify_oop {");
 #ifdef _LP64
   push(rscratch1);                    // save r10, trashed by movptr()
 #endif
@@ -8990,6 +9433,7 @@
   movptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
   call(rax);
   // Caller pops the arguments (oop, message) and restores rax, r10
+  BLOCK_COMMENT("} verify_oop");
 }
 
 
@@ -9010,7 +9454,7 @@
       jcc(Assembler::notZero, L);
       char* buf = new char[40];
       sprintf(buf, "DelayedValue="INTPTR_FORMAT, delayed_value_addr[1]);
-      stop(buf);
+      STOP(buf);
     } else {
       jccb(Assembler::notZero, L);
       hlt();
@@ -9026,60 +9470,6 @@
 }
 
 
-// registers on entry:
-//  - rax ('check' register): required MethodType
-//  - rcx: method handle
-//  - rdx, rsi, or ?: killable temp
-void MacroAssembler::check_method_handle_type(Register mtype_reg, Register mh_reg,
-                                              Register temp_reg,
-                                              Label& wrong_method_type) {
-  Address type_addr(mh_reg, delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg));
-  // compare method type against that of the receiver
-  if (UseCompressedOops) {
-    load_heap_oop(temp_reg, type_addr);
-    cmpptr(mtype_reg, temp_reg);
-  } else {
-    cmpptr(mtype_reg, type_addr);
-  }
-  jcc(Assembler::notEqual, wrong_method_type);
-}
-
-
-// A method handle has a "vmslots" field which gives the size of its
-// argument list in JVM stack slots.  This field is either located directly
-// in every method handle, or else is indirectly accessed through the
-// method handle's MethodType.  This macro hides the distinction.
-void MacroAssembler::load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
-                                                Register temp_reg) {
-  assert_different_registers(vmslots_reg, mh_reg, temp_reg);
-  // load mh.type.form.vmslots
-  Register temp2_reg = vmslots_reg;
-  load_heap_oop(temp2_reg, Address(mh_reg,    delayed_value(java_lang_invoke_MethodHandle::type_offset_in_bytes, temp_reg)));
-  load_heap_oop(temp2_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodType::form_offset_in_bytes, temp_reg)));
-  movl(vmslots_reg, Address(temp2_reg, delayed_value(java_lang_invoke_MethodTypeForm::vmslots_offset_in_bytes, temp_reg)));
-}
-
-
-// registers on entry:
-//  - rcx: method handle
-//  - rdx: killable temp (interpreted only)
-//  - rax: killable temp (compiled only)
-void MacroAssembler::jump_to_method_handle_entry(Register mh_reg, Register temp_reg) {
-  assert(mh_reg == rcx, "caller must put MH object in rcx");
-  assert_different_registers(mh_reg, temp_reg);
-
-  // pick out the interpreted side of the handler
-  // NOTE: vmentry is not an oop!
-  movptr(temp_reg, Address(mh_reg, delayed_value(java_lang_invoke_MethodHandle::vmentry_offset_in_bytes, temp_reg)));
-
-  // off we go...
-  jmp(Address(temp_reg, MethodHandleEntry::from_interpreted_entry_offset_in_bytes()));
-
-  // for the various stubs which take control at this point,
-  // see MethodHandles::generate_method_handle_stub
-}
-
-
 Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
                                          int extra_slot_offset) {
   // cf. TemplateTable::prepare_invoke(), if (load_receiver).
@@ -9152,14 +9542,14 @@
     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
     jcc(Assembler::aboveEqual, next);
-    stop("assert(top >= start)");
+    STOP("assert(top >= start)");
     should_not_reach_here();
 
     bind(next);
     movptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
     cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
     jcc(Assembler::aboveEqual, ok);
-    stop("assert(top <= end)");
+    STOP("assert(top <= end)");
     should_not_reach_here();
 
     bind(ok);
@@ -9592,6 +9982,25 @@
     movptr(dst, src);
 }
 
+void MacroAssembler::cmp_heap_oop(Register src1, Address src2, Register tmp) {
+  assert_different_registers(src1, tmp);
+#ifdef _LP64
+  if (UseCompressedOops) {
+    bool did_push = false;
+    if (tmp == noreg) {
+      tmp = rax;
+      push(tmp);
+      did_push = true;
+      assert(!src2.uses(rsp), "can't push");
+    }
+    load_heap_oop(tmp, src2);
+    cmpptr(src1, tmp);
+    if (did_push)  pop(tmp);
+  } else
+#endif
+    cmpptr(src1, src2);
+}
+
 // Used for storing NULLs.
 void MacroAssembler::store_heap_oop_null(Address dst) {
 #ifdef _LP64
@@ -9622,7 +10031,7 @@
     push(rscratch1); // cmpptr trashes rscratch1
     cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_oop_base_addr()));
     jcc(Assembler::equal, ok);
-    stop(msg);
+    STOP(msg);
     bind(ok);
     pop(rscratch1);
   }
@@ -9655,7 +10064,7 @@
     Label ok;
     testq(r, r);
     jcc(Assembler::notEqual, ok);
-    stop("null oop passed to encode_heap_oop_not_null");
+    STOP("null oop passed to encode_heap_oop_not_null");
     bind(ok);
   }
 #endif
@@ -9676,7 +10085,7 @@
     Label ok;
     testq(src, src);
     jcc(Assembler::notEqual, ok);
-    stop("null oop passed to encode_heap_oop_not_null2");
+    STOP("null oop passed to encode_heap_oop_not_null2");
     bind(ok);
   }
 #endif
@@ -9867,7 +10276,7 @@
     cmpptr(rax, StackAlignmentInBytes-wordSize);
     pop(rax);
     jcc(Assembler::equal, L);
-    stop("Stack is not properly aligned!");
+    STOP("Stack is not properly aligned!");
     bind(L);
   }
 #endif
@@ -10541,13 +10950,6 @@
   bind(DONE);
 }
 
-#ifdef PRODUCT
-#define BLOCK_COMMENT(str) /* nothing */
-#else
-#define BLOCK_COMMENT(str) block_comment(str)
-#endif
-
-#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
 void MacroAssembler::generate_fill(BasicType t, bool aligned,
                                    Register to, Register value, Register count,
                                    Register rtmp, XMMRegister xtmp) {
--- a/src/cpu/x86/vm/assembler_x86.hpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Sep 07 18:18:55 2012 -0700
@@ -617,6 +617,7 @@
                    VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
     simd_prefix(dst, xnoreg, src, pre, opc);
   }
+
   void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
     simd_prefix(src, dst, pre);
   }
@@ -626,16 +627,10 @@
     simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
   }
 
-
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
                              VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
                              bool rex_w = false, bool vector256 = false);
 
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
-    return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
-  }
-
   // Move/convert 32-bit integer value.
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
                              VexSimdPrefix pre) {
@@ -677,6 +672,15 @@
   void emit_arith(int op1, int op2, Register dst, jobject obj);
   void emit_arith(int op1, int op2, Register dst, Register src);
 
+  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                      Address src, VexSimdPrefix pre, bool vector256);
+  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                      XMMRegister src, VexSimdPrefix pre, bool vector256);
+
   void emit_operand(Register reg,
                     Register base, Register index, Address::ScaleFactor scale,
                     int disp,
@@ -891,12 +895,6 @@
   void andq(Register dst, Address src);
   void andq(Register dst, Register src);
 
-  // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
-  void andpd(XMMRegister dst, XMMRegister src);
-
-  // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
-  void andps(XMMRegister dst, XMMRegister src);
-
   void bsfl(Register dst, Register src);
   void bsrl(Register dst, Register src);
 
@@ -1436,10 +1434,6 @@
   void prefetcht2(Address src);
   void prefetchw(Address src);
 
-  // POR - Bitwise logical OR
-  void por(XMMRegister dst, XMMRegister src);
-  void por(XMMRegister dst, Address src);
-
   // Shuffle Packed Doublewords
   void pshufd(XMMRegister dst, XMMRegister src, int mode);
   void pshufd(XMMRegister dst, Address src,     int mode);
@@ -1448,9 +1442,6 @@
   void pshuflw(XMMRegister dst, XMMRegister src, int mode);
   void pshuflw(XMMRegister dst, Address src,     int mode);
 
-  // Shift Right by bits Logical Quadword Immediate
-  void psrlq(XMMRegister dst, int shift);
-
   // Shift Right by bytes Logical DoubleQuadword Immediate
   void psrldq(XMMRegister dst, int shift);
 
@@ -1475,10 +1466,6 @@
 
   void pushq(Address src);
 
-  // Xor Packed Byte Integer Values
-  void pxor(XMMRegister dst, Address src);
-  void pxor(XMMRegister dst, XMMRegister src);
-
   void rcll(Register dst, int imm8);
 
   void rclq(Register dst, int imm8);
@@ -1601,15 +1588,10 @@
   void xorq(Register dst, Address src);
   void xorq(Register dst, Register src);
 
-  // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
-  void xorpd(XMMRegister dst, XMMRegister src);
-
-  // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
-  void xorps(XMMRegister dst, XMMRegister src);
-
   void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
 
   // AVX 3-operands scalar instructions (encoded with VEX prefix)
+
   void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
   void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vaddss(XMMRegister dst, XMMRegister nds, Address src);
@@ -1627,14 +1609,147 @@
   void vsubss(XMMRegister dst, XMMRegister nds, Address src);
   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
 
-  // AVX Vector instrucitons.
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vandps(XMMRegister dst, XMMRegister nds, Address src);
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
+
+  //====================VECTOR ARITHMETIC=====================================
+
+  // Add Packed Floating-Point Values
+  void addpd(XMMRegister dst, XMMRegister src);
+  void addps(XMMRegister dst, XMMRegister src);
+  void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Subtract Packed Floating-Point Values
+  void subpd(XMMRegister dst, XMMRegister src);
+  void subps(XMMRegister dst, XMMRegister src);
+  void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Multiply Packed Floating-Point Values
+  void mulpd(XMMRegister dst, XMMRegister src);
+  void mulps(XMMRegister dst, XMMRegister src);
+  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Divide Packed Floating-Point Values
+  void divpd(XMMRegister dst, XMMRegister src);
+  void divps(XMMRegister dst, XMMRegister src);
+  void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Bitwise Logical AND of Packed Floating-Point Values
+  void andpd(XMMRegister dst, XMMRegister src);
+  void andps(XMMRegister dst, XMMRegister src);
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Bitwise Logical XOR of Packed Floating-Point Values
+  void xorpd(XMMRegister dst, XMMRegister src);
+  void xorps(XMMRegister dst, XMMRegister src);
   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Add packed integers
+  void paddb(XMMRegister dst, XMMRegister src);
+  void paddw(XMMRegister dst, XMMRegister src);
+  void paddd(XMMRegister dst, XMMRegister src);
+  void paddq(XMMRegister dst, XMMRegister src);
+  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Sub packed integers
+  void psubb(XMMRegister dst, XMMRegister src);
+  void psubw(XMMRegister dst, XMMRegister src);
+  void psubd(XMMRegister dst, XMMRegister src);
+  void psubq(XMMRegister dst, XMMRegister src);
+  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Multiply packed integers (only shorts and ints)
+  void pmullw(XMMRegister dst, XMMRegister src);
+  void pmulld(XMMRegister dst, XMMRegister src);
+  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Shift left packed integers
+  void psllw(XMMRegister dst, int shift);
+  void pslld(XMMRegister dst, int shift);
+  void psllq(XMMRegister dst, int shift);
+  void psllw(XMMRegister dst, XMMRegister shift);
+  void pslld(XMMRegister dst, XMMRegister shift);
+  void psllq(XMMRegister dst, XMMRegister shift);
+  void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // Logical shift right packed integers
+  void psrlw(XMMRegister dst, int shift);
+  void psrld(XMMRegister dst, int shift);
+  void psrlq(XMMRegister dst, int shift);
+  void psrlw(XMMRegister dst, XMMRegister shift);
+  void psrld(XMMRegister dst, XMMRegister shift);
+  void psrlq(XMMRegister dst, XMMRegister shift);
+  void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
+  void psraw(XMMRegister dst, int shift);
+  void psrad(XMMRegister dst, int shift);
+  void psraw(XMMRegister dst, XMMRegister shift);
+  void psrad(XMMRegister dst, XMMRegister shift);
+  void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // And packed integers
+  void pand(XMMRegister dst, XMMRegister src);
+  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Or packed integers
+  void por(XMMRegister dst, XMMRegister src);
+  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Xor packed integers
+  void pxor(XMMRegister dst, XMMRegister src);
   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Copy low 128bit into high 128bit of YMM registers.
   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
 
@@ -1940,6 +2055,7 @@
   void load_heap_oop(Register dst, Address src);
   void load_heap_oop_not_null(Register dst, Address src);
   void store_heap_oop(Address dst, Register src);
+  void cmp_heap_oop(Register src1, Address src2, Register tmp = noreg);
 
   // Used for storing NULL. All other oop constants should be
   // stored using routines that take a jobject.
@@ -2117,6 +2233,11 @@
                                Register scan_temp,
                                Label& no_such_interface);
 
+  // virtual method calling
+  void lookup_virtual_method(Register recv_klass,
+                             RegisterOrConstant vtable_index,
+                             Register method_result);
+
   // Test sub_klass against super_klass, with fast and slow paths.
 
   // The fast path produces a tri-state answer: yes / no / maybe-slow.
@@ -2152,15 +2273,8 @@
                            Label& L_success);
 
   // method handles (JSR 292)
-  void check_method_handle_type(Register mtype_reg, Register mh_reg,
-                                Register temp_reg,
-                                Label& wrong_method_type);
-  void load_method_handle_vmslots(Register vmslots_reg, Register mh_reg,
-                                  Register temp_reg);
-  void jump_to_method_handle_entry(Register mh_reg, Register temp_reg);
   Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 
-
   //----
   void set_word_if_not_zero(Register reg); // sets reg to 1 if not zero, otherwise 0
 
@@ -2179,8 +2293,13 @@
   // prints msg and continues
   void warn(const char* msg);
 
+  // dumps registers and other state
+  void print_state();
+
   static void debug32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip, char* msg);
   static void debug64(char* msg, int64_t pc, int64_t regs[]);
+  static void print_state32(int rdi, int rsi, int rbp, int rsp, int rbx, int rdx, int rcx, int rax, int eip);
+  static void print_state64(int64_t pc, int64_t regs[]);
 
   void os_breakpoint();
 
@@ -2528,11 +2647,13 @@
   void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
   void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandpd(dst, nds, src); }
-  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vandps(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandps(dst, nds, src); }
-  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
   void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
@@ -2561,12 +2682,12 @@
   // AVX Vector instructions
 
   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
-  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
-  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
     if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
@@ -2574,6 +2695,12 @@
     else
       Assembler::vxorpd(dst, nds, src, vector256);
   }
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+      Assembler::vpxor(dst, nds, src, vector256);
+    else
+      Assembler::vxorpd(dst, nds, src, vector256);
+  }
 
   // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -488,68 +488,6 @@
 
 }
 
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
-  // At this point we know that offset == referent_offset.
-  //
-  // So we might have to emit:
-  //   if (src == null) goto continuation.
-  //
-  // and we definitely have to emit:
-  //   if (klass(src).reference_type == REF_NONE) goto continuation
-  //   if (!marking_active) goto continuation
-  //   if (pre_val == null) goto continuation
-  //   call pre_barrier(pre_val)
-  //   goto continuation
-  //
-  __ bind(_entry);
-
-  assert(src()->is_register(), "sanity");
-  Register src_reg = src()->as_register();
-
-  if (gen_src_check()) {
-    // The original src operand was not a constant.
-    // Generate src == null?
-    __ cmpptr(src_reg, (int32_t) NULL_WORD);
-    __ jcc(Assembler::equal, _continuation);
-  }
-
-  // Generate src->_klass->_reference_type == REF_NONE)?
-  assert(tmp()->is_register(), "sanity");
-  Register tmp_reg = tmp()->as_register();
-
-  __ load_klass(tmp_reg, src_reg);
-
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
-  __ cmpb(ref_type_adr, REF_NONE);
-  __ jcc(Assembler::equal, _continuation);
-
-  // Is marking active?
-  assert(thread()->is_register(), "precondition");
-  Register thread_reg = thread()->as_pointer_register();
-
-  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_active()));
-
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    __ cmpl(in_progress, 0);
-  } else {
-    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
-    __ cmpb(in_progress, 0);
-  }
-  __ jcc(Assembler::equal, _continuation);
-
-  // val == null?
-  assert(val()->is_register(), "Precondition.");
-  Register val_reg = val()->as_register();
-
-  __ cmpptr(val_reg, (int32_t) NULL_WORD);
-  __ jcc(Assembler::equal, _continuation);
-
-  ce->store_parameter(val()->as_register(), 0);
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
-  __ jmp(_continuation);
-}
-
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -3508,6 +3508,7 @@
 void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
   ciMethod* method = op->profiled_method();
   int bci          = op->profiled_bci();
+  ciMethod* callee = op->profiled_callee();
 
   // Update counter for all call types
   ciMethodData* md = method->method_data_or_null();
@@ -3519,9 +3520,11 @@
   __ movoop(mdo, md->constant_encoding());
   Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
   Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
   // Perform additional virtual call profiling for invokevirtual and
   // invokeinterface bytecodes
   if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // required for optimized MH invokes
       C1ProfileVirtualCalls) {
     assert(op->recv()->is_single_cpu(), "recv must be allocated");
     Register recv = op->recv()->as_register();
--- a/src/cpu/x86/vm/cppInterpreter_x86.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/x86/vm/cppInterpreter_x86.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -871,9 +871,9 @@
     // Need to differentiate between igetfield, agetfield, bgetfield etc.
     // because they are different sizes.
     // Use the type from the constant pool cache
-    __ shrl(rdx, ConstantPoolCacheEntry::tosBits);
-    // Make sure we don't need to mask rdx for tosBits after the above shift
-    ConstantPoolCacheEntry::verify_tosBits();
+    __ shrl(rdx, ConstantPoolCacheEntry::tos_state_shift);
+    // Make sure we don't need to mask rdx after the above shift
+    ConstantPoolCacheEntry::verify_tos_state_shift();
 #ifdef _LP64
     Label notObj;
     __ cmpl(rdx, atos);
--- a/src/cpu/x86/vm/frame_x86.cpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/x86/vm/frame_x86.cpp	Fri Sep 07 18:18:55 2012 -0700
@@ -439,7 +439,6 @@
 // frame::sender_for_compiled_frame
 frame frame::sender_for_compiled_frame(RegisterMap* map) const {
   assert(map != NULL, "map must be set");
-  assert(!is_ricochet_frame(), "caller must handle this");
 
   // frame owned by optimizing compiler
   assert(_cb->frame_size() >= 0, "must have non-zero frame size");
@@ -483,7 +482,6 @@
   if (is_entry_frame())       return sender_for_entry_frame(map);
   if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
   assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
-  if (is_ricochet_frame())    return sender_for_ricochet_frame(map);
 
   if (_cb != NULL) {
     return sender_for_compiled_frame(map);
@@ -658,9 +656,7 @@
   values.describe(frame_no, fp() + frame::name##_offset, #name)
 
 void frame::describe_pd(FrameValues& values, int frame_no) {
-  if (is_ricochet_frame()) {
-    MethodHandles::RicochetFrame::describe(this, values, frame_no);
-  } else if (is_interpreted_frame()) {
+  if (is_interpreted_frame()) {
     DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
     DESCRIBE_FP_OFFSET(interpreter_frame_method);
@@ -682,12 +678,7 @@
   if (_cb != NULL) {
     // use the frame size if valid
     int size = _cb->frame_size();
-    if ((size > 0) &&
-        (! is_ricochet_frame())) {
-      // Work-around: ricochet explicitly excluded because frame size is not
-      // constant for the ricochet blob but its frame_size could not, for
-      // some reasons, be declared as <= 0. This potentially confusing
-      // size declaration should be fixed as another CR.
+    if (size > 0) {
       return unextended_sp() + size;
     }
   }
--- a/src/cpu/x86/vm/globals_x86.hpp	Fri Aug 10 10:41:13 2012 -0700
+++ b/src/cpu/x86/vm/globals_x86.hpp	Fri Sep 07 18:18:55 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -78,4 +78,53 @@
 
 // GC Ergo Flags
 define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
+
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                            \
+  develop(bool, IEEEPrecision, true,                                        \
+          "Enables IEEE precision (for INTEL only)")                        \
+