changeset 3574:9e3ae661284d hs24-b21

Merge
author amurillo
date Fri, 24 Aug 2012 15:51:19 -0700
parents de2aa86e037d c32dee9b8023
children e8fb566b9466
files src/share/vm/gc_implementation/parNew/parGCAllocBuffer.cpp src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp
diffstat 99 files changed, 13377 insertions(+), 2511 deletions(-) [+]
line wrap: on
line diff
--- a/make/hotspot_version	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/hotspot_version	Fri Aug 24 15:51:19 2012 -0700
@@ -35,7 +35,7 @@
 
 HS_MAJOR_VER=24
 HS_MINOR_VER=0
-HS_BUILD_NUMBER=20
+HS_BUILD_NUMBER=21
 
 JDK_MAJOR_VER=1
 JDK_MINOR_VER=8
--- a/make/jprt.properties	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/jprt.properties	Fri Aug 24 15:51:19 2012 -0700
@@ -38,7 +38,7 @@
 
 # This tells jprt what default release we want to build
 
-jprt.hotspot.default.release=jdk7
+jprt.hotspot.default.release=jdk8
 
 jprt.tools.default.release=${jprt.submit.option.release?${jprt.submit.option.release}:${jprt.hotspot.default.release}}
 
@@ -54,77 +54,77 @@
 # Define the Solaris platforms we want for the various releases
 jprt.my.solaris.sparc.jdk8=solaris_sparc_5.10
 jprt.my.solaris.sparc.jdk7=solaris_sparc_5.10
-jprt.my.solaris.sparc.jdk7u6=${jprt.my.solaris.sparc.jdk7}
+jprt.my.solaris.sparc.jdk7u8=${jprt.my.solaris.sparc.jdk7}
 jprt.my.solaris.sparc=${jprt.my.solaris.sparc.${jprt.tools.default.release}}
 
 jprt.my.solaris.sparcv9.jdk8=solaris_sparcv9_5.10
 jprt.my.solaris.sparcv9.jdk7=solaris_sparcv9_5.10
-jprt.my.solaris.sparcv9.jdk7u6=${jprt.my.solaris.sparcv9.jdk7}
+jprt.my.solaris.sparcv9.jdk7u8=${jprt.my.solaris.sparcv9.jdk7}
 jprt.my.solaris.sparcv9=${jprt.my.solaris.sparcv9.${jprt.tools.default.release}}
 
 jprt.my.solaris.i586.jdk8=solaris_i586_5.10
 jprt.my.solaris.i586.jdk7=solaris_i586_5.10
-jprt.my.solaris.i586.jdk7u6=${jprt.my.solaris.i586.jdk7}
+jprt.my.solaris.i586.jdk7u8=${jprt.my.solaris.i586.jdk7}
 jprt.my.solaris.i586=${jprt.my.solaris.i586.${jprt.tools.default.release}}
 
 jprt.my.solaris.x64.jdk8=solaris_x64_5.10
 jprt.my.solaris.x64.jdk7=solaris_x64_5.10
-jprt.my.solaris.x64.jdk7u6=${jprt.my.solaris.x64.jdk7}
+jprt.my.solaris.x64.jdk7u8=${jprt.my.solaris.x64.jdk7}
 jprt.my.solaris.x64=${jprt.my.solaris.x64.${jprt.tools.default.release}}
 
 jprt.my.linux.i586.jdk8=linux_i586_2.6
 jprt.my.linux.i586.jdk7=linux_i586_2.6
-jprt.my.linux.i586.jdk7u6=${jprt.my.linux.i586.jdk7}
+jprt.my.linux.i586.jdk7u8=${jprt.my.linux.i586.jdk7}
 jprt.my.linux.i586=${jprt.my.linux.i586.${jprt.tools.default.release}}
 
 jprt.my.linux.x64.jdk8=linux_x64_2.6
 jprt.my.linux.x64.jdk7=linux_x64_2.6
-jprt.my.linux.x64.jdk7u6=${jprt.my.linux.x64.jdk7}
+jprt.my.linux.x64.jdk7u8=${jprt.my.linux.x64.jdk7}
 jprt.my.linux.x64=${jprt.my.linux.x64.${jprt.tools.default.release}}
 
 jprt.my.linux.ppc.jdk8=linux_ppc_2.6
 jprt.my.linux.ppc.jdk7=linux_ppc_2.6
-jprt.my.linux.ppc.jdk7u6=${jprt.my.linux.ppc.jdk7}
+jprt.my.linux.ppc.jdk7u8=${jprt.my.linux.ppc.jdk7}
 jprt.my.linux.ppc=${jprt.my.linux.ppc.${jprt.tools.default.release}}
 
 jprt.my.linux.ppcv2.jdk8=linux_ppcv2_2.6
 jprt.my.linux.ppcv2.jdk7=linux_ppcv2_2.6
-jprt.my.linux.ppcv2.jdk7u6=${jprt.my.linux.ppcv2.jdk7}
+jprt.my.linux.ppcv2.jdk7u8=${jprt.my.linux.ppcv2.jdk7}
 jprt.my.linux.ppcv2=${jprt.my.linux.ppcv2.${jprt.tools.default.release}}
 
 jprt.my.linux.ppcsflt.jdk8=linux_ppcsflt_2.6
 jprt.my.linux.ppcsflt.jdk7=linux_ppcsflt_2.6
-jprt.my.linux.ppcsflt.jdk7u6=${jprt.my.linux.ppcsflt.jdk7}
+jprt.my.linux.ppcsflt.jdk7u8=${jprt.my.linux.ppcsflt.jdk7}
 jprt.my.linux.ppcsflt=${jprt.my.linux.ppcsflt.${jprt.tools.default.release}}
 
 jprt.my.linux.armvfp.jdk8=linux_armvfp_2.6
 jprt.my.linux.armvfp.jdk7=linux_armvfp_2.6
-jprt.my.linux.armvfp.jdk7u6=${jprt.my.linux.armvfp.jdk7}
+jprt.my.linux.armvfp.jdk7u8=${jprt.my.linux.armvfp.jdk7}
 jprt.my.linux.armvfp=${jprt.my.linux.armvfp.${jprt.tools.default.release}}
 
 jprt.my.linux.armv6.jdk8=linux_armv6_2.6
 jprt.my.linux.armv6.jdk7=linux_armv6_2.6
-jprt.my.linux.armv6.jdk7u6=${jprt.my.linux.armv6.jdk7}
+jprt.my.linux.armv6.jdk7u8=${jprt.my.linux.armv6.jdk7}
 jprt.my.linux.armv6=${jprt.my.linux.armv6.${jprt.tools.default.release}}
 
 jprt.my.linux.armsflt.jdk8=linux_armsflt_2.6
 jprt.my.linux.armsflt.jdk7=linux_armsflt_2.6
-jprt.my.linux.armsflt.jdk7u6=${jprt.my.linux.armsflt.jdk7}
+jprt.my.linux.armsflt.jdk7u8=${jprt.my.linux.armsflt.jdk7}
 jprt.my.linux.armsflt=${jprt.my.linux.armsflt.${jprt.tools.default.release}}
 
 jprt.my.macosx.x64.jdk8=macosx_x64_10.7
 jprt.my.macosx.x64.jdk7=macosx_x64_10.7
-jprt.my.macosx.x64.jdk7u6=${jprt.my.macosx.x64.jdk7}
+jprt.my.macosx.x64.jdk7u8=${jprt.my.macosx.x64.jdk7}
 jprt.my.macosx.x64=${jprt.my.macosx.x64.${jprt.tools.default.release}}
 
 jprt.my.windows.i586.jdk8=windows_i586_5.1
 jprt.my.windows.i586.jdk7=windows_i586_5.1
-jprt.my.windows.i586.jdk7u6=${jprt.my.windows.i586.jdk7}
+jprt.my.windows.i586.jdk7u8=${jprt.my.windows.i586.jdk7}
 jprt.my.windows.i586=${jprt.my.windows.i586.${jprt.tools.default.release}}
 
 jprt.my.windows.x64.jdk8=windows_x64_5.2
 jprt.my.windows.x64.jdk7=windows_x64_5.2
-jprt.my.windows.x64.jdk7u6=${jprt.my.windows.x64.jdk7}
+jprt.my.windows.x64.jdk7u8=${jprt.my.windows.x64.jdk7}
 jprt.my.windows.x64=${jprt.my.windows.x64.${jprt.tools.default.release}}
 
 # Standard list of jprt build targets for this source tree
@@ -159,7 +159,7 @@
 
 jprt.build.targets.jdk8=${jprt.build.targets.all}
 jprt.build.targets.jdk7=${jprt.build.targets.all}
-jprt.build.targets.jdk7u6=${jprt.build.targets.all}
+jprt.build.targets.jdk7u8=${jprt.build.targets.all}
 jprt.build.targets=${jprt.build.targets.${jprt.tools.default.release}}
 
 # Subset lists of test targets for this source tree
@@ -452,7 +452,7 @@
 
 jprt.test.targets.jdk8=${jprt.test.targets.standard}
 jprt.test.targets.jdk7=${jprt.test.targets.standard}
-jprt.test.targets.jdk7u6=${jprt.test.targets.jdk7}
+jprt.test.targets.jdk7u8=${jprt.test.targets.jdk7}
 jprt.test.targets=${jprt.test.targets.${jprt.tools.default.release}}
 
 # The default test/Makefile targets that should be run
@@ -512,7 +512,7 @@
 
 jprt.make.rule.test.targets.jdk8=${jprt.make.rule.test.targets.standard}
 jprt.make.rule.test.targets.jdk7=${jprt.make.rule.test.targets.standard}
-jprt.make.rule.test.targets.jdk7u6=${jprt.make.rule.test.targets.jdk7}
+jprt.make.rule.test.targets.jdk7u8=${jprt.make.rule.test.targets.jdk7}
 jprt.make.rule.test.targets=${jprt.make.rule.test.targets.${jprt.tools.default.release}}
 
 # 7155453: Work-around to prevent popups on OSX from blocking test completion
--- a/make/linux/makefiles/adlc.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/linux/makefiles/adlc.make	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 1999, 2011, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -133,8 +133,10 @@
 # Note that product files are updated via "mv", which is atomic.
 TEMPDIR := $(OUTDIR)/mktmp$(shell echo $$$$)
 
-# Debuggable by default
-CFLAGS += -g
+ifneq ($(DEBUG_BINARIES), true)
+  # Debuggable by default (unless already done by DEBUG_BINARIES)
+  CFLAGS += -g
+endif
 
 # Pass -D flags into ADLC.
 ADLCFLAGS += $(SYSDEFS)
--- a/make/linux/makefiles/gcc.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/linux/makefiles/gcc.make	Fri Aug 24 15:51:19 2012 -0700
@@ -215,47 +215,46 @@
 #------------------------------------------------------------------------
 # Debug flags
 
-# Use the stabs format for debugging information (this is the default
-# on gcc-2.91). It's good enough, has all the information about line
-# numbers and local variables, and libjvm_g.so is only about 16M.
-# Change this back to "-g" if you want the most expressive format.
-# (warning: that could easily inflate libjvm_g.so to 150M!)
-# Note: The Itanium gcc compiler crashes when using -gstabs.
-DEBUG_CFLAGS/ia64  = -g
-DEBUG_CFLAGS/amd64 = -g
-DEBUG_CFLAGS/arm   = -g
-DEBUG_CFLAGS/ppc   = -g
-DEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
-ifeq ($(DEBUG_CFLAGS/$(BUILDARCH)),)
-DEBUG_CFLAGS += -gstabs
-endif
-
-ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
-  FASTDEBUG_CFLAGS/ia64  = -g
-  FASTDEBUG_CFLAGS/amd64 = -g
-  FASTDEBUG_CFLAGS/arm   = -g
-  FASTDEBUG_CFLAGS/ppc   = -g
-  FASTDEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
-  ifeq ($(FASTDEBUG_CFLAGS/$(BUILDARCH)),)
-    FASTDEBUG_CFLAGS += -gstabs
+# DEBUG_BINARIES uses full -g debug information for all configs
+ifeq ($(DEBUG_BINARIES), true)
+  CFLAGS += -g
+else
+  # Use the stabs format for debugging information (this is the default
+  # on gcc-2.91). It's good enough, has all the information about line
+  # numbers and local variables, and libjvm_g.so is only about 16M.
+  # Change this back to "-g" if you want the most expressive format.
+  # (warning: that could easily inflate libjvm_g.so to 150M!)
+  # Note: The Itanium gcc compiler crashes when using -gstabs.
+  DEBUG_CFLAGS/ia64  = -g
+  DEBUG_CFLAGS/amd64 = -g
+  DEBUG_CFLAGS/arm   = -g
+  DEBUG_CFLAGS/ppc   = -g
+  DEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
+  ifeq ($(DEBUG_CFLAGS/$(BUILDARCH)),)
+    DEBUG_CFLAGS += -gstabs
   endif
-
-  OPT_CFLAGS/ia64  = -g
-  OPT_CFLAGS/amd64 = -g
-  OPT_CFLAGS/arm   = -g
-  OPT_CFLAGS/ppc   = -g
-  OPT_CFLAGS += $(OPT_CFLAGS/$(BUILDARCH))
-  ifeq ($(OPT_CFLAGS/$(BUILDARCH)),)
-    OPT_CFLAGS += -gstabs
+  
+  ifeq ($(ENABLE_FULL_DEBUG_SYMBOLS),1)
+    FASTDEBUG_CFLAGS/ia64  = -g
+    FASTDEBUG_CFLAGS/amd64 = -g
+    FASTDEBUG_CFLAGS/arm   = -g
+    FASTDEBUG_CFLAGS/ppc   = -g
+    FASTDEBUG_CFLAGS += $(DEBUG_CFLAGS/$(BUILDARCH))
+    ifeq ($(FASTDEBUG_CFLAGS/$(BUILDARCH)),)
+      FASTDEBUG_CFLAGS += -gstabs
+    endif
+  
+    OPT_CFLAGS/ia64  = -g
+    OPT_CFLAGS/amd64 = -g
+    OPT_CFLAGS/arm   = -g
+    OPT_CFLAGS/ppc   = -g
+    OPT_CFLAGS += $(OPT_CFLAGS/$(BUILDARCH))
+    ifeq ($(OPT_CFLAGS/$(BUILDARCH)),)
+      OPT_CFLAGS += -gstabs
+    endif
   endif
 endif
 
-# DEBUG_BINARIES overrides everything, use full -g debug information
-ifeq ($(DEBUG_BINARIES), true)
-  DEBUG_CFLAGS = -g
-  CFLAGS += $(DEBUG_CFLAGS)
-endif
-
 # If we are building HEADLESS, pass on to VM
 # so it can set the java.awt.headless property
 ifdef HEADLESS
--- a/make/windows/makefiles/defs.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/makefiles/defs.make	Fri Aug 24 15:51:19 2012 -0700
@@ -188,14 +188,22 @@
   MAKE_ARGS += JDK_BUILD_NUMBER=$(COOKED_BUILD_NUMBER)
 endif
 
-NMAKE= MAKEFLAGS= MFLAGS= nmake /NOLOGO
+NMAKE= MAKEFLAGS= MFLAGS= nmake -NOLOGO
+ifndef SYSTEM_UNAME
+  SYSTEM_UNAME := $(shell uname)
+  export SYSTEM_UNAME
+endif
 
 # Check for CYGWIN
-ifneq (,$(findstring CYGWIN,$(shell uname)))
+ifneq (,$(findstring CYGWIN,$(SYSTEM_UNAME)))
   USING_CYGWIN=true
 else
   USING_CYGWIN=false
 endif
+# Check for MinGW
+ifneq (,$(findstring MINGW,$(SYSTEM_UNAME)))
+  USING_MINGW=true
+endif
 # FIXUP: The subdirectory for a debug build is NOT the same on all platforms
 VM_DEBUG=debug
 
@@ -208,11 +216,16 @@
   ABS_BOOTDIR     := $(subst /,\\,$(shell /bin/cygpath -m -a "$(BOOTDIR)"))
   ABS_GAMMADIR    := $(subst /,\\,$(shell /bin/cygpath -m -a "$(GAMMADIR)"))
   ABS_OS_MAKEFILE := $(shell /bin/cygpath -m -a "$(HS_MAKE_DIR)/$(OSNAME)")/build.make
-else
-  ABS_OUTPUTDIR   := $(subst /,\\,$(shell $(CD) $(OUTPUTDIR);$(PWD)))
-  ABS_BOOTDIR     := $(subst /,\\,$(shell $(CD) $(BOOTDIR);$(PWD)))
-  ABS_GAMMADIR    := $(subst /,\\,$(shell $(CD) $(GAMMADIR);$(PWD)))
-  ABS_OS_MAKEFILE := $(subst /,\\,$(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make)
+else ifeq ($(USING_MINGW), true)
+    ABS_OUTPUTDIR   := $(shell $(CD) $(OUTPUTDIR);$(PWD))
+    ABS_BOOTDIR     := $(shell $(CD) $(BOOTDIR);$(PWD))
+    ABS_GAMMADIR    := $(shell $(CD) $(GAMMADIR);$(PWD))
+    ABS_OS_MAKEFILE := $(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make
+  else
+    ABS_OUTPUTDIR   := $(subst /,\\,$(shell $(CD) $(OUTPUTDIR);$(PWD)))
+    ABS_BOOTDIR     := $(subst /,\\,$(shell $(CD) $(BOOTDIR);$(PWD)))
+    ABS_GAMMADIR    := $(subst /,\\,$(shell $(CD) $(GAMMADIR);$(PWD)))
+    ABS_OS_MAKEFILE := $(subst /,\\,$(shell $(CD) $(HS_MAKE_DIR)/$(OSNAME);$(PWD))/build.make)
 endif
 
 # Disable building SA on windows until we are sure
--- a/make/windows/makefiles/rules.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/makefiles/rules.make	Fri Aug 24 15:51:19 2012 -0700
@@ -23,14 +23,15 @@
 #
 
 # These are the commands used externally to compile and run.
-
+# The \ are used here for traditional Windows apps and " quoted to get
+# past the Unix-like shell:
 !ifdef BootStrapDir
-RUN_JAVA=$(BootStrapDir)\bin\java
-RUN_JAVAP=$(BootStrapDir)\bin\javap
-RUN_JAVAH=$(BootStrapDir)\bin\javah
-RUN_JAR=$(BootStrapDir)\bin\jar
-COMPILE_JAVAC=$(BootStrapDir)\bin\javac $(BOOTSTRAP_JAVAC_FLAGS)
-COMPILE_RMIC=$(BootStrapDir)\bin\rmic
+RUN_JAVA="$(BootStrapDir)\bin\java"
+RUN_JAVAP="$(BootStrapDir)\bin\javap"
+RUN_JAVAH="$(BootStrapDir)\bin\javah"
+RUN_JAR="$(BootStrapDir)\bin\jar"
+COMPILE_JAVAC="$(BootStrapDir)\bin\javac" $(BOOTSTRAP_JAVAC_FLAGS)
+COMPILE_RMIC="$(BootStrapDir)\bin\rmic"
 BOOT_JAVA_HOME=$(BootStrapDir)
 !else
 RUN_JAVA=java
--- a/make/windows/makefiles/sa.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/makefiles/sa.make	Fri Aug 24 15:51:19 2012 -0700
@@ -36,37 +36,37 @@
 !include $(WorkSpace)/make/windows/makefiles/rules.make
 !include $(WorkSpace)/make/sa.files
 
-GENERATED = ..\generated
+GENERATED = ../generated
 
 # tools.jar is needed by the JDI - SA binding
-SA_CLASSPATH = $(BOOT_JAVA_HOME)\lib\tools.jar
+SA_CLASSPATH = $(BOOT_JAVA_HOME)/lib/tools.jar
 
-SA_CLASSDIR = $(GENERATED)\saclasses
+SA_CLASSDIR = $(GENERATED)/saclasses
 
 SA_BUILD_VERSION_PROP = sun.jvm.hotspot.runtime.VM.saBuildVersion=$(SA_BUILD_VERSION)
 
-SA_PROPERTIES = $(SA_CLASSDIR)\sa.properties
+SA_PROPERTIES = $(SA_CLASSDIR)/sa.properties
 
-default::  $(GENERATED)\sa-jdi.jar
+default::  $(GENERATED)/sa-jdi.jar
 
 # Remove the space between $(SA_BUILD_VERSION_PROP) and > below as it adds a white space
 # at the end of SA version string and causes a version mismatch with the target VM version.
 
-$(GENERATED)\sa-jdi.jar: $(AGENT_FILES:/=\)
-	@if not exist $(SA_CLASSDIR) mkdir $(SA_CLASSDIR)
-	@echo ...Building sa-jdi.jar
+$(GENERATED)/sa-jdi.jar: $(AGENT_FILES)
+	$(QUIETLY) mkdir -p $(SA_CLASSDIR)
+	@echo ...Building sa-jdi.jar into $(SA_CLASSDIR)
 	@echo ...$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -d $(SA_CLASSDIR) ....
-	@$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -sourcepath $(AGENT_SRC_DIR) -d $(SA_CLASSDIR) $(AGENT_FILES:/=\)
+	@$(COMPILE_JAVAC) -classpath $(SA_CLASSPATH) -sourcepath $(AGENT_SRC_DIR) -d $(SA_CLASSDIR) $(AGENT_FILES)
 	$(COMPILE_RMIC) -classpath $(SA_CLASSDIR) -d $(SA_CLASSDIR) sun.jvm.hotspot.debugger.remote.RemoteDebuggerServer
 	$(QUIETLY) echo $(SA_BUILD_VERSION_PROP)> $(SA_PROPERTIES)
 	$(QUIETLY) rm -f $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql/sa.js
 	$(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/utilities/soql/sa.js $(SA_CLASSDIR)/sun/jvm/hotspot/utilities/soql
 	$(QUIETLY) rm -rf $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
-	$(QUIETLY) mkdir $(SA_CLASSDIR)\sun\jvm\hotspot\ui\resources
+	$(QUIETLY) mkdir $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
 	$(QUIETLY) cp $(AGENT_SRC_DIR)/sun/jvm/hotspot/ui/resources/*.png $(SA_CLASSDIR)/sun/jvm/hotspot/ui/resources
 	$(QUIETLY) cp -r $(AGENT_SRC_DIR)/images/* $(SA_CLASSDIR)
 	$(RUN_JAR) cf $@ -C $(SA_CLASSDIR) .
-	$(RUN_JAR) uf $@ -C $(AGENT_SRC_DIR:/=\) META-INF\services\com.sun.jdi.connect.Connector
+	$(RUN_JAR) uf $@ -C $(AGENT_SRC_DIR) META-INF/services/com.sun.jdi.connect.Connector
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.windbg.WindbgDebuggerLocal
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.x86.X86ThreadContext 
 	$(RUN_JAVAH) -classpath $(SA_CLASSDIR) -jni sun.jvm.hotspot.debugger.ia64.IA64ThreadContext 
@@ -85,27 +85,27 @@
 # will be useful to have the assertion checks in place
 
 !if "$(BUILDARCH)" == "ia64"
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 $(GX_OPTION) -Od -D "WIN32" -D "WIN64" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -c
 !elseif "$(BUILDARCH)" == "amd64"
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 $(GX_OPTION) /Od /D "WIN32" /D "WIN64" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 $(GX_OPTION) -Od -D "WIN32" -D "WIN64" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -c
 !if "$(COMPILER_NAME)" == "VS2005"
 # On amd64, VS2005 compiler requires bufferoverflowU.lib on the link command line, 
 # otherwise we get missing __security_check_cookie externals at link time. 
 SA_LD_FLAGS = bufferoverflowU.lib
 !endif
 !else
-SA_CFLAGS = /nologo $(MS_RUNTIME_OPTION) /W3 /Gm $(GX_OPTION) /Od /D "WIN32" /D "_WINDOWS" /D "_DEBUG" /D "_CONSOLE" /D "_MBCS" /YX /FD /GZ /c
+SA_CFLAGS = -nologo $(MS_RUNTIME_OPTION) -W3 -Gm $(GX_OPTION) -Od -D "WIN32" -D "_WINDOWS" -D "_DEBUG" -D "_CONSOLE" -D "_MBCS" -YX -FD -GZ -c
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-SA_CFLAGS = $(SA_CFLAGS) /ZI
+SA_CFLAGS = $(SA_CFLAGS) -ZI
 !endif
 !endif
 !if "$(MT)" != ""
-SA_LD_FLAGS = /manifest $(SA_LD_FLAGS)
+SA_LD_FLAGS = -manifest $(SA_LD_FLAGS)
 !endif
 SASRCFILE = $(AGENT_DIR)/src/os/win32/windbg/sawindbg.cpp
-SA_LFLAGS = $(SA_LD_FLAGS) /nologo /subsystem:console /machine:$(MACHINE)
+SA_LFLAGS = $(SA_LD_FLAGS) -nologo -subsystem:console -machine:$(MACHINE)
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
-SA_LFLAGS = $(SA_LFLAGS) /map /debug
+SA_LFLAGS = $(SA_LFLAGS) -map -debug
 !endif
 
 # Note that we do not keep sawindbj.obj around as it would then
@@ -117,15 +117,15 @@
 $(SAWINDBG): $(SASRCFILE)
 	set INCLUDE=$(SA_INCLUDE)$(INCLUDE)
 	$(CXX) @<<
-	  /I"$(BootStrapDir)/include" /I"$(BootStrapDir)/include/win32" 
-	  /I"$(GENERATED)" $(SA_CFLAGS)
+	  -I"$(BootStrapDir)/include" -I"$(BootStrapDir)/include/win32" 
+	  -I"$(GENERATED)" $(SA_CFLAGS)
 	  $(SASRCFILE)
-	  /out:$*.obj
+	  -out:$*.obj
 <<
 	set LIB=$(SA_LIB)$(LIB)
-	$(LD) /out:$@ /DLL $*.obj dbgeng.lib $(SA_LFLAGS)
+	$(LD) -out:$@ -DLL $*.obj dbgeng.lib $(SA_LFLAGS)
 !if "$(MT)" != ""
-	$(MT) /manifest $(@F).manifest /outputresource:$(@F);#2
+	$(MT) -manifest $(@F).manifest -outputresource:$(@F);#2
 !endif
 !if "$(ENABLE_FULL_DEBUG_SYMBOLS)" == "1"
 !if "$(ZIP_DEBUGINFO_FILES)" == "1"
@@ -136,6 +136,6 @@
 	-@rm -f $*.obj
 
 cleanall :
-	rm -rf $(GENERATED:\=/)/saclasses
-	rm -rf $(GENERATED:\=/)/sa-jdi.jar
+	rm -rf $(GENERATED)/saclasses
+	rm -rf $(GENERATED)/sa-jdi.jar
 !endif
--- a/make/windows/makefiles/shared.make	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/makefiles/shared.make	Fri Aug 24 15:51:19 2012 -0700
@@ -36,11 +36,12 @@
 
 
 !ifdef SUBDIRS
+# \ is used below because $(MAKE) is nmake here, which expects Windows paths
 $(SUBDIRS): FORCE
 	@if not exist $@ mkdir $@
-	@if not exist $@\local.make echo # Empty > $@\local.make
-	@echo nmake $(ACTION) in $(DIR)\$@
-	cd $@ && $(MAKE) /NOLOGO /f $(WorkSpace)\make\windows\makefiles\$@.make $(ACTION) DIR=$(DIR)\$@ BUILD_FLAVOR=$(BUILD_FLAVOR)
+	@if not exist $@/local.make echo # Empty > $@/local.make
+	@echo nmake $(ACTION) in $(DIR)/$@
+	cd $@ && $(MAKE) -NOLOGO -f $(WorkSpace)\make\windows\makefiles\$@.make $(ACTION) DIR=$(DIR)\$@ BUILD_FLAVOR=$(BUILD_FLAVOR)
 !endif
 
 # Creates the needed directory
--- a/make/windows/projectfiles/common/Makefile	Thu Aug 23 12:27:33 2012 -0700
+++ b/make/windows/projectfiles/common/Makefile	Fri Aug 24 15:51:19 2012 -0700
@@ -108,7 +108,7 @@
       -define              HOTSPOT_VM_DISTRO=\\\"$(HOTSPOT_VM_DISTRO)\\\"
 
 $(HOTSPOTBUILDSPACE)/$(ProjectFile): $(HOTSPOTBUILDSPACE)/classes/ProjectCreator.class
-	@$(RUN_JAVA) -Djava.class.path=$(HOTSPOTBUILDSPACE)/classes ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions)
+	@$(RUN_JAVA) -Djava.class.path="$(HOTSPOTBUILDSPACE)/classes" ProjectCreator WinGammaPlatform$(VcVersion) $(ProjectCreatorIDEOptions)
 
 clean:
 	@rm -rf $(HOTSPOTBUILDSPACE)/classes
--- a/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/sparc/vm/c1_CodeStubs_sparc.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -435,85 +435,6 @@
 
 }
 
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
-  // At this point we know that offset == referent_offset.
-  //
-  // So we might have to emit:
-  //   if (src == null) goto continuation.
-  //
-  // and we definitely have to emit:
-  //   if (klass(src).reference_type == REF_NONE) goto continuation
-  //   if (!marking_active) goto continuation
-  //   if (pre_val == null) goto continuation
-  //   call pre_barrier(pre_val)
-  //   goto continuation
-  //
-  __ bind(_entry);
-
-  assert(src()->is_register(), "sanity");
-  Register src_reg = src()->as_register();
-
-  if (gen_src_check()) {
-    // The original src operand was not a constant.
-    // Generate src == null?
-    if (__ is_in_wdisp16_range(_continuation)) {
-      __ br_null(src_reg, /*annul*/false, Assembler::pt, _continuation);
-    } else {
-      __ cmp(src_reg, G0);
-      __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-    }
-    __ delayed()->nop();
-  }
-
-  // Generate src->_klass->_reference_type() == REF_NONE)?
-  assert(tmp()->is_register(), "sanity");
-  Register tmp_reg = tmp()->as_register();
-
-  __ load_klass(src_reg, tmp_reg);
-
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
-  __ ldub(ref_type_adr, tmp_reg);
-
-  // _reference_type field is of type ReferenceType (enum)
-  assert(REF_NONE == 0, "check this code");
-  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
-  __ delayed()->nop();
-
-  // Is marking active?
-  assert(thread()->is_register(), "precondition");
-  Register thread_reg = thread()->as_pointer_register();
-
-  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_active()));
-
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    __ ld(in_progress, tmp_reg);
-  } else {
-    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
-    __ ldsb(in_progress, tmp_reg);
-  }
-
-  __ cmp_zero_and_br(Assembler::equal, tmp_reg, _continuation, /*annul*/false, Assembler::pt);
-  __ delayed()->nop();
-
-  // val == null?
-  assert(val()->is_register(), "Precondition.");
-  Register val_reg = val()->as_register();
-
-  if (__ is_in_wdisp16_range(_continuation)) {
-    __ br_null(val_reg, /*annul*/false, Assembler::pt, _continuation);
-  } else {
-    __ cmp(val_reg, G0);
-    __ brx(Assembler::equal, false, Assembler::pt, _continuation);
-  }
-  __ delayed()->nop();
-
-  __ call(Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id));
-  __ delayed()->mov(val_reg, G4);
-  __ br(Assembler::always, false, Assembler::pt, _continuation);
-  __ delayed()->nop();
-}
-
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/sparc/vm/vm_version_sparc.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/sparc/vm/vm_version_sparc.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -106,10 +106,10 @@
     if (FLAG_IS_DEFAULT(OptoLoopAlignment)) {
       FLAG_SET_DEFAULT(OptoLoopAlignment, 4);
     }
-    // When using CMS, we cannot use memset() in BOT updates because
-    // the sun4v/CMT version in libc_psr uses BIS which exposes
-    // "phantom zeros" to concurrent readers. See 6948537.
-    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && UseConcMarkSweepGC) {
+    // When using CMS or G1, we cannot use memset() in BOT updates
+    // because the sun4v/CMT version in libc_psr uses BIS which
+    // exposes "phantom zeros" to concurrent readers. See 6948537.
+    if (FLAG_IS_DEFAULT(UseMemSetInBOT) && (UseConcMarkSweepGC || UseG1GC)) {
       FLAG_SET_DEFAULT(UseMemSetInBOT, false);
     }
 #ifdef _LP64
--- a/src/cpu/x86/vm/assembler_x86.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -999,32 +999,22 @@
 
 void Assembler::addsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::addsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::addss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::addss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_operand(dst, src);
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::andl(Address dst, int32_t imm32) {
@@ -1052,36 +1042,6 @@
   emit_arith(0x23, 0xC0, dst, src);
 }
 
-void Assembler::andpd(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::andpd(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x54);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::andps(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::andps(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x54);
-  emit_byte(0xC0 | encode);
-}
-
 void Assembler::bsfl(Register dst, Register src) {
   int encode = prefix_and_encode(dst->encoding(), src->encoding());
   emit_byte(0x0F);
@@ -1246,61 +1206,42 @@
   // NOTE: dbx seems to decode this as comiss even though the
   // 0x66 is there. Strangly ucomisd comes out correct
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x2F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::comisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x2F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::comiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::comiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2F, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::cvtdq2pd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0xE6);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0xE6, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtdq2ps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x5B);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x5B, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5A);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsd2ss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Register src) {
@@ -1312,10 +1253,7 @@
 
 void Assembler::cvtsi2sdl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x2A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Register src) {
@@ -1327,25 +1265,17 @@
 
 void Assembler::cvtsi2ssl(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x2A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x2A, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5A);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::cvtss2sd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5A);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5A, dst, src, VEX_SIMD_F3);
 }
 
 
@@ -1373,32 +1303,22 @@
 
 void Assembler::divsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::divsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::divss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::divss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::emms() {
@@ -1634,16 +1554,12 @@
 
 void Assembler::movapd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x28);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movaps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x28);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x28, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::movlhps(XMMRegister dst, XMMRegister src) {
@@ -1712,24 +1628,17 @@
 
 void Assembler::movdqa(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movdqu(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x6F);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movdqu(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F3);
-  emit_byte(0x6F);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x6F, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movdqu(Address dst, XMMRegister src) {
@@ -1810,10 +1719,7 @@
 // The selection is done in MacroAssembler::movdbl() and movflt().
 void Assembler::movlpd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x12);
-  emit_operand(dst, src);
+  emit_simd_arith(0x12, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::movq( MMXRegister dst, Address src ) {
@@ -1870,17 +1776,12 @@
 
 void Assembler::movsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x10);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x10, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::movsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F2);
-  emit_byte(0x10);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::movsd(Address dst, XMMRegister src) {
@@ -1893,17 +1794,12 @@
 
 void Assembler::movss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x10);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x10, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_F3);
-  emit_byte(0x10);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x10, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::movss(Address dst, XMMRegister src) {
@@ -2001,32 +1897,22 @@
 
 void Assembler::mulsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::mulsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::mulss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_operand(dst, src);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::mulss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::negl(Register dst) {
@@ -2315,17 +2201,12 @@
 void Assembler::packuswb(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x67);
-  emit_operand(dst, src);
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::packuswb(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x67);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x67, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::pcmpestri(XMMRegister dst, Address src, int imm8) {
@@ -2339,7 +2220,7 @@
 
 void Assembler::pcmpestri(XMMRegister dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_2(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_3A);
   emit_byte(0x61);
   emit_byte(0xC0 | encode);
   emit_byte(imm8);
@@ -2355,7 +2236,7 @@
 
 void Assembler::pmovzxbw(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x30);
   emit_byte(0xC0 | encode);
 }
@@ -2456,28 +2337,10 @@
   a_byte(p);
 }
 
-void Assembler::por(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEB);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::por(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEB);
-  emit_operand(dst, src);
-}
-
 void Assembler::pshufd(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x70);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_66);
   emit_byte(mode & 0xFF);
 
 }
@@ -2496,9 +2359,7 @@
 void Assembler::pshuflw(XMMRegister dst, XMMRegister src, int mode) {
   assert(isByte(mode), "invalid value");
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_F2);
-  emit_byte(0x70);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x70, dst, src, VEX_SIMD_F2);
   emit_byte(mode & 0xFF);
 }
 
@@ -2513,18 +2374,6 @@
   emit_byte(mode & 0xFF);
 }
 
-void Assembler::psrlq(XMMRegister dst, int shift) {
-  // Shift 64 bit value logically right by specified number of bits.
-  // HMM Table D-1 says sse2 or mmx.
-  // Do not confuse it with psrldq SSE2 instruction which
-  // shifts 128 bit value in xmm register by number of bytes.
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
-  emit_byte(0x73);
-  emit_byte(0xC0 | encode);
-  emit_byte(shift);
-}
-
 void Assembler::psrldq(XMMRegister dst, int shift) {
   // Shift 128 bit value in xmm register by number of bytes.
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
@@ -2545,7 +2394,7 @@
 
 void Assembler::ptest(XMMRegister dst, XMMRegister src) {
   assert(VM_Version::supports_sse4_1(), "");
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
   emit_byte(0x17);
   emit_byte(0xC0 | encode);
 }
@@ -2553,40 +2402,28 @@
 void Assembler::punpcklbw(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x60);
-  emit_operand(dst, src);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpcklbw(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x60);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x60, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpckldq(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
   assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x62);
-  emit_operand(dst, src);
+  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpckldq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x62);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x62, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::punpcklqdq(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x6C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x6C, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::push(int32_t imm32) {
@@ -2616,22 +2453,6 @@
 }
 #endif
 
-void Assembler::pxor(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  assert((UseAVX > 0), "SSE mode requires address alignment 16 bytes");
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEF);
-  emit_operand(dst, src);
-}
-
-void Assembler::pxor(XMMRegister dst, XMMRegister src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0xEF);
-  emit_byte(0xC0 | encode);
-}
-
 void Assembler::rcll(Register dst, int imm8) {
   assert(isShiftCount(imm8), "illegal shift count");
   int encode = prefix_and_encode(dst->encoding());
@@ -2790,32 +2611,22 @@
 
 void Assembler::sqrtsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x51);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::sqrtsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x51);
-  emit_operand(dst, src);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::sqrtss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x51);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::sqrtss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x51);
-  emit_operand(dst, src);
+  emit_simd_arith(0x51, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::stmxcsr( Address dst) {
@@ -2865,32 +2676,22 @@
 
 void Assembler::subsd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::subsd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F2);
 }
 
 void Assembler::subss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::subss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_F3);
 }
 
 void Assembler::testb(Register dst, int imm8) {
@@ -2928,32 +2729,22 @@
 
 void Assembler::ucomisd(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_66);
-  emit_byte(0x2E);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::ucomisd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_66);
-  emit_byte(0x2E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::ucomiss(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2E);
-  emit_operand(dst, src);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::ucomiss(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, src, VEX_SIMD_NONE);
-  emit_byte(0x2E);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith_nonds(0x2E, dst, src, VEX_SIMD_NONE);
 }
 
 
@@ -2995,211 +2786,714 @@
   emit_arith(0x33, 0xC0, dst, src);
 }
 
+
+// AVX 3-operands scalar float-point arithmetic instructions
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F2, /* vector256 */ false);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_F3, /* vector256 */ false);
+}
+
+//====================VECTOR ARITHMETIC=====================================
+
+// Float-point vector arithmetic
+
+void Assembler::addpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::addps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x58, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x58, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::subpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::subps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5C, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5C, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::mulpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::mulps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x59, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x59, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::divpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::divps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x5E, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x5E, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::andpd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::andps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andps(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::andpd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x54, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x54, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
 void Assembler::xorpd(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::xorpd(XMMRegister dst, Address src) {
-  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_66);
-  emit_byte(0x57);
-  emit_operand(dst, src);
-}
-
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
+}
 
 void Assembler::xorps(XMMRegister dst, XMMRegister src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
+}
+
+void Assembler::xorpd(XMMRegister dst, Address src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_66);
 }
 
 void Assembler::xorps(XMMRegister dst, Address src) {
   NOT_LP64(assert(VM_Version::supports_sse(), ""));
-  InstructionMark im(this);
-  simd_prefix(dst, dst, src, VEX_SIMD_NONE);
-  emit_byte(0x57);
-  emit_operand(dst, src);
-}
-
-// AVX 3-operands non destructive source instructions (encoded with VEX prefix)
-
-void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_operand(dst, src);
-}
-
-void Assembler::vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vaddss(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_operand(dst, src);
-}
-
-void Assembler::vaddss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x58);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vandpd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::vandps(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
-  emit_byte(0x54);
-  emit_operand(dst, src);
-}
-
-void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
-}
-
-void Assembler::vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vdivss(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_operand(dst, src);
-}
-
-void Assembler::vdivss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5E);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_operand(dst, src);
-}
-
-void Assembler::vmulsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vmulss(XMMRegister dst, XMMRegister nds, Address src) {
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_operand(dst, src);
-}
-
-void Assembler::vmulss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x59);
-  emit_byte(0xC0 | encode);
-}
-
-
-void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
-}
-
-void Assembler::vsubsd(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F2);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vsubss(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_operand(dst, src);
-}
-
-void Assembler::vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src) {
-  assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_F3);
-  emit_byte(0x5C);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_66); // 128-bit vector
-  emit_byte(0x57);
-  emit_operand(dst, src);
+  emit_simd_arith(0x57, dst, src, VEX_SIMD_NONE);
 }
 
 void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
-  emit_byte(0x57);
-  emit_byte(0xC0 | encode);
-}
-
-void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src) {
-  assert(VM_Version::supports_avx(), "");
-  InstructionMark im(this);
-  vex_prefix(dst, nds, src, VEX_SIMD_NONE); // 128-bit vector
-  emit_byte(0x57);
-  emit_operand(dst, src);
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
 }
 
 void Assembler::vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
   assert(VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_NONE, vector256);
-  emit_byte(0x57);
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+void Assembler::vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx(), "");
+  emit_vex_arith(0x57, dst, nds, src, VEX_SIMD_NONE, vector256);
+}
+
+
+// Integer vector arithmetic
+void Assembler::paddb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFC, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFD, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFE, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::paddq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD4, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFC, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFD, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFE, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD4, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::psubb(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF8, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF9, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubd(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFA, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::psubq(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xFB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF8, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF9, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFA, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xFB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pmullw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD5, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::pmulld(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_sse4_1(), "");
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_66, VEX_OPCODE_0F_38);
+  emit_byte(0x40);
   emit_byte(0xC0 | encode);
 }
 
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256, VEX_OPCODE_0F_38);
+  emit_byte(0x40);
+  emit_byte(0xC0 | encode);
+}
+
+void Assembler::vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD5, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  InstructionMark im(this);
+  int dst_enc = dst->encoding();
+  int nds_enc = nds->is_valid() ? nds->encoding() : 0;
+  vex_prefix(src, nds_enc, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  emit_byte(0x40);
+  emit_operand(dst, src);
+}
+
+// Shift packed integers left by specified number of bits.
+void Assembler::psllw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::pslld(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllq(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+  int encode = simd_prefix_and_encode(xmm6, dst, dst, VEX_SIMD_66);
+  emit_byte(0x73);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psllw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::pslld(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psllq(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xF3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 71 /6 ib
+  emit_vex_arith(0x71, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 72 /6 ib
+  emit_vex_arith(0x72, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM6 is for /6 encoding: 66 0F 73 /6 ib
+  emit_vex_arith(0x73, xmm6, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xF3, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+// Shift packed integers logically right by specified number of bits.
+void Assembler::psrlw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 71 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrld(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 72 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlq(XMMRegister dst, int shift) {
+  // Do not confuse it with psrldq SSE2 instruction which
+  // shifts 128 bit value in xmm register by number of bytes.
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  int encode = simd_prefix_and_encode(xmm2, dst, dst, VEX_SIMD_66);
+  emit_byte(0x73);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrlw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrld(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrlq(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xD3, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x71, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x72, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM2 is for /2 encoding: 66 0F 73 /2 ib
+  emit_vex_arith(0x73, xmm2, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xD3, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+// Shift packed integers arithmetically right by specified number of bits.
+void Assembler::psraw(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  emit_byte(0x71);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psrad(XMMRegister dst, int shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  // XMM4 is for /4 encoding: 66 0F 72 /4 ib
+  int encode = simd_prefix_and_encode(xmm4, dst, dst, VEX_SIMD_66);
+  emit_byte(0x72);
+  emit_byte(0xC0 | encode);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::psraw(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xE1, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::psrad(XMMRegister dst, XMMRegister shift) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xE2, dst, shift, VEX_SIMD_66);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  emit_vex_arith(0x71, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  // XMM4 is for /4 encoding: 66 0F 71 /4 ib
+  emit_vex_arith(0x72, xmm4, dst, src, VEX_SIMD_66, vector256);
+  emit_byte(shift & 0xFF);
+}
+
+void Assembler::vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xE1, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xE2, dst, src, shift, VEX_SIMD_66, vector256);
+}
+
+
+// AND packed integers
+void Assembler::pand(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xDB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xDB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::por(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xEB, dst, src, VEX_SIMD_66);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEB, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::pxor(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0xEF, dst, src, VEX_SIMD_66);
+}
+
 void Assembler::vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
-  assert(VM_Version::supports_avx2() || (!vector256) && VM_Version::supports_avx(), "");
-  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector256);
-  emit_byte(0xEF);
-  emit_byte(0xC0 | encode);
-}
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
+void Assembler::vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+  assert(VM_Version::supports_avx() && !vector256 || VM_Version::supports_avx2(), "256 bit integer vectors requires AVX2");
+  emit_vex_arith(0xEF, dst, nds, src, VEX_SIMD_66, vector256);
+}
+
 
 void Assembler::vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
   assert(VM_Version::supports_avx(), "");
@@ -3805,6 +4099,49 @@
   }
 }
 
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+  InstructionMark im(this);
+  simd_prefix(dst, dst, src, pre);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+  int encode = simd_prefix_and_encode(dst, dst, src, pre);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
+// Versions with no second source register (non-destructive source).
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre) {
+  InstructionMark im(this);
+  simd_prefix(dst, xnoreg, src, pre);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre) {
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, pre);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
+// 3-operands AVX instructions
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                               Address src, VexSimdPrefix pre, bool vector256) {
+  InstructionMark im(this);
+  vex_prefix(dst, nds, src, pre, vector256);
+  emit_byte(opcode);
+  emit_operand(dst, src);
+}
+
+void Assembler::emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                               XMMRegister src, VexSimdPrefix pre, bool vector256) {
+  int encode = vex_prefix_and_encode(dst, nds, src, pre, vector256);
+  emit_byte(opcode);
+  emit_byte(0xC0 | encode);
+}
+
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
@@ -7968,21 +8305,21 @@
   }
 }
 
-void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vandpd(dst, nds, as_Address(src));
+    vandpd(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vandpd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+    vandpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vandps(dst, nds, as_Address(src));
+    vandps(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vandps(dst, nds, Address(rscratch1, 0));
+    vandps(dst, nds, Address(rscratch1, 0), vector256);
   }
 }
 
@@ -8040,21 +8377,21 @@
   }
 }
 
-void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+void MacroAssembler::vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vxorpd(dst, nds, as_Address(src));
+    vxorpd(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vxorpd(dst, nds, Address(rscratch1, 0));
-  }
-}
-
-void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src) {
+    vxorpd(dst, nds, Address(rscratch1, 0), vector256);
+  }
+}
+
+void MacroAssembler::vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256) {
   if (reachable(src)) {
-    vxorps(dst, nds, as_Address(src));
+    vxorps(dst, nds, as_Address(src), vector256);
   } else {
     lea(rscratch1, src);
-    vxorps(dst, nds, Address(rscratch1, 0));
+    vxorps(dst, nds, Address(rscratch1, 0), vector256);
   }
 }
 
--- a/src/cpu/x86/vm/assembler_x86.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -617,6 +617,7 @@
                    VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
     simd_prefix(dst, xnoreg, src, pre, opc);
   }
+
   void simd_prefix(Address dst, XMMRegister src, VexSimdPrefix pre) {
     simd_prefix(src, dst, pre);
   }
@@ -626,16 +627,10 @@
     simd_prefix(dst, nds, src, pre, VEX_OPCODE_0F, rex_w);
   }
 
-
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, XMMRegister src,
                              VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F,
                              bool rex_w = false, bool vector256 = false);
 
-  int simd_prefix_and_encode(XMMRegister dst, XMMRegister src,
-                             VexSimdPrefix pre, VexOpcode opc = VEX_OPCODE_0F) {
-    return simd_prefix_and_encode(dst, xnoreg, src, pre, opc);
-  }
-
   // Move/convert 32-bit integer value.
   int simd_prefix_and_encode(XMMRegister dst, XMMRegister nds, Register src,
                              VexSimdPrefix pre) {
@@ -677,6 +672,15 @@
   void emit_arith(int op1, int op2, Register dst, jobject obj);
   void emit_arith(int op1, int op2, Register dst, Register src);
 
+  void emit_simd_arith(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+  void emit_simd_arith(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, Address src, VexSimdPrefix pre);
+  void emit_simd_arith_nonds(int opcode, XMMRegister dst, XMMRegister src, VexSimdPrefix pre);
+  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                      Address src, VexSimdPrefix pre, bool vector256);
+  void emit_vex_arith(int opcode, XMMRegister dst, XMMRegister nds,
+                      XMMRegister src, VexSimdPrefix pre, bool vector256);
+
   void emit_operand(Register reg,
                     Register base, Register index, Address::ScaleFactor scale,
                     int disp,
@@ -891,12 +895,6 @@
   void andq(Register dst, Address src);
   void andq(Register dst, Register src);
 
-  // Bitwise Logical AND of Packed Double-Precision Floating-Point Values
-  void andpd(XMMRegister dst, XMMRegister src);
-
-  // Bitwise Logical AND of Packed Single-Precision Floating-Point Values
-  void andps(XMMRegister dst, XMMRegister src);
-
   void bsfl(Register dst, Register src);
   void bsrl(Register dst, Register src);
 
@@ -1436,10 +1434,6 @@
   void prefetcht2(Address src);
   void prefetchw(Address src);
 
-  // POR - Bitwise logical OR
-  void por(XMMRegister dst, XMMRegister src);
-  void por(XMMRegister dst, Address src);
-
   // Shuffle Packed Doublewords
   void pshufd(XMMRegister dst, XMMRegister src, int mode);
   void pshufd(XMMRegister dst, Address src,     int mode);
@@ -1448,9 +1442,6 @@
   void pshuflw(XMMRegister dst, XMMRegister src, int mode);
   void pshuflw(XMMRegister dst, Address src,     int mode);
 
-  // Shift Right by bits Logical Quadword Immediate
-  void psrlq(XMMRegister dst, int shift);
-
   // Shift Right by bytes Logical DoubleQuadword Immediate
   void psrldq(XMMRegister dst, int shift);
 
@@ -1475,10 +1466,6 @@
 
   void pushq(Address src);
 
-  // Xor Packed Byte Integer Values
-  void pxor(XMMRegister dst, Address src);
-  void pxor(XMMRegister dst, XMMRegister src);
-
   void rcll(Register dst, int imm8);
 
   void rclq(Register dst, int imm8);
@@ -1601,15 +1588,10 @@
   void xorq(Register dst, Address src);
   void xorq(Register dst, Register src);
 
-  // Bitwise Logical XOR of Packed Double-Precision Floating-Point Values
-  void xorpd(XMMRegister dst, XMMRegister src);
-
-  // Bitwise Logical XOR of Packed Single-Precision Floating-Point Values
-  void xorps(XMMRegister dst, XMMRegister src);
-
   void set_byte_if_not_zero(Register dst); // sets reg to 1 if not zero, otherwise 0
 
   // AVX 3-operands scalar instructions (encoded with VEX prefix)
+
   void vaddsd(XMMRegister dst, XMMRegister nds, Address src);
   void vaddsd(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vaddss(XMMRegister dst, XMMRegister nds, Address src);
@@ -1627,14 +1609,147 @@
   void vsubss(XMMRegister dst, XMMRegister nds, Address src);
   void vsubss(XMMRegister dst, XMMRegister nds, XMMRegister src);
 
-  // AVX Vector instrucitons.
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vandps(XMMRegister dst, XMMRegister nds, Address src);
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src);
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src);
+
+  //====================VECTOR ARITHMETIC=====================================
+
+  // Add Packed Floating-Point Values
+  void addpd(XMMRegister dst, XMMRegister src);
+  void addps(XMMRegister dst, XMMRegister src);
+  void vaddpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vaddps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vaddpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vaddps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Subtract Packed Floating-Point Values
+  void subpd(XMMRegister dst, XMMRegister src);
+  void subps(XMMRegister dst, XMMRegister src);
+  void vsubpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vsubps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vsubpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vsubps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Multiply Packed Floating-Point Values
+  void mulpd(XMMRegister dst, XMMRegister src);
+  void mulps(XMMRegister dst, XMMRegister src);
+  void vmulpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vmulps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vmulpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vmulps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Divide Packed Floating-Point Values
+  void divpd(XMMRegister dst, XMMRegister src);
+  void divps(XMMRegister dst, XMMRegister src);
+  void vdivpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vdivps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vdivpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vdivps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Bitwise Logical AND of Packed Floating-Point Values
+  void andpd(XMMRegister dst, XMMRegister src);
+  void andps(XMMRegister dst, XMMRegister src);
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Bitwise Logical XOR of Packed Floating-Point Values
+  void xorpd(XMMRegister dst, XMMRegister src);
+  void xorps(XMMRegister dst, XMMRegister src);
   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Add packed integers
+  void paddb(XMMRegister dst, XMMRegister src);
+  void paddw(XMMRegister dst, XMMRegister src);
+  void paddd(XMMRegister dst, XMMRegister src);
+  void paddq(XMMRegister dst, XMMRegister src);
+  void vpaddb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpaddb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpaddq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Sub packed integers
+  void psubb(XMMRegister dst, XMMRegister src);
+  void psubw(XMMRegister dst, XMMRegister src);
+  void psubd(XMMRegister dst, XMMRegister src);
+  void psubq(XMMRegister dst, XMMRegister src);
+  void vpsubb(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubq(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpsubb(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubd(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpsubq(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Multiply packed integers (only shorts and ints)
+  void pmullw(XMMRegister dst, XMMRegister src);
+  void pmulld(XMMRegister dst, XMMRegister src);
+  void vpmullw(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpmulld(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpmullw(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+  void vpmulld(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Shift left packed integers
+  void psllw(XMMRegister dst, int shift);
+  void pslld(XMMRegister dst, int shift);
+  void psllq(XMMRegister dst, int shift);
+  void psllw(XMMRegister dst, XMMRegister shift);
+  void pslld(XMMRegister dst, XMMRegister shift);
+  void psllq(XMMRegister dst, XMMRegister shift);
+  void vpsllw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpslld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsllq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsllw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpslld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsllq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // Logical shift right packed integers
+  void psrlw(XMMRegister dst, int shift);
+  void psrld(XMMRegister dst, int shift);
+  void psrlq(XMMRegister dst, int shift);
+  void psrlw(XMMRegister dst, XMMRegister shift);
+  void psrld(XMMRegister dst, XMMRegister shift);
+  void psrlq(XMMRegister dst, XMMRegister shift);
+  void vpsrlw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrld(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrlq(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrlw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrld(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrlq(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // Arithmetic shift right packed integers (only shorts and ints, no instructions for longs)
+  void psraw(XMMRegister dst, int shift);
+  void psrad(XMMRegister dst, int shift);
+  void psraw(XMMRegister dst, XMMRegister shift);
+  void psrad(XMMRegister dst, XMMRegister shift);
+  void vpsraw(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsrad(XMMRegister dst, XMMRegister src, int shift, bool vector256);
+  void vpsraw(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+  void vpsrad(XMMRegister dst, XMMRegister src, XMMRegister shift, bool vector256);
+
+  // And packed integers
+  void pand(XMMRegister dst, XMMRegister src);
+  void vpand(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpand(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Or packed integers
+  void por(XMMRegister dst, XMMRegister src);
+  void vpor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Xor packed integers
+  void pxor(XMMRegister dst, XMMRegister src);
   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256);
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256);
+
+  // Copy low 128bit into high 128bit of YMM registers.
   void vinsertf128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src);
 
@@ -2532,11 +2647,13 @@
   void vaddss(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vaddss(dst, nds, src); }
   void vaddss(XMMRegister dst, XMMRegister nds, AddressLiteral src);
 
-  void vandpd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandpd(dst, nds, src); }
-  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
-
-  void vandps(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vandps(dst, nds, src); }
-  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vandpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandpd(dst, nds, src, vector256); }
+  void vandpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
+
+  void vandps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, Address src, bool vector256)     { Assembler::vandps(dst, nds, src, vector256); }
+  void vandps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vdivsd(XMMRegister dst, XMMRegister nds, XMMRegister src) { Assembler::vdivsd(dst, nds, src); }
   void vdivsd(XMMRegister dst, XMMRegister nds, Address src)     { Assembler::vdivsd(dst, nds, src); }
@@ -2565,12 +2682,12 @@
   // AVX Vector instructions
 
   void vxorpd(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
-  void vxorpd(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorpd(dst, nds, src); }
-  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vxorpd(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorpd(dst, nds, src, vector256); }
+  void vxorpd(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vxorps(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
-  void vxorps(XMMRegister dst, XMMRegister nds, Address src) { Assembler::vxorps(dst, nds, src); }
-  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src);
+  void vxorps(XMMRegister dst, XMMRegister nds, Address src, bool vector256) { Assembler::vxorps(dst, nds, src, vector256); }
+  void vxorps(XMMRegister dst, XMMRegister nds, AddressLiteral src, bool vector256);
 
   void vpxor(XMMRegister dst, XMMRegister nds, XMMRegister src, bool vector256) {
     if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
@@ -2578,6 +2695,12 @@
     else
       Assembler::vxorpd(dst, nds, src, vector256);
   }
+  void vpxor(XMMRegister dst, XMMRegister nds, Address src, bool vector256) {
+    if (UseAVX > 1 || !vector256) // vpxor 256 bit is available only in AVX2
+      Assembler::vpxor(dst, nds, src, vector256);
+    else
+      Assembler::vxorpd(dst, nds, src, vector256);
+  }
 
   // Move packed integer values from low 128 bit to hign 128 bit in 256 bit vector.
   void vinserti128h(XMMRegister dst, XMMRegister nds, XMMRegister src) {
--- a/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/c1_CodeStubs_x86.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -488,68 +488,6 @@
 
 }
 
-void G1UnsafeGetObjSATBBarrierStub::emit_code(LIR_Assembler* ce) {
-  // At this point we know that offset == referent_offset.
-  //
-  // So we might have to emit:
-  //   if (src == null) goto continuation.
-  //
-  // and we definitely have to emit:
-  //   if (klass(src).reference_type == REF_NONE) goto continuation
-  //   if (!marking_active) goto continuation
-  //   if (pre_val == null) goto continuation
-  //   call pre_barrier(pre_val)
-  //   goto continuation
-  //
-  __ bind(_entry);
-
-  assert(src()->is_register(), "sanity");
-  Register src_reg = src()->as_register();
-
-  if (gen_src_check()) {
-    // The original src operand was not a constant.
-    // Generate src == null?
-    __ cmpptr(src_reg, (int32_t) NULL_WORD);
-    __ jcc(Assembler::equal, _continuation);
-  }
-
-  // Generate src->_klass->_reference_type == REF_NONE)?
-  assert(tmp()->is_register(), "sanity");
-  Register tmp_reg = tmp()->as_register();
-
-  __ load_klass(tmp_reg, src_reg);
-
-  Address ref_type_adr(tmp_reg, instanceKlass::reference_type_offset());
-  __ cmpb(ref_type_adr, REF_NONE);
-  __ jcc(Assembler::equal, _continuation);
-
-  // Is marking active?
-  assert(thread()->is_register(), "precondition");
-  Register thread_reg = thread()->as_pointer_register();
-
-  Address in_progress(thread_reg, in_bytes(JavaThread::satb_mark_queue_offset() +
-                                       PtrQueue::byte_offset_of_active()));
-
-  if (in_bytes(PtrQueue::byte_width_of_active()) == 4) {
-    __ cmpl(in_progress, 0);
-  } else {
-    assert(in_bytes(PtrQueue::byte_width_of_active()) == 1, "Assumption");
-    __ cmpb(in_progress, 0);
-  }
-  __ jcc(Assembler::equal, _continuation);
-
-  // val == null?
-  assert(val()->is_register(), "Precondition.");
-  Register val_reg = val()->as_register();
-
-  __ cmpptr(val_reg, (int32_t) NULL_WORD);
-  __ jcc(Assembler::equal, _continuation);
-
-  ce->store_parameter(val()->as_register(), 0);
-  __ call(RuntimeAddress(Runtime1::entry_for(Runtime1::g1_pre_barrier_slow_id)));
-  __ jmp(_continuation);
-}
-
 jbyte* G1PostBarrierStub::_byte_map_base = NULL;
 
 jbyte* G1PostBarrierStub::byte_map_base_slow() {
--- a/src/cpu/x86/vm/x86.ad	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/x86.ad	Fri Aug 24 15:51:19 2012 -0700
@@ -500,6 +500,24 @@
   0  /*bottom*/
 };
 
+const bool Matcher::match_rule_supported(int opcode) {
+  if (!has_match_rule(opcode))
+    return false;
+
+  switch (opcode) {
+    case Op_PopCountI:
+    case Op_PopCountL:
+      if (!UsePopCountInstruction)
+        return false;
+    case Op_MulVI:
+      if ((UseSSE < 4) && (UseAVX < 1)) // only with SSE4_1 or AVX
+        return false;
+    break;
+  }
+
+  return true;  // Per default match rules are supported.
+}
+
 // Max vector size in bytes. 0 if not supported.
 const int Matcher::vector_width_in_bytes(BasicType bt) {
   assert(is_java_primitive(bt), "only primitive type vectors");
@@ -1439,8 +1457,9 @@
   ins_cost(150);
   format %{ "vandps  $dst, $src, [0x7fffffff]\t# abs float by sign masking" %}
   ins_encode %{
+    bool vector256 = false;
     __ vandps($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(float_signmask()));
+              ExternalAddress(float_signmask()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -1464,8 +1483,9 @@
   format %{ "vandpd  $dst, $src, [0x7fffffffffffffff]\t"
             "# abs double by sign masking" %}
   ins_encode %{
+    bool vector256 = false;
     __ vandpd($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(double_signmask()));
+              ExternalAddress(double_signmask()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -1487,8 +1507,9 @@
   ins_cost(150);
   format %{ "vxorps  $dst, $src, [0x80000000]\t# neg float by sign flipping" %}
   ins_encode %{
+    bool vector256 = false;
     __ vxorps($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(float_signflip()));
+              ExternalAddress(float_signflip()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -1512,8 +1533,9 @@
   format %{ "vxorpd  $dst, $src, [0x8000000000000000]\t"
             "# neg double by sign flipping" %}
   ins_encode %{
+    bool vector256 = false;
     __ vxorpd($dst$$XMMRegister, $src$$XMMRegister,
-              ExternalAddress(double_signflip()));
+              ExternalAddress(double_signflip()), vector256);
   %}
   ins_pipe(pipe_slow);
 %}
@@ -2382,3 +2404,2416 @@
   ins_pipe( fpu_reg_reg );
 %}
 
+// ====================VECTOR ARITHMETIC=======================================
+
+// --------------------------------- ADD --------------------------------------
+
+// Bytes vector add
+instruct vadd4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVB dst src));
+  format %{ "paddb   $dst,$src\t! add packed4B" %}
+  ins_encode %{
+    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed4B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVB dst src));
+  format %{ "paddb   $dst,$src\t! add packed8B" %}
+  ins_encode %{
+    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed8B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (AddVB dst src));
+  format %{ "paddb   $dst,$src\t! add packed16B" %}
+  ins_encode %{
+    __ paddb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src1 src2));
+  format %{ "vpaddb  $dst,$src1,$src2\t! add packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (AddVB src (LoadVector mem)));
+  format %{ "vpaddb  $dst,$src,$mem\t! add packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Shorts/Chars vector add
+instruct vadd2S(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVS dst src));
+  format %{ "paddw   $dst,$src\t! add packed2S" %}
+  ins_encode %{
+    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVS dst src));
+  format %{ "paddw   $dst,$src\t! add packed4S" %}
+  ins_encode %{
+    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVS dst src));
+  format %{ "paddw   $dst,$src\t! add packed8S" %}
+  ins_encode %{
+    __ paddw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src1 src2));
+  format %{ "vpaddw  $dst,$src1,$src2\t! add packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (AddVS src (LoadVector mem)));
+  format %{ "vpaddw  $dst,$src,$mem\t! add packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector add
+instruct vadd2I(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVI dst src));
+  format %{ "paddd   $dst,$src\t! add packed2I" %}
+  ins_encode %{
+    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vpaddd  $dst,$src1,$src2\t! add packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVI dst src));
+  format %{ "paddd   $dst,$src\t! add packed4I" %}
+  ins_encode %{
+    __ paddd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vpaddd  $dst,$src1,$src2\t! add packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4I_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVI src (LoadVector mem)));
+  format %{ "vpaddd  $dst,$src,$mem\t! add packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8I_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVI src1 src2));
+  format %{ "vpaddd  $dst,$src1,$src2\t! add packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8I_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVI src (LoadVector mem)));
+  format %{ "vpaddd  $dst,$src,$mem\t! add packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector add
+instruct vadd2L(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVL dst src));
+  format %{ "paddq   $dst,$src\t! add packed2L" %}
+  ins_encode %{
+    __ paddq($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2L_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVL src1 src2));
+  format %{ "vpaddq  $dst,$src1,$src2\t! add packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2L_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVL src (LoadVector mem)));
+  format %{ "vpaddq  $dst,$src,$mem\t! add packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4L_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVL src1 src2));
+  format %{ "vpaddq  $dst,$src1,$src2\t! add packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4L_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVL src (LoadVector mem)));
+  format %{ "vpaddq  $dst,$src,$mem\t! add packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpaddq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Floats vector add
+instruct vadd2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVF dst src));
+  format %{ "addps   $dst,$src\t! add packed2F" %}
+  ins_encode %{
+    __ addps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vaddps  $dst,$src1,$src2\t! add packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVF dst src));
+  format %{ "addps   $dst,$src\t! add packed4F" %}
+  ins_encode %{
+    __ addps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vaddps  $dst,$src1,$src2\t! add packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVF src (LoadVector mem)));
+  format %{ "vaddps  $dst,$src,$mem\t! add packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVF src1 src2));
+  format %{ "vaddps  $dst,$src1,$src2\t! add packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (AddVF src (LoadVector mem)));
+  format %{ "vaddps  $dst,$src,$mem\t! add packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector add
+instruct vadd2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVD dst src));
+  format %{ "addpd   $dst,$src\t! add packed2D" %}
+  ins_encode %{
+    __ addpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVD src1 src2));
+  format %{ "vaddpd  $dst,$src1,$src2\t! add packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (AddVD src (LoadVector mem)));
+  format %{ "vaddpd  $dst,$src,$mem\t! add packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVD src1 src2));
+  format %{ "vaddpd  $dst,$src1,$src2\t! add packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vadd4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (AddVD src (LoadVector mem)));
+  format %{ "vaddpd  $dst,$src,$mem\t! add packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vaddpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- SUB --------------------------------------
+
+// Bytes vector sub
+instruct vsub4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVB dst src));
+  format %{ "psubb   $dst,$src\t! sub packed4B" %}
+  ins_encode %{
+    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed4B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVB dst src));
+  format %{ "psubb   $dst,$src\t! sub packed8B" %}
+  ins_encode %{
+    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed8B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 16);
+  match(Set dst (SubVB dst src));
+  format %{ "psubb   $dst,$src\t! sub packed16B" %}
+  ins_encode %{
+    __ psubb($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed16B" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src1 src2));
+  format %{ "vpsubb  $dst,$src1,$src2\t! sub packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubb($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 32);
+  match(Set dst (SubVB src (LoadVector mem)));
+  format %{ "vpsubb  $dst,$src,$mem\t! sub packed32B" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubb($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Shorts/Chars vector sub
+instruct vsub2S(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVS dst src));
+  format %{ "psubw   $dst,$src\t! sub packed2S" %}
+  ins_encode %{
+    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVS dst src));
+  format %{ "psubw   $dst,$src\t! sub packed4S" %}
+  ins_encode %{
+    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVS dst src));
+  format %{ "psubw   $dst,$src\t! sub packed8S" %}
+  ins_encode %{
+    __ psubw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src1 src2));
+  format %{ "vpsubw  $dst,$src1,$src2\t! sub packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (SubVS src (LoadVector mem)));
+  format %{ "vpsubw  $dst,$src,$mem\t! sub packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector sub
+instruct vsub2I(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVI dst src));
+  format %{ "psubd   $dst,$src\t! sub packed2I" %}
+  ins_encode %{
+    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVI dst src));
+  format %{ "psubd   $dst,$src\t! sub packed4I" %}
+  ins_encode %{
+    __ psubd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4I_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVI src (LoadVector mem)));
+  format %{ "vpsubd  $dst,$src,$mem\t! sub packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8I_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVI src1 src2));
+  format %{ "vpsubd  $dst,$src1,$src2\t! sub packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8I_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVI src (LoadVector mem)));
+  format %{ "vpsubd  $dst,$src,$mem\t! sub packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector sub
+instruct vsub2L(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVL dst src));
+  format %{ "psubq   $dst,$src\t! sub packed2L" %}
+  ins_encode %{
+    __ psubq($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2L_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVL src1 src2));
+  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2L_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVL src (LoadVector mem)));
+  format %{ "vpsubq  $dst,$src,$mem\t! sub packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4L_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVL src1 src2));
+  format %{ "vpsubq  $dst,$src1,$src2\t! sub packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubq($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4L_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVL src (LoadVector mem)));
+  format %{ "vpsubq  $dst,$src,$mem\t! sub packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsubq($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Floats vector sub
+instruct vsub2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVF dst src));
+  format %{ "subps   $dst,$src\t! sub packed2F" %}
+  ins_encode %{
+    __ subps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vsubps  $dst,$src1,$src2\t! sub packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVF dst src));
+  format %{ "subps   $dst,$src\t! sub packed4F" %}
+  ins_encode %{
+    __ subps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vsubps  $dst,$src1,$src2\t! sub packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVF src (LoadVector mem)));
+  format %{ "vsubps  $dst,$src,$mem\t! sub packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVF src1 src2));
+  format %{ "vsubps  $dst,$src1,$src2\t! sub packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (SubVF src (LoadVector mem)));
+  format %{ "vsubps  $dst,$src,$mem\t! sub packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector sub
+instruct vsub2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVD dst src));
+  format %{ "subpd   $dst,$src\t! sub packed2D" %}
+  ins_encode %{
+    __ subpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVD src1 src2));
+  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (SubVD src (LoadVector mem)));
+  format %{ "vsubpd  $dst,$src,$mem\t! sub packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVD src1 src2));
+  format %{ "vsubpd  $dst,$src1,$src2\t! sub packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsub4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (SubVD src (LoadVector mem)));
+  format %{ "vsubpd  $dst,$src,$mem\t! sub packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vsubpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- MUL --------------------------------------
+
+// Shorts/Chars vector mul
+instruct vmul2S(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVS dst src));
+  format %{ "pmullw $dst,$src\t! mul packed2S" %}
+  ins_encode %{
+    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2S_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVS dst src));
+  format %{ "pmullw  $dst,$src\t! mul packed4S" %}
+  ins_encode %{
+    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4S_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (MulVS dst src));
+  format %{ "pmullw  $dst,$src\t! mul packed8S" %}
+  ins_encode %{
+    __ pmullw($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8S_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src1 src2));
+  format %{ "vpmullw $dst,$src1,$src2\t! mul packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmullw($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul16S_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (MulVS src (LoadVector mem)));
+  format %{ "vpmullw $dst,$src,$mem\t! mul packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmullw($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector mul (sse4_1)
+instruct vmul2I(vecD dst, vecD src) %{
+  predicate(UseSSE > 3 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVI dst src));
+  format %{ "pmulld  $dst,$src\t! mul packed2I" %}
+  ins_encode %{
+    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2I_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vpmulld $dst,$src1,$src2\t! mul packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I(vecX dst, vecX src) %{
+  predicate(UseSSE > 3 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVI dst src));
+  format %{ "pmulld  $dst,$src\t! mul packed4I" %}
+  ins_encode %{
+    __ pmulld($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vpmulld $dst,$src1,$src2\t! mul packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4I_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVI src (LoadVector mem)));
+  format %{ "vpmulld $dst,$src,$mem\t! mul packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8I_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVI src1 src2));
+  format %{ "vpmulld $dst,$src1,$src2\t! mul packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmulld($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8I_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVI src (LoadVector mem)));
+  format %{ "vpmulld $dst,$src,$mem\t! mul packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpmulld($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Floats vector mul
+instruct vmul2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVF dst src));
+  format %{ "mulps   $dst,$src\t! mul packed2F" %}
+  ins_encode %{
+    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vmulps  $dst,$src1,$src2\t! mul packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (MulVF dst src));
+  format %{ "mulps   $dst,$src\t! mul packed4F" %}
+  ins_encode %{
+    __ mulps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vmulps  $dst,$src1,$src2\t! mul packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVF src (LoadVector mem)));
+  format %{ "vmulps  $dst,$src,$mem\t! mul packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVF src1 src2));
+  format %{ "vmulps  $dst,$src1,$src2\t! mul packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (MulVF src (LoadVector mem)));
+  format %{ "vmulps  $dst,$src,$mem\t! mul packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector mul
+instruct vmul2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (MulVD dst src));
+  format %{ "mulpd   $dst,$src\t! mul packed2D" %}
+  ins_encode %{
+    __ mulpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVD src1 src2));
+  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (MulVD src (LoadVector mem)));
+  format %{ "vmulpd  $dst,$src,$mem\t! mul packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVD src1 src2));
+  format %{ "vmulpd  $dst,$src1,$src2\t! mul packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vmul4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (MulVD src (LoadVector mem)));
+  format %{ "vmulpd  $dst,$src,$mem\t! mul packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vmulpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- DIV --------------------------------------
+
+// Floats vector div
+instruct vdiv2F(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (DivVF dst src));
+  format %{ "divps   $dst,$src\t! div packed2F" %}
+  ins_encode %{
+    __ divps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2F_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vdivps  $dst,$src1,$src2\t! div packed2F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (DivVF dst src));
+  format %{ "divps   $dst,$src\t! div packed4F" %}
+  ins_encode %{
+    __ divps($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vdivps  $dst,$src1,$src2\t! div packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4F_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVF src (LoadVector mem)));
+  format %{ "vdivps  $dst,$src,$mem\t! div packed4F" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8F_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (DivVF src1 src2));
+  format %{ "vdivps  $dst,$src1,$src2\t! div packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivps($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv8F_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (DivVF src (LoadVector mem)));
+  format %{ "vdivps  $dst,$src,$mem\t! div packed8F" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivps($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Doubles vector div
+instruct vdiv2D(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (DivVD dst src));
+  format %{ "divpd   $dst,$src\t! div packed2D" %}
+  ins_encode %{
+    __ divpd($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2D_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (DivVD src1 src2));
+  format %{ "vdivpd  $dst,$src1,$src2\t! div packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv2D_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (DivVD src (LoadVector mem)));
+  format %{ "vdivpd  $dst,$src,$mem\t! div packed2D" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4D_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVD src1 src2));
+  format %{ "vdivpd  $dst,$src1,$src2\t! div packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivpd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vdiv4D_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (DivVD src (LoadVector mem)));
+  format %{ "vdivpd  $dst,$src,$mem\t! div packed4D" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vdivpd($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------------------ LeftShift -----------------------------------
+
+// Shorts/Chars vector left shift
+instruct vsll2S(vecS dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_imm(vecS dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg(vecS dst, vecS src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS dst shift));
+  format %{ "psllw   $dst,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    __ psllw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (LShiftVS src shift));
+  format %{ "vpsllw  $dst,$src,$shift\t! left shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector left shift
+instruct vsll2I(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI dst shift));
+  format %{ "pslld   $dst,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    __ pslld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8I_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (LShiftVI src shift));
+  format %{ "vpslld  $dst,$src,$shift\t! left shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpslld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector left shift
+instruct vsll2L(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL dst shift));
+  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    __ psllq($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL dst shift));
+  format %{ "psllq   $dst,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    __ psllq($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4L_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsll4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (LShiftVL src shift));
+  format %{ "vpsllq  $dst,$src,$shift\t! left shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsllq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ----------------------- LogicalRightShift -----------------------------------
+
+// Shorts/Chars vector logical right shift produces incorrect Java result
+// for negative data because java code convert short value into int with
+// sign extension before a shift.
+
+// Integers vector logical right shift
+instruct vsrl2I(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI dst shift));
+  format %{ "psrld   $dst,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    __ psrld($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8I_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (URShiftVI src shift));
+  format %{ "vpsrld  $dst,$src,$shift\t! logical right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrld($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Longs vector logical right shift
+instruct vsrl2L(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL dst shift));
+  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    __ psrlq($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL dst shift));
+  format %{ "psrlq   $dst,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    __ psrlq($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl2L_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed2L" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4L_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsrl4L_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 4);
+  match(Set dst (URShiftVL src shift));
+  format %{ "vpsrlq  $dst,$src,$shift\t! logical right shift packed4L" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrlq($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// ------------------- ArithmeticRightShift -----------------------------------
+
+// Shorts/Chars vector arithmetic right shift
+instruct vsra2S(vecS dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_imm(vecS dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg(vecS dst, vecS src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2S_reg_imm(vecS dst, vecS src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed2S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4S_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed4S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS dst shift));
+  format %{ "psraw   $dst,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    __ psraw($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8S_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed8S" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra16S_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 16);
+  match(Set dst (RShiftVS src shift));
+  format %{ "vpsraw  $dst,$src,$shift\t! arithmetic right shift packed16S" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsraw($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// Integers vector arithmetic right shift
+instruct vsra2I(vecD dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_imm(vecD dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_reg(vecD dst, vecD src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra2I_reg_imm(vecD dst, vecD src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 2);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed2I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I(vecX dst, regF shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, $shift$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_imm(vecX dst, immI8 shift) %{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI dst shift));
+  format %{ "psrad   $dst,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    __ psrad($dst$$XMMRegister, (int)$shift$$constant);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_reg(vecX dst, vecX src, regF shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra4I_reg_imm(vecX dst, vecX src, immI8 shift) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length() == 4);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed4I" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8I_reg(vecY dst, vecY src, regF shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, $shift$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vsra8I_reg_imm(vecY dst, vecY src, immI8 shift) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length() == 8);
+  match(Set dst (RShiftVI src shift));
+  format %{ "vpsrad  $dst,$src,$shift\t! arithmetic right shift packed8I" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpsrad($dst$$XMMRegister, $src$$XMMRegister, (int)$shift$$constant, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// There are no longs vector arithmetic right shift instructions.
+
+
+// --------------------------------- AND --------------------------------------
+
+instruct vand4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (AndV dst src));
+  format %{ "pand    $dst,$src\t! and vectors (4 bytes)" %}
+  ins_encode %{
+    __ pand($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (4 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (AndV dst src));
+  format %{ "pand    $dst,$src\t! and vectors (8 bytes)" %}
+  ins_encode %{
+    __ pand($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (8 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV dst src));
+  format %{ "pand    $dst,$src\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    __ pand($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (AndV src (LoadVector mem)));
+  format %{ "vpand   $dst,$src,$mem\t! and vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (AndV src1 src2));
+  format %{ "vpand   $dst,$src1,$src2\t! and vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpand($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vand32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (AndV src (LoadVector mem)));
+  format %{ "vpand   $dst,$src,$mem\t! and vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpand($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- OR ---------------------------------------
+
+instruct vor4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (OrV dst src));
+  format %{ "por     $dst,$src\t! or vectors (4 bytes)" %}
+  ins_encode %{
+    __ por($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (4 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (OrV dst src));
+  format %{ "por     $dst,$src\t! or vectors (8 bytes)" %}
+  ins_encode %{
+    __ por($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (8 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV dst src));
+  format %{ "por     $dst,$src\t! or vectors (16 bytes)" %}
+  ins_encode %{
+    __ por($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (OrV src (LoadVector mem)));
+  format %{ "vpor    $dst,$src,$mem\t! or vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (OrV src1 src2));
+  format %{ "vpor    $dst,$src1,$src2\t! or vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vor32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (OrV src (LoadVector mem)));
+  format %{ "vpor    $dst,$src,$mem\t! or vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// --------------------------------- XOR --------------------------------------
+
+instruct vxor4B(vecS dst, vecS src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (XorV dst src));
+  format %{ "pxor    $dst,$src\t! xor vectors (4 bytes)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor4B_reg(vecS dst, vecS src1, vecS src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 4);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (4 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor8B(vecD dst, vecD src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (XorV dst src));
+  format %{ "pxor    $dst,$src\t! xor vectors (8 bytes)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor8B_reg(vecD dst, vecD src1, vecD src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 8);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (8 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B(vecX dst, vecX src) %{
+  predicate(n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV dst src));
+  format %{ "pxor    $dst,$src\t! xor vectors (16 bytes)" %}
+  ins_encode %{
+    __ pxor($dst$$XMMRegister, $src$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B_reg(vecX dst, vecX src1, vecX src2) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor16B_mem(vecX dst, vecX src, memory mem) %{
+  predicate(UseAVX > 0 && n->as_Vector()->length_in_bytes() == 16);
+  match(Set dst (XorV src (LoadVector mem)));
+  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (16 bytes)" %}
+  ins_encode %{
+    bool vector256 = false;
+    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor32B_reg(vecY dst, vecY src1, vecY src2) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (XorV src1 src2));
+  format %{ "vpxor   $dst,$src1,$src2\t! xor vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct vxor32B_mem(vecY dst, vecY src, memory mem) %{
+  predicate(UseAVX > 1 && n->as_Vector()->length_in_bytes() == 32);
+  match(Set dst (XorV src (LoadVector mem)));
+  format %{ "vpxor   $dst,$src,$mem\t! xor vectors (32 bytes)" %}
+  ins_encode %{
+    bool vector256 = true;
+    __ vpxor($dst$$XMMRegister, $src$$XMMRegister, $mem$$Address, vector256);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
--- a/src/cpu/x86/vm/x86_32.ad	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/x86_32.ad	Fri Aug 24 15:51:19 2012 -0700
@@ -1367,22 +1367,6 @@
   return offset;
 }
 
-
-const bool Matcher::match_rule_supported(int opcode) {
-  if (!has_match_rule(opcode))
-    return false;
-
-  switch (opcode) {
-    case Op_PopCountI:
-    case Op_PopCountL:
-      if (!UsePopCountInstruction)
-        return false;
-    break;
-  }
-  
-  return true;  // Per default match rules are supported.
-}
-
 int Matcher::regnum_to_fpu_offset(int regnum) {
   return regnum - 32; // The FP registers are in the second chunk
 }
--- a/src/cpu/x86/vm/x86_64.ad	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/cpu/x86/vm/x86_64.ad	Fri Aug 24 15:51:19 2012 -0700
@@ -1513,22 +1513,6 @@
   return offset;
 }
 
-
-const bool Matcher::match_rule_supported(int opcode) {
-  if (!has_match_rule(opcode))
-    return false;
-
-  switch (opcode) {
-    case Op_PopCountI:
-    case Op_PopCountL:
-      if (!UsePopCountInstruction)
-        return false;
-    break;
-  }
-
-  return true;  // Per default match rules are supported.
-}
-
 int Matcher::regnum_to_fpu_offset(int regnum)
 {
   return regnum - 32; // The FP registers are in the second chunk
@@ -6427,6 +6411,31 @@
   ins_pipe(ialu_reg_reg); // XXX
 %}
 
+// Convert oop into int for vectors alignment masking
+instruct convP2I(rRegI dst, rRegP src)
+%{
+  match(Set dst (ConvL2I (CastP2X src)));
+
+  format %{ "movl    $dst, $src\t# ptr -> int" %}
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg_reg); // XXX
+%}
+
+// Convert compressed oop into int for vectors alignment masking
+// in case of 32bit oops (heap < 4Gb).
+instruct convN2I(rRegI dst, rRegN src)
+%{
+  predicate(Universe::narrow_oop_shift() == 0);
+  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
+
+  format %{ "movl    $dst, $src\t# compressed ptr -> int" %}
+  ins_encode %{
+    __ movl($dst$$Register, $src$$Register);
+  %}
+  ins_pipe(ialu_reg_reg); // XXX
+%}
 
 // Convert oop pointer into compressed form
 instruct encodeHeapOop(rRegN dst, rRegP src, rFlagsReg cr) %{
@@ -10049,11 +10058,10 @@
   ins_pipe( pipe_slow );
 %}
 
-// The next instructions have long latency and use Int unit. Set high cost.
 instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
   match(Set dst (MoveI2F src));
   effect(DEF dst, USE src);
-  ins_cost(300);
+  ins_cost(100);
   format %{ "movd    $dst,$src\t# MoveI2F" %}
   ins_encode %{
     __ movdl($dst$$XMMRegister, $src$$Register);
@@ -10064,7 +10072,7 @@
 instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
   match(Set dst (MoveL2D src));
   effect(DEF dst, USE src);
-  ins_cost(300);
+  ins_cost(100);
   format %{ "movd    $dst,$src\t# MoveL2D" %}
   ins_encode %{
      __ movdq($dst$$XMMRegister, $src$$Register);
--- a/src/os/solaris/vm/dtraceJSDT_solaris.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/os/solaris/vm/dtraceJSDT_solaris.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2005, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -626,45 +626,6 @@
   }
 }
 
-/**
- * This prints out hex data in a 'windbg' or 'xxd' form, where each line is:
- *   <hex-address>: 8 * <hex-halfword> <ascii translation>
- * example:
- * 0000000: 7f44 4f46 0102 0102 0000 0000 0000 0000  .DOF............
- * 0000010: 0000 0000 0000 0040 0000 0020 0000 0005  .......@... ....
- * 0000020: 0000 0000 0000 0040 0000 0000 0000 015d  .......@.......]
- * ...
- */
-static void printDOFRawData(void* dof) {
-  size_t size = ((dof_hdr_t*)dof)->dofh_loadsz;
-  size_t limit = (size + 16) / 16 * 16;
-  for (size_t i = 0; i < limit; ++i) {
-    if (i % 16 == 0) {
-      tty->print("%07x:", i);
-    }
-    if (i % 2 == 0) {
-      tty->print(" ");
-    }
-    if (i < size) {
-      tty->print("%02x", ((unsigned char*)dof)[i]);
-    } else {
-      tty->print("  ");
-    }
-    if ((i + 1) % 16 == 0) {
-      tty->print("  ");
-      for (size_t j = 0; j < 16; ++j) {
-        size_t idx = i + j - 15;
-        char c = ((char*)dof)[idx];
-        if (idx < size) {
-          tty->print("%c", c >= 32 && c <= 126 ? c : '.');
-        }
-      }
-      tty->print_cr("");
-    }
-  }
-  tty->print_cr("");
-}
-
 static void printDOFHelper(dof_helper_t* helper) {
   tty->print_cr("// dof_helper_t {");
   tty->print_cr("//   dofhp_mod = \"%s\"", helper->dofhp_mod);
@@ -672,7 +633,8 @@
   tty->print_cr("//   dofhp_dof = 0x%016llx", helper->dofhp_dof);
   printDOF((void*)helper->dofhp_dof);
   tty->print_cr("// }");
-  printDOFRawData((void*)helper->dofhp_dof);
+  size_t len = ((dof_hdr_t*)helper)->dofh_loadsz;
+  tty->print_data((void*)helper->dofhp_dof, len, true);
 }
 
 #else // ndef HAVE_DTRACE_H
--- a/src/share/vm/c1/c1_CodeStubs.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_CodeStubs.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -574,71 +574,6 @@
 #endif // PRODUCT
 };
 
-// This G1 barrier code stub is used in Unsafe.getObject.
-// It generates a sequence of guards around the SATB
-// barrier code that are used to detect when we have
-// the referent field of a Reference object.
-// The first check is assumed to have been generated
-// in the code generated for Unsafe.getObject().
-
-class G1UnsafeGetObjSATBBarrierStub: public CodeStub {
- private:
-  LIR_Opr _val;
-  LIR_Opr _src;
-
-  LIR_Opr _tmp;
-  LIR_Opr _thread;
-
-  bool _gen_src_check;
-
- public:
-  // A G1 barrier that is guarded by generated guards that determine whether
-  // val (which is the result of Unsafe.getObject() should be recorded in an
-  // SATB log buffer. We could be reading the referent field of a Reference object
-  // using Unsafe.getObject() and we need to record the referent.
-  //
-  // * val is the operand returned by the unsafe.getObject routine.
-  // * src is the base object
-  // * tmp is a temp used to load the klass of src, and then reference type
-  // * thread is the thread object.
-
-  G1UnsafeGetObjSATBBarrierStub(LIR_Opr val, LIR_Opr src,
-                                LIR_Opr tmp, LIR_Opr thread,
-                                bool gen_src_check) :
-    _val(val), _src(src),
-    _tmp(tmp), _thread(thread),
-    _gen_src_check(gen_src_check)
-  {
-    assert(_val->is_register(), "should have already been loaded");
-    assert(_src->is_register(), "should have already been loaded");
-
-    assert(_tmp->is_register(), "should be a temporary register");
-  }
-
-  LIR_Opr val() const { return _val; }
-  LIR_Opr src() const { return _src; }
-
-  LIR_Opr tmp() const { return _tmp; }
-  LIR_Opr thread() const { return _thread; }
-
-  bool gen_src_check() const { return _gen_src_check; }
-
-  virtual void emit_code(LIR_Assembler* e);
-
-  virtual void visit(LIR_OpVisitState* visitor) {
-    visitor->do_slow_case();
-    visitor->do_input(_val);
-    visitor->do_input(_src);
-    visitor->do_input(_thread);
-
-    visitor->do_temp(_tmp);
-  }
-
-#ifndef PRODUCT
-  virtual void print_name(outputStream* out) const { out->print("G1UnsafeGetObjSATBBarrierStub"); }
-#endif // PRODUCT
-};
-
 class G1PostBarrierStub: public CodeStub {
  private:
   LIR_Opr _addr;
--- a/src/share/vm/c1/c1_GraphBuilder.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_GraphBuilder.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1646,10 +1646,6 @@
 
 
 void GraphBuilder::invoke(Bytecodes::Code code) {
-  const bool has_receiver =
-    code == Bytecodes::_invokespecial   ||
-    code == Bytecodes::_invokevirtual   ||
-    code == Bytecodes::_invokeinterface;
   const bool is_invokedynamic = (code == Bytecodes::_invokedynamic);
 
   bool will_link;
@@ -1690,8 +1686,12 @@
   // convert them directly to an invokespecial or invokestatic.
   if (target->is_loaded() && !target->is_abstract() && target->can_be_statically_bound()) {
     switch (bc_raw) {
-    case Bytecodes::_invokevirtual:  code = Bytecodes::_invokespecial;  break;
-    case Bytecodes::_invokehandle:   code = Bytecodes::_invokestatic;   break;
+    case Bytecodes::_invokevirtual:
+      code = Bytecodes::_invokespecial;
+      break;
+    case Bytecodes::_invokehandle:
+      code = target->is_static() ? Bytecodes::_invokestatic : Bytecodes::_invokespecial;
+      break;
     }
   }
 
@@ -1878,11 +1878,13 @@
   // inlining not successful => standard invoke
   bool is_loaded = target->is_loaded();
   ValueType* result_type = as_ValueType(target->return_type());
-
-  // We require the debug info to be the "state before" because
-  // invokedynamics may deoptimize.
-  ValueStack* state_before = is_invokedynamic ? copy_state_before() : copy_state_exhandling();
-
+  ValueStack* state_before = copy_state_exhandling();
+
+  // The bytecode (code) might change in this method so we are checking this very late.
+  const bool has_receiver =
+    code == Bytecodes::_invokespecial   ||
+    code == Bytecodes::_invokevirtual   ||
+    code == Bytecodes::_invokeinterface;
   Values* args = state()->pop_arguments(target->arg_size_no_receiver());
   Value recv = has_receiver ? apop() : NULL;
   int vtable_index = methodOopDesc::invalid_vtable_index;
@@ -3058,7 +3060,7 @@
 
   case vmIntrinsics::_Reference_get:
     {
-      if (UseG1GC) {
+      {
         // With java.lang.ref.reference.get() we must go through the
         // intrinsic - when G1 is enabled - even when get() is the root
         // method of the compile so that, if necessary, the value in
@@ -3070,6 +3072,9 @@
         // object removed from the list of discovered references during
         // reference processing.
 
+        // Also we need intrinsic to prevent commoning reads from this field
+        // across safepoint since GC can change its value.
+
         // Set up a stream so that appending instructions works properly.
         ciBytecodeStream s(scope->method());
         s.reset_to_bci(0);
@@ -3226,7 +3231,6 @@
 
 
 bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) {
-  if (!InlineNatives           ) INLINE_BAILOUT("intrinsic method inlining disabled");
   if (callee->is_synchronized()) {
     // We don't currently support any synchronized intrinsics
     return false;
@@ -3234,9 +3238,13 @@
 
   // callee seems like a good candidate
   // determine id
+  vmIntrinsics::ID id = callee->intrinsic_id();
+  if (!InlineNatives && id != vmIntrinsics::_Reference_get) {
+    // InlineNatives does not control Reference.get
+    INLINE_BAILOUT("intrinsic method inlining disabled");
+  }
   bool preserves_state = false;
   bool cantrap = true;
-  vmIntrinsics::ID id = callee->intrinsic_id();
   switch (id) {
     case vmIntrinsics::_arraycopy:
       if (!InlineArrayCopy) return false;
@@ -3376,11 +3384,10 @@
       return true;
 
     case vmIntrinsics::_Reference_get:
-      // It is only when G1 is enabled that we absolutely
-      // need to use the intrinsic version of Reference.get()
-      // so that the value in the referent field, if necessary,
-      // can be registered by the pre-barrier code.
-      if (!UseG1GC) return false;
+      // Use the intrinsic version of Reference.get() so that the value in
+      // the referent field can be registered by the G1 pre-barrier code.
+      // Also to prevent commoning reads from this field across safepoint
+      // since GC can change its value.
       preserves_state = true;
       break;
 
--- a/src/share/vm/c1/c1_Instruction.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_Instruction.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -369,9 +369,6 @@
   _signature = new BasicTypeList(number_of_arguments() + (has_receiver() ? 1 : 0));
   if (has_receiver()) {
     _signature->append(as_BasicType(receiver()->type()));
-  } else if (is_invokedynamic()) {
-    // Add the synthetic MethodHandle argument to the signature.
-    _signature->append(T_OBJECT);
   }
   for (int i = 0; i < number_of_arguments(); i++) {
     ValueType* t = argument_at(i)->type();
--- a/src/share/vm/c1/c1_LIRAssembler.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_LIRAssembler.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -448,10 +448,10 @@
 
   switch (op->code()) {
   case lir_static_call:
+  case lir_dynamic_call:
     call(op, relocInfo::static_call_type);
     break;
   case lir_optvirtual_call:
-  case lir_dynamic_call:
     call(op, relocInfo::opt_virtual_call_type);
     break;
   case lir_icvirtual_call:
@@ -460,7 +460,9 @@
   case lir_virtual_call:
     vtable_call(op);
     break;
-  default: ShouldNotReachHere();
+  default:
+    fatal(err_msg_res("unexpected op code: %s", op->name()));
+    break;
   }
 
   // JSR 292
--- a/src/share/vm/c1/c1_LIRGenerator.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_LIRGenerator.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -920,7 +920,8 @@
 
 
 LIR_Opr LIRGenerator::force_to_spill(LIR_Opr value, BasicType t) {
-  assert(type2size[t] == type2size[value->type()], "size mismatch");
+  assert(type2size[t] == type2size[value->type()],
+         err_msg_res("size mismatch: t=%s, value->type()=%s", type2name(t), type2name(value->type())));
   if (!value->is_register()) {
     // force into a register
     LIR_Opr r = new_register(value->type());
@@ -2176,9 +2177,9 @@
   off.load_item();
   src.load_item();
 
-  LIR_Opr reg = rlock_result(x, x->basic_type());
-
-  get_Object_unsafe(reg, src.result(), off.result(), type, x->is_volatile());
+  LIR_Opr value = rlock_result(x, x->basic_type());
+
+  get_Object_unsafe(value, src.result(), off.result(), type, x->is_volatile());
 
 #ifndef SERIALGC
   // We might be reading the value of the referent field of a
@@ -2191,19 +2192,16 @@
   // if (offset == java_lang_ref_Reference::referent_offset) {
   //   if (src != NULL) {
   //     if (klass(src)->reference_type() != REF_NONE) {
-  //       pre_barrier(..., reg, ...);
+  //       pre_barrier(..., value, ...);
   //     }
   //   }
   // }
-  //
-  // The first non-constant check of either the offset or
-  // the src operand will be done here; the remainder
-  // will take place in the generated code stub.
 
   if (UseG1GC && type == T_OBJECT) {
-    bool gen_code_stub = true;       // Assume we need to generate the slow code stub.
-    bool gen_offset_check = true;       // Assume the code stub has to generate the offset guard.
-    bool gen_source_check = true;       // Assume the code stub has to check the src object for null.
+    bool gen_pre_barrier = true;     // Assume we need to generate pre_barrier.
+    bool gen_offset_check = true;    // Assume we need to generate the offset guard.
+    bool gen_source_check = true;    // Assume we need to check the src object for null.
+    bool gen_type_check = true;      // Assume we need to check the reference_type.
 
     if (off.is_constant()) {
       jlong off_con = (off.type()->is_int() ?
@@ -2215,7 +2213,7 @@
         // The constant offset is something other than referent_offset.
         // We can skip generating/checking the remaining guards and
         // skip generation of the code stub.
-        gen_code_stub = false;
+        gen_pre_barrier = false;
       } else {
         // The constant offset is the same as referent_offset -
         // we do not need to generate a runtime offset check.
@@ -2224,11 +2222,11 @@
     }
 
     // We don't need to generate stub if the source object is an array
-    if (gen_code_stub && src.type()->is_array()) {
-      gen_code_stub = false;
+    if (gen_pre_barrier && src.type()->is_array()) {
+      gen_pre_barrier = false;
     }
 
-    if (gen_code_stub) {
+    if (gen_pre_barrier) {
       // We still need to continue with the checks.
       if (src.is_constant()) {
         ciObject* src_con = src.get_jobject_constant();
@@ -2236,7 +2234,7 @@
         if (src_con->is_null_object()) {
           // The constant src object is null - We can skip
           // generating the code stub.
-          gen_code_stub = false;
+          gen_pre_barrier = false;
         } else {
           // Non-null constant source object. We still have to generate
           // the slow stub - but we don't need to generate the runtime
@@ -2245,20 +2243,28 @@
         }
       }
     }
-
-    if (gen_code_stub) {
-      // Temoraries.
-      LIR_Opr src_klass = new_register(T_OBJECT);
-
-      // Get the thread pointer for the pre-barrier
-      LIR_Opr thread = getThreadPointer();
-
-      CodeStub* stub;
+    if (gen_pre_barrier && !PatchALot) {
+      // Can the klass of object be statically determined to be
+      // a sub-class of Reference?
+      ciType* type = src.value()->declared_type();
+      if ((type != NULL) && type->is_loaded()) {
+        if (type->is_subtype_of(compilation()->env()->Reference_klass())) {
+          gen_type_check = false;
+        } else if (type->is_klass() &&
+                   !compilation()->env()->Object_klass()->is_subtype_of(type->as_klass())) {
+          // Not Reference and not Object klass.
+          gen_pre_barrier = false;
+        }
+      }
+    }
+
+    if (gen_pre_barrier) {
+      LabelObj* Lcont = new LabelObj();
 
       // We can have generate one runtime check here. Let's start with
       // the offset check.
       if (gen_offset_check) {
-        // if (offset == referent_offset) -> slow code stub
+        // if (offset != referent_offset) -> continue
         // If offset is an int then we can do the comparison with the
         // referent_offset constant; otherwise we need to move
         // referent_offset into a temporary register and generate
@@ -2273,43 +2279,36 @@
           referent_off = new_register(T_LONG);
           __ move(LIR_OprFact::longConst(java_lang_ref_Reference::referent_offset), referent_off);
         }
-
-        __ cmp(lir_cond_equal, off.result(), referent_off);
-
-        // Optionally generate "src == null" check.
-        stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
-                                                    src_klass, thread,
-                                                    gen_source_check);
-
-        __ branch(lir_cond_equal, as_BasicType(off.type()), stub);
-      } else {
-        if (gen_source_check) {
-          // offset is a const and equals referent offset
-          // if (source != null) -> slow code stub
-          __ cmp(lir_cond_notEqual, src.result(), LIR_OprFact::oopConst(NULL));
-
-          // Since we are generating the "if src == null" guard here,
-          // there is no need to generate the "src == null" check again.
-          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
-                                                    src_klass, thread,
-                                                    false);
-
-          __ branch(lir_cond_notEqual, T_OBJECT, stub);
-        } else {
-          // We have statically determined that offset == referent_offset
-          // && src != null so we unconditionally branch to code stub
-          // to perform the guards and record reg in the SATB log buffer.
-
-          stub = new G1UnsafeGetObjSATBBarrierStub(reg, src.result(),
-                                                    src_klass, thread,
-                                                    false);
-
-          __ branch(lir_cond_always, T_ILLEGAL, stub);
-        }
+        __ cmp(lir_cond_notEqual, off.result(), referent_off);
+        __ branch(lir_cond_notEqual, as_BasicType(off.type()), Lcont->label());
       }
-
-      // Continuation point
-      __ branch_destination(stub->continuation());
+      if (gen_source_check) {
+        // offset is a const and equals referent offset
+        // if (source == null) -> continue
+        __ cmp(lir_cond_equal, src.result(), LIR_OprFact::oopConst(NULL));
+        __ branch(lir_cond_equal, T_OBJECT, Lcont->label());
+      }
+      LIR_Opr src_klass = new_register(T_OBJECT);
+      if (gen_type_check) {
+        // We have determined that offset == referent_offset && src != null.
+        // if (src->_klass->_reference_type == REF_NONE) -> continue
+        __ move(new LIR_Address(src.result(), oopDesc::klass_offset_in_bytes(), T_OBJECT), src_klass);
+        LIR_Address* reference_type_addr = new LIR_Address(src_klass, in_bytes(instanceKlass::reference_type_offset()), T_BYTE);
+        LIR_Opr reference_type = new_register(T_INT);
+        __ move(reference_type_addr, reference_type);
+        __ cmp(lir_cond_equal, reference_type, LIR_OprFact::intConst(REF_NONE));
+        __ branch(lir_cond_equal, T_INT, Lcont->label());
+      }
+      {
+        // We have determined that src->_klass->_reference_type != REF_NONE
+        // so register the value in the referent field with the pre-barrier.
+        pre_barrier(LIR_OprFact::illegalOpr /* addr_opr */,
+                    value  /* pre_val */,
+                    false  /* do_load */,
+                    false  /* patch */,
+                    NULL   /* info */);
+      }
+      __ branch_destination(Lcont->label());
     }
   }
 #endif // SERIALGC
@@ -2664,8 +2663,9 @@
 
 
 void LIRGenerator::invoke_load_arguments(Invoke* x, LIRItemList* args, const LIR_OprList* arg_list) {
-  int i = (x->has_receiver() || x->is_invokedynamic()) ? 1 : 0;
-  for (; i < args->length(); i++) {
+  assert(args->length() == arg_list->length(),
+         err_msg_res("args=%d, arg_list=%d", args->length(), arg_list->length()));
+  for (int i = x->has_receiver() ? 1 : 0; i < args->length(); i++) {
     LIRItem* param = args->at(i);
     LIR_Opr loc = arg_list->at(i);
     if (loc->is_register()) {
@@ -2705,15 +2705,9 @@
     LIRItem* receiver = new LIRItem(x->receiver(), this);
     argument_items->append(receiver);
   }
-  if (x->is_invokedynamic()) {
-    // Insert a dummy for the synthetic MethodHandle argument.
-    argument_items->append(NULL);
-  }
-  int idx = x->has_receiver() ? 1 : 0;
   for (int i = 0; i < x->number_of_arguments(); i++) {
     LIRItem* param = new LIRItem(x->argument_at(i), this);
     argument_items->append(param);
-    idx += (param->type()->is_double_word() ? 2 : 1);
   }
   return argument_items;
 }
@@ -2758,9 +2752,6 @@
 
   CodeEmitInfo* info = state_for(x, x->state());
 
-  // invokedynamics can deoptimize.
-  CodeEmitInfo* deopt_info = x->is_invokedynamic() ? state_for(x, x->state_before()) : NULL;
-
   invoke_load_arguments(x, args, arg_list);
 
   if (x->has_receiver()) {
@@ -2809,41 +2800,8 @@
       }
       break;
     case Bytecodes::_invokedynamic: {
-      ciBytecodeStream bcs(x->scope()->method());
-      bcs.force_bci(x->state()->bci());
-      assert(bcs.cur_bc() == Bytecodes::_invokedynamic, "wrong stream");
-      ciCPCache* cpcache = bcs.get_cpcache();
-
-      // Get CallSite offset from constant pool cache pointer.
-      int index = bcs.get_method_index();
-      size_t call_site_offset = cpcache->get_f1_offset(index);
-
-      // Load CallSite object from constant pool cache.
-      LIR_Opr call_site = new_register(objectType);
-      __ oop2reg(cpcache->constant_encoding(), call_site);
-      __ move_wide(new LIR_Address(call_site, call_site_offset, T_OBJECT), call_site);
-
-      // If this invokedynamic call site hasn't been executed yet in
-      // the interpreter, the CallSite object in the constant pool
-      // cache is still null and we need to deoptimize.
-      if (cpcache->is_f1_null_at(index)) {
-        // Only deoptimize if the CallSite object is still null; we don't
-        // recompile methods in C1 after deoptimization so this call site
-        // might be resolved the next time we execute it after OSR.
-        DeoptimizeStub* deopt_stub = new DeoptimizeStub(deopt_info);
-        __ cmp(lir_cond_equal, call_site, LIR_OprFact::oopConst(NULL));
-        __ branch(lir_cond_equal, T_OBJECT, deopt_stub);
-      }
-
-      // Use the receiver register for the synthetic MethodHandle
-      // argument.
-      receiver = LIR_Assembler::receiverOpr();
-
-      // Load target MethodHandle from CallSite object.
-      __ load(new LIR_Address(call_site, java_lang_invoke_CallSite::target_offset_in_bytes(), T_OBJECT), receiver);
-
       __ call_dynamic(target, receiver, result_register,
-                      SharedRuntime::get_resolve_opt_virtual_call_stub(),
+                      SharedRuntime::get_resolve_static_call_stub(),
                       arg_list, info);
       break;
     }
--- a/src/share/vm/c1/c1_ValueMap.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_ValueMap.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -190,7 +190,7 @@
   LoadField* lf = value->as_LoadField();                                                 \
   bool must_kill = lf != NULL                                                            \
                    && lf->field()->holder() == field->holder()                           \
-                   && lf->field()->offset() == field->offset();
+                   && (all_offsets || lf->field()->offset() == field->offset());
 
 #define MUST_KILL_EXCEPTION(must_kill, entry, value)                                     \
   assert(entry->nesting() < nesting(), "must not find bigger nesting than current");     \
@@ -205,7 +205,7 @@
   GENERIC_KILL_VALUE(MUST_KILL_ARRAY);
 }
 
-void ValueMap::kill_field(ciField* field) {
+void ValueMap::kill_field(ciField* field, bool all_offsets) {
   GENERIC_KILL_VALUE(MUST_KILL_FIELD);
 }
 
@@ -280,9 +280,9 @@
   ValueMap* value_map_of(BlockBegin* block)      { return _gvn->value_map_of(block); }
 
   // implementation for abstract methods of ValueNumberingVisitor
-  void      kill_memory()                        { _too_complicated_loop = true; }
-  void      kill_field(ciField* field)           { current_map()->kill_field(field); };
-  void      kill_array(ValueType* type)          { current_map()->kill_array(type); };
+  void      kill_memory()                                 { _too_complicated_loop = true; }
+  void      kill_field(ciField* field, bool all_offsets)  { current_map()->kill_field(field, all_offsets); };
+  void      kill_array(ValueType* type)                   { current_map()->kill_array(type); };
 
  public:
   ShortLoopOptimizer(GlobalValueNumbering* gvn)
--- a/src/share/vm/c1/c1_ValueMap.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/c1/c1_ValueMap.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -114,7 +114,7 @@
   Value find_insert(Value x);
 
   void kill_memory();
-  void kill_field(ciField* field);
+  void kill_field(ciField* field, bool all_offsets);
   void kill_array(ValueType* type);
   void kill_exception();
   void kill_map(ValueMap* map);
@@ -136,7 +136,7 @@
  protected:
   // called by visitor functions for instructions that kill values
   virtual void kill_memory() = 0;
-  virtual void kill_field(ciField* field) = 0;
+  virtual void kill_field(ciField* field, bool all_offsets) = 0;
   virtual void kill_array(ValueType* type) = 0;
 
   // visitor functions
@@ -148,7 +148,7 @@
         x->field()->is_volatile()) {
       kill_memory();
     } else {
-      kill_field(x->field());
+      kill_field(x->field(), x->needs_patching());
     }
   }
   void do_StoreIndexed   (StoreIndexed*    x) { kill_array(x->type()); }
@@ -214,9 +214,9 @@
 
  public:
   // implementation for abstract methods of ValueNumberingVisitor
-  void          kill_memory()                    { _map->kill_memory(); }
-  void          kill_field(ciField* field)       { _map->kill_field(field); }
-  void          kill_array(ValueType* type)      { _map->kill_array(type); }
+  void          kill_memory()                                 { _map->kill_memory(); }
+  void          kill_field(ciField* field, bool all_offsets)  { _map->kill_field(field, all_offsets); }
+  void          kill_array(ValueType* type)                   { _map->kill_array(type); }
 
   ValueNumberingEffects(ValueMap* map): _map(map) {}
 };
@@ -234,9 +234,9 @@
   void          set_value_map_of(BlockBegin* block, ValueMap* map)   { assert(value_map_of(block) == NULL, ""); _value_maps.at_put(block->linear_scan_number(), map); }
 
   // implementation for abstract methods of ValueNumberingVisitor
-  void          kill_memory()                    { current_map()->kill_memory(); }
-  void          kill_field(ciField* field)       { current_map()->kill_field(field); }
-  void          kill_array(ValueType* type)      { current_map()->kill_array(type); }
+  void          kill_memory()                                 { current_map()->kill_memory(); }
+  void          kill_field(ciField* field, bool all_offsets)  { current_map()->kill_field(field, all_offsets); }
+  void          kill_array(ValueType* type)                   { current_map()->kill_array(type); }
 
   // main entry point that performs global value numbering
   GlobalValueNumbering(IR* ir);
--- a/src/share/vm/classfile/stackMapFrame.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapFrame.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -32,9 +32,9 @@
 #include "utilities/globalDefinitions.hpp"
 
 StackMapFrame::StackMapFrame(u2 max_locals, u2 max_stack, ClassVerifier* v) :
-                      _offset(0), _locals_size(0), _stack_size(0), _flags(0),
-                      _max_locals(max_locals), _max_stack(max_stack),
-                      _verifier(v) {
+                      _offset(0), _locals_size(0), _stack_size(0),
+                      _stack_mark(0), _flags(0), _max_locals(max_locals),
+                      _max_stack(max_stack), _verifier(v) {
   Thread* thr = v->thread();
   _locals = NEW_RESOURCE_ARRAY_IN_THREAD(thr, VerificationType, max_locals);
   _stack = NEW_RESOURCE_ARRAY_IN_THREAD(thr, VerificationType, max_stack);
@@ -157,17 +157,17 @@
   }
 }
 
-
-bool StackMapFrame::is_assignable_to(
+// Returns the location of the first mismatch, or 'len' if there are no
+// mismatches
+int StackMapFrame::is_assignable_to(
     VerificationType* from, VerificationType* to, int32_t len, TRAPS) const {
-  for (int32_t i = 0; i < len; i++) {
-    bool subtype = to[i].is_assignable_from(
-      from[i], verifier(), THREAD);
-    if (!subtype) {
-      return false;
+  int32_t i = 0;
+  for (i = 0; i < len; i++) {
+    if (!to[i].is_assignable_from(from[i], verifier(), THREAD)) {
+      break;
     }
   }
-  return true;
+  return i;
 }
 
 bool StackMapFrame::has_flag_match_exception(
@@ -209,50 +209,84 @@
 }
 
 bool StackMapFrame::is_assignable_to(
-    const StackMapFrame* target, bool is_exception_handler, TRAPS) const {
-  if (_max_locals != target->max_locals() ||
-      _stack_size != target->stack_size()) {
+    const StackMapFrame* target, bool is_exception_handler,
+    ErrorContext* ctx, TRAPS) const {
+  if (_max_locals != target->max_locals()) {
+    *ctx = ErrorContext::locals_size_mismatch(
+        _offset, (StackMapFrame*)this, (StackMapFrame*)target);
+    return false;
+  }
+  if (_stack_size != target->stack_size()) {
+    *ctx = ErrorContext::stack_size_mismatch(
+        _offset, (StackMapFrame*)this, (StackMapFrame*)target);
     return false;
   }
   // Only need to compare type elements up to target->locals() or target->stack().
   // The remaining type elements in this state can be ignored because they are
   // assignable to bogus type.
-  bool match_locals = is_assignable_to(
-    _locals, target->locals(), target->locals_size(), CHECK_false);
-  bool match_stack = is_assignable_to(
-    _stack, target->stack(), _stack_size, CHECK_false);
+  int mismatch_loc;
+  mismatch_loc = is_assignable_to(
+    _locals, target->locals(), target->locals_size(), THREAD);
+  if (mismatch_loc != target->locals_size()) {
+    *ctx = ErrorContext::bad_type(target->offset(),
+        TypeOrigin::local(mismatch_loc, (StackMapFrame*)this),
+        TypeOrigin::sm_local(mismatch_loc, (StackMapFrame*)target));
+    return false;
+  }
+  mismatch_loc = is_assignable_to(_stack, target->stack(), _stack_size, THREAD);
+  if (mismatch_loc != _stack_size) {
+    *ctx = ErrorContext::bad_type(target->offset(),
+        TypeOrigin::stack(mismatch_loc, (StackMapFrame*)this),
+        TypeOrigin::sm_stack(mismatch_loc, (StackMapFrame*)target));
+    return false;
+  }
+
   bool match_flags = (_flags | target->flags()) == target->flags();
-
-  return match_locals && match_stack &&
-    (match_flags || (is_exception_handler && has_flag_match_exception(target)));
+  if (match_flags || is_exception_handler && has_flag_match_exception(target)) {
+    return true;
+  } else {
+    *ctx = ErrorContext::bad_flags(target->offset(),
+        (StackMapFrame*)this, (StackMapFrame*)target);
+    return false;
+  }
 }
 
 VerificationType StackMapFrame::pop_stack_ex(VerificationType type, TRAPS) {
   if (_stack_size <= 0) {
-    verifier()->verify_error(_offset, "Operand stack underflow");
+    verifier()->verify_error(
+        ErrorContext::stack_underflow(_offset, this),
+        "Operand stack underflow");
     return VerificationType::bogus_type();
   }
   VerificationType top = _stack[--_stack_size];
   bool subtype = type.is_assignable_from(
     top, verifier(), CHECK_(VerificationType::bogus_type()));
   if (!subtype) {
-    verifier()->verify_error(_offset, "Bad type on operand stack");
+    verifier()->verify_error(
+        ErrorContext::bad_type(_offset, stack_top_ctx(),
+            TypeOrigin::implicit(type)),
+        "Bad type on operand stack");
     return VerificationType::bogus_type();
   }
-  NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
   return top;
 }
 
 VerificationType StackMapFrame::get_local(
     int32_t index, VerificationType type, TRAPS) {
   if (index >= _max_locals) {
-    verifier()->verify_error(_offset, "Local variable table overflow");
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "Local variable table overflow");
     return VerificationType::bogus_type();
   }
   bool subtype = type.is_assignable_from(_locals[index],
     verifier(), CHECK_(VerificationType::bogus_type()));
   if (!subtype) {
-    verifier()->verify_error(_offset, "Bad local variable type");
+    verifier()->verify_error(
+        ErrorContext::bad_type(_offset,
+          TypeOrigin::local(index, this),
+          TypeOrigin::implicit(type)),
+        "Bad local variable type");
     return VerificationType::bogus_type();
   }
   if(index >= _locals_size) { _locals_size = index + 1; }
@@ -264,23 +298,37 @@
   assert(type1.is_long() || type1.is_double(), "must be long/double");
   assert(type2.is_long2() || type2.is_double2(), "must be long/double_2");
   if (index >= _locals_size - 1) {
-    verifier()->verify_error(_offset, "get long/double overflows locals");
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "get long/double overflows locals");
     return;
   }
-  bool subtype1 = type1.is_assignable_from(
-    _locals[index], verifier(), CHECK);
-  bool subtype2 = type2.is_assignable_from(
-    _locals[index+1], verifier(), CHECK);
-  if (!subtype1 || !subtype2) {
-    verifier()->verify_error(_offset, "Bad local variable type");
-    return;
+  bool subtype = type1.is_assignable_from(_locals[index], verifier(), CHECK);
+  if (!subtype) {
+    verifier()->verify_error(
+        ErrorContext::bad_type(_offset,
+            TypeOrigin::local(index, this), TypeOrigin::implicit(type1)),
+        "Bad local variable type");
+  } else {
+    subtype = type2.is_assignable_from(_locals[index + 1], verifier(), CHECK);
+    if (!subtype) {
+      /* Unreachable? All local store routines convert a split long or double
+       * into a TOP during the store.  So we should never end up seeing an
+       * orphaned half.  */
+      verifier()->verify_error(
+          ErrorContext::bad_type(_offset,
+              TypeOrigin::local(index + 1, this), TypeOrigin::implicit(type2)),
+          "Bad local variable type");
+    }
   }
 }
 
 void StackMapFrame::set_local(int32_t index, VerificationType type, TRAPS) {
   assert(!type.is_check(), "Must be a real type");
   if (index >= _max_locals) {
-    verifier()->verify_error("Local variable table overflow", _offset);
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "Local variable table overflow");
     return;
   }
   // If type at index is double or long, set the next location to be unusable
@@ -310,7 +358,9 @@
   assert(type1.is_long() || type1.is_double(), "must be long/double");
   assert(type2.is_long2() || type2.is_double2(), "must be long/double_2");
   if (index >= _max_locals - 1) {
-    verifier()->verify_error("Local variable table overflow", _offset);
+    verifier()->verify_error(
+        ErrorContext::bad_local_index(_offset, index),
+        "Local variable table overflow");
     return;
   }
   // If type at index+1 is double or long, set the next location to be unusable
@@ -336,21 +386,30 @@
   }
 }
 
-#ifndef PRODUCT
-
-void StackMapFrame::print() const {
-  tty->print_cr("stackmap_frame[%d]:", _offset);
-  tty->print_cr("flags = 0x%x", _flags);
-  tty->print("locals[%d] = { ", _locals_size);
-  for (int32_t i = 0; i < _locals_size; i++) {
-    _locals[i].print_on(tty);
-  }
-  tty->print_cr(" }");
-  tty->print("stack[%d] = { ", _stack_size);
-  for (int32_t j = 0; j < _stack_size; j++) {
-    _stack[j].print_on(tty);
-  }
-  tty->print_cr(" }");
+TypeOrigin StackMapFrame::stack_top_ctx() {
+  return TypeOrigin::stack(_stack_size, this);
 }
 
-#endif
+void StackMapFrame::print_on(outputStream* str) const {
+  str->indent().print_cr("bci: @%d", _offset);
+  str->indent().print_cr("flags: {%s }",
+      flag_this_uninit() ? " flagThisUninit" : "");
+  str->indent().print("locals: {");
+  for (int32_t i = 0; i < _locals_size; ++i) {
+    str->print(" ");
+    _locals[i].print_on(str);
+    if (i != _locals_size - 1) {
+      str->print(",");
+    }
+  }
+  str->print_cr(" }");
+  str->indent().print("stack: {");
+  for (int32_t j = 0; j < _stack_size; ++j) {
+    str->print(" ");
+    _stack[j].print_on(str);
+    if (j != _stack_size - 1) {
+      str->print(",");
+    }
+  }
+  str->print_cr(" }");
+}
--- a/src/share/vm/classfile/stackMapFrame.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapFrame.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -34,6 +34,8 @@
 
 // A StackMapFrame represents one frame in the stack map attribute.
 
+class TypeContext;
+
 enum {
   FLAG_THIS_UNINIT = 0x01
 };
@@ -47,6 +49,10 @@
   int32_t _locals_size;  // number of valid type elements in _locals
   int32_t _stack_size;   // number of valid type elements in _stack
 
+  int32_t _stack_mark;   // Records the size of the stack prior to an
+                         // instruction modification, to allow rewinding
+                         // when/if an error occurs.
+
   int32_t _max_locals;
   int32_t _max_stack;
 
@@ -56,6 +62,31 @@
 
   ClassVerifier* _verifier;  // the verifier verifying this method
 
+  StackMapFrame(const StackMapFrame& cp) :
+      _offset(cp._offset), _locals_size(cp._locals_size),
+      _stack_size(cp._stack_size), _stack_mark(cp._stack_mark),
+      _max_locals(cp._max_locals), _max_stack(cp._max_stack),
+      _flags(cp._flags) {
+    _locals = NEW_RESOURCE_ARRAY(VerificationType, _max_locals);
+    for (int i = 0; i < _max_locals; ++i) {
+      if (i < _locals_size) {
+        _locals[i] = cp._locals[i];
+      } else {
+        _locals[i] = VerificationType::bogus_type();
+      }
+    }
+    int ss = MAX2(_stack_size, _stack_mark);
+    _stack = NEW_RESOURCE_ARRAY(VerificationType, _max_stack);
+    for (int i = 0; i < _max_stack; ++i) {
+      if (i < ss) {
+        _stack[i] = cp._stack[i];
+      } else {
+        _stack[i] = VerificationType::bogus_type();
+      }
+    }
+    _verifier = NULL;
+  }
+
  public:
   // constructors
 
@@ -77,16 +108,21 @@
                 ClassVerifier* v) : _offset(offset), _flags(flags),
                                     _locals_size(locals_size),
                                     _stack_size(stack_size),
+                                    _stack_mark(-1),
                                     _max_locals(max_locals),
                                     _max_stack(max_stack),
                                     _locals(locals), _stack(stack),
                                     _verifier(v) { }
 
+  static StackMapFrame* copy(StackMapFrame* smf) {
+    return new StackMapFrame(*smf);
+  }
+
   inline void set_offset(int32_t offset)      { _offset = offset; }
   inline void set_verifier(ClassVerifier* v)  { _verifier = v; }
   inline void set_flags(u1 flags)             { _flags = flags; }
   inline void set_locals_size(u2 locals_size) { _locals_size = locals_size; }
-  inline void set_stack_size(u2 stack_size)   { _stack_size = stack_size; }
+  inline void set_stack_size(u2 stack_size)   { _stack_size = _stack_mark = stack_size; }
   inline void clear_stack()                   { _stack_size = 0; }
   inline int32_t offset()   const             { return _offset; }
   inline ClassVerifier* verifier() const      { return _verifier; }
@@ -134,14 +170,37 @@
   void copy_stack(const StackMapFrame* src);
 
   // Return true if this stack map frame is assignable to target.
-  bool is_assignable_to(const StackMapFrame* target,
-                        bool is_exception_handler, TRAPS) const;
+  bool is_assignable_to(
+      const StackMapFrame* target, bool is_exception_handler,
+      ErrorContext* ctx, TRAPS) const;
+
+  inline void set_mark() {
+#ifdef DEBUG
+    // Put bogus type to indicate it's no longer valid.
+    if (_stack_mark != -1) {
+      for (int i = _stack_mark; i >= _stack_size; --i) {
+        _stack[i] = VerificationType::bogus_type();
+      }
+    }
+#endif // def DEBUG
+    _stack_mark = _stack_size;
+  }
+
+  // Used when an error occurs and we want to reset the stack to the state
+  // it was before operands were popped off.
+  void restore() {
+    if (_stack_mark != -1) {
+      _stack_size = _stack_mark;
+    }
+  }
 
   // Push type into stack type array.
   inline void push_stack(VerificationType type, TRAPS) {
     assert(!type.is_check(), "Must be a real type");
     if (_stack_size >= _max_stack) {
-      verifier()->verify_error(_offset, "Operand stack overflow");
+      verifier()->verify_error(
+          ErrorContext::stack_overflow(_offset, this),
+          "Operand stack overflow");
       return;
     }
     _stack[_stack_size++] = type;
@@ -152,7 +211,9 @@
     assert(type1.is_long() || type1.is_double(), "must be long/double");
     assert(type2.is_long2() || type2.is_double2(), "must be long/double_2");
     if (_stack_size >= _max_stack - 1) {
-      verifier()->verify_error(_offset, "Operand stack overflow");
+      verifier()->verify_error(
+          ErrorContext::stack_overflow(_offset, this),
+          "Operand stack overflow");
       return;
     }
     _stack[_stack_size++] = type1;
@@ -162,13 +223,12 @@
   // Pop and return the top type on stack without verifying.
   inline VerificationType pop_stack(TRAPS) {
     if (_stack_size <= 0) {
-      verifier()->verify_error(_offset, "Operand stack underflow");
+      verifier()->verify_error(
+          ErrorContext::stack_underflow(_offset, this),
+          "Operand stack underflow");
       return VerificationType::bogus_type();
     }
-    // Put bogus type to indicate it's no longer valid.
-    // Added to make it consistent with the other pop_stack method.
     VerificationType top = _stack[--_stack_size];
-    NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
     return top;
   }
 
@@ -180,8 +240,7 @@
       bool subtype = type.is_assignable_from(
         top, verifier(), CHECK_(VerificationType::bogus_type()));
       if (subtype) {
-        _stack_size --;
-        NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
+        --_stack_size;
         return top;
       }
     }
@@ -199,8 +258,6 @@
       bool subtype2 = type2.is_assignable_from(top2, verifier(), CHECK);
       if (subtype1 && subtype2) {
         _stack_size -= 2;
-        NOT_PRODUCT( _stack[_stack_size] = VerificationType::bogus_type(); )
-        NOT_PRODUCT( _stack[_stack_size+1] = VerificationType::bogus_type(); )
         return;
       }
     }
@@ -208,6 +265,14 @@
     pop_stack_ex(type2, THREAD);
   }
 
+  VerificationType local_at(int index) {
+    return _locals[index];
+  }
+
+  VerificationType stack_at(int index) {
+    return _stack[index];
+  }
+
   // Uncommon case that throws exceptions.
   VerificationType pop_stack_ex(VerificationType type, TRAPS);
 
@@ -226,13 +291,14 @@
 
   // Private auxiliary method used only in is_assignable_to(StackMapFrame).
   // Returns true if src is assignable to target.
-  bool is_assignable_to(
+  int is_assignable_to(
     VerificationType* src, VerificationType* target, int32_t len, TRAPS) const;
 
   bool has_flag_match_exception(const StackMapFrame* target) const;
 
-  // Debugging
-  void print() const PRODUCT_RETURN;
+  TypeOrigin stack_top_ctx();
+
+  void print_on(outputStream* str) const;
 };
 
 #endif // SHARE_VM_CLASSFILE_STACKMAPFRAME_HPP
--- a/src/share/vm/classfile/stackMapTable.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapTable.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -46,7 +46,9 @@
       _frame_array[i] = frame;
       int offset = frame->offset();
       if (offset >= code_len || code_data[offset] == 0) {
-        frame->verifier()->verify_error("StackMapTable error: bad offset");
+        frame->verifier()->verify_error(
+            ErrorContext::bad_stackmap(i, frame),
+            "StackMapTable error: bad offset");
         return;
       }
       pre_frame = frame;
@@ -68,12 +70,9 @@
 
 bool StackMapTable::match_stackmap(
     StackMapFrame* frame, int32_t target,
-    bool match, bool update, TRAPS) const {
+    bool match, bool update, ErrorContext* ctx, TRAPS) const {
   int index = get_index_from_offset(target);
-
-  return match_stackmap(
-    frame, target, index, match,
-    update, CHECK_VERIFY_(frame->verifier(), false));
+  return match_stackmap(frame, target, index, match, update, ctx, THREAD);
 }
 
 // Match and/or update current_frame to the frame in stackmap table with
@@ -88,23 +87,23 @@
 // unconditional branch:                                 true   true
 bool StackMapTable::match_stackmap(
     StackMapFrame* frame, int32_t target, int32_t frame_index,
-    bool match, bool update, TRAPS) const {
+    bool match, bool update, ErrorContext* ctx, TRAPS) const {
   if (frame_index < 0 || frame_index >= _frame_count) {
-    frame->verifier()->verify_error(frame->offset(),
-      "Expecting a stackmap frame at branch target %d", target);
+    *ctx = ErrorContext::missing_stackmap(frame->offset());
+    frame->verifier()->verify_error(
+        *ctx, "Expecting a stackmap frame at branch target %d", target);
     return false;
   }
 
+  StackMapFrame *stackmap_frame = _frame_array[frame_index];
   bool result = true;
-  StackMapFrame *stackmap_frame = _frame_array[frame_index];
   if (match) {
     // when checking handler target, match == true && update == false
     bool is_exception_handler = !update;
     // Has direct control flow from last instruction, need to match the two
     // frames.
-    result = frame->is_assignable_to(
-      stackmap_frame, is_exception_handler,
-      CHECK_VERIFY_(frame->verifier(), false));
+    result = frame->is_assignable_to(stackmap_frame, is_exception_handler,
+        ctx, CHECK_VERIFY_(frame->verifier(), result));
   }
   if (update) {
     // Use the frame in stackmap table as current frame
@@ -125,11 +124,12 @@
 
 void StackMapTable::check_jump_target(
     StackMapFrame* frame, int32_t target, TRAPS) const {
+  ErrorContext ctx;
   bool match = match_stackmap(
-    frame, target, true, false, CHECK_VERIFY(frame->verifier()));
+    frame, target, true, false, &ctx, CHECK_VERIFY(frame->verifier()));
   if (!match || (target < 0 || target >= _code_length)) {
-    frame->verifier()->verify_error(frame->offset(),
-      "Inconsistent stackmap frames at branch target %d", target);
+    frame->verifier()->verify_error(ctx,
+        "Inconsistent stackmap frames at branch target %d", target);
     return;
   }
   // check if uninitialized objects exist on backward branches
@@ -139,25 +139,25 @@
 void StackMapTable::check_new_object(
     const StackMapFrame* frame, int32_t target, TRAPS) const {
   if (frame->offset() > target && frame->has_new_object()) {
-    frame->verifier()->verify_error(frame->offset(),
-      "Uninitialized object exists on backward branch %d", target);
+    frame->verifier()->verify_error(
+        ErrorContext::bad_code(frame->offset()),
+        "Uninitialized object exists on backward branch %d", target);
     return;
   }
 }
 
-#ifndef PRODUCT
-
-void StackMapTable::print() const {
-  tty->print_cr("StackMapTable: frame_count = %d", _frame_count);
-  tty->print_cr("table = { ");
-  for (int32_t i = 0; i < _frame_count; i++) {
-    _frame_array[i]->print();
+void StackMapTable::print_on(outputStream* str) const {
+  str->indent().print_cr("StackMapTable: frame_count = %d", _frame_count);
+  str->indent().print_cr("table = { ");
+  {
+    streamIndentor si(str);
+    for (int32_t i = 0; i < _frame_count; ++i) {
+      _frame_array[i]->print_on(str);
+    }
   }
-  tty->print_cr(" }");
+  str->print_cr(" }");
 }
 
-#endif
-
 int32_t StackMapReader::chop(
     VerificationType* locals, int32_t length, int32_t chops) {
   if (locals == NULL) return -1;
--- a/src/share/vm/classfile/stackMapTable.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapTable.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -26,6 +26,7 @@
 #define SHARE_VM_CLASSFILE_STACKMAPTABLE_HPP
 
 #include "classfile/stackMapFrame.hpp"
+#include "classfile/verifier.hpp"
 #include "memory/allocation.hpp"
 #include "oops/constantPoolOop.hpp"
 #include "oops/methodOop.hpp"
@@ -73,12 +74,12 @@
   // specified offset. Return true if the two frames match.
   bool match_stackmap(
     StackMapFrame* current_frame, int32_t offset,
-    bool match, bool update, TRAPS) const;
+    bool match, bool update, ErrorContext* ctx, TRAPS) const;
   // Match and/or update current_frame to the frame in stackmap table with
   // specified offset and frame index. Return true if the two frames match.
   bool match_stackmap(
     StackMapFrame* current_frame, int32_t offset, int32_t frame_index,
-    bool match, bool update, TRAPS) const;
+    bool match, bool update, ErrorContext* ctx, TRAPS) const;
 
   // Check jump instructions. Make sure there are no uninitialized
   // instances on backward branch.
@@ -93,8 +94,7 @@
   void check_new_object(
     const StackMapFrame* frame, int32_t target, TRAPS) const;
 
-  // Debugging
-  void print() const PRODUCT_RETURN;
+  void print_on(outputStream* str) const;
 };
 
 class StackMapStream : StackObj {
--- a/src/share/vm/classfile/stackMapTableFormat.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/stackMapTableFormat.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -135,7 +135,6 @@
                 !is_object() && !is_uninitialized()));
   }
 
-#ifdef ASSERT
   void print_on(outputStream* st) {
     switch (tag()) {
       case ITEM_Top: st->print("Top"); break;
@@ -154,14 +153,13 @@
         assert(false, "Bad verification_type_info");
     }
   }
-#endif
 };
 
 #define FOR_EACH_STACKMAP_FRAME_TYPE(macro, arg1, arg2) \
   macro(same_frame, arg1, arg2) \
   macro(same_frame_extended, arg1, arg2) \
-  macro(same_frame_1_stack_item_frame, arg1, arg2) \
-  macro(same_frame_1_stack_item_extended, arg1, arg2) \
+  macro(same_locals_1_stack_item_frame, arg1, arg2) \
+  macro(same_locals_1_stack_item_extended, arg1, arg2) \
   macro(chop_frame, arg1, arg2) \
   macro(append_frame, arg1, arg2) \
   macro(full_frame, arg1, arg2)
@@ -203,9 +201,8 @@
   // that we don't read past a particular memory limit.  It returns false
   // if any part of the data structure is outside the specified memory bounds.
   inline bool verify(address start, address end) const;
-#ifdef ASSERT
-  inline void print_on(outputStream* st) const;
-#endif
+
+  inline void print_on(outputStream* st, int current_offset) const;
 
   // Create as_xxx and is_xxx methods for the subtypes
 #define FRAME_TYPE_DECL(stackmap_frame_type, arg1, arg2) \
@@ -263,11 +260,9 @@
     return true;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame(%d)", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_frame(@%d)", offset_delta() + current_offset);
   }
-#endif
 };
 
 class same_frame_extended : public stack_map_frame {
@@ -311,14 +306,12 @@
     return frame_type_addr() + size() <= end;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame_extended(%d)", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_frame_extended(@%d)", offset_delta() + current_offset);
   }
-#endif
 };
 
-class same_frame_1_stack_item_frame : public stack_map_frame {
+class same_locals_1_stack_item_frame : public stack_map_frame {
  private:
   address type_addr() const { return frame_type_addr() + sizeof(u1); }
 
@@ -332,14 +325,14 @@
     return tag >= 64 && tag < 128;
   }
 
-  static same_frame_1_stack_item_frame* at(address addr) {
+  static same_locals_1_stack_item_frame* at(address addr) {
     assert(is_frame_type(*addr), "Wrong frame id");
-    return (same_frame_1_stack_item_frame*)addr;
+    return (same_locals_1_stack_item_frame*)addr;
   }
 
-  static same_frame_1_stack_item_frame* create_at(
+  static same_locals_1_stack_item_frame* create_at(
       address addr, int offset_delta, verification_type_info* vti) {
-    same_frame_1_stack_item_frame* sm = (same_frame_1_stack_item_frame*)addr;
+    same_locals_1_stack_item_frame* sm = (same_locals_1_stack_item_frame*)addr;
     sm->set_offset_delta(offset_delta);
     if (vti != NULL) {
       sm->set_type(vti);
@@ -382,16 +375,15 @@
     return types()->verify(start, end);
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame_1_stack_item_frame(%d,", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_locals_1_stack_item_frame(@%d,",
+        offset_delta() + current_offset);
     types()->print_on(st);
     st->print(")");
   }
-#endif
 };
 
-class same_frame_1_stack_item_extended : public stack_map_frame {
+class same_locals_1_stack_item_extended : public stack_map_frame {
  private:
   address offset_delta_addr() const { return frame_type_addr() + sizeof(u1); }
   address type_addr() const { return offset_delta_addr() + sizeof(u2); }
@@ -403,15 +395,15 @@
     return tag == _frame_id;
   }
 
-  static same_frame_1_stack_item_extended* at(address addr) {
+  static same_locals_1_stack_item_extended* at(address addr) {
     assert(is_frame_type(*addr), "Wrong frame id");
-    return (same_frame_1_stack_item_extended*)addr;
+    return (same_locals_1_stack_item_extended*)addr;
   }
 
-  static same_frame_1_stack_item_extended* create_at(
+  static same_locals_1_stack_item_extended* create_at(
       address addr, int offset_delta, verification_type_info* vti) {
-    same_frame_1_stack_item_extended* sm =
-       (same_frame_1_stack_item_extended*)addr;
+    same_locals_1_stack_item_extended* sm =
+       (same_locals_1_stack_item_extended*)addr;
     sm->set_frame_type(_frame_id);
     sm->set_offset_delta(offset_delta);
     if (vti != NULL) {
@@ -448,13 +440,12 @@
     return type_addr() < end && types()->verify(start, end);
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("same_frame_1_stack_item_extended(%d,", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("same_locals_1_stack_item_extended(@%d,",
+        offset_delta() + current_offset);
     types()->print_on(st);
     st->print(")");
   }
-#endif
 };
 
 class chop_frame : public stack_map_frame {
@@ -517,11 +508,9 @@
     return frame_type_addr() + size() <= end;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("chop_frame(%d,%d)", offset_delta(), chops());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("chop_frame(@%d,%d)", offset_delta() + current_offset, chops());
   }
-#endif
 };
 
 class append_frame : public stack_map_frame {
@@ -618,9 +607,8 @@
     return false;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("append_frame(%d,", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("append_frame(@%d,", offset_delta() + current_offset);
     verification_type_info* vti = types();
     for (int i = 0; i < number_of_types(); ++i) {
       vti->print_on(st);
@@ -631,7 +619,6 @@
     }
     st->print(")");
   }
-#endif
 };
 
 class full_frame : public stack_map_frame {
@@ -774,9 +761,8 @@
     return true;
   }
 
-#ifdef ASSERT
-  void print_on(outputStream* st) const {
-    st->print("full_frame(%d,{", offset_delta());
+  void print_on(outputStream* st, int current_offset = -1) const {
+    st->print("full_frame(@%d,{", offset_delta() + current_offset);
     verification_type_info* vti = locals();
     for (int i = 0; i < num_locals(); ++i) {
       vti->print_on(st);
@@ -798,7 +784,6 @@
     }
     st->print("})");
   }
-#endif
 };
 
 #define VIRTUAL_DISPATCH(stack_frame_type, func_name, args) \
@@ -852,11 +837,9 @@
   return false;
 }
 
-#ifdef ASSERT
-void stack_map_frame::print_on(outputStream* st) const {
-  FOR_EACH_STACKMAP_FRAME_TYPE(VOID_VIRTUAL_DISPATCH, print_on, (st));
+void stack_map_frame::print_on(outputStream* st, int offs = -1) const {
+  FOR_EACH_STACKMAP_FRAME_TYPE(VOID_VIRTUAL_DISPATCH, print_on, (st, offs));
 }
-#endif
 
 #undef VIRTUAL_DISPATCH
 #undef VOID_VIRTUAL_DISPATCH
@@ -873,16 +856,46 @@
 FOR_EACH_STACKMAP_FRAME_TYPE(AS_SUBTYPE_DEF, x, x)
 #undef AS_SUBTYPE_DEF
 
+class stack_map_table {
+ private:
+  address number_of_entries_addr() const {
+    return (address)this;
+  }
+  address entries_addr() const {
+    return number_of_entries_addr() + sizeof(u2);
+  }
+
+ protected:
+  // No constructors  - should be 'private', but GCC issues a warning if it is
+  stack_map_table() {}
+  stack_map_table(const stack_map_table&) {}
+
+ public:
+
+  static stack_map_table* at(address addr) {
+    return (stack_map_table*)addr;
+  }
+
+  u2 number_of_entries() const {
+    return Bytes::get_Java_u2(number_of_entries_addr());
+  }
+  stack_map_frame* entries() const {
+    return stack_map_frame::at(entries_addr());
+  }
+
+  void set_number_of_entries(u2 num) {
+    Bytes::put_Java_u2(number_of_entries_addr(), num);
+  }
+};
+
 class stack_map_table_attribute {
  private:
   address name_index_addr() const {
       return (address)this; }
   address attribute_length_addr() const {
       return name_index_addr() + sizeof(u2); }
-  address number_of_entries_addr() const {
+  address stack_map_table_addr() const {
       return attribute_length_addr() + sizeof(u4); }
-  address entries_addr() const {
-      return number_of_entries_addr() + sizeof(u2); }
 
  protected:
   // No constructors  - should be 'private', but GCC issues a warning if it is
@@ -896,17 +909,11 @@
   }
 
   u2 name_index() const {
-       return Bytes::get_Java_u2(name_index_addr()); }
+    return Bytes::get_Java_u2(name_index_addr()); }
   u4 attribute_length() const {
-      return Bytes::get_Java_u4(attribute_length_addr()); }
-  u2 number_of_entries() const {
-      return Bytes::get_Java_u2(number_of_entries_addr()); }
-  stack_map_frame* entries() const {
-    return stack_map_frame::at(entries_addr());
-  }
-
-  static size_t header_size() {
-      return sizeof(u2) + sizeof(u4);
+    return Bytes::get_Java_u4(attribute_length_addr()); }
+  stack_map_table* table() const {
+    return stack_map_table::at(stack_map_table_addr());
   }
 
   void set_name_index(u2 idx) {
@@ -915,9 +922,8 @@
   void set_attribute_length(u4 len) {
     Bytes::put_Java_u4(attribute_length_addr(), len);
   }
-  void set_number_of_entries(u2 num) {
-    Bytes::put_Java_u2(number_of_entries_addr(), num);
-  }
 };
 
+#undef FOR_EACH_STACKMAP_FRAME_TYPE
+
 #endif // SHARE_VM_CLASSFILE_STACKMAPTABLEFORMAT_HPP
--- a/src/share/vm/classfile/verificationType.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/verificationType.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -110,34 +110,34 @@
   }
 }
 
-#ifndef PRODUCT
-
 void VerificationType::print_on(outputStream* st) const {
   switch (_u._data) {
-    case Bogus:            st->print(" bogus "); break;
-    case Category1:        st->print(" category1 "); break;
-    case Category2:        st->print(" category2 "); break;
-    case Category2_2nd:    st->print(" category2_2nd "); break;
-    case Boolean:          st->print(" boolean "); break;
-    case Byte:             st->print(" byte "); break;
-    case Short:            st->print(" short "); break;
-    case Char:             st->print(" char "); break;
-    case Integer:          st->print(" integer "); break;
-    case Float:            st->print(" float "); break;
-    case Long:             st->print(" long "); break;
-    case Double:           st->print(" double "); break;
-    case Long_2nd:         st->print(" long_2nd "); break;
-    case Double_2nd:       st->print(" double_2nd "); break;
-    case Null:             st->print(" null "); break;
+    case Bogus:            st->print("top"); break;
+    case Category1:        st->print("category1"); break;
+    case Category2:        st->print("category2"); break;
+    case Category2_2nd:    st->print("category2_2nd"); break;
+    case Boolean:          st->print("boolean"); break;
+    case Byte:             st->print("byte"); break;
+    case Short:            st->print("short"); break;
+    case Char:             st->print("char"); break;
+    case Integer:          st->print("integer"); break;
+    case Float:            st->print("float"); break;
+    case Long:             st->print("long"); break;
+    case Double:           st->print("double"); break;
+    case Long_2nd:         st->print("long_2nd"); break;
+    case Double_2nd:       st->print("double_2nd"); break;
+    case Null:             st->print("null"); break;
+    case ReferenceQuery:   st->print("reference type"); break;
+    case Category1Query:   st->print("category1 type"); break;
+    case Category2Query:   st->print("category2 type"); break;
+    case Category2_2ndQuery: st->print("category2_2nd type"); break;
     default:
       if (is_uninitialized_this()) {
-        st->print(" uninitializedThis ");
+        st->print("uninitializedThis");
       } else if (is_uninitialized()) {
-        st->print(" uninitialized %d ", bci());
+        st->print("uninitialized %d", bci());
       } else {
-        st->print(" class %s ", name()->as_klass_external_name());
+        name()->print_value_on(st);
       }
   }
 }
-
-#endif
--- a/src/share/vm/classfile/verificationType.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/verificationType.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -157,7 +157,7 @@
 
   // For reference types, store the actual Symbol
   static VerificationType reference_type(Symbol* sh) {
-      assert(((uintptr_t)sh & 0x3) == 0, "Oops must be aligned");
+      assert(((uintptr_t)sh & 0x3) == 0, "Symbols must be aligned");
       // If the above assert fails in the future because oop* isn't aligned,
       // then this type encoding system will have to change to have a tag value
       // to descriminate between oops and primitives.
@@ -303,7 +303,7 @@
     return index;
   }
 
-  void print_on(outputStream* st) const PRODUCT_RETURN;
+  void print_on(outputStream* st) const;
 
  private:
 
--- a/src/share/vm/classfile/verifier.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/verifier.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -26,9 +26,12 @@
 #include "classfile/classFileStream.hpp"
 #include "classfile/javaClasses.hpp"
 #include "classfile/stackMapTable.hpp"
+#include "classfile/stackMapFrame.hpp"
+#include "classfile/stackMapTableFormat.hpp"
 #include "classfile/systemDictionary.hpp"
 #include "classfile/verifier.hpp"
 #include "classfile/vmSymbols.hpp"
+#include "interpreter/bytecodes.hpp"
 #include "interpreter/bytecodeStream.hpp"
 #include "memory/oopFactory.hpp"
 #include "memory/resourceArea.hpp"
@@ -110,8 +113,11 @@
   Symbol* exception_name = NULL;
   const size_t message_buffer_len = klass->name()->utf8_length() + 1024;
   char* message_buffer = NEW_RESOURCE_ARRAY(char, message_buffer_len);
+  char* exception_message = message_buffer;
 
   const char* klassName = klass->external_name();
+  bool can_failover = FailOverToOldVerifier &&
+      klass->major_version() < NOFAILOVER_MAJOR_VERSION;
 
   // If the class should be verified, first see if we can use the split
   // verifier.  If not, or if verification fails and FailOverToOldVerifier
@@ -122,27 +128,28 @@
     }
     if (UseSplitVerifier &&
         klass->major_version() >= STACKMAP_ATTRIBUTE_MAJOR_VERSION) {
-        ClassVerifier split_verifier(
-          klass, message_buffer, message_buffer_len, THREAD);
-        split_verifier.verify_class(THREAD);
-        exception_name = split_verifier.result();
-      if (klass->major_version() < NOFAILOVER_MAJOR_VERSION &&
-          FailOverToOldVerifier && !HAS_PENDING_EXCEPTION &&
+      ClassVerifier split_verifier(klass, THREAD);
+      split_verifier.verify_class(THREAD);
+      exception_name = split_verifier.result();
+      if (can_failover && !HAS_PENDING_EXCEPTION &&
           (exception_name == vmSymbols::java_lang_VerifyError() ||
            exception_name == vmSymbols::java_lang_ClassFormatError())) {
-        if (TraceClassInitialization) {
+        if (TraceClassInitialization || VerboseVerification) {
           tty->print_cr(
             "Fail over class verification to old verifier for: %s", klassName);
         }
         exception_name = inference_verify(
           klass, message_buffer, message_buffer_len, THREAD);
       }
+      if (exception_name != NULL) {
+        exception_message = split_verifier.exception_message();
+      }
     } else {
       exception_name = inference_verify(
           klass, message_buffer, message_buffer_len, THREAD);
     }
 
-    if (TraceClassInitialization) {
+    if (TraceClassInitialization || VerboseVerification) {
       if (HAS_PENDING_EXCEPTION) {
         tty->print("Verification for %s has", klassName);
         tty->print_cr(" exception pending %s ",
@@ -173,7 +180,7 @@
       kls = kls->super();
     }
     message_buffer[message_buffer_len - 1] = '\0'; // just to be sure
-    THROW_MSG_(exception_name, message_buffer, false);
+    THROW_MSG_(exception_name, exception_message, false);
   }
 }
 
@@ -221,7 +228,7 @@
   }
 
   ResourceMark rm(THREAD);
-  if (ClassVerifier::_verify_verbose) {
+  if (VerboseVerification) {
     tty->print_cr("Verifying class %s with old format", klass->external_name());
   }
 
@@ -265,14 +272,252 @@
   }
 }
 
+TypeOrigin TypeOrigin::null() {
+  return TypeOrigin();
+}
+TypeOrigin TypeOrigin::local(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(CF_LOCALS, index, StackMapFrame::copy(frame),
+     frame->local_at(index));
+}
+TypeOrigin TypeOrigin::stack(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(CF_STACK, index, StackMapFrame::copy(frame),
+      frame->stack_at(index));
+}
+TypeOrigin TypeOrigin::sm_local(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(SM_LOCALS, index, StackMapFrame::copy(frame),
+      frame->local_at(index));
+}
+TypeOrigin TypeOrigin::sm_stack(u2 index, StackMapFrame* frame) {
+  assert(frame != NULL, "Must have a frame");
+  return TypeOrigin(SM_STACK, index, StackMapFrame::copy(frame),
+      frame->stack_at(index));
+}
+TypeOrigin TypeOrigin::bad_index(u2 index) {
+  return TypeOrigin(BAD_INDEX, index, NULL, VerificationType::bogus_type());
+}
+TypeOrigin TypeOrigin::cp(u2 index, VerificationType vt) {
+  return TypeOrigin(CONST_POOL, index, NULL, vt);
+}
+TypeOrigin TypeOrigin::signature(VerificationType vt) {
+  return TypeOrigin(SIG, 0, NULL, vt);
+}
+TypeOrigin TypeOrigin::implicit(VerificationType t) {
+  return TypeOrigin(IMPLICIT, 0, NULL, t);
+}
+TypeOrigin TypeOrigin::frame(StackMapFrame* frame) {
+  return TypeOrigin(FRAME_ONLY, 0, StackMapFrame::copy(frame),
+                    VerificationType::bogus_type());
+}
+
+void TypeOrigin::reset_frame() {
+  if (_frame != NULL) {
+    _frame->restore();
+  }
+}
+
+void TypeOrigin::details(outputStream* ss) const {
+  _type.print_on(ss);
+  switch (_origin) {
+    case CF_LOCALS:
+      ss->print(" (current frame, locals[%d])", _index);
+      break;
+    case CF_STACK:
+      ss->print(" (current frame, stack[%d])", _index);
+      break;
+    case SM_LOCALS:
+      ss->print(" (stack map, locals[%d])", _index);
+      break;
+    case SM_STACK:
+      ss->print(" (stack map, stack[%d])", _index);
+      break;
+    case CONST_POOL:
+      ss->print(" (constant pool %d)", _index);
+      break;
+    case SIG:
+      ss->print(" (from method signature)");
+      break;
+    case IMPLICIT:
+    case FRAME_ONLY:
+    case NONE:
+    default:
+      ;
+  }
+}
+
+#ifdef ASSERT
+void TypeOrigin::print_on(outputStream* str) const {
+  str->print("{%d,%d,%p:", _origin, _index, _frame);
+  if (_frame != NULL) {
+    _frame->print_on(str);
+  } else {
+    str->print("null");
+  }
+  str->print(",");
+  _type.print_on(str);
+  str->print("}");
+}
+#endif
+
+void ErrorContext::details(outputStream* ss, methodOop method) const {
+  if (is_valid()) {
+    ss->print_cr("");
+    ss->print_cr("Exception Details:");
+    location_details(ss, method);
+    reason_details(ss);
+    frame_details(ss);
+    bytecode_details(ss, method);
+    handler_details(ss, method);
+    stackmap_details(ss, method);
+  }
+}
+
+void ErrorContext::reason_details(outputStream* ss) const {
+  streamIndentor si(ss);
+  ss->indent().print_cr("Reason:");
+  streamIndentor si2(ss);
+  ss->indent().print("");
+  switch (_fault) {
+    case INVALID_BYTECODE:
+      ss->print("Error exists in the bytecode");
+      break;
+    case WRONG_TYPE:
+      if (_expected.is_valid()) {
+        ss->print("Type ");
+        _type.details(ss);
+        ss->print(" is not assignable to ");
+        _expected.details(ss);
+      } else {
+        ss->print("Invalid type: ");
+        _type.details(ss);
+      }
+      break;
+    case FLAGS_MISMATCH:
+      if (_expected.is_valid()) {
+        ss->print("Current frame's flags are not assignable "
+                  "to stack map frame's.");
+      } else {
+        ss->print("Current frame's flags are invalid in this context.");
+      }
+      break;
+    case BAD_CP_INDEX:
+      ss->print("Constant pool index %d is invalid", _type.index());
+      break;
+    case BAD_LOCAL_INDEX:
+      ss->print("Local index %d is invalid", _type.index());
+      break;
+    case LOCALS_SIZE_MISMATCH:
+      ss->print("Current frame's local size doesn't match stackmap.");
+      break;
+    case STACK_SIZE_MISMATCH:
+      ss->print("Current frame's stack size doesn't match stackmap.");
+      break;
+    case STACK_OVERFLOW:
+      ss->print("Exceeded max stack size.");
+      break;
+    case STACK_UNDERFLOW:
+      ss->print("Attempt to pop empty stack.");
+      break;
+    case MISSING_STACKMAP:
+      ss->print("Expected stackmap frame at this location.");
+      break;
+    case BAD_STACKMAP:
+      ss->print("Invalid stackmap specification.");
+      break;
+    case UNKNOWN:
+    default:
+      ShouldNotReachHere();
+      ss->print_cr("Unknown");
+  }
+  ss->print_cr("");
+}
+
+void ErrorContext::location_details(outputStream* ss, methodOop method) const {
+  if (_bci != -1 && method != NULL) {
+    streamIndentor si(ss);
+    const char* bytecode_name = "<invalid>";
+    if (method->validate_bci_from_bcx(_bci) != -1) {
+      Bytecodes::Code code = Bytecodes::code_or_bp_at(method->bcp_from(_bci));
+      if (Bytecodes::is_defined(code)) {
+          bytecode_name = Bytecodes::name(code);
+      } else {
+          bytecode_name = "<illegal>";
+      }
+    }
+    instanceKlass* ik = instanceKlass::cast(method->method_holder());
+    ss->indent().print_cr("Location:");
+    streamIndentor si2(ss);
+    ss->indent().print_cr("%s.%s%s @%d: %s",
+        ik->name()->as_C_string(), method->name()->as_C_string(),
+        method->signature()->as_C_string(), _bci, bytecode_name);
+  }
+}
+
+void ErrorContext::frame_details(outputStream* ss) const {
+  streamIndentor si(ss);
+  if (_type.is_valid() && _type.frame() != NULL) {
+    ss->indent().print_cr("Current Frame:");
+    streamIndentor si2(ss);
+    _type.frame()->print_on(ss);
+  }
+  if (_expected.is_valid() && _expected.frame() != NULL) {
+    ss->indent().print_cr("Stackmap Frame:");
+    streamIndentor si2(ss);
+    _expected.frame()->print_on(ss);
+  }
+}
+
+void ErrorContext::bytecode_details(outputStream* ss, methodOop method) const {
+  if (method != NULL) {
+    streamIndentor si(ss);
+    ss->indent().print_cr("Bytecode:");
+    streamIndentor si2(ss);
+    ss->print_data(method->code_base(), method->code_size(), false);
+  }
+}
+
+void ErrorContext::handler_details(outputStream* ss, methodOop method) const {
+  if (method != NULL) {
+    streamIndentor si(ss);
+    ExceptionTable table(method);
+    if (table.length() > 0) {
+      ss->indent().print_cr("Exception Handler Table:");
+      streamIndentor si2(ss);
+      for (int i = 0; i < table.length(); ++i) {
+        ss->indent().print_cr("bci [%d, %d] => handler: %d", table.start_pc(i),
+            table.end_pc(i), table.handler_pc(i));
+      }
+    }
+  }
+}
+
+void ErrorContext::stackmap_details(outputStream* ss, methodOop method) const {
+  if (method != NULL && method->has_stackmap_table()) {
+    streamIndentor si(ss);
+    ss->indent().print_cr("Stackmap Table:");
+    typeArrayOop data = method->stackmap_data();
+    stack_map_table* sm_table =
+        stack_map_table::at((address)data->byte_at_addr(0));
+    stack_map_frame* sm_frame = sm_table->entries();
+    streamIndentor si2(ss);
+    int current_offset = -1;
+    for (u2 i = 0; i < sm_table->number_of_entries(); ++i) {
+      ss->indent();
+      sm_frame->print_on(ss, current_offset);
+      ss->print_cr("");
+      current_offset += sm_frame->offset_delta();
+      sm_frame = sm_frame->next();
+    }
+  }
+}
+
 // Methods in ClassVerifier
 
-bool ClassVerifier::_verify_verbose = false;
-
 ClassVerifier::ClassVerifier(
-    instanceKlassHandle klass, char* msg, size_t msg_len, TRAPS)
-    : _thread(THREAD), _exception_type(NULL), _message(msg),
-      _message_buffer_len(msg_len), _klass(klass) {
+    instanceKlassHandle klass, TRAPS)
+    : _thread(THREAD), _exception_type(NULL), _message(NULL), _klass(klass) {
   _this_type = VerificationType::reference_type(klass->name());
   // Create list to hold symbols in reference area.
   _symbols = new GrowableArray<Symbol*>(100, 0, NULL);
@@ -290,8 +535,14 @@
   return VerificationType::reference_type(vmSymbols::java_lang_Object());
 }
 
+TypeOrigin ClassVerifier::ref_ctx(const char* sig, TRAPS) {
+  VerificationType vt = VerificationType::reference_type(
+      create_temporary_symbol(sig, (int)strlen(sig), THREAD));
+  return TypeOrigin::implicit(vt);
+}
+
 void ClassVerifier::verify_class(TRAPS) {
-  if (_verify_verbose) {
+  if (VerboseVerification) {
     tty->print_cr("Verifying class %s with new format",
       _klass->external_name());
   }
@@ -312,7 +563,7 @@
     verify_method(methodHandle(THREAD, m), CHECK_VERIFY(this));
   }
 
-  if (_verify_verbose || TraceClassInitialization) {
+  if (VerboseVerification || TraceClassInitialization) {
     if (was_recursively_verified())
       tty->print_cr("Recursive verification detected for: %s",
           _klass->external_name());
@@ -321,7 +572,7 @@
 
 void ClassVerifier::verify_method(methodHandle m, TRAPS) {
   _method = m;   // initialize _method
-  if (_verify_verbose) {
+  if (VerboseVerification) {
     tty->print_cr("Verifying method %s", m->name_and_sig_as_C_string());
   }
 
@@ -368,8 +619,8 @@
   StackMapTable stackmap_table(&reader, &current_frame, max_locals, max_stack,
                                code_data, code_length, CHECK_VERIFY(this));
 
-  if (_verify_verbose) {
-    stackmap_table.print();
+  if (VerboseVerification) {
+    stackmap_table.print_on(tty);
   }
 
   RawBytecodeStream bcs(m);
@@ -388,6 +639,7 @@
 
     // Set current frame's offset to bci
     current_frame.set_offset(bci);
+    current_frame.set_mark();
 
     // Make sure every offset in stackmap table point to the beginning to
     // an instruction. Match current_frame to stackmap_table entry with
@@ -396,6 +648,7 @@
       stackmap_index, bci, &current_frame, &stackmap_table,
       no_control_flow, CHECK_VERIFY(this));
 
+
     bool this_uninit = false;  // Set to true when invokespecial <init> initialized 'this'
 
     // Merge with the next instruction
@@ -406,8 +659,8 @@
       VerificationType atype;
 
 #ifndef PRODUCT
-      if (_verify_verbose) {
-        current_frame.print();
+      if (VerboseVerification) {
+        current_frame.print_on(tty);
         tty->print_cr("offset = %d,  opcode = %s", bci, Bytecodes::name(opcode));
       }
 #endif
@@ -420,7 +673,10 @@
             opcode != Bytecodes::_lstore && opcode != Bytecodes::_fload  &&
             opcode != Bytecodes::_dload  && opcode != Bytecodes::_fstore &&
             opcode != Bytecodes::_dstore) {
-          verify_error(bci, "Bad wide instruction");
+          /* Unreachable?  RawBytecodeStream's raw_next() returns 'illegal'
+           * if we encounter a wide instruction that modifies an invalid
+           * opcode (not one of the ones listed above) */
+          verify_error(ErrorContext::bad_code(bci), "Bad wide instruction");
           return;
         }
       }
@@ -532,7 +788,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_int_array()) {
-            verify_error(bci, bad_type_msg, "iaload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[I", THREAD)),
+                bad_type_msg, "iaload");
             return;
           }
           current_frame.push_stack(
@@ -544,7 +802,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_bool_array() && !atype.is_byte_array()) {
-            verify_error(bci, bad_type_msg, "baload");
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "baload");
             return;
           }
           current_frame.push_stack(
@@ -556,7 +816,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_char_array()) {
-            verify_error(bci, bad_type_msg, "caload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[C", THREAD)),
+                bad_type_msg, "caload");
             return;
           }
           current_frame.push_stack(
@@ -568,7 +830,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_short_array()) {
-            verify_error(bci, bad_type_msg, "saload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[S", THREAD)),
+                bad_type_msg, "saload");
             return;
           }
           current_frame.push_stack(
@@ -580,7 +844,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_long_array()) {
-            verify_error(bci, bad_type_msg, "laload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[J", THREAD)),
+                bad_type_msg, "laload");
             return;
           }
           current_frame.push_stack_2(
@@ -593,7 +859,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_float_array()) {
-            verify_error(bci, bad_type_msg, "faload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[F", THREAD)),
+                bad_type_msg, "faload");
             return;
           }
           current_frame.push_stack(
@@ -605,7 +873,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_double_array()) {
-            verify_error(bci, bad_type_msg, "daload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[D", THREAD)),
+                bad_type_msg, "daload");
             return;
           }
           current_frame.push_stack_2(
@@ -618,7 +888,10 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_reference_array()) {
-            verify_error(bci, bad_type_msg, "aaload");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(),
+                TypeOrigin::implicit(VerificationType::reference_check())),
+                bad_type_msg, "aaload");
             return;
           }
           if (atype.is_null()) {
@@ -689,7 +962,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_int_array()) {
-            verify_error(bci, bad_type_msg, "iastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[I", THREAD)),
+                bad_type_msg, "iastore");
             return;
           }
           no_control_flow = false; break;
@@ -701,7 +976,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_bool_array() && !atype.is_byte_array()) {
-            verify_error(bci, bad_type_msg, "bastore");
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "bastore");
             return;
           }
           no_control_flow = false; break;
@@ -713,7 +990,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_char_array()) {
-            verify_error(bci, bad_type_msg, "castore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[C", THREAD)),
+                bad_type_msg, "castore");
             return;
           }
           no_control_flow = false; break;
@@ -725,7 +1004,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_short_array()) {
-            verify_error(bci, bad_type_msg, "sastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[S", THREAD)),
+                bad_type_msg, "sastore");
             return;
           }
           no_control_flow = false; break;
@@ -738,7 +1019,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_long_array()) {
-            verify_error(bci, bad_type_msg, "lastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[J", THREAD)),
+                bad_type_msg, "lastore");
             return;
           }
           no_control_flow = false; break;
@@ -750,7 +1033,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_float_array()) {
-            verify_error(bci, bad_type_msg, "fastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[F", THREAD)),
+                bad_type_msg, "fastore");
             return;
           }
           no_control_flow = false; break;
@@ -763,7 +1048,9 @@
           atype = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!atype.is_double_array()) {
-            verify_error(bci, bad_type_msg, "dastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(), ref_ctx("[D", THREAD)),
+                bad_type_msg, "dastore");
             return;
           }
           no_control_flow = false; break;
@@ -775,7 +1062,10 @@
             VerificationType::reference_check(), CHECK_VERIFY(this));
           // more type-checking is done at runtime
           if (!atype.is_reference_array()) {
-            verify_error(bci, bad_type_msg, "aastore");
+            verify_error(ErrorContext::bad_type(bci,
+                current_frame.stack_top_ctx(),
+                TypeOrigin::implicit(VerificationType::reference_check())),
+                bad_type_msg, "aastore");
             return;
           }
           // 4938384: relaxed constraint in JVMS 3nd edition.
@@ -793,7 +1083,11 @@
             current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "pop2");
+            /* Unreachable? Would need a category2_1st on TOS
+             * which does not appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "pop2");
             return;
           }
           no_control_flow = false; break;
@@ -825,7 +1119,10 @@
             type3 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup_x2");
+            /* Unreachable? Would need a category2_1st at stack depth 2 with
+             * a category1 on TOS which does not appear possible. */
+            verify_error(ErrorContext::bad_type(
+                bci, current_frame.stack_top_ctx()), bad_type_msg, "dup_x2");
             return;
           }
           current_frame.push_stack(type, CHECK_VERIFY(this));
@@ -843,7 +1140,11 @@
             type2 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2");
+            /* Unreachable?  Would need a category2_1st on TOS which does not
+             * appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2");
             return;
           }
           current_frame.push_stack(type2, CHECK_VERIFY(this));
@@ -858,11 +1159,15 @@
           if (type.is_category1()) {
             type2 = current_frame.pop_stack(
               VerificationType::category1_check(), CHECK_VERIFY(this));
-          } else if(type.is_category2_2nd()) {
-            type2 = current_frame.pop_stack
-              (VerificationType::category2_check(), CHECK_VERIFY(this));
+          } else if (type.is_category2_2nd()) {
+            type2 = current_frame.pop_stack(
+              VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2_x1");
+            /* Unreachable?  Would need a category2_1st on TOS which does
+             * not appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2_x1");
             return;
           }
           type3 = current_frame.pop_stack(
@@ -885,7 +1190,11 @@
             type2 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2_x2");
+            /* Unreachable?  Would need a category2_1st on TOS which does
+             * not appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2_x2");
             return;
           }
           type3 = current_frame.pop_stack(CHECK_VERIFY(this));
@@ -896,7 +1205,12 @@
             type4 = current_frame.pop_stack(
               VerificationType::category2_check(), CHECK_VERIFY(this));
           } else {
-            verify_error(bci, bad_type_msg, "dup2_x2");
+            /* Unreachable?  Would need a category2_1st on TOS after popping
+             * a long/double or two category 1's, which does not
+             * appear possible. */
+            verify_error(
+                ErrorContext::bad_type(bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "dup2_x2");
             return;
           }
           current_frame.push_stack(type2, CHECK_VERIFY(this));
@@ -1176,43 +1490,50 @@
         case Bytecodes::_ireturn :
           type = current_frame.pop_stack(
             VerificationType::integer_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_lreturn :
           type2 = current_frame.pop_stack(
             VerificationType::long2_type(), CHECK_VERIFY(this));
           type = current_frame.pop_stack(
             VerificationType::long_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_freturn :
           type = current_frame.pop_stack(
             VerificationType::float_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_dreturn :
           type2 = current_frame.pop_stack(
             VerificationType::double2_type(),  CHECK_VERIFY(this));
           type = current_frame.pop_stack(
             VerificationType::double_type(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_areturn :
           type = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
-          verify_return_value(return_type, type, bci, CHECK_VERIFY(this));
+          verify_return_value(return_type, type, bci,
+                              &current_frame, CHECK_VERIFY(this));
           no_control_flow = true; break;
         case Bytecodes::_return :
           if (return_type != VerificationType::bogus_type()) {
-            verify_error(bci, "Method expects no return value");
+            verify_error(ErrorContext::bad_code(bci),
+                         "Method expects a return value");
             return;
           }
           // Make sure "this" has been initialized if current method is an
           // <init>
           if (_method->name() == vmSymbols::object_initializer_name() &&
               current_frame.flag_this_uninit()) {
-            verify_error(bci,
-              "Constructor must call super() or this() before return");
+            verify_error(ErrorContext::bad_code(bci),
+                         "Constructor must call super() or this() "
+                         "before return");
             return;
           }
           no_control_flow = true; break;
@@ -1239,11 +1560,13 @@
         case Bytecodes::_new :
         {
           index = bcs.get_index_u2();
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           VerificationType new_class_type =
             cp_index_to_type(index, cp, CHECK_VERIFY(this));
           if (!new_class_type.is_object()) {
-            verify_error(bci, "Illegal new instruction");
+            verify_error(ErrorContext::bad_type(bci,
+                TypeOrigin::cp(index, new_class_type)),
+                "Illegal new instruction");
             return;
           }
           type = VerificationType::uninitialized_type(bci);
@@ -1258,13 +1581,15 @@
           no_control_flow = false; break;
         case Bytecodes::_anewarray :
           verify_anewarray(
-            bcs.get_index_u2(), cp, &current_frame, CHECK_VERIFY(this));
+            bci, bcs.get_index_u2(), cp, &current_frame, CHECK_VERIFY(this));
           no_control_flow = false; break;
         case Bytecodes::_arraylength :
           type = current_frame.pop_stack(
             VerificationType::reference_check(), CHECK_VERIFY(this));
           if (!(type.is_null() || type.is_array())) {
-            verify_error(bci, bad_type_msg, "arraylength");
+            verify_error(ErrorContext::bad_type(
+                bci, current_frame.stack_top_ctx()),
+                bad_type_msg, "arraylength");
           }
           current_frame.push_stack(
             VerificationType::integer_type(), CHECK_VERIFY(this));
@@ -1272,7 +1597,7 @@
         case Bytecodes::_checkcast :
         {
           index = bcs.get_index_u2();
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           current_frame.pop_stack(object_type(), CHECK_VERIFY(this));
           VerificationType klass_type = cp_index_to_type(
             index, cp, CHECK_VERIFY(this));
@@ -1281,7 +1606,7 @@
         }
         case Bytecodes::_instanceof : {
           index = bcs.get_index_u2();
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           current_frame.pop_stack(object_type(), CHECK_VERIFY(this));
           current_frame.push_stack(
             VerificationType::integer_type(), CHECK_VERIFY(this));
@@ -1296,17 +1621,18 @@
         {
           index = bcs.get_index_u2();
           u2 dim = *(bcs.bcp()+3);
-          verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+          verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
           VerificationType new_array_type =
             cp_index_to_type(index, cp, CHECK_VERIFY(this));
           if (!new_array_type.is_array()) {
-            verify_error(bci,
-              "Illegal constant pool index in multianewarray instruction");
+            verify_error(ErrorContext::bad_type(bci,
+                TypeOrigin::cp(index, new_array_type)),
+                "Illegal constant pool index in multianewarray instruction");
             return;
           }
           if (dim < 1 || new_array_type.dimensions() < dim) {
-            verify_error(bci,
-              "Illegal dimension in multianewarray instruction");
+            verify_error(ErrorContext::bad_code(bci),
+                "Illegal dimension in multianewarray instruction: %d", dim);
             return;
           }
           for (int i = 0; i < dim; i++) {
@@ -1324,7 +1650,8 @@
         default:
           // We only need to check the valid bytecodes in class file.
           // And jsr and ret are not in the new class file format in JDK1.5.
-          verify_error(bci, "Bad instruction");
+          verify_error(ErrorContext::bad_code(bci),
+              "Bad instruction: %02x", opcode);
           no_control_flow = false;
           return;
       }  // end switch
@@ -1340,7 +1667,8 @@
 
   // Make sure that control flow does not fall through end of the method
   if (!no_control_flow) {
-    verify_error(code_length, "Control flow falls through code end");
+    verify_error(ErrorContext::bad_code(code_length),
+        "Control flow falls through code end");
     return;
   }
 }
@@ -1359,7 +1687,7 @@
         code_data[bci] = BYTECODE_OFFSET;
       }
     } else {
-      verify_error(bcs.bci(), "Bad instruction");
+      verify_error(ErrorContext::bad_code(bcs.bci()), "Bad instruction");
       return NULL;
     }
   }
@@ -1402,9 +1730,11 @@
         catch_type, this, CHECK_VERIFY(this));
       if (!is_subclass) {
         // 4286534: should throw VerifyError according to recent spec change
-        verify_error(
-          "Catch type is not a subclass of Throwable in handler %d",
-          handler_pc);
+        verify_error(ErrorContext::bad_type(handler_pc,
+            TypeOrigin::cp(catch_type_index, catch_type),
+            TypeOrigin::implicit(throwable)),
+            "Catch type is not a subclass "
+            "of Throwable in exception handler %d", handler_pc);
         return;
       }
     }
@@ -1444,19 +1774,21 @@
   if (stackmap_index < stackmap_table->get_frame_count()) {
     u2 this_offset = stackmap_table->get_offset(stackmap_index);
     if (no_control_flow && this_offset > bci) {
-      verify_error(bci, "Expecting a stack map frame");
+      verify_error(ErrorContext::missing_stackmap(bci),
+                   "Expecting a stack map frame");
       return 0;
     }
     if (this_offset == bci) {
+      ErrorContext ctx;
       // See if current stack map can be assigned to the frame in table.
       // current_frame is the stackmap frame got from the last instruction.
       // If matched, current_frame will be updated by this method.
-      bool match = stackmap_table->match_stackmap(
+      bool matches = stackmap_table->match_stackmap(
         current_frame, this_offset, stackmap_index,
-        !no_control_flow, true, CHECK_VERIFY_(this, 0));
-      if (!match) {
+        !no_control_flow, true, &ctx, CHECK_VERIFY_(this, 0));
+      if (!matches) {
         // report type error
-        verify_error(bci, "Instruction type does not match stack map");
+        verify_error(ctx, "Instruction type does not match stack map");
         return 0;
       }
       stackmap_index++;
@@ -1466,7 +1798,7 @@
       return 0;
     }
   } else if (no_control_flow) {
-    verify_error(bci, "Expecting a stack map frame");
+    verify_error(ErrorContext::bad_code(bci), "Expecting a stack map frame");
     return 0;
   }
   return stackmap_index;
@@ -1498,29 +1830,31 @@
           VerificationType::reference_type(vmSymbols::java_lang_Throwable());
         new_frame->push_stack(throwable, CHECK_VERIFY(this));
       }
-      bool match = stackmap_table->match_stackmap(
-        new_frame, handler_pc, true, false, CHECK_VERIFY(this));
-      if (!match) {
-        verify_error(bci,
-          "Stack map does not match the one at exception handler %d",
-          handler_pc);
+      ErrorContext ctx;
+      bool matches = stackmap_table->match_stackmap(
+        new_frame, handler_pc, true, false, &ctx, CHECK_VERIFY(this));
+      if (!matches) {
+        verify_error(ctx, "Stack map does not match the one at "
+            "exception handler %d", handler_pc);
         return;
       }
     }
   }
 }
 
-void ClassVerifier::verify_cp_index(constantPoolHandle cp, int index, TRAPS) {
+void ClassVerifier::verify_cp_index(
+    u2 bci, constantPoolHandle cp, int index, TRAPS) {
   int nconstants = cp->length();
   if ((index <= 0) || (index >= nconstants)) {
-    verify_error("Illegal constant pool index %d in class %s",
-      index, instanceKlass::cast(cp->pool_holder())->external_name());
+    verify_error(ErrorContext::bad_cp_index(bci, index),
+        "Illegal constant pool index %d in class %s",
+        index, instanceKlass::cast(cp->pool_holder())->external_name());
     return;
   }
 }
 
 void ClassVerifier::verify_cp_type(
-    int index, constantPoolHandle cp, unsigned int types, TRAPS) {
+    u2 bci, int index, constantPoolHandle cp, unsigned int types, TRAPS) {
 
   // In some situations, bytecode rewriting may occur while we're verifying.
   // In this case, a constant pool cache exists and some indices refer to that
@@ -1528,10 +1862,10 @@
   // We must check was_recursively_verified() before we get here.
   guarantee(cp->cache() == NULL, "not rewritten yet");
 
-  verify_cp_index(cp, index, CHECK_VERIFY(this));
+  verify_cp_index(bci, cp, index, CHECK_VERIFY(this));
   unsigned int tag = cp->tag_at(index).value();
   if ((types & (1 << tag)) == 0) {
-    verify_error(
+    verify_error(ErrorContext::bad_cp_index(bci, index),
       "Illegal type at constant pool entry %d in class %s",
       index, instanceKlass::cast(cp->pool_holder())->external_name());
     return;
@@ -1539,51 +1873,46 @@
 }
 
 void ClassVerifier::verify_cp_class_type(
-    int index, constantPoolHandle cp, TRAPS) {
-  verify_cp_index(cp, index, CHECK_VERIFY(this));
+    u2 bci, int index, constantPoolHandle cp, TRAPS) {
+  verify_cp_index(bci, cp, index, CHECK_VERIFY(this));
   constantTag tag = cp->tag_at(index);
   if (!tag.is_klass() && !tag.is_unresolved_klass()) {
-    verify_error("Illegal type at constant pool entry %d in class %s",
-      index, instanceKlass::cast(cp->pool_holder())->external_name());
+    verify_error(ErrorContext::bad_cp_index(bci, index),
+        "Illegal type at constant pool entry %d in class %s",
+        index, instanceKlass::cast(cp->pool_holder())->external_name());
     return;
   }
 }
 
-void ClassVerifier::format_error_message(
-    const char* fmt, int offset, va_list va) {
-  ResourceMark rm(_thread);
-  stringStream message(_message, _message_buffer_len);
-  message.vprint(fmt, va);
-  if (!_method.is_null()) {
-    message.print(" in method %s", _method->name_and_sig_as_C_string());
-  }
-  if (offset != -1) {
-    message.print(" at offset %d", offset);
-  }
-}
+void ClassVerifier::verify_error(ErrorContext ctx, const char* msg, ...) {
+  stringStream ss;
 
-void ClassVerifier::verify_error(u2 offset, const char* fmt, ...) {
+  ctx.reset_frames();
   _exception_type = vmSymbols::java_lang_VerifyError();
+  _error_context = ctx;
   va_list va;
-  va_start(va, fmt);
-  format_error_message(fmt, offset, va);
+  va_start(va, msg);
+  ss.vprint(msg, va);
   va_end(va);
-}
-
-void ClassVerifier::verify_error(const char* fmt, ...) {
-  _exception_type = vmSymbols::java_lang_VerifyError();
-  va_list va;
-  va_start(va, fmt);
-  format_error_message(fmt, -1, va);
-  va_end(va);
+  _message = ss.as_string();
+#ifdef ASSERT
+  ResourceMark rm;
+  const char* exception_name = _exception_type->as_C_string();
+  Exceptions::debug_check_abort(exception_name, NULL);
+#endif // ndef ASSERT
 }
 
 void ClassVerifier::class_format_error(const char* msg, ...) {
+  stringStream ss;
   _exception_type = vmSymbols::java_lang_ClassFormatError();
   va_list va;
   va_start(va, msg);
-  format_error_message(msg, -1, va);
+  ss.vprint(msg, va);
   va_end(va);
+  if (!_method.is_null()) {
+    ss.print(" in method %s", _method->name_and_sig_as_C_string());
+  }
+  _message = ss.as_string();
 }
 
 klassOop ClassVerifier::load_class(Symbol* name, TRAPS) {
@@ -1619,7 +1948,7 @@
     }
   } else {
     klassOop member_klass = target_instance->find_field(field_name, field_sig, &fd);
-    if(member_klass != NULL && fd.is_protected()) {
+    if (member_klass != NULL && fd.is_protected()) {
       if (!this_class->is_same_class_package(member_klass)) {
         return true;
       }
@@ -1629,9 +1958,9 @@
 }
 
 void ClassVerifier::verify_ldc(
-    int opcode, u2 index, StackMapFrame *current_frame,
-     constantPoolHandle cp, u2 bci, TRAPS) {
-  verify_cp_index(cp, index, CHECK_VERIFY(this));
+    int opcode, u2 index, StackMapFrame* current_frame,
+    constantPoolHandle cp, u2 bci, TRAPS) {
+  verify_cp_index(bci, cp, index, CHECK_VERIFY(this));
   constantTag tag = cp->tag_at(index);
   unsigned int types;
   if (opcode == Bytecodes::_ldc || opcode == Bytecodes::_ldc_w) {
@@ -1641,12 +1970,12 @@
             | (1 << JVM_CONSTANT_MethodHandle) | (1 << JVM_CONSTANT_MethodType);
       // Note:  The class file parser already verified the legality of
       // MethodHandle and MethodType constants.
-      verify_cp_type(index, cp, types, CHECK_VERIFY(this));
+      verify_cp_type(bci, index, cp, types, CHECK_VERIFY(this));
     }
   } else {
     assert(opcode == Bytecodes::_ldc2_w, "must be ldc2_w");
     types = (1 << JVM_CONSTANT_Double) | (1 << JVM_CONSTANT_Long);
-    verify_cp_type(index, cp, types, CHECK_VERIFY(this));
+    verify_cp_type(bci, index, cp, types, CHECK_VERIFY(this));
   }
   if (tag.is_string() && cp->is_pseudo_string_at(index)) {
     current_frame->push_stack(object_type(), CHECK_VERIFY(this));
@@ -1681,7 +2010,9 @@
       VerificationType::reference_type(
         vmSymbols::java_lang_invoke_MethodType()), CHECK_VERIFY(this));
   } else {
-    verify_error(bci, "Invalid index in ldc");
+    /* Unreachable? verify_cp_type has already validated the cp type. */
+    verify_error(
+        ErrorContext::bad_cp_index(bci, index), "Invalid index in ldc");
     return;
   }
 }
@@ -1697,7 +2028,8 @@
   u2 padding_offset = 1;
   while ((bcp + padding_offset) < aligned_bcp) {
     if(*(bcp + padding_offset) != 0) {
-      verify_error(bci, "Nonzero padding byte in lookswitch or tableswitch");
+      verify_error(ErrorContext::bad_code(bci),
+                   "Nonzero padding byte in lookswitch or tableswitch");
       return;
     }
     padding_offset++;
@@ -1710,20 +2042,21 @@
     jint low = (jint)Bytes::get_Java_u4(aligned_bcp + jintSize);
     jint high = (jint)Bytes::get_Java_u4(aligned_bcp + 2*jintSize);
     if (low > high) {
-      verify_error(bci,
-        "low must be less than or equal to high in tableswitch");
+      verify_error(ErrorContext::bad_code(bci),
+          "low must be less than or equal to high in tableswitch");
       return;
     }
     keys = high - low + 1;
     if (keys < 0) {
-      verify_error(bci, "too many keys in tableswitch");
+      verify_error(ErrorContext::bad_code(bci), "too many keys in tableswitch");
       return;
     }
     delta = 1;
   } else {
     keys = (int)Bytes::get_Java_u4(aligned_bcp + jintSize);
     if (keys < 0) {
-      verify_error(bci, "number of keys in lookupswitch less than 0");
+      verify_error(ErrorContext::bad_code(bci),
+                   "number of keys in lookupswitch less than 0");
       return;
     }
     delta = 2;
@@ -1732,7 +2065,8 @@
       jint this_key = Bytes::get_Java_u4(aligned_bcp + (2+2*i)*jintSize);
       jint next_key = Bytes::get_Java_u4(aligned_bcp + (2+2*i+2)*jintSize);
       if (this_key >= next_key) {
-        verify_error(bci, "Bad lookupswitch instruction");
+        verify_error(ErrorContext::bad_code(bci),
+                     "Bad lookupswitch instruction");
         return;
       }
     }
@@ -1767,7 +2101,8 @@
                                               constantPoolHandle cp,
                                               TRAPS) {
   u2 index = bcs->get_index_u2();
-  verify_cp_type(index, cp, 1 << JVM_CONSTANT_Fieldref, CHECK_VERIFY(this));
+  verify_cp_type(bcs->bci(), index, cp,
+      1 << JVM_CONSTANT_Fieldref, CHECK_VERIFY(this));
 
   // Get field name and signature
   Symbol* field_name = cp->name_ref_at(index);
@@ -1784,9 +2119,11 @@
   VerificationType ref_class_type = cp_ref_index_to_type(
     index, cp, CHECK_VERIFY(this));
   if (!ref_class_type.is_object()) {
-    verify_error(
-      "Expecting reference to class in class %s at constant pool index %d",
-      _klass->external_name(), index);
+    /* Unreachable?  Class file parser verifies Fieldref contents */
+    verify_error(ErrorContext::bad_type(bcs->bci(),
+        TypeOrigin::cp(index, ref_class_type)),
+        "Expecting reference to class in class %s at constant pool index %d",
+        _klass->external_name(), index);
     return;
   }
   VerificationType target_class_type = ref_class_type;
@@ -1844,7 +2181,10 @@
       is_assignable = target_class_type.is_assignable_from(
         stack_object_type, this, CHECK_VERIFY(this));
       if (!is_assignable) {
-        verify_error(bci, "Bad type on operand stack in putfield");
+        verify_error(ErrorContext::bad_type(bci,
+            current_frame->stack_top_ctx(),
+            TypeOrigin::cp(index, target_class_type)),
+            "Bad type on operand stack in putfield");
         return;
       }
     }
@@ -1868,7 +2208,10 @@
         is_assignable = current_type().is_assignable_from(
           stack_object_type, this, CHECK_VERIFY(this));
         if (!is_assignable) {
-          verify_error(bci, "Bad access to protected data in getfield");
+          verify_error(ErrorContext::bad_type(bci,
+              current_frame->stack_top_ctx(),
+              TypeOrigin::implicit(current_type())),
+              "Bad access to protected data in getfield");
           return;
         }
       }
@@ -1879,7 +2222,7 @@
 }
 
 void ClassVerifier::verify_invoke_init(
-    RawBytecodeStream* bcs, VerificationType ref_class_type,
+    RawBytecodeStream* bcs, u2 ref_class_index, VerificationType ref_class_type,
     StackMapFrame* current_frame, u4 code_length, bool *this_uninit,
     constantPoolHandle cp, TRAPS) {
   u2 bci = bcs->bci();
@@ -1890,7 +2233,10 @@
     klassOop superk = current_class()->super();
     if (ref_class_type.name() != current_class()->name() &&
         ref_class_type.name() != superk->klass_part()->name()) {
-      verify_error(bci, "Bad <init> method call");
+      verify_error(ErrorContext::bad_type(bci,
+          TypeOrigin::implicit(ref_class_type),
+          TypeOrigin::implicit(current_type())),
+          "Bad <init> method call");
       return;
     }
     current_frame->initialize_object(type, current_type());
@@ -1899,17 +2245,23 @@
     u2 new_offset = type.bci();
     address new_bcp = bcs->bcp() - bci + new_offset;
     if (new_offset > (code_length - 3) || (*new_bcp) != Bytecodes::_new) {
-      verify_error(new_offset, "Expecting new instruction");
+      /* Unreachable?  Stack map parsing ensures valid type and new
+       * instructions have a valid BCI. */
+      verify_error(ErrorContext::bad_code(new_offset),
+                   "Expecting new instruction");
       return;
     }
     u2 new_class_index = Bytes::get_Java_u2(new_bcp + 1);
-    verify_cp_class_type(new_class_index, cp, CHECK_VERIFY(this));
+    verify_cp_class_type(bci, new_class_index, cp, CHECK_VERIFY(this));
 
     // The method must be an <init> method of the indicated class
     VerificationType new_class_type = cp_index_to_type(
       new_class_index, cp, CHECK_VERIFY(this));
     if (!new_class_type.equals(ref_class_type)) {
-      verify_error(bci, "Call to wrong <init> method");
+      verify_error(ErrorContext::bad_type(bci,
+          TypeOrigin::cp(new_class_index, new_class_type),
+          TypeOrigin::cp(ref_class_index, ref_class_type)),
+          "Call to wrong <init> method");
       return;
     }
     // According to the VM spec, if the referent class is a superclass of the
@@ -1928,14 +2280,18 @@
         bool assignable = current_type().is_assignable_from(
           objectref_type, this, CHECK_VERIFY(this));
         if (!assignable) {
-          verify_error(bci, "Bad access to protected <init> method");
+          verify_error(ErrorContext::bad_type(bci,
+              TypeOrigin::cp(new_class_index, objectref_type),
+              TypeOrigin::implicit(current_type())),
+              "Bad access to protected <init> method");
           return;
         }
       }
     }
     current_frame->initialize_object(type, new_class_type);
   } else {
-    verify_error(bci, "Bad operand type when invoking <init>");
+    verify_error(ErrorContext::bad_type(bci, current_frame->stack_top_ctx()),
+        "Bad operand type when invoking <init>");
     return;
   }
 }
@@ -1952,7 +2308,7 @@
                       : opcode == Bytecodes::_invokedynamic
                                 ? 1 << JVM_CONSTANT_InvokeDynamic
                                 : 1 << JVM_CONSTANT_Methodref);
-  verify_cp_type(index, cp, types, CHECK_VERIFY(this));
+  verify_cp_type(bcs->bci(), index, cp, types, CHECK_VERIFY(this));
 
   // Get method name and signature
   Symbol* method_name = cp->name_ref_at(index);
@@ -2029,11 +2385,13 @@
     // the difference between the size of the operand stack before and after the instruction
     // executes.
     if (*(bcp+3) != (nargs+1)) {
-      verify_error(bci, "Inconsistent args count operand in invokeinterface");
+      verify_error(ErrorContext::bad_code(bci),
+          "Inconsistent args count operand in invokeinterface");
       return;
     }
     if (*(bcp+4) != 0) {
-      verify_error(bci, "Fourth operand byte of invokeinterface must be zero");
+      verify_error(ErrorContext::bad_code(bci),
+          "Fourth operand byte of invokeinterface must be zero");
       return;
     }
   }
@@ -2041,7 +2399,8 @@
   if (opcode == Bytecodes::_invokedynamic) {
     address bcp = bcs->bcp();
     if (*(bcp+3) != 0 || *(bcp+4) != 0) {
-      verify_error(bci, "Third and fourth operand bytes of invokedynamic must be zero");
+      verify_error(ErrorContext::bad_code(bci),
+          "Third and fourth operand bytes of invokedynamic must be zero");
       return;
     }
   }
@@ -2050,7 +2409,8 @@
     // Make sure <init> can only be invoked by invokespecial
     if (opcode != Bytecodes::_invokespecial ||
         method_name != vmSymbols::object_initializer_name()) {
-      verify_error(bci, "Illegal call to internal method");
+      verify_error(ErrorContext::bad_code(bci),
+          "Illegal call to internal method");
       return;
     }
   } else if (opcode == Bytecodes::_invokespecial
@@ -2060,7 +2420,8 @@
     bool subtype = ref_class_type.is_assignable_from(
       current_type(), this, CHECK_VERIFY(this));
     if (!subtype) {
-      verify_error(bci, "Bad invokespecial instruction: "
+      verify_error(ErrorContext::bad_code(bci),
+          "Bad invokespecial instruction: "
           "current class isn't assignable to reference class.");
        return;
     }
@@ -2073,7 +2434,7 @@
   if (opcode != Bytecodes::_invokestatic &&
       opcode != Bytecodes::_invokedynamic) {
     if (method_name == vmSymbols::object_initializer_name()) {  // <init> method
-      verify_invoke_init(bcs, ref_class_type, current_frame,
+      verify_invoke_init(bcs, index, ref_class_type, current_frame,
         code_length, this_uninit, cp, CHECK_VERIFY(this));
     } else {   // other methods
       // Ensures that target class is assignable to method class.
@@ -2103,8 +2464,10 @@
                   // Special case: arrays pretend to implement public Object
                   // clone().
                 } else {
-                  verify_error(bci,
-                    "Bad access to protected data in invokevirtual");
+                  verify_error(ErrorContext::bad_type(bci,
+                      current_frame->stack_top_ctx(),
+                      TypeOrigin::implicit(current_type())),
+                      "Bad access to protected data in invokevirtual");
                   return;
                 }
               }
@@ -2121,7 +2484,10 @@
   if (sig_stream.type() != T_VOID) {
     if (method_name == vmSymbols::object_initializer_name()) {
       // <init> method must have a void return type
-      verify_error(bci, "Return type must be void in <init> method");
+      /* Unreachable?  Class file parser verifies that methods with '<' have
+       * void return */
+      verify_error(ErrorContext::bad_code(bci),
+          "Return type must be void in <init> method");
       return;
     }
     VerificationType return_type[2];
@@ -2139,7 +2505,7 @@
     NULL, NULL, NULL, NULL, "[Z", "[C", "[F", "[D", "[B", "[S", "[I", "[J",
   };
   if (index < T_BOOLEAN || index > T_LONG) {
-    verify_error(bci, "Illegal newarray instruction");
+    verify_error(ErrorContext::bad_code(bci), "Illegal newarray instruction");
     return VerificationType::bogus_type();
   }
 
@@ -2150,8 +2516,9 @@
 }
 
 void ClassVerifier::verify_anewarray(
-    u2 index, constantPoolHandle cp, StackMapFrame* current_frame, TRAPS) {
-  verify_cp_class_type(index, cp, CHECK_VERIFY(this));
+    u2 bci, u2 index, constantPoolHandle cp,
+    StackMapFrame* current_frame, TRAPS) {
+  verify_cp_class_type(bci, index, cp, CHECK_VERIFY(this));
   current_frame->pop_stack(
     VerificationType::integer_type(), CHECK_VERIFY(this));
 
@@ -2264,14 +2631,19 @@
 }
 
 void ClassVerifier::verify_return_value(
-    VerificationType return_type, VerificationType type, u2 bci, TRAPS) {
+    VerificationType return_type, VerificationType type, u2 bci,
+    StackMapFrame* current_frame, TRAPS) {
   if (return_type == VerificationType::bogus_type()) {
-    verify_error(bci, "Method expects a return value");
+    verify_error(ErrorContext::bad_type(bci,
+        current_frame->stack_top_ctx(), TypeOrigin::signature(return_type)),
+        "Method expects a return value");
     return;
   }
   bool match = return_type.is_assignable_from(type, this, CHECK_VERIFY(this));
   if (!match) {
-    verify_error(bci, "Bad return type");
+    verify_error(ErrorContext::bad_type(bci,
+        current_frame->stack_top_ctx(), TypeOrigin::signature(return_type)),
+        "Bad return type");
     return;
   }
 }
--- a/src/share/vm/classfile/verifier.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/classfile/verifier.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -88,18 +88,178 @@
 #define CHECK_VERIFY_(verifier, result) \
   CHECK_(result)); if ((verifier)->has_error()) return (result); (0
 
+class TypeOrigin VALUE_OBJ_CLASS_SPEC {
+ private:
+  typedef enum {
+    CF_LOCALS,  // Comes from the current frame locals
+    CF_STACK,   // Comes from the current frame expression stack
+    SM_LOCALS,  // Comes from stackmap locals
+    SM_STACK,   // Comes from stackmap expression stack
+    CONST_POOL, // Comes from the constant pool
+    SIG,        // Comes from method signature
+    IMPLICIT,   // Comes implicitly from code or context
+    BAD_INDEX,  // No type, but the index is bad
+    FRAME_ONLY, // No type, context just contains the frame
+    NONE
+  } Origin;
+
+  Origin _origin;
+  u2 _index;              // local, stack, or constant pool index
+  StackMapFrame* _frame;  // source frame if CF or SM
+  VerificationType _type; // The actual type
+
+  TypeOrigin(
+      Origin origin, u2 index, StackMapFrame* frame, VerificationType type)
+      : _origin(origin), _index(index), _frame(frame), _type(type) {}
+
+ public:
+  TypeOrigin() : _origin(NONE), _index(0), _frame(NULL) {}
+
+  static TypeOrigin null();
+  static TypeOrigin local(u2 index, StackMapFrame* frame);
+  static TypeOrigin stack(u2 index, StackMapFrame* frame);
+  static TypeOrigin sm_local(u2 index, StackMapFrame* frame);
+  static TypeOrigin sm_stack(u2 index, StackMapFrame* frame);
+  static TypeOrigin cp(u2 index, VerificationType vt);
+  static TypeOrigin signature(VerificationType vt);
+  static TypeOrigin bad_index(u2 index);
+  static TypeOrigin implicit(VerificationType t);
+  static TypeOrigin frame(StackMapFrame* frame);
+
+  void reset_frame();
+  void details(outputStream* ss) const;
+  void print_frame(outputStream* ss) const;
+  const StackMapFrame* frame() const { return _frame; }
+  bool is_valid() const { return _origin != NONE; }
+  u2 index() const { return _index; }
+
+#ifdef ASSERT
+  void print_on(outputStream* str) const;
+#endif
+};
+
+class ErrorContext VALUE_OBJ_CLASS_SPEC {
+ private:
+  typedef enum {
+    INVALID_BYTECODE,     // There was a problem with the bytecode
+    WRONG_TYPE,           // Type value was not as expected
+    FLAGS_MISMATCH,       // Frame flags are not assignable
+    BAD_CP_INDEX,         // Invalid constant pool index
+    BAD_LOCAL_INDEX,      // Invalid local index
+    LOCALS_SIZE_MISMATCH, // Frames have differing local counts
+    STACK_SIZE_MISMATCH,  // Frames have different stack sizes
+    STACK_OVERFLOW,       // Attempt to push onto a full expression stack
+    STACK_UNDERFLOW,      // Attempt to pop and empty expression stack
+    MISSING_STACKMAP,     // No stackmap for this location and there should be
+    BAD_STACKMAP,         // Format error in stackmap
+    NO_FAULT,             // No error
+    UNKNOWN
+  } FaultType;
+
+  int _bci;
+  FaultType _fault;
+  TypeOrigin _type;
+  TypeOrigin _expected;
+
+  ErrorContext(int bci, FaultType fault) :
+      _bci(bci), _fault(fault)  {}
+  ErrorContext(int bci, FaultType fault, TypeOrigin type) :
+      _bci(bci), _fault(fault), _type(type)  {}
+  ErrorContext(int bci, FaultType fault, TypeOrigin type, TypeOrigin exp) :
+      _bci(bci), _fault(fault), _type(type), _expected(exp)  {}
+
+ public:
+  ErrorContext() : _bci(-1), _fault(NO_FAULT) {}
+
+  static ErrorContext bad_code(u2 bci) {
+    return ErrorContext(bci, INVALID_BYTECODE);
+  }
+  static ErrorContext bad_type(u2 bci, TypeOrigin type) {
+    return ErrorContext(bci, WRONG_TYPE, type);
+  }
+  static ErrorContext bad_type(u2 bci, TypeOrigin type, TypeOrigin exp) {
+    return ErrorContext(bci, WRONG_TYPE, type, exp);
+  }
+  static ErrorContext bad_flags(u2 bci, StackMapFrame* frame) {
+    return ErrorContext(bci, FLAGS_MISMATCH, TypeOrigin::frame(frame));
+  }
+  static ErrorContext bad_flags(u2 bci, StackMapFrame* cur, StackMapFrame* sm) {
+    return ErrorContext(bci, FLAGS_MISMATCH,
+                        TypeOrigin::frame(cur), TypeOrigin::frame(sm));
+  }
+  static ErrorContext bad_cp_index(u2 bci, u2 index) {
+    return ErrorContext(bci, BAD_CP_INDEX, TypeOrigin::bad_index(index));
+  }
+  static ErrorContext bad_local_index(u2 bci, u2 index) {
+    return ErrorContext(bci, BAD_LOCAL_INDEX, TypeOrigin::bad_index(index));
+  }
+  static ErrorContext locals_size_mismatch(
+      u2 bci, StackMapFrame* frame0, StackMapFrame* frame1) {
+    return ErrorContext(bci, LOCALS_SIZE_MISMATCH,
+        TypeOrigin::frame(frame0), TypeOrigin::frame(frame1));
+  }
+  static ErrorContext stack_size_mismatch(
+      u2 bci, StackMapFrame* frame0, StackMapFrame* frame1) {
+    return ErrorContext(bci, STACK_SIZE_MISMATCH,
+        TypeOrigin::frame(frame0), TypeOrigin::frame(frame1));
+  }
+  static ErrorContext stack_overflow(u2 bci, StackMapFrame* frame) {
+    return ErrorContext(bci, STACK_OVERFLOW, TypeOrigin::frame(frame));
+  }
+  static ErrorContext stack_underflow(u2 bci, StackMapFrame* frame) {
+    return ErrorContext(bci, STACK_UNDERFLOW, TypeOrigin::frame(frame));
+  }
+  static ErrorContext missing_stackmap(u2 bci) {
+    return ErrorContext(bci, MISSING_STACKMAP);
+  }
+  static ErrorContext bad_stackmap(int index, StackMapFrame* frame) {
+    return ErrorContext(0, BAD_STACKMAP, TypeOrigin::frame(frame));
+  }
+
+  bool is_valid() const { return _fault != NO_FAULT; }
+  int bci() const { return _bci; }
+
+  void reset_frames() {
+    _type.reset_frame();
+    _expected.reset_frame();
+  }
+
+  void details(outputStream* ss, methodOop method) const;
+
+#ifdef ASSERT
+  void print_on(outputStream* str) const {
+    str->print("error_context(%d, %d,", _bci, _fault);
+    _type.print_on(str);
+    str->print(",");
+    _expected.print_on(str);
+    str->print(")");
+  }
+#endif
+
+ private:
+  void location_details(outputStream* ss, methodOop method) const;
+  void reason_details(outputStream* ss) const;
+  void frame_details(outputStream* ss) const;
+  void bytecode_details(outputStream* ss, methodOop method) const;
+  void handler_details(outputStream* ss, methodOop method) const;
+  void stackmap_details(outputStream* ss, methodOop method) const;
+};
+
 // A new instance of this class is created for each class being verified
 class ClassVerifier : public StackObj {
  private:
   Thread* _thread;
+  GrowableArray<Symbol*>* _symbols;  // keep a list of symbols created
+
   Symbol* _exception_type;
   char* _message;
-  size_t _message_buffer_len;
-  GrowableArray<Symbol*>* _symbols;  // keep a list of symbols created
+
+  ErrorContext _error_context;  // contains information about an error
 
   void verify_method(methodHandle method, TRAPS);
   char* generate_code_data(methodHandle m, u4 code_length, TRAPS);
-  void verify_exception_handler_table(u4 code_length, char* code_data, int& min, int& max, TRAPS);
+  void verify_exception_handler_table(u4 code_length, char* code_data,
+                                      int& min, int& max, TRAPS);
   void verify_local_variable_table(u4 code_length, char* code_data, TRAPS);
 
   VerificationType cp_ref_index_to_type(
@@ -111,10 +271,10 @@
     instanceKlassHandle this_class, klassOop target_class,
     Symbol* field_name, Symbol* field_sig, bool is_method);
 
-  void verify_cp_index(constantPoolHandle cp, int index, TRAPS);
-  void verify_cp_type(
-    int index, constantPoolHandle cp, unsigned int types, TRAPS);
-  void verify_cp_class_type(int index, constantPoolHandle cp, TRAPS);
+  void verify_cp_index(u2 bci, constantPoolHandle cp, int index, TRAPS);
+  void verify_cp_type(u2 bci, int index, constantPoolHandle cp,
+      unsigned int types, TRAPS);
+  void verify_cp_class_type(u2 bci, int index, constantPoolHandle cp, TRAPS);
 
   u2 verify_stackmap_table(
     u2 stackmap_index, u2 bci, StackMapFrame* current_frame,
@@ -137,7 +297,7 @@
     constantPoolHandle cp, TRAPS);
 
   void verify_invoke_init(
-    RawBytecodeStream* bcs, VerificationType ref_class_type,
+    RawBytecodeStream* bcs, u2 ref_index, VerificationType ref_class_type,
     StackMapFrame* current_frame, u4 code_length, bool* this_uninit,
     constantPoolHandle cp, TRAPS);
 
@@ -147,10 +307,11 @@
     constantPoolHandle cp, TRAPS);
 
   VerificationType get_newarray_type(u2 index, u2 bci, TRAPS);
-  void verify_anewarray(
-    u2 index, constantPoolHandle cp, StackMapFrame* current_frame, TRAPS);
+  void verify_anewarray(u2 bci, u2 index, constantPoolHandle cp,
+      StackMapFrame* current_frame, TRAPS);
   void verify_return_value(
-    VerificationType return_type, VerificationType type, u2 offset, TRAPS);
+      VerificationType return_type, VerificationType type, u2 offset,
+      StackMapFrame* current_frame, TRAPS);
 
   void verify_iload (u2 index, StackMapFrame* current_frame, TRAPS);
   void verify_lload (u2 index, StackMapFrame* current_frame, TRAPS);
@@ -189,7 +350,7 @@
   };
 
   // constructor
-  ClassVerifier(instanceKlassHandle klass, char* msg, size_t msg_len, TRAPS);
+  ClassVerifier(instanceKlassHandle klass, TRAPS);
 
   // destructor
   ~ClassVerifier();
@@ -207,13 +368,17 @@
   // Return status modes
   Symbol* result() const { return _exception_type; }
   bool has_error() const { return result() != NULL; }
+  char* exception_message() {
+    stringStream ss;
+    ss.print(_message);
+    _error_context.details(&ss, _method());
+    return ss.as_string();
+  }
 
   // Called when verify or class format errors are encountered.
   // May throw an exception based upon the mode.
-  void verify_error(u2 offset, const char* fmt, ...);
-  void verify_error(const char* fmt, ...);
+  void verify_error(ErrorContext ctx, const char* fmt, ...);
   void class_format_error(const char* fmt, ...);
-  void format_error_message(const char* fmt, int offset, va_list args);
 
   klassOop load_class(Symbol* name, TRAPS);
 
@@ -228,10 +393,11 @@
   // their reference counts need to be decrememented when the verifier object
   // goes out of scope.  Since these symbols escape the scope in which they're
   // created, we can't use a TempNewSymbol.
-  Symbol* create_temporary_symbol(const Symbol* s, int begin, int end, TRAPS);
+  Symbol* create_temporary_symbol(
+      const Symbol* s, int begin, int end, TRAPS);
   Symbol* create_temporary_symbol(const char *s, int length, TRAPS);
 
-  static bool _verify_verbose;  // for debugging
+  TypeOrigin ref_ctx(const char* str, TRAPS);
 };
 
 inline int ClassVerifier::change_sig_to_verificationType(
--- a/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1BlockOffsetTable.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -159,14 +159,30 @@
            "right address out of range");
     assert(left  < right, "Heap addresses out of order");
     size_t num_cards = pointer_delta(right, left) >> LogN_words;
-    memset(&_offset_array[index_for(left)], offset, num_cards);
+    if (UseMemSetInBOT) {
+      memset(&_offset_array[index_for(left)], offset, num_cards);
+    } else {
+      size_t i = index_for(left);
+      const size_t end = i + num_cards;
+      for (; i < end; i++) {
+        _offset_array[i] = offset;
+      }
+    }
   }
 
   void set_offset_array(size_t left, size_t right, u_char offset) {
     assert(right < _vs.committed_size(), "right address out of range");
-    assert(left  <= right, "indexes out of order");
+    assert(left <= right, "indexes out of order");
     size_t num_cards = right - left + 1;
-    memset(&_offset_array[left], offset, num_cards);
+    if (UseMemSetInBOT) {
+      memset(&_offset_array[left], offset, num_cards);
+    } else {
+      size_t i = left;
+      const size_t end = i + num_cards;
+      for (; i < end; i++) {
+        _offset_array[i] = offset;
+      }
+    }
   }
 
   void check_offset_array(size_t index, HeapWord* high, HeapWord* low) const {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1891,6 +1891,8 @@
   _young_list(new YoungList(this)),
   _gc_time_stamp(0),
   _retained_old_gc_alloc_region(NULL),
+  _survivor_plab_stats(YoungPLABSize, PLABWeight),
+  _old_plab_stats(OldPLABSize, PLABWeight),
   _expand_heap_after_alloc_failure(true),
   _surviving_young_words(NULL),
   _old_marking_cycles_started(0),
@@ -1932,6 +1934,14 @@
   clear_cset_start_regions();
 
   guarantee(_task_queues != NULL, "task_queues allocation failure.");
+#ifdef SPARC
+  // Issue a stern warning, but allow use for experimentation and debugging.
+  if (VM_Version::is_sun4v() && UseMemSetInBOT) {
+    assert(!FLAG_IS_DEFAULT(UseMemSetInBOT), "Error");
+    warning("Experimental flag -XX:+UseMemSetInBOT is known to cause instability"
+            " on sun4v; please understand that you are using at your own risk!");
+  }
+#endif
 }
 
 jint G1CollectedHeap::initialize() {
@@ -3580,15 +3590,11 @@
   DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
   size_t buffer_size = dcqs.buffer_size();
   size_t buffer_num = dcqs.completed_buffers_num();
-  return buffer_size * buffer_num + extra_cards;
-}
-
-size_t G1CollectedHeap::max_pending_card_num() {
-  DirtyCardQueueSet& dcqs = JavaThread::dirty_card_queue_set();
-  size_t buffer_size = dcqs.buffer_size();
-  size_t buffer_num  = dcqs.completed_buffers_num();
-  int thread_num  = Threads::number_of_threads();
-  return (buffer_num + thread_num) * buffer_size;
+
+  // PtrQueueSet::buffer_size() and PtrQueue:size() return sizes
+  // in bytes - not the number of 'entries'. We need to convert
+  // into a number of cards.
+  return (buffer_size * buffer_num + extra_cards) / oopSize;
 }
 
 size_t G1CollectedHeap::cards_scanned() {
@@ -4099,17 +4105,22 @@
   size_t gclab_word_size;
   switch (purpose) {
     case GCAllocForSurvived:
-      gclab_word_size = YoungPLABSize;
+      gclab_word_size = _survivor_plab_stats.desired_plab_sz();
       break;
     case GCAllocForTenured:
-      gclab_word_size = OldPLABSize;
+      gclab_word_size = _old_plab_stats.desired_plab_sz();
       break;
     default:
       assert(false, "unknown GCAllocPurpose");
-      gclab_word_size = OldPLABSize;
+      gclab_word_size = _old_plab_stats.desired_plab_sz();
       break;
   }
-  return gclab_word_size;
+
+  // Prevent humongous PLAB sizes for two reasons:
+  // * PLABs are allocated using a similar paths as oops, but should
+  //   never be in a humongous region
+  // * Allowing humongous PLABs needlessly churns the region free lists
+  return MIN2(_humongous_object_threshold_in_words, gclab_word_size);
 }
 
 void G1CollectedHeap::init_mutator_alloc_region() {
@@ -4165,6 +4176,11 @@
   // want either way so no reason to check explicitly for either
   // condition.
   _retained_old_gc_alloc_region = _old_gc_alloc_region.release();
+
+  if (ResizePLAB) {
+    _survivor_plab_stats.adjust_desired_plab_sz();
+    _old_plab_stats.adjust_desired_plab_sz();
+  }
 }
 
 void G1CollectedHeap::abandon_gc_alloc_regions() {
--- a/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -33,7 +33,7 @@
 #include "gc_implementation/g1/heapRegionSeq.hpp"
 #include "gc_implementation/g1/heapRegionSets.hpp"
 #include "gc_implementation/shared/hSpaceCounters.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/barrierSet.hpp"
 #include "memory/memRegion.hpp"
 #include "memory/sharedHeap.hpp"
@@ -278,10 +278,33 @@
   // survivor objects.
   SurvivorGCAllocRegion _survivor_gc_alloc_region;
 
+  // PLAB sizing policy for survivors.
+  PLABStats _survivor_plab_stats;
+
   // Alloc region used to satisfy allocation requests by the GC for
   // old objects.
   OldGCAllocRegion _old_gc_alloc_region;
 
+  // PLAB sizing policy for tenured objects.
+  PLABStats _old_plab_stats;
+
+  PLABStats* stats_for_purpose(GCAllocPurpose purpose) {
+    PLABStats* stats = NULL;
+
+    switch (purpose) {
+    case GCAllocForSurvived:
+      stats = &_survivor_plab_stats;
+      break;
+    case GCAllocForTenured:
+      stats = &_old_plab_stats;
+      break;
+    default:
+      assert(false, "unrecognized GCAllocPurpose");
+    }
+
+    return stats;
+  }
+
   // The last old region we allocated to during the last GC.
   // Typically, it is not full so we should re-use it during the next GC.
   HeapRegion* _retained_old_gc_alloc_region;
@@ -314,7 +337,7 @@
   G1MonitoringSupport* _g1mm;
 
   // Determines PLAB size for a particular allocation purpose.
-  static size_t desired_plab_sz(GCAllocPurpose purpose);
+  size_t desired_plab_sz(GCAllocPurpose purpose);
 
   // Outside of GC pauses, the number of bytes used in all regions other
   // than the current allocation region.
@@ -1683,7 +1706,6 @@
   void stop_conc_gc_threads();
 
   size_t pending_card_num();
-  size_t max_pending_card_num();
   size_t cards_scanned();
 
 protected:
@@ -1811,19 +1833,19 @@
   }
 
   HeapWord* allocate_slow(GCAllocPurpose purpose, size_t word_sz) {
-
     HeapWord* obj = NULL;
     size_t gclab_word_size = _g1h->desired_plab_sz(purpose);
     if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) {
       G1ParGCAllocBuffer* alloc_buf = alloc_buffer(purpose);
-      assert(gclab_word_size == alloc_buf->word_sz(),
-             "dynamic resizing is not supported");
       add_to_alloc_buffer_waste(alloc_buf->words_remaining());
-      alloc_buf->retire(false, false);
+      alloc_buf->flush_stats_and_retire(_g1h->stats_for_purpose(purpose),
+                                        false /* end_of_gc */,
+                                        false /* retain */);
 
       HeapWord* buf = _g1h->par_allocate_during_gc(purpose, gclab_word_size);
       if (buf == NULL) return NULL; // Let caller handle allocation failure.
       // Otherwise.
+      alloc_buf->set_word_size(gclab_word_size);
       alloc_buf->set_buf(buf);
 
       obj = alloc_buf->allocate(word_sz);
@@ -1908,7 +1930,9 @@
     for (int ap = 0; ap < GCAllocPurposeCount; ++ap) {
       size_t waste = _alloc_buffers[ap]->words_remaining();
       add_to_alloc_buffer_waste(waste);
-      _alloc_buffers[ap]->retire(true, false);
+      _alloc_buffers[ap]->flush_stats_and_retire(_g1h->stats_for_purpose((GCAllocPurpose)ap),
+                                                 true /* end_of_gc */,
+                                                 false /* retain */);
     }
   }
 
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -90,7 +90,6 @@
 
   _alloc_rate_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
   _prev_collection_pause_end_ms(0.0),
-  _pending_card_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
   _rs_length_diff_seq(new TruncatedSeq(TruncatedSeqLength)),
   _cost_per_card_ms_seq(new TruncatedSeq(TruncatedSeqLength)),
   _young_cards_per_entry_ratio_seq(new TruncatedSeq(TruncatedSeqLength)),
@@ -197,7 +196,6 @@
 
   int index = MIN2(_parallel_gc_threads - 1, 7);
 
-  _pending_card_diff_seq->add(0.0);
   _rs_length_diff_seq->add(rs_length_diff_defaults[index]);
   _cost_per_card_ms_seq->add(cost_per_card_ms_defaults[index]);
   _young_cards_per_entry_ratio_seq->add(
@@ -657,7 +655,7 @@
   for (HeapRegion * r = _recorded_survivor_head;
        r != NULL && r != _recorded_survivor_tail->get_next_young_region();
        r = r->get_next_young_region()) {
-    survivor_regions_evac_time += predict_region_elapsed_time_ms(r, true);
+    survivor_regions_evac_time += predict_region_elapsed_time_ms(r, gcs_are_young());
   }
   return survivor_regions_evac_time;
 }
@@ -801,9 +799,8 @@
   _cur_collection_pause_used_at_start_bytes = start_used;
   _cur_collection_pause_used_regions_at_start = _g1->used_regions();
   _pending_cards = _g1->pending_card_num();
-  _max_pending_cards = _g1->max_pending_card_num();
 
-  _bytes_in_collection_set_before_gc = 0;
+  _collection_set_bytes_used_before = 0;
   _bytes_copied_during_gc = 0;
 
   YoungList* young_list = _g1->young_list();
@@ -1036,12 +1033,6 @@
   // do that for any other surv rate groupsx
 
   if (update_stats) {
-    size_t diff = 0;
-    if (_max_pending_cards >= _pending_cards) {
-      diff = _max_pending_cards - _pending_cards;
-    }
-    _pending_card_diff_seq->add((double) diff);
-
     double cost_per_card_ms = 0.0;
     if (_pending_cards > 0) {
       cost_per_card_ms = phase_times()->_update_rs_time / (double) _pending_cards;
@@ -1126,9 +1117,9 @@
     _constant_other_time_ms_seq->add(constant_other_time_ms);
 
     double survival_ratio = 0.0;
-    if (_bytes_in_collection_set_before_gc > 0) {
+    if (_collection_set_bytes_used_before > 0) {
       survival_ratio = (double) _bytes_copied_during_gc /
-                                   (double) _bytes_in_collection_set_before_gc;
+                                   (double) _collection_set_bytes_used_before;
     }
 
     _pending_cards_seq->add((double) _pending_cards);
@@ -1229,6 +1220,15 @@
 }
 
 double
+G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards,
+                                                size_t scanned_cards) {
+  return
+    predict_rs_update_time_ms(pending_cards) +
+    predict_rs_scan_time_ms(scanned_cards) +
+    predict_constant_other_time_ms();
+}
+
+double
 G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards) {
   size_t rs_length = predict_rs_length_diff();
   size_t card_num;
@@ -1240,39 +1240,6 @@
   return predict_base_elapsed_time_ms(pending_cards, card_num);
 }
 
-double
-G1CollectorPolicy::predict_base_elapsed_time_ms(size_t pending_cards,
-                                                size_t scanned_cards) {
-  return
-    predict_rs_update_time_ms(pending_cards) +
-    predict_rs_scan_time_ms(scanned_cards) +
-    predict_constant_other_time_ms();
-}
-
-double
-G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr,
-                                                  bool young) {
-  size_t rs_length = hr->rem_set()->occupied();
-  size_t card_num;
-  if (gcs_are_young()) {
-    card_num = predict_young_card_num(rs_length);
-  } else {
-    card_num = predict_non_young_card_num(rs_length);
-  }
-  size_t bytes_to_copy = predict_bytes_to_copy(hr);
-
-  double region_elapsed_time_ms =
-    predict_rs_scan_time_ms(card_num) +
-    predict_object_copy_time_ms(bytes_to_copy);
-
-  if (young)
-    region_elapsed_time_ms += predict_young_other_time_ms(1);
-  else
-    region_elapsed_time_ms += predict_non_young_other_time_ms(1);
-
-  return region_elapsed_time_ms;
-}
-
 size_t G1CollectorPolicy::predict_bytes_to_copy(HeapRegion* hr) {
   size_t bytes_to_copy;
   if (hr->is_marked())
@@ -1286,6 +1253,35 @@
   return bytes_to_copy;
 }
 
+double
+G1CollectorPolicy::predict_region_elapsed_time_ms(HeapRegion* hr,
+                                                  bool for_young_gc) {
+  size_t rs_length = hr->rem_set()->occupied();
+  size_t card_num;
+
+  // Predicting the number of cards is based on which type of GC
+  // we're predicting for.
+  if (for_young_gc) {
+    card_num = predict_young_card_num(rs_length);
+  } else {
+    card_num = predict_non_young_card_num(rs_length);
+  }
+  size_t bytes_to_copy = predict_bytes_to_copy(hr);
+
+  double region_elapsed_time_ms =
+    predict_rs_scan_time_ms(card_num) +
+    predict_object_copy_time_ms(bytes_to_copy);
+
+  // The prediction of the "other" time for this region is based
+  // upon the region type and NOT the GC type.
+  if (hr->is_young()) {
+    region_elapsed_time_ms += predict_young_other_time_ms(1);
+  } else {
+    region_elapsed_time_ms += predict_non_young_other_time_ms(1);
+  }
+  return region_elapsed_time_ms;
+}
+
 void
 G1CollectorPolicy::init_cset_region_lengths(uint eden_cset_region_length,
                                             uint survivor_cset_region_length) {
@@ -1342,22 +1338,6 @@
   }
 }
 
-class CountCSClosure: public HeapRegionClosure {
-  G1CollectorPolicy* _g1_policy;
-public:
-  CountCSClosure(G1CollectorPolicy* g1_policy) :
-    _g1_policy(g1_policy) {}
-  bool doHeapRegion(HeapRegion* r) {
-    _g1_policy->_bytes_in_collection_set_before_gc += r->used();
-    return false;
-  }
-};
-
-void G1CollectorPolicy::count_CS_bytes_used() {
-  CountCSClosure cs_closure(this);
-  _g1->collection_set_iterate(&cs_closure);
-}
-
 void G1CollectorPolicy::print_tracing_info() const {
   _trace_gen0_time_data.print();
   _trace_gen1_time_data.print();
@@ -1696,7 +1676,7 @@
   // retiring the current allocation region) or a concurrent
   // refine thread (RSet sampling).
 
-  double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
+  double region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young());
   size_t used_bytes = hr->used();
   _inc_cset_recorded_rs_lengths += rs_length;
   _inc_cset_predicted_elapsed_time_ms += region_elapsed_time_ms;
@@ -1731,7 +1711,7 @@
   _inc_cset_recorded_rs_lengths_diffs += rs_lengths_diff;
 
   double old_elapsed_time_ms = hr->predicted_elapsed_time_ms();
-  double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, true);
+  double new_region_elapsed_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young());
   double elapsed_ms_diff = new_region_elapsed_time_ms - old_elapsed_time_ms;
   _inc_cset_predicted_elapsed_time_ms_diffs += elapsed_ms_diff;
 
@@ -1854,8 +1834,7 @@
 }
 
 void G1CollectorPolicy::finalize_cset(double target_pause_time_ms) {
-  // Set this here - in case we're not doing young collections.
-  double non_young_start_time_sec = os::elapsedTime();
+  double young_start_time_sec = os::elapsedTime();
 
   YoungList* young_list = _g1->young_list();
   finalize_incremental_cset_building();
@@ -1869,17 +1848,14 @@
   double predicted_pause_time_ms = base_time_ms;
   double time_remaining_ms = target_pause_time_ms - base_time_ms;
 
-  ergo_verbose3(ErgoCSetConstruction | ErgoHigh,
+  ergo_verbose4(ErgoCSetConstruction | ErgoHigh,
                 "start choosing CSet",
+                ergo_format_size("_pending_cards")
                 ergo_format_ms("predicted base time")
                 ergo_format_ms("remaining time")
                 ergo_format_ms("target pause time"),
-                base_time_ms, time_remaining_ms, target_pause_time_ms);
+                _pending_cards, base_time_ms, time_remaining_ms, target_pause_time_ms);
 
-  HeapRegion* hr;
-  double young_start_time_sec = os::elapsedTime();
-
-  _collection_set_bytes_used_before = 0;
   _last_gc_was_young = gcs_are_young() ? true : false;
 
   if (_last_gc_was_young) {
@@ -1895,7 +1871,8 @@
   uint survivor_region_length = young_list->survivor_length();
   uint eden_region_length = young_list->length() - survivor_region_length;
   init_cset_region_lengths(eden_region_length, survivor_region_length);
-  hr = young_list->first_survivor_region();
+
+  HeapRegion* hr = young_list->first_survivor_region();
   while (hr != NULL) {
     assert(hr->is_survivor(), "badly formed young list");
     hr->set_young();
@@ -1926,8 +1903,8 @@
   phase_times()->_recorded_young_cset_choice_time_ms =
     (young_end_time_sec - young_start_time_sec) * 1000.0;
 
-  // We are doing young collections so reset this.
-  non_young_start_time_sec = young_end_time_sec;
+  // Set the start of the non-young choice time.
+  double non_young_start_time_sec = young_end_time_sec;
 
   if (!gcs_are_young()) {
     CollectionSetChooser* cset_chooser = _collectionSetChooser;
@@ -1937,6 +1914,7 @@
 
     uint expensive_region_num = 0;
     bool check_time_remaining = adaptive_young_list_length();
+
     HeapRegion* hr = cset_chooser->peek();
     while (hr != NULL) {
       if (old_cset_region_length() >= max_old_cset_length) {
@@ -1950,7 +1928,7 @@
         break;
       }
 
-      double predicted_time_ms = predict_region_elapsed_time_ms(hr, false);
+      double predicted_time_ms = predict_region_elapsed_time_ms(hr, gcs_are_young());
       if (check_time_remaining) {
         if (predicted_time_ms > time_remaining_ms) {
           // Too expensive for the current CSet.
@@ -2025,8 +2003,6 @@
 
   stop_incremental_cset_building();
 
-  count_CS_bytes_used();
-
   ergo_verbose5(ErgoCSetConstruction,
                 "finish choosing CSet",
                 ergo_format_region("eden")
--- a/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1CollectorPolicy.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -228,7 +228,6 @@
   TruncatedSeq* _alloc_rate_ms_seq;
   double        _prev_collection_pause_end_ms;
 
-  TruncatedSeq* _pending_card_diff_seq;
   TruncatedSeq* _rs_length_diff_seq;
   TruncatedSeq* _cost_per_card_ms_seq;
   TruncatedSeq* _young_cards_per_entry_ratio_seq;
@@ -295,7 +294,6 @@
   double _pause_time_target_ms;
 
   size_t _pending_cards;
-  size_t _max_pending_cards;
 
 public:
   // Accessors
@@ -325,28 +323,6 @@
     _max_rs_lengths = rs_lengths;
   }
 
-  size_t predict_pending_card_diff() {
-    double prediction = get_new_neg_prediction(_pending_card_diff_seq);
-    if (prediction < 0.00001) {
-      return 0;
-    } else {
-      return (size_t) prediction;
-    }
-  }
-
-  size_t predict_pending_cards() {
-    size_t max_pending_card_num = _g1->max_pending_card_num();
-    size_t diff = predict_pending_card_diff();
-    size_t prediction;
-    if (diff > max_pending_card_num) {
-      prediction = max_pending_card_num;
-    } else {
-      prediction = max_pending_card_num - diff;
-    }
-
-    return prediction;
-  }
-
   size_t predict_rs_length_diff() {
     return (size_t) get_new_prediction(_rs_length_diff_seq);
   }
@@ -439,7 +415,7 @@
   double predict_base_elapsed_time_ms(size_t pending_cards,
                                       size_t scanned_cards);
   size_t predict_bytes_to_copy(HeapRegion* hr);
-  double predict_region_elapsed_time_ms(HeapRegion* hr, bool young);
+  double predict_region_elapsed_time_ms(HeapRegion* hr, bool for_young_gc);
 
   void set_recorded_rs_lengths(size_t rs_lengths);
 
@@ -495,12 +471,6 @@
   }
 
 private:
-  size_t _bytes_in_collection_set_before_gc;
-  size_t _bytes_copied_during_gc;
-
-  // Used to count used bytes in CS.
-  friend class CountCSClosure;
-
   // Statistics kept per GC stoppage, pause or full.
   TruncatedSeq* _recent_prev_end_times_for_all_gcs_sec;
 
@@ -514,9 +484,13 @@
 
   // The number of bytes in the collection set before the pause. Set from
   // the incrementally built collection set at the start of an evacuation
-  // pause.
+  // pause, and incremented in finalize_cset() when adding old regions
+  // (if any) to the collection set.
   size_t _collection_set_bytes_used_before;
 
+  // The number of bytes copied during the GC.
+  size_t _bytes_copied_during_gc;
+
   // The associated information that is maintained while the incremental
   // collection set is being built with young regions. Used to populate
   // the recorded info for the evacuation pause.
@@ -646,9 +620,6 @@
   bool predict_will_fit(uint young_length, double base_time_ms,
                         uint base_free_regions, double target_pause_time_ms);
 
-  // Count the number of bytes used in the CS.
-  void count_CS_bytes_used();
-
 public:
 
   G1CollectorPolicy();
@@ -666,10 +637,6 @@
   // higher, recalculate the young list target length prediction.
   void revise_young_list_target_length_if_necessary();
 
-  size_t bytes_in_collection_set() {
-    return _bytes_in_collection_set_before_gc;
-  }
-
   // This should be called after the heap is resized.
   void record_new_heap_size(uint new_number_of_regions);
 
--- a/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1ErgoVerbose.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -125,6 +125,7 @@
 #define ergo_format_double(_name_)   ", " _name_ ": %1.2f"
 #define ergo_format_perc(_name_)     ", " _name_ ": %1.2f %%"
 #define ergo_format_ms(_name_)       ", " _name_ ": %1.2f ms"
+#define ergo_format_size(_name_)     ", " _name_ ": "SIZE_FORMAT
 
 // Double parameter format strings
 #define ergo_format_byte_perc(_name_)                                   \
--- a/src/share/vm/gc_implementation/g1/g1_globals.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/g1_globals.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -287,17 +287,17 @@
           "The number of times we'll force an overflow during "             \
           "concurrent marking")                                             \
                                                                             \
-  develop(uintx, G1DefaultMinNewGenPercent, 20,                             \
+  experimental(uintx, G1DefaultMinNewGenPercent, 20,                        \
           "Percentage (0-100) of the heap size to use as minimum "          \
           "young gen size.")                                                \
                                                                             \
-  develop(uintx, G1DefaultMaxNewGenPercent, 80,                             \
+  experimental(uintx, G1DefaultMaxNewGenPercent, 80,                        \
           "Percentage (0-100) of the heap size to use as maximum "          \
           "young gen size.")                                                \
                                                                             \
-  develop(uintx, G1OldCSetRegionLiveThresholdPercent, 90,                   \
+  experimental(uintx, G1OldCSetRegionLiveThresholdPercent, 90,              \
           "Threshold for regions to be added to the collection set. "       \
-          "Regions with more live bytes that this will not be collected.")  \
+          "Regions with more live bytes than this will not be collected.")  \
                                                                             \
   product(uintx, G1HeapWastePercent, 5,                                     \
           "Amount of space, expressed as a percentage of the heap size, "   \
@@ -306,7 +306,7 @@
   product(uintx, G1MixedGCCountTarget, 4,                                   \
           "The target number of mixed GCs after a marking cycle.")          \
                                                                             \
-  develop(uintx, G1OldCSetRegionThresholdPercent, 10,                       \
+  experimental(uintx, G1OldCSetRegionThresholdPercent, 10,                  \
           "An upper bound for the number of old CSet regions expressed "    \
           "as a percentage of the heap size.")                              \
                                                                             \
--- a/src/share/vm/gc_implementation/g1/heapRegion.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/g1/heapRegion.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -384,10 +384,17 @@
 }
 
 void HeapRegion::calc_gc_efficiency() {
+  // GC efficiency is the ratio of how much space would be
+  // reclaimed over how long we predict it would take to reclaim it.
   G1CollectedHeap* g1h = G1CollectedHeap::heap();
   G1CollectorPolicy* g1p = g1h->g1_policy();
-  _gc_efficiency = (double) reclaimable_bytes() /
-                            g1p->predict_region_elapsed_time_ms(this, false);
+
+  // Retrieve a prediction of the elapsed time for this region for
+  // a mixed gc because the region will only be evacuated during a
+  // mixed gc.
+  double region_elapsed_time_ms =
+    g1p->predict_region_elapsed_time_ms(this, false /* for_young_gc */);
+  _gc_efficiency = (double) reclaimable_bytes() / region_elapsed_time_ms;
 }
 
 void HeapRegion::set_startsHumongous(HeapWord* new_top, HeapWord* new_end) {
--- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,344 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#include "precompiled.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
-#include "memory/sharedHeap.hpp"
-#include "oops/arrayOop.hpp"
-#include "oops/oop.inline.hpp"
-
-ParGCAllocBuffer::ParGCAllocBuffer(size_t desired_plab_sz_) :
-  _word_sz(desired_plab_sz_), _bottom(NULL), _top(NULL),
-  _end(NULL), _hard_end(NULL),
-  _retained(false), _retained_filler(),
-  _allocated(0), _wasted(0)
-{
-  assert (min_size() > AlignmentReserve, "Inconsistency!");
-  // arrayOopDesc::header_size depends on command line initialization.
-  FillerHeaderSize = align_object_size(arrayOopDesc::header_size(T_INT));
-  AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? FillerHeaderSize : 0;
-}
-
-size_t ParGCAllocBuffer::FillerHeaderSize;
-
-// If the minimum object size is greater than MinObjAlignment, we can
-// end up with a shard at the end of the buffer that's smaller than
-// the smallest object.  We can't allow that because the buffer must
-// look like it's full of objects when we retire it, so we make
-// sure we have enough space for a filler int array object.
-size_t ParGCAllocBuffer::AlignmentReserve;
-
-void ParGCAllocBuffer::retire(bool end_of_gc, bool retain) {
-  assert(!retain || end_of_gc, "Can only retain at GC end.");
-  if (_retained) {
-    // If the buffer had been retained shorten the previous filler object.
-    assert(_retained_filler.end() <= _top, "INVARIANT");
-    CollectedHeap::fill_with_object(_retained_filler);
-    // Wasted space book-keeping, otherwise (normally) done in invalidate()
-    _wasted += _retained_filler.word_size();
-    _retained = false;
-  }
-  assert(!end_of_gc || !_retained, "At this point, end_of_gc ==> !_retained.");
-  if (_top < _hard_end) {
-    CollectedHeap::fill_with_object(_top, _hard_end);
-    if (!retain) {
-      invalidate();
-    } else {
-      // Is there wasted space we'd like to retain for the next GC?
-      if (pointer_delta(_end, _top) > FillerHeaderSize) {
-        _retained = true;
-        _retained_filler = MemRegion(_top, FillerHeaderSize);
-        _top = _top + FillerHeaderSize;
-      } else {
-        invalidate();
-      }
-    }
-  }
-}
-
-void ParGCAllocBuffer::flush_stats(PLABStats* stats) {
-  assert(ResizePLAB, "Wasted work");
-  stats->add_allocated(_allocated);
-  stats->add_wasted(_wasted);
-  stats->add_unused(pointer_delta(_end, _top));
-}
-
-// Compute desired plab size and latch result for later
-// use. This should be called once at the end of parallel
-// scavenge; it clears the sensor accumulators.
-void PLABStats::adjust_desired_plab_sz() {
-  assert(ResizePLAB, "Not set");
-  if (_allocated == 0) {
-    assert(_unused == 0, "Inconsistency in PLAB stats");
-    _allocated = 1;
-  }
-  double wasted_frac    = (double)_unused/(double)_allocated;
-  size_t target_refills = (size_t)((wasted_frac*TargetSurvivorRatio)/
-                                   TargetPLABWastePct);
-  if (target_refills == 0) {
-    target_refills = 1;
-  }
-  _used = _allocated - _wasted - _unused;
-  size_t plab_sz = _used/(target_refills*ParallelGCThreads);
-  if (PrintPLAB) gclog_or_tty->print(" (plab_sz = %d ", plab_sz);
-  // Take historical weighted average
-  _filter.sample(plab_sz);
-  // Clip from above and below, and align to object boundary
-  plab_sz = MAX2(min_size(), (size_t)_filter.average());
-  plab_sz = MIN2(max_size(), plab_sz);
-  plab_sz = align_object_size(plab_sz);
-  // Latch the result
-  if (PrintPLAB) gclog_or_tty->print(" desired_plab_sz = %d) ", plab_sz);
-  if (ResizePLAB) {
-    _desired_plab_sz = plab_sz;
-  }
-  // Now clear the accumulators for next round:
-  // note this needs to be fixed in the case where we
-  // are retaining across scavenges. FIX ME !!! XXX
-  _allocated = 0;
-  _wasted    = 0;
-  _unused    = 0;
-}
-
-#ifndef PRODUCT
-void ParGCAllocBuffer::print() {
-  gclog_or_tty->print("parGCAllocBuffer: _bottom: %p  _top: %p  _end: %p  _hard_end: %p"
-             "_retained: %c _retained_filler: [%p,%p)\n",
-             _bottom, _top, _end, _hard_end,
-             "FT"[_retained], _retained_filler.start(), _retained_filler.end());
-}
-#endif // !PRODUCT
-
-const size_t ParGCAllocBufferWithBOT::ChunkSizeInWords =
-MIN2(CardTableModRefBS::par_chunk_heapword_alignment(),
-     ((size_t)Generation::GenGrain)/HeapWordSize);
-const size_t ParGCAllocBufferWithBOT::ChunkSizeInBytes =
-MIN2(CardTableModRefBS::par_chunk_heapword_alignment() * HeapWordSize,
-     (size_t)Generation::GenGrain);
-
-ParGCAllocBufferWithBOT::ParGCAllocBufferWithBOT(size_t word_sz,
-                                                 BlockOffsetSharedArray* bsa) :
-  ParGCAllocBuffer(word_sz),
-  _bsa(bsa),
-  _bt(bsa, MemRegion(_bottom, _hard_end)),
-  _true_end(_hard_end)
-{}
-
-// The buffer comes with its own BOT, with a shared (obviously) underlying
-// BlockOffsetSharedArray. We manipulate this BOT in the normal way
-// as we would for any contiguous space. However, on accasion we
-// need to do some buffer surgery at the extremities before we
-// start using the body of the buffer for allocations. Such surgery
-// (as explained elsewhere) is to prevent allocation on a card that
-// is in the process of being walked concurrently by another GC thread.
-// When such surgery happens at a point that is far removed (to the
-// right of the current allocation point, top), we use the "contig"
-// parameter below to directly manipulate the shared array without
-// modifying the _next_threshold state in the BOT.
-void ParGCAllocBufferWithBOT::fill_region_with_block(MemRegion mr,
-                                                     bool contig) {
-  CollectedHeap::fill_with_object(mr);
-  if (contig) {
-    _bt.alloc_block(mr.start(), mr.end());
-  } else {
-    _bt.BlockOffsetArray::alloc_block(mr.start(), mr.end());
-  }
-}
-
-HeapWord* ParGCAllocBufferWithBOT::allocate_slow(size_t word_sz) {
-  HeapWord* res = NULL;
-  if (_true_end > _hard_end) {
-    assert((HeapWord*)align_size_down(intptr_t(_hard_end),
-                                      ChunkSizeInBytes) == _hard_end,
-           "or else _true_end should be equal to _hard_end");
-    assert(_retained, "or else _true_end should be equal to _hard_end");
-    assert(_retained_filler.end() <= _top, "INVARIANT");
-    CollectedHeap::fill_with_object(_retained_filler);
-    if (_top < _hard_end) {
-      fill_region_with_block(MemRegion(_top, _hard_end), true);
-    }
-    HeapWord* next_hard_end = MIN2(_true_end, _hard_end + ChunkSizeInWords);
-    _retained_filler = MemRegion(_hard_end, FillerHeaderSize);
-    _bt.alloc_block(_retained_filler.start(), _retained_filler.word_size());
-    _top      = _retained_filler.end();
-    _hard_end = next_hard_end;
-    _end      = _hard_end - AlignmentReserve;
-    res       = ParGCAllocBuffer::allocate(word_sz);
-    if (res != NULL) {
-      _bt.alloc_block(res, word_sz);
-    }
-  }
-  return res;
-}
-
-void
-ParGCAllocBufferWithBOT::undo_allocation(HeapWord* obj, size_t word_sz) {
-  ParGCAllocBuffer::undo_allocation(obj, word_sz);
-  // This may back us up beyond the previous threshold, so reset.
-  _bt.set_region(MemRegion(_top, _hard_end));
-  _bt.initialize_threshold();
-}
-
-void ParGCAllocBufferWithBOT::retire(bool end_of_gc, bool retain) {
-  assert(!retain || end_of_gc, "Can only retain at GC end.");
-  if (_retained) {
-    // We're about to make the retained_filler into a block.
-    _bt.BlockOffsetArray::alloc_block(_retained_filler.start(),
-                                      _retained_filler.end());
-  }
-  // Reset _hard_end to _true_end (and update _end)
-  if (retain && _hard_end != NULL) {
-    assert(_hard_end <= _true_end, "Invariant.");
-    _hard_end = _true_end;
-    _end      = MAX2(_top, _hard_end - AlignmentReserve);
-    assert(_end <= _hard_end, "Invariant.");
-  }
-  _true_end = _hard_end;
-  HeapWord* pre_top = _top;
-
-  ParGCAllocBuffer::retire(end_of_gc, retain);
-  // Now any old _retained_filler is cut back to size, the free part is
-  // filled with a filler object, and top is past the header of that
-  // object.
-
-  if (retain && _top < _end) {
-    assert(end_of_gc && retain, "Or else retain should be false.");
-    // If the lab does not start on a card boundary, we don't want to
-    // allocate onto that card, since that might lead to concurrent
-    // allocation and card scanning, which we don't support.  So we fill
-    // the first card with a garbage object.
-    size_t first_card_index = _bsa->index_for(pre_top);
-    HeapWord* first_card_start = _bsa->address_for_index(first_card_index);
-    if (first_card_start < pre_top) {
-      HeapWord* second_card_start =
-        _bsa->inc_by_region_size(first_card_start);
-
-      // Ensure enough room to fill with the smallest block
-      second_card_start = MAX2(second_card_start, pre_top + AlignmentReserve);
-
-      // If the end is already in the first card, don't go beyond it!
-      // Or if the remainder is too small for a filler object, gobble it up.
-      if (_hard_end < second_card_start ||
-          pointer_delta(_hard_end, second_card_start) < AlignmentReserve) {
-        second_card_start = _hard_end;
-      }
-      if (pre_top < second_card_start) {
-        MemRegion first_card_suffix(pre_top, second_card_start);
-        fill_region_with_block(first_card_suffix, true);
-      }
-      pre_top = second_card_start;
-      _top = pre_top;
-      _end = MAX2(_top, _hard_end - AlignmentReserve);
-    }
-
-    // If the lab does not end on a card boundary, we don't want to
-    // allocate onto that card, since that might lead to concurrent
-    // allocation and card scanning, which we don't support.  So we fill
-    // the last card with a garbage object.
-    size_t last_card_index = _bsa->index_for(_hard_end);
-    HeapWord* last_card_start = _bsa->address_for_index(last_card_index);
-    if (last_card_start < _hard_end) {
-
-      // Ensure enough room to fill with the smallest block
-      last_card_start = MIN2(last_card_start, _hard_end - AlignmentReserve);
-
-      // If the top is already in the last card, don't go back beyond it!
-      // Or if the remainder is too small for a filler object, gobble it up.
-      if (_top > last_card_start ||
-          pointer_delta(last_card_start, _top) < AlignmentReserve) {
-        last_card_start = _top;
-      }
-      if (last_card_start < _hard_end) {
-        MemRegion last_card_prefix(last_card_start, _hard_end);
-        fill_region_with_block(last_card_prefix, false);
-      }
-      _hard_end = last_card_start;
-      _end      = MAX2(_top, _hard_end - AlignmentReserve);
-      _true_end = _hard_end;
-      assert(_end <= _hard_end, "Invariant.");
-    }
-
-    // At this point:
-    //   1) we had a filler object from the original top to hard_end.
-    //   2) We've filled in any partial cards at the front and back.
-    if (pre_top < _hard_end) {
-      // Now we can reset the _bt to do allocation in the given area.
-      MemRegion new_filler(pre_top, _hard_end);
-      fill_region_with_block(new_filler, false);
-      _top = pre_top + ParGCAllocBuffer::FillerHeaderSize;
-      // If there's no space left, don't retain.
-      if (_top >= _end) {
-        _retained = false;
-        invalidate();
-        return;
-      }
-      _retained_filler = MemRegion(pre_top, _top);
-      _bt.set_region(MemRegion(_top, _hard_end));
-      _bt.initialize_threshold();
-      assert(_bt.threshold() > _top, "initialize_threshold failed!");
-
-      // There may be other reasons for queries into the middle of the
-      // filler object.  When such queries are done in parallel with
-      // allocation, bad things can happen, if the query involves object
-      // iteration.  So we ensure that such queries do not involve object
-      // iteration, by putting another filler object on the boundaries of
-      // such queries.  One such is the object spanning a parallel card
-      // chunk boundary.
-
-      // "chunk_boundary" is the address of the first chunk boundary less
-      // than "hard_end".
-      HeapWord* chunk_boundary =
-        (HeapWord*)align_size_down(intptr_t(_hard_end-1), ChunkSizeInBytes);
-      assert(chunk_boundary < _hard_end, "Or else above did not work.");
-      assert(pointer_delta(_true_end, chunk_boundary) >= AlignmentReserve,
-             "Consequence of last card handling above.");
-
-      if (_top <= chunk_boundary) {
-        assert(_true_end == _hard_end, "Invariant.");
-        while (_top <= chunk_boundary) {
-          assert(pointer_delta(_hard_end, chunk_boundary) >= AlignmentReserve,
-                 "Consequence of last card handling above.");
-          _bt.BlockOffsetArray::alloc_block(chunk_boundary, _hard_end);
-          CollectedHeap::fill_with_object(chunk_boundary, _hard_end);
-          _hard_end = chunk_boundary;
-          chunk_boundary -= ChunkSizeInWords;
-        }
-        _end = _hard_end - AlignmentReserve;
-        assert(_top <= _end, "Invariant.");
-        // Now reset the initial filler chunk so it doesn't overlap with
-        // the one(s) inserted above.
-        MemRegion new_filler(pre_top, _hard_end);
-        fill_region_with_block(new_filler, false);
-      }
-    } else {
-      _retained = false;
-      invalidate();
-    }
-  } else {
-    assert(!end_of_gc ||
-           (!_retained && _true_end == _hard_end), "Checking.");
-  }
-  assert(_end <= _hard_end, "Invariant.");
-  assert(_top < _end || _top == _hard_end, "Invariant");
-}
--- a/src/share/vm/gc_implementation/parNew/parGCAllocBuffer.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,249 +0,0 @@
-/*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- *
- */
-
-#ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-#define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
-
-#include "memory/allocation.hpp"
-#include "memory/blockOffsetTable.hpp"
-#include "memory/threadLocalAllocBuffer.hpp"
-#include "utilities/globalDefinitions.hpp"
-
-// Forward decl.
-
-class PLABStats;
-
-// A per-thread allocation buffer used during GC.
-class ParGCAllocBuffer: public CHeapObj<mtGC> {
-protected:
-  char head[32];
-  size_t _word_sz;          // in HeapWord units
-  HeapWord* _bottom;
-  HeapWord* _top;
-  HeapWord* _end;       // last allocatable address + 1
-  HeapWord* _hard_end;  // _end + AlignmentReserve
-  bool      _retained;  // whether we hold a _retained_filler
-  MemRegion _retained_filler;
-  // In support of ergonomic sizing of PLAB's
-  size_t    _allocated;     // in HeapWord units
-  size_t    _wasted;        // in HeapWord units
-  char tail[32];
-  static size_t FillerHeaderSize;
-  static size_t AlignmentReserve;
-
-public:
-  // Initializes the buffer to be empty, but with the given "word_sz".
-  // Must get initialized with "set_buf" for an allocation to succeed.
-  ParGCAllocBuffer(size_t word_sz);
-
-  static const size_t min_size() {
-    return ThreadLocalAllocBuffer::min_size();
-  }
-
-  static const size_t max_size() {
-    return ThreadLocalAllocBuffer::max_size();
-  }
-
-  // If an allocation of the given "word_sz" can be satisfied within the
-  // buffer, do the allocation, returning a pointer to the start of the
-  // allocated block.  If the allocation request cannot be satisfied,
-  // return NULL.
-  HeapWord* allocate(size_t word_sz) {
-    HeapWord* res = _top;
-    if (pointer_delta(_end, _top) >= word_sz) {
-      _top = _top + word_sz;
-      return res;
-    } else {
-      return NULL;
-    }
-  }
-
-  // Undo the last allocation in the buffer, which is required to be of the
-  // "obj" of the given "word_sz".
-  void undo_allocation(HeapWord* obj, size_t word_sz) {
-    assert(pointer_delta(_top, _bottom) >= word_sz, "Bad undo");
-    assert(pointer_delta(_top, obj)     == word_sz, "Bad undo");
-    _top = obj;
-  }
-
-  // The total (word) size of the buffer, including both allocated and
-  // unallocted space.
-  size_t word_sz() { return _word_sz; }
-
-  // Should only be done if we are about to reset with a new buffer of the
-  // given size.
-  void set_word_size(size_t new_word_sz) {
-    assert(new_word_sz > AlignmentReserve, "Too small");
-    _word_sz = new_word_sz;
-  }
-
-  // The number of words of unallocated space remaining in the buffer.
-  size_t words_remaining() {
-    assert(_end >= _top, "Negative buffer");
-    return pointer_delta(_end, _top, HeapWordSize);
-  }
-
-  bool contains(void* addr) {
-    return (void*)_bottom <= addr && addr < (void*)_hard_end;
-  }
-
-  // Sets the space of the buffer to be [buf, space+word_sz()).
-  void set_buf(HeapWord* buf) {
-    _bottom   = buf;
-    _top      = _bottom;
-    _hard_end = _bottom + word_sz();
-    _end      = _hard_end - AlignmentReserve;
-    assert(_end >= _top, "Negative buffer");
-    // In support of ergonomic sizing
-    _allocated += word_sz();
-  }
-
-  // Flush the stats supporting ergonomic sizing of PLAB's
-  void flush_stats(PLABStats* stats);
-  void flush_stats_and_retire(PLABStats* stats, bool retain) {
-    // We flush the stats first in order to get a reading of
-    // unused space in the last buffer.
-    if (ResizePLAB) {
-      flush_stats(stats);
-    }
-    // Retire the last allocation buffer.
-    retire(true, retain);
-  }
-
-  // Force future allocations to fail and queries for contains()
-  // to return false
-  void invalidate() {
-    assert(!_retained, "Shouldn't retain an invalidated buffer.");
-    _end    = _hard_end;
-    _wasted += pointer_delta(_end, _top);  // unused  space
-    _top    = _end;      // force future allocations to fail
-    _bottom = _end;      // force future contains() queries to return false
-  }
-
-  // Fills in the unallocated portion of the buffer with a garbage object.
-  // If "end_of_gc" is TRUE, is after the last use in the GC.  IF "retain"
-  // is true, attempt to re-use the unused portion in the next GC.
-  void retire(bool end_of_gc, bool retain);
-
-  void print() PRODUCT_RETURN;
-};
-
-// PLAB stats book-keeping
-class PLABStats VALUE_OBJ_CLASS_SPEC {
-  size_t _allocated;      // total allocated
-  size_t _wasted;         // of which wasted (internal fragmentation)
-  size_t _unused;         // Unused in last buffer
-  size_t _used;           // derived = allocated - wasted - unused
-  size_t _desired_plab_sz;// output of filter (below), suitably trimmed and quantized
-  AdaptiveWeightedAverage
-         _filter;         // integrator with decay
-
- public:
-  PLABStats(size_t desired_plab_sz_, unsigned wt) :
-    _allocated(0),
-    _wasted(0),
-    _unused(0),
-    _used(0),
-    _desired_plab_sz(desired_plab_sz_),
-    _filter(wt)
-  {
-    size_t min_sz = min_size();
-    size_t max_sz = max_size();
-    size_t aligned_min_sz = align_object_size(min_sz);
-    size_t aligned_max_sz = align_object_size(max_sz);
-    assert(min_sz <= aligned_min_sz && max_sz >= aligned_max_sz &&
-           min_sz <= max_sz,
-           "PLAB clipping computation in adjust_desired_plab_sz()"
-           " may be incorrect");
-  }
-
-  static const size_t min_size() {
-    return ParGCAllocBuffer::min_size();
-  }
-
-  static const size_t max_size() {
-    return ParGCAllocBuffer::max_size();
-  }
-
-  size_t desired_plab_sz() {
-    return _desired_plab_sz;
-  }
-
-  void adjust_desired_plab_sz(); // filter computation, latches output to
-                                 // _desired_plab_sz, clears sensor accumulators
-
-  void add_allocated(size_t v) {
-    Atomic::add_ptr(v, &_allocated);
-  }
-
-  void add_unused(size_t v) {
-    Atomic::add_ptr(v, &_unused);
-  }
-
-  void add_wasted(size_t v) {
-    Atomic::add_ptr(v, &_wasted);
-  }
-};
-
-class ParGCAllocBufferWithBOT: public ParGCAllocBuffer {
-  BlockOffsetArrayContigSpace _bt;
-  BlockOffsetSharedArray*     _bsa;
-  HeapWord*                   _true_end;  // end of the whole ParGCAllocBuffer
-
-  static const size_t ChunkSizeInWords;
-  static const size_t ChunkSizeInBytes;
-  HeapWord* allocate_slow(size_t word_sz);
-
-  void fill_region_with_block(MemRegion mr, bool contig);
-
-public:
-  ParGCAllocBufferWithBOT(size_t word_sz, BlockOffsetSharedArray* bsa);
-
-  HeapWord* allocate(size_t word_sz) {
-    HeapWord* res = ParGCAllocBuffer::allocate(word_sz);
-    if (res != NULL) {
-      _bt.alloc_block(res, word_sz);
-    } else {
-      res = allocate_slow(word_sz);
-    }
-    return res;
-  }
-
-  void undo_allocation(HeapWord* obj, size_t word_sz);
-
-  void set_buf(HeapWord* buf_start) {
-    ParGCAllocBuffer::set_buf(buf_start);
-    _true_end = _hard_end;
-    _bt.set_region(MemRegion(buf_start, word_sz()));
-    _bt.initialize_threshold();
-  }
-
-  void retire(bool end_of_gc, bool retain);
-
-  MemRegion range() {
-    return MemRegion(_top, _true_end);
-  }
-};
-
-#endif // SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARGCALLOCBUFFER_HPP
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -24,11 +24,11 @@
 
 #include "precompiled.hpp"
 #include "gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp"
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
 #include "gc_implementation/parNew/parNewGeneration.hpp"
 #include "gc_implementation/parNew/parOopClosures.inline.hpp"
 #include "gc_implementation/shared/adaptiveSizePolicy.hpp"
 #include "gc_implementation/shared/ageTable.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "gc_implementation/shared/spaceDecorator.hpp"
 #include "memory/defNewGeneration.inline.hpp"
 #include "memory/genCollectedHeap.hpp"
@@ -453,7 +453,8 @@
     // retire the last buffer.
     par_scan_state.to_space_alloc_buffer()->
       flush_stats_and_retire(_gen.plab_stats(),
-                             false /* !retain */);
+                             true /* end_of_gc */,
+                             false /* retain */);
 
     // Every thread has its own age table.  We need to merge
     // them all into one.
--- a/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Thu Aug 23 12:27:33 2012 -0700
+++ b/src/share/vm/gc_implementation/parNew/parNewGeneration.hpp	Fri Aug 24 15:51:19 2012 -0700
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -25,7 +25,7 @@
 #ifndef SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARNEWGENERATION_HPP
 #define SHARE_VM_GC_IMPLEMENTATION_PARNEW_PARNEWGENERATION_HPP
 
-#include "gc_implementation/parNew/parGCAllocBuffer.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
 #include "memory/defNewGeneration.hpp"
 #include "utilities/taskqueue.hpp"
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/share/vm/gc_implementation/shared/parGCAllocBuffer.cpp	Fri Aug 24 15:51:19 2012 -0700
@@ -0,0 +1,342 @@
+/*
+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "gc_implementation/shared/parGCAllocBuffer.hpp"
+#include "memory/sharedHeap.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/oop.inline.hpp"
+
+ParGCAllocBuffer::ParGCAllocBuffer(size_t desired_plab_sz_) :
+  _word_sz(desired_plab_sz_), _bottom(NULL), _top(NULL),
+  _end(NULL), _hard_end(NULL),
+  _retained(false), _retained_filler(),
+  _allocated(0), _wasted(0)
+{
+  assert (min_size() > AlignmentReserve, "Inconsistency!");
+  // arrayOopDesc::header_size depends on command line initialization.
+  FillerHeaderSize = align_object_size(arrayOopDesc::header_size(T_INT));
+  AlignmentReserve = oopDesc::header_size() > MinObjAlignment ? FillerHeaderSize : 0;
+}
+
+size_t ParGCAllocBuffer::FillerHeaderSize;
+
+// If the minimum object size is greater than MinObjAlignment, we can
+// end up with a shard at the end of the buffer that's smaller than
+// the smallest object.  We can't allow that because the buffer must
+// look like it's full of objects when we retire it, so we make
+// sure we have enough space for a filler int array object.
+size_t ParGCAllocBuffer::AlignmentReserve;
+
+void ParGCAllocBuffer::retire(bool end_of_gc, bool retain) {
+  assert(!retain || end_of_gc, "Can only retain at GC end.");
+  if (_retained) {
+    // If the buffer had been retained shorten the previous filler object.
+    assert(_retained_filler.end() <= _top, "INVARIANT");
+    CollectedHeap::fill_with_object(_retained_filler);
+    // Wasted space book-keeping, otherwise (normally) done in invalidate()
+    _wasted += _retained_filler.word_size();
+    _retained = false;
+  }
+  assert(!end_of_gc || !_retained, "At this point, end_of_gc ==> !_retained.");
+  if (_top < _hard_end) {
+    CollectedHeap::fill_with_object(_top, _hard_end);
+    if (!retain) {
+      invalidate();
+    } else {
+      // Is there wasted space we'd like to retain for the next GC?
+      if (pointer_delta(_end, _top) > FillerHeaderSize) {
+        _retained = true;
+        _retained_filler = MemRegion(_top, FillerHeaderSize);
+        _top = _top + FillerHeaderSize;
+      } else {
+        invalidate();
+      }
+    }
+  }
+}
+
+void ParGCAllocBuffer::flush_stats(PLABStats* stats) {
+  assert(ResizePLAB, "Wasted work");
+  stats->add_allocated(_allocated);
+  stats->add_wasted(_wasted);
+  stats->add_unused(pointer_delta(_end, _top));
+}
+
+// Compute desired plab size and latch result for later
+// use. This should be called once at the end of parallel
+// scavenge; it clears the sensor accumulators.
+void PLABStats::adjust_desired_plab_sz() {
+  assert(ResizePLAB, "Not set");
+  if (_allocated == 0) {
+    assert(_unused == 0, "Inconsistency in PLAB stats");
+    _allocated = 1;
+  }
+  double wasted_frac    = (double)_unused/(double)_allocated;
+  size_t target_refills = (size_t)((wasted_frac*TargetSurvivorRatio)/
+                                   TargetPLABWastePct);
+  if (target_refills == 0) {
+    target_refills = 1;
+  }
+  _used = _allocated - _wasted - _unused;
+  size_t plab_sz = _used/(target_refills*ParallelGCThreads);
+  if (PrintPLAB) gclog_or_tty->print(" (plab_sz = %d ", plab_sz);