changeset 9416:552ad0474d5b

Merge
author coleenp
date Wed, 11 Nov 2015 23:51:57 -0500
parents a7ffcce47ffb e1a7a5dbdf59
children e18b241d1f30
files src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/AbstractAddress.java src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/StackSlotValue.java src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/UnsignedMath.java src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/VirtualStackSlot.java src/jdk.vm.ci/share/classes/jdk.vm.ci.compiler/src/jdk/vm/ci/compiler/Compiler.java src/jdk.vm.ci/share/classes/jdk.vm.ci.compiler/src/jdk/vm/ci/compiler/CompilerFactory.java src/jdk.vm.ci/share/classes/jdk.vm.ci.compiler/src/jdk/vm/ci/compiler/StartupEventListener.java src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/SuppressFBWarnings.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/JVMCIJarsOptionDescriptorsProvider.java test/compiler/jvmci/common/services/jdk.vm.ci.compiler.Compiler test/compiler/jvmci/common/services/jdk.vm.ci.compiler.CompilerFactory test/compiler/jvmci/events/JvmciCompleteInitializationTest.config test/compiler/jvmci/events/JvmciCompleteInitializationTest.java
diffstat 464 files changed, 19484 insertions(+), 5342 deletions(-) [+]
line wrap: on
line diff
--- a/.hgignore	Wed Nov 11 18:04:33 2015 -0500
+++ b/.hgignore	Wed Nov 11 23:51:57 2015 -0500
@@ -11,3 +11,12 @@
 ^.hgtip
 .DS_Store
 \.class$
+^\.?mx.jvmci/
+^src/jdk.vm.ci/share/classes/\w[\w\.]*/.*\.xml
+^src/jdk.vm.ci/share/classes/\w[\w\.]*/.*\.iml
+^src/jdk.vm.ci/share/classes/\w[\w\.]*/nbproject
+^src/jdk.vm.ci/share/classes/\w[\w\.]*/\..*
+^test/compiler/jvmci/\w[\w\.]*/.*\.xml
+^test/compiler/jvmci/\w[\w\.]*/.*\.iml
+^test/compiler/jvmci/\w[\w\.]*/nbproject
+^test/compiler/jvmci/\w[\w\.]*/\..*
--- a/.hgtags	Wed Nov 11 18:04:33 2015 -0500
+++ b/.hgtags	Wed Nov 11 23:51:57 2015 -0500
@@ -491,3 +491,6 @@
 1ae4191359d811a51512f17dca80ffe79837a5ff jdk9-b86
 d7ffd16382fe7071181b967932b47cff6d1312e1 jdk9-b87
 bc48b669bc6610fac97e16593050c0f559cf6945 jdk9-b88
+20dff0211deda8d5877fda0e80b6d165ab93c6c2 jdk9-b89
+7fe46dc64bb3a8df554b24cde0153ffb24f39c5e jdk9-b90
+3fd5c2ca4c20c183628b6dbeb8df821a961419e3 jdk9-b91
--- a/agent/src/share/classes/sun/jvm/hotspot/oops/OopUtilities.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/oops/OopUtilities.java	Wed Nov 11 23:51:57 2015 -0500
@@ -40,8 +40,7 @@
   // FIXME: access should be synchronized and cleared when VM is
   // resumed
   // String fields
-  private static IntField offsetField;
-  private static IntField countField;
+  private static ByteField coderField;
   private static OopField valueField;
   // ThreadGroup fields
   private static OopField threadGroupParentField;
@@ -96,20 +95,30 @@
     if (charArray == null) {
       return null;
     }
-    return charArrayToString(charArray, 0, (int) charArray.getLength());
+    int length = (int)charArray.getLength();
+    StringBuffer buf = new StringBuffer(length);
+    for (int i = 0; i < length; i++) {
+      buf.append(charArray.getCharAt(i));
+    }
+    return buf.toString();
   }
 
-  public static String charArrayToString(TypeArray charArray, int offset, int length) {
-    if (charArray == null) {
+  public static String byteArrayToString(TypeArray byteArray, byte coder) {
+    if (byteArray == null) {
       return null;
     }
-    final int limit = offset + length;
-    if (Assert.ASSERTS_ENABLED) {
-      Assert.that(offset >= 0 && limit <= charArray.getLength(), "out of bounds");
-    }
+    int length = (int)byteArray.getLength() >> coder;
     StringBuffer buf = new StringBuffer(length);
-    for (int i = offset; i < limit; i++) {
-      buf.append(charArray.getCharAt(i));
+    if (coder == 0) {
+      // Latin1 encoded
+      for (int i = 0; i < length; i++) {
+        buf.append((char)(byteArray.getByteAt(i) & 0xff));
+      }
+    } else {
+      // UTF16 encoded
+      for (int i = 0; i < length; i++) {
+        buf.append(byteArray.getCharAt(i));
+      }
     }
     return buf.toString();
   }
@@ -141,21 +150,14 @@
   }
 
   public static String stringOopToString(Oop stringOop) {
-    if (offsetField == null) {
-      InstanceKlass k = (InstanceKlass) stringOop.getKlass();
-      offsetField = (IntField) k.findField("offset", "I");   // optional
-      countField  = (IntField) k.findField("count",  "I");   // optional
-      valueField  = (OopField) k.findField("value",  "[C");
-      if (Assert.ASSERTS_ENABLED) {
-         Assert.that(valueField != null, "Field \'value\' of java.lang.String not found");
-      }
+    InstanceKlass k = (InstanceKlass) stringOop.getKlass();
+    coderField  = (ByteField) k.findField("coder", "B");
+    valueField  = (OopField) k.findField("value",  "[B");
+    if (Assert.ASSERTS_ENABLED) {
+       Assert.that(coderField != null, "Field \'coder\' of java.lang.String not found");
+       Assert.that(valueField != null, "Field \'value\' of java.lang.String not found");
     }
-    if (offsetField != null && countField != null) {
-      return charArrayToString((TypeArray) valueField.getValue(stringOop),
-                               offsetField.getValue(stringOop),
-                               countField.getValue(stringOop));
-    }
-    return  charArrayToString((TypeArray) valueField.getValue(stringOop));
+    return byteArrayToString((TypeArray) valueField.getValue(stringOop), coderField.getValue(stringOop));
   }
 
   public static String stringOopToEscapedString(Oop stringOop) {
--- a/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/tools/HeapSummary.java	Wed Nov 11 23:51:57 2015 -0500
@@ -268,8 +268,8 @@
             VM vm = VM.getVM();
             SystemDictionary sysDict = vm.getSystemDictionary();
             InstanceKlass strKlass = sysDict.getStringKlass();
-            // String has a field named 'value' of type 'char[]'.
-            stringValueField = (OopField) strKlass.findField("value", "[C");
+            // String has a field named 'value' of type 'byte[]'.
+            stringValueField = (OopField) strKlass.findField("value", "[B");
          }
 
          private long stringSize(Instance instance) {
--- a/agent/src/share/classes/sun/jvm/hotspot/utilities/Hashtable.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/agent/src/share/classes/sun/jvm/hotspot/utilities/Hashtable.java	Wed Nov 11 23:51:57 2015 -0500
@@ -61,9 +61,8 @@
     long h = 0;
     int s = 0;
     int len = buf.length;
-    // Emulate the unsigned int in java_lang_String::hash_code
     while (len-- > 0) {
-      h = 31*h + (0xFFFFFFFFL & buf[s]);
+      h = 31*h + (0xFFL & buf[s]);
       s++;
     }
     return h & 0xFFFFFFFFL;
--- a/make/bsd/makefiles/gcc.make	Wed Nov 11 18:04:33 2015 -0500
+++ b/make/bsd/makefiles/gcc.make	Wed Nov 11 23:51:57 2015 -0500
@@ -97,6 +97,7 @@
   # prints the numbers (e.g. "2.95", "3.2.1")
   CC_VER_MAJOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
   CC_VER_MINOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f2)
+  CC_VER_MICRO := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f3)
 endif
 
 ifeq ($(USE_CLANG), true)
@@ -333,6 +334,10 @@
     $(error "Update compiler workarounds for Clang $(CC_VER_MAJOR).$(CC_VER_MINOR)")
   endif
 else
+  # Do not allow GCC 4.1.1
+  ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 1 \& $(CC_VER_MICRO) = 1), 1)
+    $(error "GCC $(CC_VER_MAJOR).$(CC_VER_MINOR).$(CC_VER_MICRO) not supported because of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=27724")
+  endif
   # 6835796. Problem in GCC 4.3.0 with mulnode.o optimized compilation.
   ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 3), 1)
     OPT_CFLAGS/mulnode.o += $(OPT_CFLAGS/NOOPT)
--- a/make/gensrc/Gensrc-jdk.vm.ci.gmk	Wed Nov 11 18:04:33 2015 -0500
+++ b/make/gensrc/Gensrc-jdk.vm.ci.gmk	Wed Nov 11 23:51:57 2015 -0500
@@ -56,10 +56,10 @@
 ################################################################################
 
 PROC_SRC_SUBDIRS := \
-    jdk.vm.ci.compiler \
     jdk.vm.ci.hotspot \
     jdk.vm.ci.hotspot.amd64 \
     jdk.vm.ci.hotspot.sparc \
+    jdk.vm.ci.runtime \
     #
 
 PROC_SRC_DIRS := $(patsubst %, $(SRC_DIR)/%/src, $(PROC_SRC_SUBDIRS))
@@ -94,11 +94,7 @@
 $(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors: \
     $(GENSRC_DIR)/_gensrc_proc_done
 	$(MKDIR) -p $(@D)
-	($(CD) $(GENSRC_DIR)/META-INF/jvmci.options && \
-	    $(RM) -f $@; \
-	    for i in $$(ls); do \
-	      echo $${i}_OptionDescriptors >> $@; \
-	    done)
+	$(FIND) $(GENSRC_DIR) -name '*_OptionDescriptors.java' | $(SED) 's:.*/jdk\.vm\.ci/\(.*\)\.java:\1:' | $(TR) '/' '.' > $@
 
 TARGETS += $(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors
 
@@ -109,7 +105,11 @@
 	($(CD) $(GENSRC_DIR)/META-INF/jvmci.providers && \
 	    for i in $$($(LS)); do \
 	      c=$$($(CAT) $$i | $(TR) -d '\n\r'); \
-	      $(ECHO) $$i >> $(GENSRC_DIR)/META-INF/services/$$c; \
+	      $(ECHO) $$i >> $(GENSRC_DIR)/META-INF/services/$$c.tmp; \
+	    done)
+	($(CD) $(GENSRC_DIR)/META-INF/services && \
+	    for i in $$($(LS) *.tmp); do \
+	      $(MV) $$i $${i%.tmp}; \
 	    done)
 	$(TOUCH) $@
 
--- a/make/linux/makefiles/gcc.make	Wed Nov 11 18:04:33 2015 -0500
+++ b/make/linux/makefiles/gcc.make	Wed Nov 11 23:51:57 2015 -0500
@@ -60,6 +60,12 @@
   # prints the numbers (e.g. "2.95", "3.2.1")
   CC_VER_MAJOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
   CC_VER_MINOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f2)
+  CC_VER_MICRO := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f3)
+  # Workaround Ubuntu bug where -dumpversion doesn't print a micro version
+  # https://bugs.launchpad.net/ubuntu/+source/gcc-4.8/+bug/1360404
+  ifeq ($(CC_VER_MICRO),)
+    CC_VER_MICRO := "0"
+  endif
 endif
 
 ifeq ($(USE_CLANG), true)
@@ -266,6 +272,10 @@
     OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT)
   endif
 else
+  # Do not allow GCC 4.1.1
+  ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 1 \& $(CC_VER_MICRO) = 1), 1)
+    $(error "GCC $(CC_VER_MAJOR).$(CC_VER_MINOR).$(CC_VER_MICRO) not supported because of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=27724")
+  endif
   # 6835796. Problem in GCC 4.3.0 with mulnode.o optimized compilation.
   ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 3), 1)
     OPT_CFLAGS/mulnode.o += $(OPT_CFLAGS/NOOPT)
--- a/make/solaris/makefiles/gcc.make	Wed Nov 11 18:04:33 2015 -0500
+++ b/make/solaris/makefiles/gcc.make	Wed Nov 11 23:51:57 2015 -0500
@@ -39,6 +39,7 @@
 # prints the numbers (e.g. "2.95", "3.2.1")
 CC_VER_MAJOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f1)
 CC_VER_MINOR := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f2)
+CC_VER_MICRO := $(shell $(CC) -dumpversion | sed 's/egcs-//' | cut -d'.' -f3)
 
 # Check for the versions of C++ and C compilers ($CXX and $CC) used.
 
@@ -160,6 +161,10 @@
     OPT_CFLAGS/loopTransform.o += $(OPT_CFLAGS/NOOPT)
   endif
 else
+  # Do not allow GCC 4.1.1
+  ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 1 \& $(CC_VER_MICRO) = 1), 1)
+    $(error "GCC $(CC_VER_MAJOR).$(CC_VER_MINOR).$(CC_VER_MICRO) not supported because of https://gcc.gnu.org/bugzilla/show_bug.cgi?id=27724")
+  endif
   # 6835796. Problem in GCC 4.3.0 with mulnode.o optimized compilation.
   ifeq ($(shell expr $(CC_VER_MAJOR) = 4 \& $(CC_VER_MINOR) = 3), 1)
     OPT_CFLAGS/mulnode.o += $(OPT_CFLAGS/NOOPT)
--- a/src/cpu/aarch64/vm/aarch64.ad	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/aarch64/vm/aarch64.ad	Wed Nov 11 23:51:57 2015 -0500
@@ -14150,6 +14150,7 @@
 instruct string_compare(iRegP_R1 str1, iRegI_R2 cnt1, iRegP_R3 str2, iRegI_R4 cnt2,
                         iRegI_R0 result, iRegP_R10 tmp1, rFlagsReg cr)
 %{
+  predicate(!CompactStrings);
   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(KILL tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 
@@ -14165,6 +14166,7 @@
 instruct string_indexof(iRegP_R1 str1, iRegI_R4 cnt1, iRegP_R3 str2, iRegI_R2 cnt2,
        iRegI_R0 result, iRegI tmp1, iRegI tmp2, iRegI tmp3, iRegI tmp4, rFlagsReg cr)
 %{
+  predicate(!CompactStrings);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
@@ -14184,6 +14186,7 @@
                  immI_le_4 int_cnt2, iRegI_R0 result, iRegI tmp1, iRegI tmp2,
                  iRegI tmp3, iRegI tmp4, rFlagsReg cr)
 %{
+  predicate(!CompactStrings);
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
@@ -14203,6 +14206,7 @@
 instruct string_equals(iRegP_R1 str1, iRegP_R3 str2, iRegI_R4 cnt,
                         iRegI_R0 result, iRegP_R10 tmp, rFlagsReg cr)
 %{
+  predicate(!CompactStrings);
   match(Set result (StrEquals (Binary str1 str2) cnt));
   effect(KILL tmp, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
 
@@ -14218,6 +14222,7 @@
 instruct array_equals(iRegP_R1 ary1, iRegP_R2 ary2, iRegI_R0 result,
                       iRegP_R10 tmp, rFlagsReg cr)
 %{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
   match(Set result (AryEq ary1 ary2));
   effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, KILL cr);
 
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -483,15 +483,6 @@
   return offset;
 }
 
-
-// This is the fast version of java.lang.String.compare; it has not
-// OSR-entry and therefore, we generate a slow version for OSR's
-void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info)  {
-  __ mov(r2, (address)__FUNCTION__);
-  __ call_Unimplemented();
-}
-
-
 void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
   _masm->code_section()->relocate(adr, relocInfo::poll_type);
   int pc_offset = code_offset();
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -59,9 +59,9 @@
 #define DEFAULT_STACK_RED_PAGES (1)
 #define DEFAULT_STACK_SHADOW_PAGES (4 DEBUG_ONLY(+5))
 
-#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
-#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES
-#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
+#define MIN_STACK_YELLOW_PAGES 1
+#define MIN_STACK_RED_PAGES    1
+#define MIN_STACK_SHADOW_PAGES 1
 
 define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
 define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
@@ -79,6 +79,9 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
+// No performance work done here yet.
+define_pd_global(bool, CompactStrings, false);
+
 // avoid biased locking while we are bootstrapping the aarch64 build
 define_pd_global(bool, UseBiasedLocking, false);
 
--- a/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/aarch64/vm/jvmciCodeInstaller_aarch64.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -38,11 +38,11 @@
   Unimplemented();
 }
 
-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
   Unimplemented();
 }
 
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -1777,7 +1777,7 @@
   const Register obj_reg  = r19;  // Will contain the oop
   const Register lock_reg = r13;  // Address of compiler lock object (BasicLock)
   const Register old_hdr  = r13;  // value of old header at unlock time
-  const Register tmp = c_rarg3;
+  const Register tmp = lr;
 
   Label slow_path_lock;
   Label lock_done;
--- a/src/cpu/ppc/vm/globals_ppc.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/ppc/vm/globals_ppc.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -72,6 +72,9 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
+// No performance work done here yet.
+define_pd_global(bool, CompactStrings, false);
+
 // Platform dependent flag handling: flags only defined on this platform.
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint)  \
                                                                             \
--- a/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/ppc/vm/jvmciCodeInstaller_ppc.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -38,11 +38,11 @@
   Unimplemented();
 }
 
-void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
   Unimplemented();
 }
 
-void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
+void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
   Unimplemented();
 }
 
--- a/src/cpu/ppc/vm/ppc.ad	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/ppc/vm/ppc.ad	Wed Nov 11 23:51:57 2015 -0500
@@ -2054,11 +2054,11 @@
     return (UsePopCountInstruction && VM_Version::has_popcntw());
 
   case Op_StrComp:
-    return SpecialStringCompareTo;
+    return SpecialStringCompareTo && !CompactStrings;
   case Op_StrEquals:
-    return SpecialStringEquals;
+    return SpecialStringEquals && !CompactStrings;
   case Op_StrIndexOf:
-    return SpecialStringIndexOf;
+    return SpecialStringIndexOf && !CompactStrings;
   }
 
   return true;  // Per default match rules are supported.
@@ -11076,7 +11076,7 @@
                                   immP needleImm, immL offsetImm, immI_1 needlecntImm,
                                   iRegIdst tmp1, iRegIdst tmp2,
                                   flagsRegCR0 cr0, flagsRegCR1 cr1) %{
-  predicate(SpecialStringIndexOf);  // type check implicit by parameter type, See Matcher::match_rule_supported
+  predicate(SpecialStringIndexOf && !CompactStrings);  // type check implicit by parameter type, See Matcher::match_rule_supported
   match(Set result (StrIndexOf (Binary haystack haycnt) (Binary (AddP needleImm offsetImm) needlecntImm)));
 
   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, KILL cr0, KILL cr1);
@@ -11119,7 +11119,7 @@
   effect(USE_KILL needle, /* TDEF needle, */ TEMP_DEF result,
          TEMP tmp1, TEMP tmp2);
   // Required for EA: check if it is still a type_array.
-  predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
+  predicate(SpecialStringIndexOf && !CompactStrings && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
   ins_cost(180);
 
@@ -11166,7 +11166,7 @@
   effect(USE_KILL haycnt, /* better: TDEF haycnt, */ TEMP_DEF result,
          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, KILL cr0, KILL cr1, KILL cr6);
   // Required for EA: check if it is still a type_array.
-  predicate(SpecialStringIndexOf && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
+  predicate(SpecialStringIndexOf && !CompactStrings && n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop() &&
             n->in(3)->in(1)->bottom_type()->is_aryptr()->const_oop()->is_type_array());
   ins_cost(250);
 
@@ -11199,7 +11199,7 @@
   effect(USE_KILL haycnt, USE_KILL needlecnt, /*better: TDEF haycnt, TDEF needlecnt,*/
          TEMP_DEF result,
          TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr0, KILL cr1, KILL cr6);
-  predicate(SpecialStringIndexOf);  // See Matcher::match_rule_supported.
+  predicate(SpecialStringIndexOf && !CompactStrings);  // See Matcher::match_rule_supported.
   ins_cost(300);
 
   ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
@@ -11223,7 +11223,7 @@
   match(Set result (StrEquals (Binary str1 str2) cntImm));
   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2,
          KILL cr0, KILL cr6, KILL ctr);
-  predicate(SpecialStringEquals);  // See Matcher::match_rule_supported.
+  predicate(SpecialStringEquals && !CompactStrings);  // See Matcher::match_rule_supported.
   ins_cost(250);
 
   ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
@@ -11246,7 +11246,7 @@
   match(Set result (StrEquals (Binary str1 str2) cnt));
   effect(TEMP_DEF result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5,
          KILL cr0, KILL cr1, KILL cr6, KILL ctr);
-  predicate(SpecialStringEquals);  // See Matcher::match_rule_supported.
+  predicate(SpecialStringEquals && !CompactStrings);  // See Matcher::match_rule_supported.
   ins_cost(300);
 
   ins_alignment(8); // 'compute_padding()' gets called, up to this number-1 nops will get inserted.
@@ -11266,6 +11266,7 @@
 // Use dst register classes if register gets killed, as it is the case for TEMP operands!
 instruct string_compare(rarg1RegP str1, rarg2RegP str2, rarg3RegI cnt1, rarg4RegI cnt2, iRegIdst result,
                         iRegPdst tmp, flagsRegCR0 cr0, regCTR ctr) %{
+  predicate(!CompactStrings);
   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL cnt1, USE_KILL cnt2, USE_KILL str1, USE_KILL str2, TEMP_DEF result, TEMP tmp, KILL cr0, KILL ctr);
   ins_cost(300);
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -124,6 +124,8 @@
     impdep1_op3  = 0x36,
     aes3_op3     = 0x36,
     sha_op3      = 0x36,
+    bmask_op3    = 0x36,
+    bshuffle_op3   = 0x36,
     alignaddr_op3  = 0x36,
     faligndata_op3 = 0x36,
     flog3_op3    = 0x36,
@@ -194,6 +196,7 @@
     fnegd_opf          = 0x06,
 
     alignaddr_opf      = 0x18,
+    bmask_opf          = 0x19,
 
     fadds_opf          = 0x41,
     faddd_opf          = 0x42,
@@ -204,6 +207,7 @@
 
     fmuls_opf          = 0x49,
     fmuld_opf          = 0x4a,
+    bshuffle_opf       = 0x4c,
     fdivs_opf          = 0x4d,
     fdivd_opf          = 0x4e,
 
@@ -1226,6 +1230,9 @@
 
   void edge8n( Register s1, Register s2, Register d ) { vis2_only(); emit_int32( op(arith_op) | rd(d) | op3(edge_op3) | rs1(s1) | opf(edge8n_opf) | rs2(s2)); }
 
+  void bmask( Register s1, Register s2, Register d ) { vis2_only(); emit_int32( op(arith_op) | rd(d) | op3(bmask_op3) | rs1(s1) | opf(bmask_opf) | rs2(s2)); }
+  void bshuffle( FloatRegister s1, FloatRegister s2, FloatRegister d ) { vis2_only(); emit_int32( op(arith_op) | fd(d, FloatRegisterImpl::D) | op3(bshuffle_op3) | fs1(s1, FloatRegisterImpl::D) | opf(bshuffle_opf) | fs2(s2, FloatRegisterImpl::D)); }
+
   // VIS3 instructions
 
   void movstosw( FloatRegister s, Register d ) { vis3_only();  emit_int32( op(arith_op) | rd(d) | op3(mftoi_op3) | opf(mstosw_opf) | fs2(s, FloatRegisterImpl::S)); }
--- a/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/c1_LIRAssembler_sparc.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -232,118 +232,6 @@
 }
 
 
-// Optimized Library calls
-// This is the fast version of java.lang.String.compare; it has not
-// OSR-entry and therefore, we generate a slow version for OSR's
-void LIR_Assembler::emit_string_compare(LIR_Opr left, LIR_Opr right, LIR_Opr dst, CodeEmitInfo* info) {
-  Register str0 = left->as_register();
-  Register str1 = right->as_register();
-
-  Label Ldone;
-
-  Register result = dst->as_register();
-  {
-    // Get a pointer to the first character of string0 in tmp0
-    //   and get string0.length() in str0
-    // Get a pointer to the first character of string1 in tmp1
-    //   and get string1.length() in str1
-    // Also, get string0.length()-string1.length() in
-    //   o7 and get the condition code set
-    // Note: some instructions have been hoisted for better instruction scheduling
-
-    Register tmp0 = L0;
-    Register tmp1 = L1;
-    Register tmp2 = L2;
-
-    int  value_offset = java_lang_String:: value_offset_in_bytes(); // char array
-    if (java_lang_String::has_offset_field()) {
-      int offset_offset = java_lang_String::offset_offset_in_bytes(); // first character position
-      int  count_offset = java_lang_String:: count_offset_in_bytes();
-      __ load_heap_oop(str0, value_offset, tmp0);
-      __ ld(str0, offset_offset, tmp2);
-      __ add(tmp0, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp0);
-      __ ld(str0, count_offset, str0);
-      __ sll(tmp2, exact_log2(sizeof(jchar)), tmp2);
-    } else {
-      __ load_heap_oop(str0, value_offset, tmp1);
-      __ add(tmp1, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp0);
-      __ ld(tmp1, arrayOopDesc::length_offset_in_bytes(), str0);
-    }
-
-    // str1 may be null
-    add_debug_info_for_null_check_here(info);
-
-    if (java_lang_String::has_offset_field()) {
-      int offset_offset = java_lang_String::offset_offset_in_bytes(); // first character position
-      int  count_offset = java_lang_String:: count_offset_in_bytes();
-      __ load_heap_oop(str1, value_offset, tmp1);
-      __ add(tmp0, tmp2, tmp0);
-
-      __ ld(str1, offset_offset, tmp2);
-      __ add(tmp1, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1);
-      __ ld(str1, count_offset, str1);
-      __ sll(tmp2, exact_log2(sizeof(jchar)), tmp2);
-      __ add(tmp1, tmp2, tmp1);
-    } else {
-      __ load_heap_oop(str1, value_offset, tmp2);
-      __ add(tmp2, arrayOopDesc::base_offset_in_bytes(T_CHAR), tmp1);
-      __ ld(tmp2, arrayOopDesc::length_offset_in_bytes(), str1);
-    }
-    __ subcc(str0, str1, O7);
-  }
-
-  {
-    // Compute the minimum of the string lengths, scale it and store it in limit
-    Register count0 = I0;
-    Register count1 = I1;
-    Register limit  = L3;
-
-    Label Lskip;
-    __ sll(count0, exact_log2(sizeof(jchar)), limit);             // string0 is shorter
-    __ br(Assembler::greater, true, Assembler::pt, Lskip);
-    __ delayed()->sll(count1, exact_log2(sizeof(jchar)), limit);  // string1 is shorter
-    __ bind(Lskip);
-
-    // If either string is empty (or both of them) the result is the difference in lengths
-    __ cmp(limit, 0);
-    __ br(Assembler::equal, true, Assembler::pn, Ldone);
-    __ delayed()->mov(O7, result);  // result is difference in lengths
-  }
-
-  {
-    // Neither string is empty
-    Label Lloop;
-
-    Register base0 = L0;
-    Register base1 = L1;
-    Register chr0  = I0;
-    Register chr1  = I1;
-    Register limit = L3;
-
-    // Shift base0 and base1 to the end of the arrays, negate limit
-    __ add(base0, limit, base0);
-    __ add(base1, limit, base1);
-    __ neg(limit);  // limit = -min{string0.length(), string1.length()}
-
-    __ lduh(base0, limit, chr0);
-    __ bind(Lloop);
-    __ lduh(base1, limit, chr1);
-    __ subcc(chr0, chr1, chr0);
-    __ br(Assembler::notZero, false, Assembler::pn, Ldone);
-    assert(chr0 == result, "result must be pre-placed");
-    __ delayed()->inccc(limit, sizeof(jchar));
-    __ br(Assembler::notZero, true, Assembler::pt, Lloop);
-    __ delayed()->lduh(base0, limit, chr0);
-  }
-
-  // If strings are equal up to min length, return the length difference.
-  __ mov(O7, result);
-
-  // Otherwise, return the difference between the first mismatched chars.
-  __ bind(Ldone);
-}
-
-
 // --------------------------------------------------------------------------------------------
 
 void LIR_Assembler::monitorexit(LIR_Opr obj_opr, LIR_Opr lock_opr, Register hdr, int monitor_no) {
--- a/src/cpu/sparc/vm/globals_sparc.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/globals_sparc.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -86,6 +86,8 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
+define_pd_global(bool, CompactStrings, true);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
                                                                             \
   product(intx, UseVIS, 99,                                                 \
--- a/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/jvmciCodeInstaller_sparc.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -66,6 +66,25 @@
   }
 }
 
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+  address pc = _instructions->start() + pc_offset;
+  if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
+#ifdef _LP64
+    NativeMovConstReg32* move = nativeMovConstReg32_at(pc);
+    narrowKlass narrowOop = record_narrow_metadata_reference(constant);
+    move->set_data((intptr_t)narrowOop);
+    TRACE_jvmci_3("relocating (narrow metaspace constant) at %p/%p", pc, narrowOop);
+#else
+    fatal("compressed Klass* on 32bit");
+#endif
+  } else {
+    NativeMovConstReg* move = nativeMovConstReg_at(pc);
+    Metadata* reference = record_metadata_reference(constant);
+    move->set_data((intptr_t)reference);
+    TRACE_jvmci_3("relocating (metaspace constant) at %p/%p", pc, reference);
+  }
+}
+
 void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
   address pc = _instructions->start() + pc_offset;
   NativeInstruction* inst = nativeInstruction_at(pc);
@@ -87,10 +106,6 @@
   }
 }
 
-void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
-  fatal("CodeInstaller::pd_relocate_CodeBlob - sparc unimp");
-}
-
 void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
   address pc = (address) inst;
   if (inst->is_call()) {
@@ -168,16 +183,25 @@
 
 // convert JVMCI register indices (as used in oop maps) to HotSpot registers
 VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg) {
-  if (jvmci_reg < RegisterImpl::number_of_registers) {
+  // JVMCI Registers are numbered as follows:
+  //   0..31: Thirty-two General Purpose registers (CPU Registers)
+  //   32..63: Thirty-two single precision float registers
+  //   64..95: Thirty-two double precision float registers
+  //   96..111: Sixteen quad precision float registers
+  if (jvmci_reg < 32) {
     return as_Register(jvmci_reg)->as_VMReg();
   } else {
-    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
-    floatRegisterNumber += MAX2(0, floatRegisterNumber-32); // Beginning with f32, only every second register is going to be addressed
-    if (floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
-      return as_FloatRegister(floatRegisterNumber)->as_VMReg();
+    jint floatRegisterNumber;
+    if(jvmci_reg < 64) { // Single precision
+      floatRegisterNumber = jvmci_reg - 32;
+    } else if(jvmci_reg < 96) {
+      floatRegisterNumber = 2 * (jvmci_reg - 64);
+    } else if(jvmci_reg < 112) {
+      floatRegisterNumber = 4 * (jvmci_reg - 96);
+    } else {
+      fatal("Unknown jvmci register");
     }
-    ShouldNotReachHere();
-    return NULL;
+    return as_FloatRegister(floatRegisterNumber)->as_VMReg();
   }
 }
 
--- a/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -44,6 +44,9 @@
 #include "gc/g1/g1SATBCardTableModRefBS.hpp"
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
+#ifdef COMPILER2
+#include "opto/intrinsicnode.hpp"
+#endif
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@@ -4253,27 +4256,385 @@
   }
 }
 
-// Compare char[] arrays aligned to 4 bytes.
-void MacroAssembler::char_arrays_equals(Register ary1, Register ary2,
-                                        Register limit, Register result,
-                                        Register chr1, Register chr2, Label& Ldone) {
-  Label Lvector, Lloop;
-  assert(chr1 == result, "should be the same");
-
-  // Note: limit contains number of bytes (2*char_elements) != 0.
-  andcc(limit, 0x2, chr1); // trailing character ?
+#ifdef COMPILER2
+
+// Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
+void MacroAssembler::string_compress_16(Register src, Register dst, Register cnt, Register result,
+                                        Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                                        FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone) {
+  Label Lloop, Lslow;
+  assert(UseVIS >= 3, "VIS3 is required");
+  assert_different_registers(src, dst, cnt, tmp1, tmp2, tmp3, tmp4, result);
+  assert_different_registers(ftmp1, ftmp2, ftmp3);
+
+  // Check if cnt >= 8 (= 16 bytes)
+  cmp(cnt, 8);
+  br(Assembler::less, false, Assembler::pn, Lslow);
+  delayed()->mov(cnt, result); // copy count
+
+  // Check for 8-byte alignment of src and dst
+  or3(src, dst, tmp1);
+  andcc(tmp1, 7, G0);
+  br(Assembler::notZero, false, Assembler::pn, Lslow);
+  delayed()->nop();
+
+  // Set mask for bshuffle instruction
+  Register mask = tmp4;
+  set(0x13579bdf, mask);
+  bmask(mask, G0, G0);
+
+  // Set mask to 0xff00 ff00 ff00 ff00 to check for non-latin1 characters
+  Assembler::sethi(0xff00fc00, mask); // mask = 0x0000 0000 ff00 fc00
+  add(mask, 0x300, mask);             // mask = 0x0000 0000 ff00 ff00
+  sllx(mask, 32, tmp1);               // tmp1 = 0xff00 ff00 0000 0000
+  or3(mask, tmp1, mask);              // mask = 0xff00 ff00 ff00 ff00
+
+  // Load first 8 bytes
+  ldx(src, 0, tmp1);
+
+  bind(Lloop);
+  // Load next 8 bytes
+  ldx(src, 8, tmp2);
+
+  // Check for non-latin1 character by testing if the most significant byte of a char is set.
+  // Although we have to move the data between integer and floating point registers, this is
+  // still faster than the corresponding VIS instructions (ford/fand/fcmpd).
+  or3(tmp1, tmp2, tmp3);
+  btst(tmp3, mask);
+  // annul zeroing if branch is not taken to preserve original count
+  brx(Assembler::notZero, true, Assembler::pn, Ldone);
+  delayed()->mov(G0, result); // 0 - failed
+
+  // Move bytes into float register
+  movxtod(tmp1, ftmp1);
+  movxtod(tmp2, ftmp2);
+
+  // Compress by copying one byte per char from ftmp1 and ftmp2 to ftmp3
+  bshuffle(ftmp1, ftmp2, ftmp3);
+  stf(FloatRegisterImpl::D, ftmp3, dst, 0);
+
+  // Increment addresses and decrement count
+  inc(src, 16);
+  inc(dst, 8);
+  dec(cnt, 8);
+
+  cmp(cnt, 8);
+  // annul LDX if branch is not taken to prevent access past end of string
+  br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
+  delayed()->ldx(src, 0, tmp1);
+
+  // Fallback to slow version
+  bind(Lslow);
+}
+
+// Compress char[] to byte[]. Return 0 on failure.
+void MacroAssembler::string_compress(Register src, Register dst, Register cnt, Register result, Register tmp, Label& Ldone) {
+  Label Lloop;
+  assert_different_registers(src, dst, cnt, tmp, result);
+
+  lduh(src, 0, tmp);
+
+  bind(Lloop);
+  inc(src, sizeof(jchar));
+  cmp(tmp, 0xff);
+  // annul zeroing if branch is not taken to preserve original count
+  br(Assembler::greater, true, Assembler::pn, Ldone); // don't check xcc
+  delayed()->mov(G0, result); // 0 - failed
+  deccc(cnt);
+  stb(tmp, dst, 0);
+  inc(dst);
+  // annul LDUH if branch is not taken to prevent access past end of string
+  br(Assembler::notZero, true, Assembler::pt, Lloop);
+  delayed()->lduh(src, 0, tmp); // hoisted
+}
+
+// Inflate byte[] to char[] by inflating 16 bytes at once.
+void MacroAssembler::string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
+                                       FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone) {
+  Label Lloop, Lslow;
+  assert(UseVIS >= 3, "VIS3 is required");
+  assert_different_registers(src, dst, cnt, tmp);
+  assert_different_registers(ftmp1, ftmp2, ftmp3, ftmp4);
+
+  // Check if cnt >= 8 (= 16 bytes)
+  cmp(cnt, 8);
+  br(Assembler::less, false, Assembler::pn, Lslow);
+  delayed()->nop();
+
+  // Check for 8-byte alignment of src and dst
+  or3(src, dst, tmp);
+  andcc(tmp, 7, G0);
+  br(Assembler::notZero, false, Assembler::pn, Lslow);
+  // Initialize float register to zero
+  FloatRegister zerof = ftmp4;
+  delayed()->fzero(FloatRegisterImpl::D, zerof);
+
+  // Load first 8 bytes
+  ldf(FloatRegisterImpl::D, src, 0, ftmp1);
+
+  bind(Lloop);
+  inc(src, 8);
+  dec(cnt, 8);
+
+  // Inflate the string by interleaving each byte from the source array
+  // with a zero byte and storing the result in the destination array.
+  fpmerge(zerof, ftmp1->successor(), ftmp2);
+  stf(FloatRegisterImpl::D, ftmp2, dst, 8);
+  fpmerge(zerof, ftmp1, ftmp3);
+  stf(FloatRegisterImpl::D, ftmp3, dst, 0);
+
+  inc(dst, 16);
+
+  cmp(cnt, 8);
+  // annul LDX if branch is not taken to prevent access past end of string
+  br(Assembler::greaterEqual, true, Assembler::pt, Lloop);
+  delayed()->ldf(FloatRegisterImpl::D, src, 0, ftmp1);
+
+  // Fallback to slow version
+  bind(Lslow);
+}
+
+// Inflate byte[] to char[].
+void MacroAssembler::string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone) {
+  Label Loop;
+  assert_different_registers(src, dst, cnt, tmp);
+
+  ldub(src, 0, tmp);
+  bind(Loop);
+  inc(src);
+  deccc(cnt);
+  sth(tmp, dst, 0);
+  inc(dst, sizeof(jchar));
+  // annul LDUB if branch is not taken to prevent access past end of string
+  br(Assembler::notZero, true, Assembler::pt, Loop);
+  delayed()->ldub(src, 0, tmp); // hoisted
+}
+
+void MacroAssembler::string_compare(Register str1, Register str2,
+                                    Register cnt1, Register cnt2,
+                                    Register tmp1, Register tmp2,
+                                    Register result, int ae) {
+  Label Ldone, Lloop;
+  assert_different_registers(str1, str2, cnt1, cnt2, tmp1, result);
+  int stride1, stride2;
+
+  // Note: Making use of the fact that compareTo(a, b) == -compareTo(b, a)
+  // we interchange str1 and str2 in the UL case and negate the result.
+  // Like this, str1 is always latin1 encoded, expect for the UU case.
+
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    srl(cnt2, 1, cnt2);
+  }
+
+  // See if the lengths are different, and calculate min in cnt1.
+  // Save diff in case we need it for a tie-breaker.
+  Label Lskip;
+  Register diff = tmp1;
+  subcc(cnt1, cnt2, diff);
+  br(Assembler::greater, true, Assembler::pt, Lskip);
+  // cnt2 is shorter, so use its count:
+  delayed()->mov(cnt2, cnt1);
+  bind(Lskip);
+
+  // Rename registers
+  Register limit1 = cnt1;
+  Register limit2 = limit1;
+  Register chr1   = result;
+  Register chr2   = cnt2;
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    // We need an additional register to keep track of two limits
+    assert_different_registers(str1, str2, cnt1, cnt2, tmp1, tmp2, result);
+    limit2 = tmp2;
+  }
+
+  // Is the minimum length zero?
+  cmp(limit1, (int)0); // use cast to resolve overloading ambiguity
+  br(Assembler::equal, true, Assembler::pn, Ldone);
+  // result is difference in lengths
+  if (ae == StrIntrinsicNode::UU) {
+    delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
+  } else {
+    delayed()->mov(diff, result);
+  }
+
+  // Load first characters
+  if (ae == StrIntrinsicNode::LL) {
+    stride1 = stride2 = sizeof(jbyte);
+    ldub(str1, 0, chr1);
+    ldub(str2, 0, chr2);
+  } else if (ae == StrIntrinsicNode::UU) {
+    stride1 = stride2 = sizeof(jchar);
+    lduh(str1, 0, chr1);
+    lduh(str2, 0, chr2);
+  } else {
+    stride1 = sizeof(jbyte);
+    stride2 = sizeof(jchar);
+    ldub(str1, 0, chr1);
+    lduh(str2, 0, chr2);
+  }
+
+  // Compare first characters
+  subcc(chr1, chr2, chr1);
+  br(Assembler::notZero, false, Assembler::pt, Ldone);
+  assert(chr1 == result, "result must be pre-placed");
+  delayed()->nop();
+
+  // Check if the strings start at same location
+  cmp(str1, str2);
+  brx(Assembler::equal, true, Assembler::pn, Ldone);
+  delayed()->mov(G0, result);  // result is zero
+
+  // We have no guarantee that on 64 bit the higher half of limit is 0
+  signx(limit1);
+
+  // Get limit
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    sll(limit1, 1, limit2);
+    subcc(limit2, stride2, chr2);
+  }
+  subcc(limit1, stride1, chr1);
+  br(Assembler::zero, true, Assembler::pn, Ldone);
+  // result is difference in lengths
+  if (ae == StrIntrinsicNode::UU) {
+    delayed()->sra(diff, 1, result);  // Divide by 2 to get number of chars
+  } else {
+    delayed()->mov(diff, result);
+  }
+
+  // Shift str1 and str2 to the end of the arrays, negate limit
+  add(str1, limit1, str1);
+  add(str2, limit2, str2);
+  neg(chr1, limit1);  // limit1 = -(limit1-stride1)
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    neg(chr2, limit2);  // limit2 = -(limit2-stride2)
+  }
+
+  // Compare the rest of the characters
+  if (ae == StrIntrinsicNode::UU) {
+    lduh(str1, limit1, chr1);
+  } else {
+    ldub(str1, limit1, chr1);
+  }
+
+  bind(Lloop);
+  if (ae == StrIntrinsicNode::LL) {
+    ldub(str2, limit2, chr2);
+  } else {
+    lduh(str2, limit2, chr2);
+  }
+
+  subcc(chr1, chr2, chr1);
+  br(Assembler::notZero, false, Assembler::pt, Ldone);
+  assert(chr1 == result, "result must be pre-placed");
+  delayed()->inccc(limit1, stride1);
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    inccc(limit2, stride2);
+  }
+
+  // annul LDUB if branch is not taken to prevent access past end of string
+  br(Assembler::notZero, true, Assembler::pt, Lloop);
+  if (ae == StrIntrinsicNode::UU) {
+    delayed()->lduh(str1, limit2, chr1);
+  } else {
+    delayed()->ldub(str1, limit1, chr1);
+  }
+
+  // If strings are equal up to min length, return the length difference.
+  if (ae == StrIntrinsicNode::UU) {
+    // Divide by 2 to get number of chars
+    sra(diff, 1, result);
+  } else {
+    mov(diff, result);
+  }
+
+  // Otherwise, return the difference between the first mismatched chars.
+  bind(Ldone);
+  if(ae == StrIntrinsicNode::UL) {
+    // Negate result (see note above)
+    neg(result);
+  }
+}
+
+void MacroAssembler::array_equals(bool is_array_equ, Register ary1, Register ary2,
+                                  Register limit, Register tmp, Register result, bool is_byte) {
+  Label Ldone, Lvector, Lloop;
+  assert_different_registers(ary1, ary2, limit, tmp, result);
+
+  int length_offset  = arrayOopDesc::length_offset_in_bytes();
+  int base_offset    = arrayOopDesc::base_offset_in_bytes(is_byte ? T_BYTE : T_CHAR);
+
+  if (is_array_equ) {
+    // return true if the same array
+    cmp(ary1, ary2);
+    brx(Assembler::equal, true, Assembler::pn, Ldone);
+    delayed()->add(G0, 1, result); // equal
+
+    br_null(ary1, true, Assembler::pn, Ldone);
+    delayed()->mov(G0, result);    // not equal
+
+    br_null(ary2, true, Assembler::pn, Ldone);
+    delayed()->mov(G0, result);    // not equal
+
+    // load the lengths of arrays
+    ld(Address(ary1, length_offset), limit);
+    ld(Address(ary2, length_offset), tmp);
+
+    // return false if the two arrays are not equal length
+    cmp(limit, tmp);
+    br(Assembler::notEqual, true, Assembler::pn, Ldone);
+    delayed()->mov(G0, result);    // not equal
+  }
+
+  cmp_zero_and_br(Assembler::zero, limit, Ldone, true, Assembler::pn);
+  delayed()->add(G0, 1, result); // zero-length arrays are equal
+
+  if (is_array_equ) {
+    // load array addresses
+    add(ary1, base_offset, ary1);
+    add(ary2, base_offset, ary2);
+  } else {
+    // We have no guarantee that on 64 bit the higher half of limit is 0
+    signx(limit);
+  }
+
+  if (is_byte) {
+    Label Lskip;
+    // check for trailing byte
+    andcc(limit, 0x1, tmp);
+    br(Assembler::zero, false, Assembler::pt, Lskip);
+    delayed()->nop();
+
+    // compare the trailing byte
+    sub(limit, sizeof(jbyte), limit);
+    ldub(ary1, limit, result);
+    ldub(ary2, limit, tmp);
+    cmp(result, tmp);
+    br(Assembler::notEqual, true, Assembler::pt, Ldone);
+    delayed()->mov(G0, result);    // not equal
+
+    // only one byte?
+    cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
+    delayed()->add(G0, 1, result); // zero-length arrays are equal
+    bind(Lskip);
+  } else if (is_array_equ) {
+    // set byte count
+    sll(limit, exact_log2(sizeof(jchar)), limit);
+  }
+
+  // check for trailing character
+  andcc(limit, 0x2, tmp);
   br(Assembler::zero, false, Assembler::pt, Lvector);
   delayed()->nop();
 
   // compare the trailing char
   sub(limit, sizeof(jchar), limit);
-  lduh(ary1, limit, chr1);
-  lduh(ary2, limit, chr2);
-  cmp(chr1, chr2);
+  lduh(ary1, limit, result);
+  lduh(ary2, limit, tmp);
+  cmp(result, tmp);
   br(Assembler::notEqual, true, Assembler::pt, Ldone);
   delayed()->mov(G0, result);     // not equal
 
-  // only one char ?
+  // only one char?
   cmp_zero_and_br(zero, limit, Ldone, true, Assembler::pn);
   delayed()->add(G0, 1, result); // zero-length arrays are equal
 
@@ -4284,21 +4645,23 @@
   add(ary2, limit, ary2);
   neg(limit, limit);
 
-  lduw(ary1, limit, chr1);
+  lduw(ary1, limit, result);
   bind(Lloop);
-  lduw(ary2, limit, chr2);
-  cmp(chr1, chr2);
+  lduw(ary2, limit, tmp);
+  cmp(result, tmp);
   br(Assembler::notEqual, true, Assembler::pt, Ldone);
   delayed()->mov(G0, result);     // not equal
   inccc(limit, 2*sizeof(jchar));
   // annul LDUW if branch is not taken to prevent access past end of array
   br(Assembler::notZero, true, Assembler::pt, Lloop);
-  delayed()->lduw(ary1, limit, chr1); // hoisted
-
-  // Caller should set it:
-  // add(G0, 1, result); // equals
+  delayed()->lduw(ary1, limit, result); // hoisted
+
+  add(G0, 1, result); // equals
+  bind(Ldone);
 }
 
+#endif
+
 // Use BIS for zeroing (count is in bytes).
 void MacroAssembler::bis_zeroing(Register to, Register count, Register temp, Label& Ldone) {
   assert(UseBlockZeroing && VM_Version::has_block_zeroing(), "only works with BIS zeroing");
--- a/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -1433,10 +1433,31 @@
   void inc_counter(address counter_addr, Register Rtmp1, Register Rtmp2);
   void inc_counter(int*    counter_addr, Register Rtmp1, Register Rtmp2);
 
-  // Compare char[] arrays aligned to 4 bytes.
-  void char_arrays_equals(Register ary1, Register ary2,
-                          Register limit, Register result,
-                          Register chr1, Register chr2, Label& Ldone);
+#ifdef COMPILER2
+  // Compress char[] to byte[] by compressing 16 bytes at once. Return 0 on failure.
+  void string_compress_16(Register src, Register dst, Register cnt, Register result,
+                          Register tmp1, Register tmp2, Register tmp3, Register tmp4,
+                          FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, Label& Ldone);
+
+  // Compress char[] to byte[]. Return 0 on failure.
+  void string_compress(Register src, Register dst, Register cnt, Register tmp, Register result, Label& Ldone);
+
+  // Inflate byte[] to char[] by inflating 16 bytes at once.
+  void string_inflate_16(Register src, Register dst, Register cnt, Register tmp,
+                         FloatRegister ftmp1, FloatRegister ftmp2, FloatRegister ftmp3, FloatRegister ftmp4, Label& Ldone);
+
+  // Inflate byte[] to char[].
+  void string_inflate(Register src, Register dst, Register cnt, Register tmp, Label& Ldone);
+
+  void string_compare(Register str1, Register str2,
+                      Register cnt1, Register cnt2,
+                      Register tmp1, Register tmp2,
+                      Register result, int ae);
+
+  void array_equals(bool is_array_equ, Register ary1, Register ary2,
+                    Register limit, Register tmp, Register result, bool is_byte);
+#endif
+
   // Use BIS for zeroing
   void bis_zeroing(Register to, Register count, Register temp, Label& Ldone);
 
--- a/src/cpu/sparc/vm/nativeInst_sparc.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/nativeInst_sparc.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -417,6 +417,67 @@
 
 //-------------------------------------------------------------------
 
+void NativeMovConstReg32::verify() {
+  NativeInstruction::verify();
+  // make sure code pattern is actually a "set_metadata" synthetic instruction
+  // see MacroAssembler::set_oop()
+  int i0 = long_at(sethi_offset);
+  int i1 = long_at(add_offset);
+
+  // verify the pattern "sethi %hi22(imm), reg ;  add reg, %lo10(imm), reg"
+  Register rd = inv_rd(i0);
+  if (!is_op2(i0, Assembler::sethi_op2) && rd != G0 ) {
+    fatal("not a set_metadata");
+  }
+}
+
+
+void NativeMovConstReg32::print() {
+  tty->print_cr(INTPTR_FORMAT ": mov reg, " INTPTR_FORMAT, instruction_address(), data());
+}
+
+
+intptr_t NativeMovConstReg32::data() const {
+  return data32(long_at(sethi_offset), long_at(add_offset));
+}
+
+
+void NativeMovConstReg32::set_data(intptr_t x) {
+  set_long_at(sethi_offset, set_data32_sethi(  long_at(sethi_offset), x));
+  set_long_at(add_offset,   set_data32_simm13( long_at(add_offset),   x));
+
+  // also store the value into an oop_Relocation cell, if any
+  CodeBlob* cb = CodeCache::find_blob(instruction_address());
+  nmethod*  nm = cb ? cb->as_nmethod_or_null() : NULL;
+  if (nm != NULL) {
+    RelocIterator iter(nm, instruction_address(), next_instruction_address());
+    oop* oop_addr = NULL;
+    Metadata** metadata_addr = NULL;
+    while (iter.next()) {
+      if (iter.type() == relocInfo::oop_type) {
+        oop_Relocation *r = iter.oop_reloc();
+        if (oop_addr == NULL) {
+          oop_addr = r->oop_addr();
+          *oop_addr = cast_to_oop(x);
+        } else {
+          assert(oop_addr == r->oop_addr(), "must be only one set-oop here");
+        }
+      }
+      if (iter.type() == relocInfo::metadata_type) {
+        metadata_Relocation *r = iter.metadata_reloc();
+        if (metadata_addr == NULL) {
+          metadata_addr = r->metadata_addr();
+          *metadata_addr = (Metadata*)x;
+        } else {
+          assert(metadata_addr == r->metadata_addr(), "must be only one set-metadata here");
+        }
+      }
+    }
+  }
+}
+
+//-------------------------------------------------------------------
+
 void NativeMovConstRegPatching::verify() {
   NativeInstruction::verify();
   // Make sure code pattern is sethi/nop/add.
--- a/src/cpu/sparc/vm/nativeInst_sparc.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/nativeInst_sparc.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -518,6 +518,46 @@
 
 #endif // _LP64
 
+// An interface for accessing/manipulating 32 bit native set_metadata imm, reg instructions
+// (used to manipulate inlined data references, etc.)
+//      set_metadata imm, reg
+//      == sethi %hi22(imm), reg ;  add reg, %lo10(imm), reg
+class NativeMovConstReg32;
+inline NativeMovConstReg32* nativeMovConstReg32_at(address address);
+class NativeMovConstReg32: public NativeInstruction {
+ public:
+  enum Sparc_specific_constants {
+    sethi_offset           = 0,
+    add_offset             = 4,
+    instruction_size       = 8
+  };
+
+  address instruction_address() const       { return addr_at(0); }
+  address next_instruction_address() const  { return addr_at(instruction_size); }
+
+  // (The [set_]data accessor respects oop_type relocs also.)
+  intptr_t data() const;
+  void set_data(intptr_t x);
+
+  // report the destination register
+  Register destination() { return inv_rd(long_at(sethi_offset)); }
+
+  void  verify();
+  void  print();
+
+  // unit test stuff
+  static void test();
+
+  // Creation
+  friend inline NativeMovConstReg32* nativeMovConstReg32_at(address address) {
+    NativeMovConstReg32* test = (NativeMovConstReg32*)address;
+    #ifdef ASSERT
+      test->verify();
+    #endif
+    return test;
+  }
+};
+
 // An interface for accessing/manipulating native set_metadata imm, reg instructions.
 // (used to manipulate inlined data references, etc.)
 //      set_metadata imm, reg
--- a/src/cpu/sparc/vm/sparc.ad	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/sparc.ad	Wed Nov 11 23:51:57 2015 -0500
@@ -2905,232 +2905,6 @@
     __ float_cmp( $primary, -1, Fsrc1, Fsrc2, Rdst);
   %}
 
-
-  enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result) %{
-    Label Ldone, Lloop;
-    MacroAssembler _masm(&cbuf);
-
-    Register   str1_reg = reg_to_register_object($str1$$reg);
-    Register   str2_reg = reg_to_register_object($str2$$reg);
-    Register   cnt1_reg = reg_to_register_object($cnt1$$reg);
-    Register   cnt2_reg = reg_to_register_object($cnt2$$reg);
-    Register result_reg = reg_to_register_object($result$$reg);
-
-    assert(result_reg != str1_reg &&
-           result_reg != str2_reg &&
-           result_reg != cnt1_reg &&
-           result_reg != cnt2_reg ,
-           "need different registers");
-
-    // Compute the minimum of the string lengths(str1_reg) and the
-    // difference of the string lengths (stack)
-
-    // See if the lengths are different, and calculate min in str1_reg.
-    // Stash diff in O7 in case we need it for a tie-breaker.
-    Label Lskip;
-    __ subcc(cnt1_reg, cnt2_reg, O7);
-    __ sll(cnt1_reg, exact_log2(sizeof(jchar)), cnt1_reg); // scale the limit
-    __ br(Assembler::greater, true, Assembler::pt, Lskip);
-    // cnt2 is shorter, so use its count:
-    __ delayed()->sll(cnt2_reg, exact_log2(sizeof(jchar)), cnt1_reg); // scale the limit
-    __ bind(Lskip);
-
-    // reallocate cnt1_reg, cnt2_reg, result_reg
-    // Note:  limit_reg holds the string length pre-scaled by 2
-    Register limit_reg =   cnt1_reg;
-    Register  chr2_reg =   cnt2_reg;
-    Register  chr1_reg = result_reg;
-    // str{12} are the base pointers
-
-    // Is the minimum length zero?
-    __ cmp(limit_reg, (int)(0 * sizeof(jchar))); // use cast to resolve overloading ambiguity
-    __ br(Assembler::equal, true, Assembler::pn, Ldone);
-    __ delayed()->mov(O7, result_reg);  // result is difference in lengths
-
-    // Load first characters
-    __ lduh(str1_reg, 0, chr1_reg);
-    __ lduh(str2_reg, 0, chr2_reg);
-
-    // Compare first characters
-    __ subcc(chr1_reg, chr2_reg, chr1_reg);
-    __ br(Assembler::notZero, false, Assembler::pt,  Ldone);
-    assert(chr1_reg == result_reg, "result must be pre-placed");
-    __ delayed()->nop();
-
-    {
-      // Check after comparing first character to see if strings are equivalent
-      Label LSkip2;
-      // Check if the strings start at same location
-      __ cmp(str1_reg, str2_reg);
-      __ brx(Assembler::notEqual, true, Assembler::pt, LSkip2);
-      __ delayed()->nop();
-
-      // Check if the length difference is zero (in O7)
-      __ cmp(G0, O7);
-      __ br(Assembler::equal, true, Assembler::pn, Ldone);
-      __ delayed()->mov(G0, result_reg);  // result is zero
-
-      // Strings might not be equal
-      __ bind(LSkip2);
-    }
-
-    // We have no guarantee that on 64 bit the higher half of limit_reg is 0
-    __ signx(limit_reg);
-
-    __ subcc(limit_reg, 1 * sizeof(jchar), chr1_reg);
-    __ br(Assembler::equal, true, Assembler::pn, Ldone);
-    __ delayed()->mov(O7, result_reg);  // result is difference in lengths
-
-    // Shift str1_reg and str2_reg to the end of the arrays, negate limit
-    __ add(str1_reg, limit_reg, str1_reg);
-    __ add(str2_reg, limit_reg, str2_reg);
-    __ neg(chr1_reg, limit_reg);  // limit = -(limit-2)
-
-    // Compare the rest of the characters
-    __ lduh(str1_reg, limit_reg, chr1_reg);
-    __ bind(Lloop);
-    // __ lduh(str1_reg, limit_reg, chr1_reg); // hoisted
-    __ lduh(str2_reg, limit_reg, chr2_reg);
-    __ subcc(chr1_reg, chr2_reg, chr1_reg);
-    __ br(Assembler::notZero, false, Assembler::pt, Ldone);
-    assert(chr1_reg == result_reg, "result must be pre-placed");
-    __ delayed()->inccc(limit_reg, sizeof(jchar));
-    // annul LDUH if branch is not taken to prevent access past end of string
-    __ br(Assembler::notZero, true, Assembler::pt, Lloop);
-    __ delayed()->lduh(str1_reg, limit_reg, chr1_reg); // hoisted
-
-    // If strings are equal up to min length, return the length difference.
-    __ mov(O7, result_reg);
-
-    // Otherwise, return the difference between the first mismatched chars.
-    __ bind(Ldone);
-  %}
-
-enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result) %{
-    Label Lchar, Lchar_loop, Ldone;
-    MacroAssembler _masm(&cbuf);
-
-    Register   str1_reg = reg_to_register_object($str1$$reg);
-    Register   str2_reg = reg_to_register_object($str2$$reg);
-    Register    cnt_reg = reg_to_register_object($cnt$$reg);
-    Register   tmp1_reg = O7;
-    Register result_reg = reg_to_register_object($result$$reg);
-
-    assert(result_reg != str1_reg &&
-           result_reg != str2_reg &&
-           result_reg !=  cnt_reg &&
-           result_reg != tmp1_reg ,
-           "need different registers");
-
-    __ cmp(str1_reg, str2_reg); //same char[] ?
-    __ brx(Assembler::equal, true, Assembler::pn, Ldone);
-    __ delayed()->add(G0, 1, result_reg);
-
-    __ cmp_zero_and_br(Assembler::zero, cnt_reg, Ldone, true, Assembler::pn);
-    __ delayed()->add(G0, 1, result_reg); // count == 0
-
-    //rename registers
-    Register limit_reg =    cnt_reg;
-    Register  chr1_reg = result_reg;
-    Register  chr2_reg =   tmp1_reg;
-
-    // We have no guarantee that on 64 bit the higher half of limit_reg is 0
-    __ signx(limit_reg);
-
-    //check for alignment and position the pointers to the ends
-    __ or3(str1_reg, str2_reg, chr1_reg);
-    __ andcc(chr1_reg, 0x3, chr1_reg);
-    // notZero means at least one not 4-byte aligned.
-    // We could optimize the case when both arrays are not aligned
-    // but it is not frequent case and it requires additional checks.
-    __ br(Assembler::notZero, false, Assembler::pn, Lchar); // char by char compare
-    __ delayed()->sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg); // set byte count
-
-    // Compare char[] arrays aligned to 4 bytes.
-    __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg,
-                          chr1_reg, chr2_reg, Ldone);
-    __ ba(Ldone);
-    __ delayed()->add(G0, 1, result_reg);
-
-    // char by char compare
-    __ bind(Lchar);
-    __ add(str1_reg, limit_reg, str1_reg);
-    __ add(str2_reg, limit_reg, str2_reg);
-    __ neg(limit_reg); //negate count
-
-    __ lduh(str1_reg, limit_reg, chr1_reg);
-    // Lchar_loop
-    __ bind(Lchar_loop);
-    __ lduh(str2_reg, limit_reg, chr2_reg);
-    __ cmp(chr1_reg, chr2_reg);
-    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
-    __ delayed()->mov(G0, result_reg); //not equal
-    __ inccc(limit_reg, sizeof(jchar));
-    // annul LDUH if branch is not taken to prevent access past end of string
-    __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop);
-    __ delayed()->lduh(str1_reg, limit_reg, chr1_reg); // hoisted
-
-    __ add(G0, 1, result_reg);  //equal
-
-    __ bind(Ldone);
-  %}
-
-enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, notemp_iRegI result) %{
-    Label Lvector, Ldone, Lloop;
-    MacroAssembler _masm(&cbuf);
-
-    Register   ary1_reg = reg_to_register_object($ary1$$reg);
-    Register   ary2_reg = reg_to_register_object($ary2$$reg);
-    Register   tmp1_reg = reg_to_register_object($tmp1$$reg);
-    Register   tmp2_reg = O7;
-    Register result_reg = reg_to_register_object($result$$reg);
-
-    int length_offset  = arrayOopDesc::length_offset_in_bytes();
-    int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-    // return true if the same array
-    __ cmp(ary1_reg, ary2_reg);
-    __ brx(Assembler::equal, true, Assembler::pn, Ldone);
-    __ delayed()->add(G0, 1, result_reg); // equal
-
-    __ br_null(ary1_reg, true, Assembler::pn, Ldone);
-    __ delayed()->mov(G0, result_reg);    // not equal
-
-    __ br_null(ary2_reg, true, Assembler::pn, Ldone);
-    __ delayed()->mov(G0, result_reg);    // not equal
-
-    //load the lengths of arrays
-    __ ld(Address(ary1_reg, length_offset), tmp1_reg);
-    __ ld(Address(ary2_reg, length_offset), tmp2_reg);
-
-    // return false if the two arrays are not equal length
-    __ cmp(tmp1_reg, tmp2_reg);
-    __ br(Assembler::notEqual, true, Assembler::pn, Ldone);
-    __ delayed()->mov(G0, result_reg);     // not equal
-
-    __ cmp_zero_and_br(Assembler::zero, tmp1_reg, Ldone, true, Assembler::pn);
-    __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal
-
-    // load array addresses
-    __ add(ary1_reg, base_offset, ary1_reg);
-    __ add(ary2_reg, base_offset, ary2_reg);
-
-    // renaming registers
-    Register chr1_reg  =  result_reg; // for characters in ary1
-    Register chr2_reg  =  tmp2_reg;   // for characters in ary2
-    Register limit_reg =  tmp1_reg;   // length
-
-    // set byte count
-    __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg);
-
-    // Compare char[] arrays aligned to 4 bytes.
-    __ char_arrays_equals(ary1_reg, ary2_reg, limit_reg, result_reg,
-                          chr1_reg, chr2_reg, Ldone);
-    __ add(G0, 1, result_reg); // equals
-
-    __ bind(Ldone);
-  %}
-
   enc_class enc_rethrow() %{
     cbuf.set_insts_mark();
     Register temp_reg = G3;
@@ -10275,33 +10049,204 @@
   ins_pipe(long_memory_op);
 %}
 
-instruct string_compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
-                        o7RegI tmp, flagsReg ccr) %{
+instruct string_compareL(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
+                         o7RegI tmp, flagsReg ccr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp);
   ins_cost(300);
-  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp" %}
-  ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result) );
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, 
+                      $tmp$$Register, $tmp$$Register,
+                      $result$$Register, StrIntrinsicNode::LL);
+  %}                    
   ins_pipe(long_memory_op);
 %}
 
-instruct string_equals(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result,
-                       o7RegI tmp, flagsReg ccr) %{
+instruct string_compareU(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
+                         o7RegI tmp, flagsReg ccr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp);
+  ins_cost(300);
+  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp$$Register, $tmp$$Register,
+                      $result$$Register, StrIntrinsicNode::UU);
+  %}                    
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_compareLU(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
+                          o7RegI tmp1, g1RegI tmp2, flagsReg ccr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp1, KILL tmp2);
+  ins_cost(300);
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1,$tmp2" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      $tmp1$$Register, $tmp2$$Register,
+                      $result$$Register, StrIntrinsicNode::LU);
+  %}                    
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_compareUL(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
+                          o7RegI tmp1, g1RegI tmp2, flagsReg ccr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp1, KILL tmp2);
+  ins_cost(300);
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1,$tmp2" %}
+  ins_encode %{
+    __ string_compare($str2$$Register, $str1$$Register,
+                      $cnt2$$Register, $cnt1$$Register, 
+                      $tmp1$$Register, $tmp2$$Register,
+                      $result$$Register, StrIntrinsicNode::UL);
+  %}                    
+  ins_pipe(long_memory_op);
+%}
+
+instruct string_equalsL(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result,
+                        o7RegI tmp, flagsReg ccr) %{
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (StrEquals (Binary str1 str2) cnt));
   effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp, KILL ccr);
   ins_cost(300);
-  format %{ "String Equals $str1,$str2,$cnt -> $result   // KILL $tmp" %}
-  ins_encode( enc_String_Equals(str1, str2, cnt, result) );
+  format %{ "String Equals byte[] $str1,$str2,$cnt -> $result   // KILL $tmp" %}
+  ins_encode %{
+    __ array_equals(false, $str1$$Register, $str2$$Register,
+                    $cnt$$Register, $tmp$$Register,
+                    $result$$Register, true /* byte */);
+  %}
   ins_pipe(long_memory_op);
 %}
 
-instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result,
-                      o7RegI tmp2, flagsReg ccr) %{
+instruct string_equalsU(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result,
+                        o7RegI tmp, flagsReg ccr) %{
+  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrEquals (Binary str1 str2) cnt));
+  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp, KILL ccr);
+  ins_cost(300);
+  format %{ "String Equals char[]  $str1,$str2,$cnt -> $result   // KILL $tmp" %}
+  ins_encode %{
+    __ array_equals(false, $str1$$Register, $str2$$Register,
+                    $cnt$$Register, $tmp$$Register,
+                    $result$$Register, false /* byte */);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct array_equalsB(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result,
+                       o7RegI tmp2, flagsReg ccr) %{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (AryEq ary1 ary2));
   effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr);
   ins_cost(300);
   format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1,$tmp2" %}
-  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, result));
+  ins_encode %{
+    __ array_equals(true, $ary1$$Register, $ary2$$Register,
+                    $tmp1$$Register, $tmp2$$Register,
+                    $result$$Register, true /* byte */);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+instruct array_equalsC(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result,
+                       o7RegI tmp2, flagsReg ccr) %{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (AryEq ary1 ary2));
+  effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr);
+  ins_cost(300);
+  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1,$tmp2" %}
+  ins_encode %{
+    __ array_equals(true, $ary1$$Register, $ary2$$Register,
+                    $tmp1$$Register, $tmp2$$Register,
+                    $result$$Register, false /* byte */);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+// char[] to byte[] compression
+instruct string_compress(o0RegP src, o1RegP dst, g3RegI len, notemp_iRegI result, iRegL tmp, flagsReg ccr) %{
+  predicate(UseVIS < 3);
+  match(Set result (StrCompressedCopy src (Binary dst len)));
+  effect(TEMP result, TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr);
+  ins_cost(300);
+  format %{ "String Compress $src,$dst,$len -> $result    // KILL $tmp" %}
+  ins_encode %{
+    Label Ldone;
+    __ signx($len$$Register);
+    __ cmp_zero_and_br(Assembler::zero, $len$$Register, Ldone, false, Assembler::pn);
+    __ delayed()->mov($len$$Register, $result$$Register); // copy count
+    __ string_compress($src$$Register, $dst$$Register, $len$$Register, $result$$Register, $tmp$$Register, Ldone);
+    __ bind(Ldone);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+// fast char[] to byte[] compression using VIS instructions
+instruct string_compress_fast(o0RegP src, o1RegP dst, g3RegI len, notemp_iRegI result,
+                              iRegL tmp1, iRegL tmp2, iRegL tmp3, iRegL tmp4,
+                              regD ftmp1, regD ftmp2, regD ftmp3, flagsReg ccr) %{
+  predicate(UseVIS >= 3);
+  match(Set result (StrCompressedCopy src (Binary dst len)));
+  effect(TEMP result, TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP ftmp1, TEMP ftmp2, TEMP ftmp3, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr);
+  ins_cost(300);
+  format %{ "String Compress Fast $src,$dst,$len -> $result    // KILL $tmp1,$tmp2,$tmp3,$tmp4,$ftmp1,$ftmp2,$ftmp3" %}
+  ins_encode %{
+    Label Ldone;
+    __ signx($len$$Register);
+    __ string_compress_16($src$$Register, $dst$$Register, $len$$Register, $result$$Register,
+                          $tmp1$$Register, $tmp2$$Register, $tmp3$$Register, $tmp4$$Register,
+                          $ftmp1$$FloatRegister, $ftmp2$$FloatRegister, $ftmp3$$FloatRegister, Ldone);
+    __ cmp_and_brx_short($len$$Register, 0, Assembler::equal, Assembler::pn, Ldone);
+    __ string_compress($src$$Register, $dst$$Register, $len$$Register, $result$$Register, $tmp1$$Register, Ldone);
+    __ bind(Ldone);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+// byte[] to char[] inflation
+instruct string_inflate(Universe dummy, o0RegP src, o1RegP dst, g3RegI len,
+                        iRegL tmp, flagsReg ccr) %{
+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+  effect(TEMP tmp, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr);
+  ins_cost(300);
+  format %{ "String Inflate $src,$dst,$len    // KILL $tmp" %}
+  ins_encode %{
+    Label Ldone;
+    __ signx($len$$Register);
+    __ cmp_and_brx_short($len$$Register, 0, Assembler::equal, Assembler::pn, Ldone);
+    __ string_inflate($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register, Ldone);
+    __ bind(Ldone);
+  %}
+  ins_pipe(long_memory_op);
+%}
+
+// fast byte[] to char[] inflation using VIS instructions
+instruct string_inflate_fast(Universe dummy, o0RegP src, o1RegP dst, g3RegI len,
+                             iRegL tmp, regD ftmp1, regD ftmp2, regD ftmp3, regD ftmp4, flagsReg ccr) %{
+  predicate(UseVIS >= 3);
+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+  effect(TEMP tmp, TEMP ftmp1, TEMP ftmp2, TEMP ftmp3, TEMP ftmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL ccr);
+  ins_cost(300);
+  format %{ "String Inflate Fast $src,$dst,$len    // KILL $tmp,$ftmp1,$ftmp2,$ftmp3,$ftmp4" %}
+  ins_encode %{
+    Label Ldone;
+    __ signx($len$$Register);
+    __ string_inflate_16($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register,
+                         $ftmp1$$FloatRegister, $ftmp2$$FloatRegister, $ftmp3$$FloatRegister, $ftmp4$$FloatRegister, Ldone);
+    __ cmp_and_brx_short($len$$Register, 0, Assembler::equal, Assembler::pn, Ldone);
+    __ string_inflate($src$$Register, $dst$$Register, $len$$Register, $tmp$$Register, Ldone);
+    __ bind(Ldone);
+  %}
   ins_pipe(long_memory_op);
 %}
 
--- a/src/cpu/sparc/vm/vmStructs_sparc.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/sparc/vm/vmStructs_sparc.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -83,7 +83,26 @@
   declare_constant(VM_Version::vis1_instructions_m)                       \
   declare_constant(VM_Version::vis2_instructions_m)                       \
   declare_constant(VM_Version::vis3_instructions_m)                       \
-  declare_constant(VM_Version::cbcond_instructions_m)
+  declare_constant(VM_Version::cbcond_instructions_m)                     \
+  declare_constant(VM_Version::v8_instructions_m)                         \
+  declare_constant(VM_Version::hardware_mul32_m)                          \
+  declare_constant(VM_Version::hardware_div32_m)                          \
+  declare_constant(VM_Version::hardware_fsmuld_m)                         \
+  declare_constant(VM_Version::hardware_popc_m)                           \
+  declare_constant(VM_Version::v9_instructions_m)                         \
+  declare_constant(VM_Version::sun4v_m)                                   \
+  declare_constant(VM_Version::blk_init_instructions_m)                   \
+  declare_constant(VM_Version::fmaf_instructions_m)                       \
+  declare_constant(VM_Version::fmau_instructions_m)                       \
+  declare_constant(VM_Version::sparc64_family_m)                          \
+  declare_constant(VM_Version::M_family_m)                                \
+  declare_constant(VM_Version::T_family_m)                                \
+  declare_constant(VM_Version::T1_model_m)                                \
+  declare_constant(VM_Version::sparc5_instructions_m)                     \
+  declare_constant(VM_Version::aes_instructions_m)                        \
+  declare_constant(VM_Version::sha1_instruction_m)                        \
+  declare_constant(VM_Version::sha256_instruction_m)                      \
+  declare_constant(VM_Version::sha512_instruction_m)
 
 #define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 
--- a/src/cpu/x86/vm/assembler_x86.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/assembler_x86.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -1539,7 +1539,6 @@
   emit_arith(0x3B, 0xC0, dst, src);
 }
 
-
 void Assembler::cmpl(Register dst, Address  src) {
   InstructionMark im(this);
   prefix(src, dst);
@@ -2125,6 +2124,16 @@
   emit_operand(dst, src);
 }
 
+void Assembler::movddup(XMMRegister dst, XMMRegister src) {
+  _instruction_uses_vl = true;
+  NOT_LP64(assert(VM_Version::supports_sse3(), ""));
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_F2, /* no_mask_reg */ false, VEX_OPCODE_0F,
+                                      /* rex_w */ VM_Version::supports_evex(), AVX_128bit, /* legacy_mode */ false);
+  emit_int8(0x12);
+  emit_int8(0xC0 | encode);
+
+}
+
 void Assembler::kmovql(KRegister dst, KRegister src) {
   NOT_LP64(assert(VM_Version::supports_evex(), ""));
   int encode = kreg_prefix_and_encode(dst, knoreg, src, VEX_SIMD_NONE,
@@ -3027,6 +3036,35 @@
   emit_int8(imm8);
 }
 
+void Assembler::pcmpeqw(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse2(), ""));
+  emit_simd_arith(0x75, dst, src, VEX_SIMD_66,
+                  false, (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len) {
+  assert(UseAVX > 0, "some form of AVX must be enabled");
+  emit_vex_arith(0x75, dst, nds, src, VEX_SIMD_66, vector_len,
+                 false, (VM_Version::supports_avx512dq() == false));
+}
+
+void Assembler::pmovmskb(Register dst, XMMRegister src) {
+  assert(VM_Version::supports_sse2(), "");
+  int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, true, VEX_OPCODE_0F,
+                                      false, AVX_128bit, (VM_Version::supports_avx512dq() == false));
+  emit_int8((unsigned char)0xD7);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
+void Assembler::vpmovmskb(Register dst, XMMRegister src) {
+  assert(VM_Version::supports_avx2(), "");
+  int vector_len = AVX_256bit;
+  int encode = vex_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66,
+                                     vector_len, VEX_OPCODE_0F, true, false);
+  emit_int8((unsigned char)0xD7);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 void Assembler::pextrd(Register dst, XMMRegister src, int imm8) {
   assert(VM_Version::supports_sse4_1(), "");
   int encode = simd_prefix_and_encode(as_XMMRegister(dst->encoding()), xnoreg, src, VEX_SIMD_66, /* no_mask_reg */ true,
@@ -3099,6 +3137,17 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::vpmovzxbw(XMMRegister dst, Address src) {
+  assert(VM_Version::supports_avx(), "");
+  InstructionMark im(this);
+  bool vector256 = true;
+  assert(dst != xnoreg, "sanity");
+  int dst_enc = dst->encoding();
+  vex_prefix(src, 0, dst_enc, VEX_SIMD_66, VEX_OPCODE_0F_38, false, vector256);
+  emit_int8(0x30);
+  emit_operand(dst, src);
+}
+
 // generic
 void Assembler::pop(Register dst) {
   int encode = prefix_and_encode(dst->encoding());
@@ -3403,6 +3452,20 @@
   }
 }
 
+void Assembler::rcpps(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_NONE, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  emit_int8(0x53);
+  emit_int8(0xC0 | encode);
+}
+
+void Assembler::rcpss(XMMRegister dst, XMMRegister src) {
+  NOT_LP64(assert(VM_Version::supports_sse(), ""));
+  int encode = simd_prefix_and_encode(dst, dst, src, VEX_SIMD_F3, /* no_mask_reg */ false, VEX_OPCODE_0F, /* rex_w */ false, AVX_128bit, /* legacy_mode */ true);
+  emit_int8(0x53);
+  emit_int8(0xC0 | encode);
+}
+
 void Assembler::rdtsc() {
   emit_int8((unsigned char)0x0F);
   emit_int8((unsigned char)0x31);
@@ -5347,6 +5410,16 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+// duplicate 2-bytes integer data from src into 16 locations in dest
+void Assembler::vpbroadcastw(XMMRegister dst, XMMRegister src) {
+  assert(VM_Version::supports_avx2(), "");
+  bool vector_len = AVX_256bit;
+  int encode = vex_prefix_and_encode(dst, xnoreg, src, VEX_SIMD_66,
+                                     vector_len, VEX_OPCODE_0F_38, false);
+  emit_int8(0x79);
+  emit_int8((unsigned char)(0xC0 | encode));
+}
+
 // duplicate 1-byte integer data from src into 16||32|64 locations in dest : requires AVX512BW and AVX512VL
 void Assembler::evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len) {
   _instruction_uses_vl = true;
@@ -6326,6 +6399,26 @@
   emit_int8((unsigned char)(0xC0 | encode));
 }
 
+void Assembler::cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  assert(!VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src, VEX_SIMD_66, vector_len, VEX_OPCODE_0F, /* no_mask_reg */ false);
+  emit_int8((unsigned char)0xC2);
+  emit_int8((unsigned char)(0xC0 | encode));
+  emit_int8((unsigned char)(0xF & cop));
+}
+
+void Assembler::vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len) {
+  assert(VM_Version::supports_avx(), "");
+  assert(!VM_Version::supports_evex(), "");
+  int encode = vex_prefix_and_encode(dst, nds, src1, VEX_SIMD_66, vector_len, VEX_OPCODE_0F_3A, /* no_mask_reg */ false);
+  emit_int8((unsigned char)0x4B);
+  emit_int8((unsigned char)(0xC0 | encode));
+  int src2_enc = src2->encoding();
+  emit_int8((unsigned char)(0xF0 & src2_enc<<4));
+}
+
+
 #ifndef _LP64
 
 void Assembler::incl(Register dst) {
--- a/src/cpu/x86/vm/assembler_x86.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/assembler_x86.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -1504,6 +1504,8 @@
   void movb(Address dst, int imm8);
   void movb(Register dst, Address src);
 
+  void movddup(XMMRegister dst, XMMRegister src);
+
   void kmovql(KRegister dst, KRegister src);
   void kmovql(KRegister dst, Register src);
   void kmovdl(KRegister dst, Register src);
@@ -1680,6 +1682,12 @@
   void pcmpestri(XMMRegister xmm1, XMMRegister xmm2, int imm8);
   void pcmpestri(XMMRegister xmm1, Address src, int imm8);
 
+  void pcmpeqw(XMMRegister dst, XMMRegister src);
+  void vpcmpeqw(XMMRegister dst, XMMRegister nds, XMMRegister src, int vector_len);
+
+  void pmovmskb(Register dst, XMMRegister src);
+  void vpmovmskb(Register dst, XMMRegister src);
+
   // SSE 4.1 extract
   void pextrd(Register dst, XMMRegister src, int imm8);
   void pextrq(Register dst, XMMRegister src, int imm8);
@@ -1696,6 +1704,8 @@
   void pmovzxbw(XMMRegister dst, XMMRegister src);
   void pmovzxbw(XMMRegister dst, Address src);
 
+  void vpmovzxbw(XMMRegister dst, Address src);
+
 #ifndef _LP64 // no 32bit push/pop on amd64
   void popl(Address dst);
 #endif
@@ -1768,6 +1778,10 @@
 
   void rcrq(Register dst, int imm8);
 
+  void rcpps(XMMRegister dst, XMMRegister src);
+
+  void rcpss(XMMRegister dst, XMMRegister src);
+
   void rdtsc();
 
   void ret(int imm16);
@@ -2110,6 +2124,9 @@
   // duplicate 4-bytes integer data from src into 8 locations in dest
   void vpbroadcastd(XMMRegister dst, XMMRegister src);
 
+  // duplicate 2-bytes integer data from src into 16 locations in dest
+  void vpbroadcastw(XMMRegister dst, XMMRegister src);
+
   // duplicate n-bytes integer data from src into vector_len locations in dest
   void evpbroadcastb(XMMRegister dst, XMMRegister src, int vector_len);
   void evpbroadcastb(XMMRegister dst, Address src, int vector_len);
@@ -2141,6 +2158,11 @@
   // runtime code and native libraries.
   void vzeroupper();
 
+  // AVX support for vectorized conditional move (double). The following two instructions used only coupled.
+  void cmppd(XMMRegister dst, XMMRegister nds, XMMRegister src, int cop, int vector_len);
+  void vpblendd(XMMRegister dst, XMMRegister nds, XMMRegister src1, XMMRegister src2, int vector_len);
+
+
  protected:
   // Next instructions require address alignment 16 bytes SSE mode.
   // They should be called only from corresponding MacroAssembler instructions.
--- a/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/c1_LIRAssembler_x86.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -509,86 +509,6 @@
 }
 
 
-// This is the fast version of java.lang.String.compare; it has not
-// OSR-entry and therefore, we generate a slow version for OSR's
-void LIR_Assembler::emit_string_compare(LIR_Opr arg0, LIR_Opr arg1, LIR_Opr dst, CodeEmitInfo* info) {
-  __ movptr (rbx, rcx); // receiver is in rcx
-  __ movptr (rax, arg1->as_register());
-
-  // Get addresses of first characters from both Strings
-  __ load_heap_oop(rsi, Address(rax, java_lang_String::value_offset_in_bytes()));
-  if (java_lang_String::has_offset_field()) {
-    __ movptr     (rcx, Address(rax, java_lang_String::offset_offset_in_bytes()));
-    __ movl       (rax, Address(rax, java_lang_String::count_offset_in_bytes()));
-    __ lea        (rsi, Address(rsi, rcx, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
-  } else {
-    __ movl       (rax, Address(rsi, arrayOopDesc::length_offset_in_bytes()));
-    __ lea        (rsi, Address(rsi, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
-  }
-
-  // rbx, may be NULL
-  add_debug_info_for_null_check_here(info);
-  __ load_heap_oop(rdi, Address(rbx, java_lang_String::value_offset_in_bytes()));
-  if (java_lang_String::has_offset_field()) {
-    __ movptr     (rcx, Address(rbx, java_lang_String::offset_offset_in_bytes()));
-    __ movl       (rbx, Address(rbx, java_lang_String::count_offset_in_bytes()));
-    __ lea        (rdi, Address(rdi, rcx, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
-  } else {
-    __ movl       (rbx, Address(rdi, arrayOopDesc::length_offset_in_bytes()));
-    __ lea        (rdi, Address(rdi, arrayOopDesc::base_offset_in_bytes(T_CHAR)));
-  }
-
-  // compute minimum length (in rax) and difference of lengths (on top of stack)
-  __ mov   (rcx, rbx);
-  __ subptr(rbx, rax); // subtract lengths
-  __ push  (rbx);      // result
-  __ cmov  (Assembler::lessEqual, rax, rcx);
-
-  // is minimum length 0?
-  Label noLoop, haveResult;
-  __ testptr (rax, rax);
-  __ jcc (Assembler::zero, noLoop);
-
-  // compare first characters
-  __ load_unsigned_short(rcx, Address(rdi, 0));
-  __ load_unsigned_short(rbx, Address(rsi, 0));
-  __ subl(rcx, rbx);
-  __ jcc(Assembler::notZero, haveResult);
-  // starting loop
-  __ decrement(rax); // we already tested index: skip one
-  __ jcc(Assembler::zero, noLoop);
-
-  // set rsi.edi to the end of the arrays (arrays have same length)
-  // negate the index
-
-  __ lea(rsi, Address(rsi, rax, Address::times_2, type2aelembytes(T_CHAR)));
-  __ lea(rdi, Address(rdi, rax, Address::times_2, type2aelembytes(T_CHAR)));
-  __ negptr(rax);
-
-  // compare the strings in a loop
-
-  Label loop;
-  __ align(wordSize);
-  __ bind(loop);
-  __ load_unsigned_short(rcx, Address(rdi, rax, Address::times_2, 0));
-  __ load_unsigned_short(rbx, Address(rsi, rax, Address::times_2, 0));
-  __ subl(rcx, rbx);
-  __ jcc(Assembler::notZero, haveResult);
-  __ increment(rax);
-  __ jcc(Assembler::notZero, loop);
-
-  // strings are equal up to min length
-
-  __ bind(noLoop);
-  __ pop(rax);
-  return_op(LIR_OprFact::illegalOpr);
-
-  __ bind(haveResult);
-  // leave instruction is going to discard the TOS value
-  __ mov (rax, rcx); // result of call is in rax,
-}
-
-
 void LIR_Assembler::return_op(LIR_Opr result) {
   assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == rax, "word returns are in rax,");
   if (!result->is_illegal() && result->is_float_kind() && !result->is_xmm_register()) {
@@ -2441,7 +2361,6 @@
   } else if (value->is_double_fpu()) {
     assert(value->fpu_regnrLo() == 0 && dest->fpu_regnrLo() == 0, "both must be on TOS");
     switch(code) {
-      case lir_log   : __ flog() ; break;
       case lir_log10 : __ flog10() ; break;
       case lir_abs   : __ fabs() ; break;
       case lir_sqrt  : __ fsqrt(); break;
--- a/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -809,8 +809,8 @@
 void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
   assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
 
-  if (x->id() == vmIntrinsics::_dexp) {
-    do_ExpIntrinsic(x);
+  if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog) {
+    do_LibmIntrinsic(x);
     return;
   }
 
@@ -822,7 +822,6 @@
       case vmIntrinsics::_dsin:
       case vmIntrinsics::_dcos:
       case vmIntrinsics::_dtan:
-      case vmIntrinsics::_dlog:
       case vmIntrinsics::_dlog10:
       case vmIntrinsics::_dpow:
         use_fpu = true;
@@ -873,7 +872,6 @@
     case vmIntrinsics::_dsin:   __ sin  (calc_input, calc_result, tmp1, tmp2);              break;
     case vmIntrinsics::_dcos:   __ cos  (calc_input, calc_result, tmp1, tmp2);              break;
     case vmIntrinsics::_dtan:   __ tan  (calc_input, calc_result, tmp1, tmp2);              break;
-    case vmIntrinsics::_dlog:   __ log  (calc_input, calc_result, tmp1);                    break;
     case vmIntrinsics::_dlog10: __ log10(calc_input, calc_result, tmp1);                    break;
     case vmIntrinsics::_dpow:   __ pow  (calc_input, calc_input2, calc_result, tmp1, tmp2, FrameMap::rax_opr, FrameMap::rcx_opr, FrameMap::rdx_opr); break;
     default:                    ShouldNotReachHere();
@@ -884,7 +882,7 @@
   }
 }
 
-void LIRGenerator::do_ExpIntrinsic(Intrinsic* x) {
+void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
   LIRItem value(x->argument_at(0), this);
   value.set_destroys_register();
 
@@ -900,13 +898,33 @@
 #ifndef _LP64
   LIR_Opr tmp = FrameMap::fpu0_double_opr;
   result_reg = tmp;
-  if (VM_Version::supports_sse2()) {
-    __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
-  } else {
-    __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
+  switch(x->id()) {
+    case vmIntrinsics::_dexp:
+      if (VM_Version::supports_sse2()) {
+        __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
+      } else {
+        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
+      }
+      break;
+    case vmIntrinsics::_dlog:
+      if (VM_Version::supports_sse2()) {
+        __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
+      }
+      else {
+        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
+      }
+      break;
+    default:  ShouldNotReachHere();
   }
 #else
-  __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
+  switch (x->id()) {
+    case vmIntrinsics::_dexp:
+      __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
+      break;
+    case vmIntrinsics::_dlog:
+      __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
+      break;
+  }
 #endif
   __ move(result_reg, calc_result);
 }
--- a/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/c1_LinearScan_x86.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -786,7 +786,6 @@
       break;
     }
 
-    case lir_log:
     case lir_log10: {
       // log and log10 need one temporary fpu stack slot, so
       // there is one temporary registers stored in temp of the
--- a/src/cpu/x86/vm/globals_x86.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/globals_x86.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -91,6 +91,8 @@
 
 define_pd_global(uintx, TypeProfileLevel, 111);
 
+define_pd_global(bool, CompactStrings, true);
+
 define_pd_global(bool, PreserveFramePointer, false);
 
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint) \
--- a/src/cpu/x86/vm/interpreter_x86_32.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/interpreter_x86_32.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -132,10 +132,15 @@
         __ fabs();
         break;
     case Interpreter::java_lang_math_log:
-        __ flog();
-        // Store to stack to convert 80bit precision back to 64bits
-        __ push_fTOS();
-        __ pop_fTOS();
+        __ subptr(rsp, 2 * wordSize);
+        __ fstp_d(Address(rsp, 0));
+        if (VM_Version::supports_sse2()) {
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
+        }
+        else {
+          __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dlog)));
+        }
+        __ addptr(rsp, 2 * wordSize);
         break;
     case Interpreter::java_lang_math_log10:
         __ flog10();
--- a/src/cpu/x86/vm/interpreter_x86_64.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/interpreter_x86_64.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -253,6 +253,9 @@
   } else if (kind == Interpreter::java_lang_math_exp) {
     __ movdbl(xmm0, Address(rsp, wordSize));
     __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
+  } else if (kind == Interpreter::java_lang_math_log) {
+    __ movdbl(xmm0, Address(rsp, wordSize));
+    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
   } else {
     __ fld_d(Address(rsp, wordSize));
     switch (kind) {
@@ -268,9 +271,6 @@
       case Interpreter::java_lang_math_abs:
           __ fabs();
           break;
-      case Interpreter::java_lang_math_log:
-          __ flog();
-          break;
       case Interpreter::java_lang_math_log10:
           __ flog10();
           break;
--- a/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/jvmciCodeInstaller_x86.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -85,6 +85,23 @@
   }
 }
 
+void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle& constant) {
+  address pc = _instructions->start() + pc_offset;
+  if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
+#ifdef _LP64
+    address operand = Assembler::locate_operand(pc, Assembler::narrow_oop_operand);
+    *((narrowKlass*) operand) = record_narrow_metadata_reference(constant);
+    TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
+#else
+    fatal("compressed Klass* on 32bit");
+#endif
+  } else {
+    address operand = Assembler::locate_operand(pc, Assembler::imm_operand);
+    *((Metadata**) operand) = record_metadata_reference(constant);
+    TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(operand));
+  }
+}
+
 void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset) {
   address pc = _instructions->start() + pc_offset;
 
@@ -100,16 +117,6 @@
   TRACE_jvmci_3("relocating at " PTR_FORMAT "/" PTR_FORMAT " with destination at " PTR_FORMAT " (%d)", p2i(pc), p2i(operand), p2i(dest), data_offset);
 }
 
-void CodeInstaller::pd_relocate_CodeBlob(CodeBlob* cb, NativeInstruction* inst) {
-  if (cb->is_nmethod()) {
-    nmethod* nm = (nmethod*) cb;
-    nativeJump_at((address)inst)->set_jump_destination(nm->verified_entry_point());
-  } else {
-    nativeJump_at((address)inst)->set_jump_destination(cb->code_begin());
-  }
-  _instructions->relocate((address)inst, runtime_call_Relocation::spec(), Assembler::call32_operand);
-}
-
 void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination) {
   address pc = (address) inst;
   if (inst->is_call()) {
--- a/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/macroAssembler_x86.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -46,6 +46,9 @@
 #include "gc/g1/heapRegion.hpp"
 #endif // INCLUDE_ALL_GCS
 #include "crc32c.h"
+#ifdef COMPILER2
+#include "opto/intrinsicnode.hpp"
+#endif
 
 #ifdef PRODUCT
 #define BLOCK_COMMENT(str) /* nothing */
@@ -6299,25 +6302,34 @@
   }
 }
 
+#ifdef COMPILER2
+
 // IndexOf for constant substrings with size >= 8 chars
 // which don't need to be loaded through stack.
 void MacroAssembler::string_indexofC8(Register str1, Register str2,
                                       Register cnt1, Register cnt2,
                                       int int_cnt2,  Register result,
-                                      XMMRegister vec, Register tmp) {
+                                      XMMRegister vec, Register tmp,
+                                      int ae) {
   ShortBranchVerifier sbv(this);
   assert(UseSSE42Intrinsics, "SSE4.2 is required");
-
-  // This method uses pcmpestri instruction with bound registers
+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+
+  // This method uses the pcmpestri instruction with bound registers
   //   inputs:
   //     xmm - substring
   //     rax - substring length (elements count)
   //     mem - scanned string
   //     rdx - string length (elements count)
   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+  //     0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
   //   outputs:
   //     rcx - matched index in string
   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+  int mode   = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
+  int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
+  Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
+  Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
 
   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR,
         RET_FOUND, RET_NOT_FOUND, EXIT, FOUND_SUBSTR,
@@ -6326,20 +6338,28 @@
   // Note, inline_string_indexOf() generates checks:
   // if (substr.count > string.count) return -1;
   // if (substr.count == 0) return 0;
-  assert(int_cnt2 >= 8, "this code isused only for cnt2 >= 8 chars");
+  assert(int_cnt2 >= stride, "this code is used only for cnt2 >= 8 chars");
 
   // Load substring.
-  movdqu(vec, Address(str2, 0));
+  if (ae == StrIntrinsicNode::UL) {
+    pmovzxbw(vec, Address(str2, 0));
+  } else {
+    movdqu(vec, Address(str2, 0));
+  }
   movl(cnt2, int_cnt2);
   movptr(result, str1); // string addr
 
-  if (int_cnt2 > 8) {
+  if (int_cnt2 > stride) {
     jmpb(SCAN_TO_SUBSTR);
 
     // Reload substr for rescan, this code
     // is executed only for large substrings (> 8 chars)
     bind(RELOAD_SUBSTR);
-    movdqu(vec, Address(str2, 0));
+    if (ae == StrIntrinsicNode::UL) {
+      pmovzxbw(vec, Address(str2, 0));
+    } else {
+      movdqu(vec, Address(str2, 0));
+    }
     negptr(cnt2); // Jumped here with negative cnt2, convert to positive
 
     bind(RELOAD_STR);
@@ -6358,15 +6378,15 @@
     cmpl(cnt1, cnt2);
     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
 
-    addptr(result, 2);
+    addptr(result, (1<<scale1));
 
   } // (int_cnt2 > 8)
 
   // Scan string for start of substr in 16-byte vectors
   bind(SCAN_TO_SUBSTR);
-  pcmpestri(vec, Address(result, 0), 0x0d);
+  pcmpestri(vec, Address(result, 0), mode);
   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
-  subl(cnt1, 8);
+  subl(cnt1, stride);
   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
   cmpl(cnt1, cnt2);
   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
@@ -6376,19 +6396,19 @@
   // Found a potential substr
   bind(FOUND_CANDIDATE);
   // Matched whole vector if first element matched (tmp(rcx) == 0).
-  if (int_cnt2 == 8) {
+  if (int_cnt2 == stride) {
     jccb(Assembler::overflow, RET_FOUND);    // OF == 1
   } else { // int_cnt2 > 8
     jccb(Assembler::overflow, FOUND_SUBSTR);
   }
   // After pcmpestri tmp(rcx) contains matched element index
   // Compute start addr of substr
-  lea(result, Address(result, tmp, Address::times_2));
+  lea(result, Address(result, tmp, scale1));
 
   // Make sure string is still long enough
   subl(cnt1, tmp);
   cmpl(cnt1, cnt2);
-  if (int_cnt2 == 8) {
+  if (int_cnt2 == stride) {
     jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
   } else { // int_cnt2 > 8
     jccb(Assembler::greaterEqual, MATCH_SUBSTR_HEAD);
@@ -6399,11 +6419,11 @@
   movl(result, -1);
   jmpb(EXIT);
 
-  if (int_cnt2 > 8) {
+  if (int_cnt2 > stride) {
     // This code is optimized for the case when whole substring
     // is matched if its head is matched.
     bind(MATCH_SUBSTR_HEAD);
-    pcmpestri(vec, Address(result, 0), 0x0d);
+    pcmpestri(vec, Address(result, 0), mode);
     // Reload only string if does not match
     jccb(Assembler::noOverflow, RELOAD_STR); // OF == 0
 
@@ -6412,31 +6432,41 @@
     bind(FOUND_SUBSTR);
     // First 8 chars are already matched.
     negptr(cnt2);
-    addptr(cnt2, 8);
+    addptr(cnt2, stride);
 
     bind(SCAN_SUBSTR);
-    subl(cnt1, 8);
-    cmpl(cnt2, -8); // Do not read beyond substring
+    subl(cnt1, stride);
+    cmpl(cnt2, -stride); // Do not read beyond substring
     jccb(Assembler::lessEqual, CONT_SCAN_SUBSTR);
     // Back-up strings to avoid reading beyond substring:
     // cnt1 = cnt1 - cnt2 + 8
     addl(cnt1, cnt2); // cnt2 is negative
-    addl(cnt1, 8);
-    movl(cnt2, 8); negptr(cnt2);
+    addl(cnt1, stride);
+    movl(cnt2, stride); negptr(cnt2);
     bind(CONT_SCAN_SUBSTR);
     if (int_cnt2 < (int)G) {
-      movdqu(vec, Address(str2, cnt2, Address::times_2, int_cnt2*2));
-      pcmpestri(vec, Address(result, cnt2, Address::times_2, int_cnt2*2), 0x0d);
+      int tail_off1 = int_cnt2<<scale1;
+      int tail_off2 = int_cnt2<<scale2;
+      if (ae == StrIntrinsicNode::UL) {
+        pmovzxbw(vec, Address(str2, cnt2, scale2, tail_off2));
+      } else {
+        movdqu(vec, Address(str2, cnt2, scale2, tail_off2));
+      }
+      pcmpestri(vec, Address(result, cnt2, scale1, tail_off1), mode);
     } else {
       // calculate index in register to avoid integer overflow (int_cnt2*2)
       movl(tmp, int_cnt2);
       addptr(tmp, cnt2);
-      movdqu(vec, Address(str2, tmp, Address::times_2, 0));
-      pcmpestri(vec, Address(result, tmp, Address::times_2, 0), 0x0d);
+      if (ae == StrIntrinsicNode::UL) {
+        pmovzxbw(vec, Address(str2, tmp, scale2, 0));
+      } else {
+        movdqu(vec, Address(str2, tmp, scale2, 0));
+      }
+      pcmpestri(vec, Address(result, tmp, scale1, 0), mode);
     }
     // Need to reload strings pointers if not matched whole vector
     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
-    addptr(cnt2, 8);
+    addptr(cnt2, stride);
     jcc(Assembler::negative, SCAN_SUBSTR);
     // Fall through if found full substring
 
@@ -6446,7 +6476,9 @@
   // Found result if we matched full small substring.
   // Compute substr offset
   subptr(result, str1);
-  shrl(result, 1); // index
+  if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
+    shrl(result, 1); // index
+  }
   bind(EXIT);
 
 } // string_indexofC8
@@ -6455,9 +6487,12 @@
 void MacroAssembler::string_indexof(Register str1, Register str2,
                                     Register cnt1, Register cnt2,
                                     int int_cnt2,  Register result,
-                                    XMMRegister vec, Register tmp) {
+                                    XMMRegister vec, Register tmp,
+                                    int ae) {
   ShortBranchVerifier sbv(this);
   assert(UseSSE42Intrinsics, "SSE4.2 is required");
+  assert(ae != StrIntrinsicNode::LU, "Invalid encoding");
+
   //
   // int_cnt2 is length of small (< 8 chars) constant substring
   // or (-1) for non constant substring in which case its length
@@ -6467,18 +6502,22 @@
   // if (substr.count > string.count) return -1;
   // if (substr.count == 0) return 0;
   //
-  assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < 8), "should be != 0");
-
-  // This method uses pcmpestri instruction with bound registers
+  int stride = (ae == StrIntrinsicNode::LL) ? 16 : 8; //UU, UL -> 8
+  assert(int_cnt2 == -1 || (0 < int_cnt2 && int_cnt2 < stride), "should be != 0");
+  // This method uses the pcmpestri instruction with bound registers
   //   inputs:
   //     xmm - substring
   //     rax - substring length (elements count)
   //     mem - scanned string
   //     rdx - string length (elements count)
   //     0xd - mode: 1100 (substring search) + 01 (unsigned shorts)
+  //     0xc - mode: 1100 (substring search) + 00 (unsigned bytes)
   //   outputs:
   //     rcx - matched index in string
   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
+  int mode = (ae == StrIntrinsicNode::LL) ? 0x0c : 0x0d; // bytes or shorts
+  Address::ScaleFactor scale1 = (ae == StrIntrinsicNode::LL) ? Address::times_1 : Address::times_2;
+  Address::ScaleFactor scale2 = (ae == StrIntrinsicNode::UL) ? Address::times_1 : scale1;
 
   Label RELOAD_SUBSTR, SCAN_TO_SUBSTR, SCAN_SUBSTR, ADJUST_STR,
         RET_FOUND, RET_NOT_FOUND, CLEANUP, FOUND_SUBSTR,
@@ -6492,23 +6531,40 @@
     movptr(tmp, rsp); // save old SP
 
     if (int_cnt2 > 0) {     // small (< 8 chars) constant substring
-      if (int_cnt2 == 1) {  // One char
+      if (int_cnt2 == (1>>scale2)) { // One byte
+        assert((ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL), "Only possible for latin1 encoding");
+        load_unsigned_byte(result, Address(str2, 0));
+        movdl(vec, result); // move 32 bits
+      } else if (ae == StrIntrinsicNode::LL && int_cnt2 == 3) {  // Three bytes
+        // Not enough header space in 32-bit VM: 12+3 = 15.
+        movl(result, Address(str2, -1));
+        shrl(result, 8);
+        movdl(vec, result); // move 32 bits
+      } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (2>>scale2)) {  // One char
         load_unsigned_short(result, Address(str2, 0));
         movdl(vec, result); // move 32 bits
-      } else if (int_cnt2 == 2) { // Two chars
+      } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (4>>scale2)) { // Two chars
         movdl(vec, Address(str2, 0)); // move 32 bits
-      } else if (int_cnt2 == 4) { // Four chars
+      } else if (ae != StrIntrinsicNode::UL && int_cnt2 == (8>>scale2)) { // Four chars
         movq(vec, Address(str2, 0));  // move 64 bits
-      } else { // cnt2 = { 3, 5, 6, 7 }
+      } else { // cnt2 = { 3, 5, 6, 7 } || (ae == StrIntrinsicNode::UL && cnt2 ={2, ..., 7})
         // Array header size is 12 bytes in 32-bit VM
         // + 6 bytes for 3 chars == 18 bytes,
         // enough space to load vec and shift.
         assert(HeapWordSize*TypeArrayKlass::header_size() >= 12,"sanity");
-        movdqu(vec, Address(str2, (int_cnt2*2)-16));
-        psrldq(vec, 16-(int_cnt2*2));
+        if (ae == StrIntrinsicNode::UL) {
+          int tail_off = int_cnt2-8;
+          pmovzxbw(vec, Address(str2, tail_off));
+          psrldq(vec, -2*tail_off);
+        }
+        else {
+          int tail_off = int_cnt2*(1<<scale2);
+          movdqu(vec, Address(str2, tail_off-16));
+          psrldq(vec, 16-tail_off);
+        }
       }
     } else { // not constant substring
-      cmpl(cnt2, 8);
+      cmpl(cnt2, stride);
       jccb(Assembler::aboveEqual, BIG_STRINGS); // Both strings are big enough
 
       // We can read beyond string if srt+16 does not cross page boundary
@@ -6521,12 +6577,17 @@
 
       // Move small strings to stack to allow load 16 bytes into vec.
       subptr(rsp, 16);
-      int stk_offset = wordSize-2;
+      int stk_offset = wordSize-(1<<scale2);
       push(cnt2);
 
       bind(COPY_SUBSTR);
-      load_unsigned_short(result, Address(str2, cnt2, Address::times_2, -2));
-      movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+      if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UL) {
+        load_unsigned_byte(result, Address(str2, cnt2, scale2, -1));
+        movb(Address(rsp, cnt2, scale2, stk_offset), result);
+      } else if (ae == StrIntrinsicNode::UU) {
+        load_unsigned_short(result, Address(str2, cnt2, scale2, -2));
+        movw(Address(rsp, cnt2, scale2, stk_offset), result);
+      }
       decrement(cnt2);
       jccb(Assembler::notZero, COPY_SUBSTR);
 
@@ -6535,7 +6596,7 @@
     } // non constant
 
     bind(CHECK_STR);
-    cmpl(cnt1, 8);
+    cmpl(cnt1, stride);
     jccb(Assembler::aboveEqual, BIG_STRINGS);
 
     // Check cross page boundary.
@@ -6545,7 +6606,7 @@
     jccb(Assembler::belowEqual, BIG_STRINGS);
 
     subptr(rsp, 16);
-    int stk_offset = -2;
+    int stk_offset = -(1<<scale1);
     if (int_cnt2 < 0) { // not constant
       push(cnt2);
       stk_offset += wordSize;
@@ -6553,8 +6614,13 @@
     movl(cnt2, cnt1);
 
     bind(COPY_STR);
-    load_unsigned_short(result, Address(str1, cnt2, Address::times_2, -2));
-    movw(Address(rsp, cnt2, Address::times_2, stk_offset), result);
+    if (ae == StrIntrinsicNode::LL) {
+      load_unsigned_byte(result, Address(str1, cnt2, scale1, -1));
+      movb(Address(rsp, cnt2, scale1, stk_offset), result);
+    } else {
+      load_unsigned_short(result, Address(str1, cnt2, scale1, -2));
+      movw(Address(rsp, cnt2, scale1, stk_offset), result);
+    }
     decrement(cnt2);
     jccb(Assembler::notZero, COPY_STR);
 
@@ -6566,7 +6632,11 @@
     bind(BIG_STRINGS);
     // Load substring.
     if (int_cnt2 < 0) { // -1
-      movdqu(vec, Address(str2, 0));
+      if (ae == StrIntrinsicNode::UL) {
+        pmovzxbw(vec, Address(str2, 0));
+      } else {
+        movdqu(vec, Address(str2, 0));
+      }
       push(cnt2);       // substr count
       push(str2);       // substr addr
       push(str1);       // string addr
@@ -6597,37 +6667,43 @@
     bind(RELOAD_SUBSTR);
     movptr(str2, Address(rsp, 2*wordSize));
     movl(cnt2, Address(rsp, 3*wordSize));
-    movdqu(vec, Address(str2, 0));
+    if (ae == StrIntrinsicNode::UL) {
+      pmovzxbw(vec, Address(str2, 0));
+    } else {
+      movdqu(vec, Address(str2, 0));
+    }
     // We came here after the beginning of the substring was
     // matched but the rest of it was not so we need to search
     // again. Start from the next element after the previous match.
     subptr(str1, result); // Restore counter
-    shrl(str1, 1);
+    if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
+      shrl(str1, 1);
+    }
     addl(cnt1, str1);
     decrementl(cnt1);   // Shift to next element
     cmpl(cnt1, cnt2);
     jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
 
-    addptr(result, 2);
+    addptr(result, (1<<scale1));
   } // non constant
 
   // Scan string for start of substr in 16-byte vectors
   bind(SCAN_TO_SUBSTR);
   assert(cnt1 == rdx && cnt2 == rax && tmp == rcx, "pcmpestri");
-  pcmpestri(vec, Address(result, 0), 0x0d);
+  pcmpestri(vec, Address(result, 0), mode);
   jccb(Assembler::below, FOUND_CANDIDATE);   // CF == 1
-  subl(cnt1, 8);
+  subl(cnt1, stride);
   jccb(Assembler::lessEqual, RET_NOT_FOUND); // Scanned full string
   cmpl(cnt1, cnt2);
   jccb(Assembler::negative, RET_NOT_FOUND);  // Left less then substring
   addptr(result, 16);
 
   bind(ADJUST_STR);
-  cmpl(cnt1, 8); // Do not read beyond string
+  cmpl(cnt1, stride); // Do not read beyond string
   jccb(Assembler::greaterEqual, SCAN_TO_SUBSTR);
   // Back-up string to avoid reading beyond string.
-  lea(result, Address(result, cnt1, Address::times_2, -16));
-  movl(cnt1, 8);
+  lea(result, Address(result, cnt1, scale1, -16));
+  movl(cnt1, stride);
   jmpb(SCAN_TO_SUBSTR);
 
   // Found a potential substr
@@ -6646,13 +6722,12 @@
 
   bind(FOUND_SUBSTR);
   // Compute start addr of substr
-  lea(result, Address(result, tmp, Address::times_2));
-
+  lea(result, Address(result, tmp, scale1));
   if (int_cnt2 > 0) { // Constant substring
     // Repeat search for small substring (< 8 chars)
     // from new point without reloading substring.
     // Have to check that we don't read beyond string.
-    cmpl(tmp, 8-int_cnt2);
+    cmpl(tmp, stride-int_cnt2);
     jccb(Assembler::greater, ADJUST_STR);
     // Fall through if matched whole substring.
   } else { // non constant
@@ -6660,12 +6735,12 @@
 
     addl(tmp, cnt2);
     // Found result if we matched whole substring.
-    cmpl(tmp, 8);
+    cmpl(tmp, stride);
     jccb(Assembler::lessEqual, RET_FOUND);
 
     // Repeat search for small substring (<= 8 chars)
     // from new point 'str1' without reloading substring.
-    cmpl(cnt2, 8);
+    cmpl(cnt2, stride);
     // Have to check that we don't read beyond string.
     jccb(Assembler::lessEqual, ADJUST_STR);
 
@@ -6678,26 +6753,40 @@
     jccb(Assembler::equal, CHECK_NEXT);
 
     bind(SCAN_SUBSTR);
-    pcmpestri(vec, Address(str1, 0), 0x0d);
+    pcmpestri(vec, Address(str1, 0), mode);
     // Need to reload strings pointers if not matched whole vector
     jcc(Assembler::noOverflow, RELOAD_SUBSTR); // OF == 0
 
     bind(CHECK_NEXT);
-    subl(cnt2, 8);
+    subl(cnt2, stride);
     jccb(Assembler::lessEqual, RET_FOUND_LONG); // Found full substring
     addptr(str1, 16);
-    addptr(str2, 16);
-    subl(cnt1, 8);
-    cmpl(cnt2, 8); // Do not read beyond substring
+    if (ae == StrIntrinsicNode::UL) {
+      addptr(str2, 8);
+    } else {
+      addptr(str2, 16);
+    }
+    subl(cnt1, stride);
+    cmpl(cnt2, stride); // Do not read beyond substring
     jccb(Assembler::greaterEqual, CONT_SCAN_SUBSTR);
     // Back-up strings to avoid reading beyond substring.
-    lea(str2, Address(str2, cnt2, Address::times_2, -16));
-    lea(str1, Address(str1, cnt2, Address::times_2, -16));
+
+    if (ae == StrIntrinsicNode::UL) {
+      lea(str2, Address(str2, cnt2, scale2, -8));
+      lea(str1, Address(str1, cnt2, scale1, -16));
+    } else {
+      lea(str2, Address(str2, cnt2, scale2, -16));
+      lea(str1, Address(str1, cnt2, scale1, -16));
+    }
     subl(cnt1, cnt2);
-    movl(cnt2, 8);
-    addl(cnt1, 8);
+    movl(cnt2, stride);
+    addl(cnt1, stride);
     bind(CONT_SCAN_SUBSTR);
-    movdqu(vec, Address(str2, 0));
+    if (ae == StrIntrinsicNode::UL) {
+      pmovzxbw(vec, Address(str2, 0));
+    } else {
+      movdqu(vec, Address(str2, 0));
+    }
     jmpb(SCAN_SUBSTR);
 
     bind(RET_FOUND_LONG);
@@ -6707,20 +6796,143 @@
   bind(RET_FOUND);
   // Compute substr offset
   subptr(result, str1);
-  shrl(result, 1); // index
-
+  if (ae == StrIntrinsicNode::UU || ae == StrIntrinsicNode::UL) {
+    shrl(result, 1); // index
+  }
   bind(CLEANUP);
   pop(rsp); // restore SP
 
 } // string_indexof
 
-// Compare strings.
+void MacroAssembler::string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
+                                         XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp) {
+  ShortBranchVerifier sbv(this);
+  assert(UseSSE42Intrinsics, "SSE4.2 is required");
+
+  int stride = 8;
+
+  Label FOUND_CHAR, SCAN_TO_CHAR, SCAN_TO_CHAR_LOOP,
+        SCAN_TO_8_CHAR, SCAN_TO_8_CHAR_LOOP, SCAN_TO_16_CHAR_LOOP,
+        RET_NOT_FOUND, SCAN_TO_8_CHAR_INIT,
+        FOUND_SEQ_CHAR, DONE_LABEL;
+
+  movptr(result, str1);
+  if (UseAVX >= 2) {
+    cmpl(cnt1, stride);
+    jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
+    cmpl(cnt1, 2*stride);
+    jccb(Assembler::less, SCAN_TO_8_CHAR_INIT);
+    movdl(vec1, ch);
+    vpbroadcastw(vec1, vec1);
+    vpxor(vec2, vec2);
+    movl(tmp, cnt1);
+    andl(tmp, 0xFFFFFFF0);  //vector count (in chars)
+    andl(cnt1,0x0000000F);  //tail count (in chars)
+
+    bind(SCAN_TO_16_CHAR_LOOP);
+    vmovdqu(vec3, Address(result, 0));
+    vpcmpeqw(vec3, vec3, vec1, true);
+    vptest(vec2, vec3);
+    jcc(Assembler::carryClear, FOUND_CHAR);
+    addptr(result, 32);
+    subl(tmp, 2*stride);
+    jccb(Assembler::notZero, SCAN_TO_16_CHAR_LOOP);
+    jmp(SCAN_TO_8_CHAR);
+    bind(SCAN_TO_8_CHAR_INIT);
+    movdl(vec1, ch);
+    pshuflw(vec1, vec1, 0x00);
+    pshufd(vec1, vec1, 0);
+    pxor(vec2, vec2);
+  }
+  if (UseAVX >= 2 || UseSSE42Intrinsics) {
+    bind(SCAN_TO_8_CHAR);
+    cmpl(cnt1, stride);
+    if (UseAVX >= 2) {
+      jccb(Assembler::less, SCAN_TO_CHAR);
+    }
+    if (!(UseAVX >= 2)) {
+      jccb(Assembler::less, SCAN_TO_CHAR_LOOP);
+      movdl(vec1, ch);
+      pshuflw(vec1, vec1, 0x00);
+      pshufd(vec1, vec1, 0);
+      pxor(vec2, vec2);
+    }
+    movl(tmp, cnt1);
+    andl(tmp, 0xFFFFFFF8);  //vector count (in chars)
+    andl(cnt1,0x00000007);  //tail count (in chars)
+
+    bind(SCAN_TO_8_CHAR_LOOP);
+    movdqu(vec3, Address(result, 0));
+    pcmpeqw(vec3, vec1);
+    ptest(vec2, vec3);
+    jcc(Assembler::carryClear, FOUND_CHAR);
+    addptr(result, 16);
+    subl(tmp, stride);
+    jccb(Assembler::notZero, SCAN_TO_8_CHAR_LOOP);
+  }
+  bind(SCAN_TO_CHAR);
+  testl(cnt1, cnt1);
+  jcc(Assembler::zero, RET_NOT_FOUND);
+
+  bind(SCAN_TO_CHAR_LOOP);
+  load_unsigned_short(tmp, Address(result, 0));
+  cmpl(ch, tmp);
+  jccb(Assembler::equal, FOUND_SEQ_CHAR);
+  addptr(result, 2);
+  subl(cnt1, 1);
+  jccb(Assembler::zero, RET_NOT_FOUND);
+  jmp(SCAN_TO_CHAR_LOOP);
+
+  bind(RET_NOT_FOUND);
+  movl(result, -1);
+  jmpb(DONE_LABEL);
+
+  if (UseAVX >= 2 || UseSSE42Intrinsics) {
+    bind(FOUND_CHAR);
+    if (UseAVX >= 2) {
+      vpmovmskb(tmp, vec3);
+    } else {
+      pmovmskb(tmp, vec3);
+    }
+    bsfl(ch, tmp);
+    addl(result, ch);
+  }
+
+  bind(FOUND_SEQ_CHAR);
+  subptr(result, str1);
+  shrl(result, 1);
+
+  bind(DONE_LABEL);
+} // string_indexof_char
+
+// helper function for string_compare
+void MacroAssembler::load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
+                                        Address::ScaleFactor scale, Address::ScaleFactor scale1,
+                                        Address::ScaleFactor scale2, Register index, int ae) {
+  if (ae == StrIntrinsicNode::LL) {
+    load_unsigned_byte(elem1, Address(str1, index, scale, 0));
+    load_unsigned_byte(elem2, Address(str2, index, scale, 0));
+  } else if (ae == StrIntrinsicNode::UU) {
+    load_unsigned_short(elem1, Address(str1, index, scale, 0));
+    load_unsigned_short(elem2, Address(str2, index, scale, 0));
+  } else {
+    load_unsigned_byte(elem1, Address(str1, index, scale1, 0));
+    load_unsigned_short(elem2, Address(str2, index, scale2, 0));
+  }
+}
+
+// Compare strings, used for char[] and byte[].
 void MacroAssembler::string_compare(Register str1, Register str2,
                                     Register cnt1, Register cnt2, Register result,
-                                    XMMRegister vec1) {
+                                    XMMRegister vec1, int ae) {
   ShortBranchVerifier sbv(this);
   Label LENGTH_DIFF_LABEL, POP_LABEL, DONE_LABEL, WHILE_HEAD_LABEL;
-
+  int stride, stride2, adr_stride, adr_stride1, adr_stride2;
+  Address::ScaleFactor scale, scale1, scale2;
+
+  if (ae == StrIntrinsicNode::LU || ae == StrIntrinsicNode::UL) {
+    shrl(cnt2, 1);
+  }
   // Compute the minimum of the string lengths and the
   // difference of the string lengths (stack).
   // Do the conditional move stuff
@@ -6732,32 +6944,68 @@
   // Is the minimum length zero?
   testl(cnt2, cnt2);
   jcc(Assembler::zero, LENGTH_DIFF_LABEL);
-
-  // Compare first characters
-  load_unsigned_short(result, Address(str1, 0));
-  load_unsigned_short(cnt1, Address(str2, 0));
+  if (ae == StrIntrinsicNode::LL) {
+    // Load first bytes
+    load_unsigned_byte(result, Address(str1, 0));
+    load_unsigned_byte(cnt1, Address(str2, 0));
+  } else if (ae == StrIntrinsicNode::UU) {
+    // Load first characters
+    load_unsigned_short(result, Address(str1, 0));
+    load_unsigned_short(cnt1, Address(str2, 0));
+  } else {
+    load_unsigned_byte(result, Address(str1, 0));
+    load_unsigned_short(cnt1, Address(str2, 0));
+  }
   subl(result, cnt1);
   jcc(Assembler::notZero,  POP_LABEL);
+
+  if (ae == StrIntrinsicNode::UU) {
+    // Divide length by 2 to get number of chars
+    shrl(cnt2, 1);
+  }
   cmpl(cnt2, 1);
   jcc(Assembler::equal, LENGTH_DIFF_LABEL);
 
-  // Check if the strings start at the same location.
-  cmpptr(str1, str2);
-  jcc(Assembler::equal, LENGTH_DIFF_LABEL);
-
-  Address::ScaleFactor scale = Address::times_2;
-  int stride = 8;
+  // Check if the strings start at the same location and setup scale and stride
+  if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+    cmpptr(str1, str2);
+    jcc(Assembler::equal, LENGTH_DIFF_LABEL);
+    if (ae == StrIntrinsicNode::LL) {
+      scale = Address::times_1;
+      stride = 16;
+    } else {
+      scale = Address::times_2;
+      stride = 8;
+    }
+  } else {
+    scale = Address::no_scale;  // not used
+    scale1 = Address::times_1;
+    scale2 = Address::times_2;
+    stride = 8;
+  }
 
   if (UseAVX >= 2 && UseSSE42Intrinsics) {
     Label COMPARE_WIDE_VECTORS, VECTOR_NOT_EQUAL, COMPARE_WIDE_TAIL, COMPARE_SMALL_STR;
     Label COMPARE_WIDE_VECTORS_LOOP, COMPARE_16_CHARS, COMPARE_INDEX_CHAR;
     Label COMPARE_TAIL_LONG;
     int pcmpmask = 0x19;
+    if (ae == StrIntrinsicNode::LL) {
+      pcmpmask &= ~0x01;
+    }
 
     // Setup to compare 16-chars (32-bytes) vectors,
     // start from first character again because it has aligned address.
-    int stride2 = 16;
-    int adr_stride  = stride  << scale;
+    if (ae == StrIntrinsicNode::LL) {
+      stride2 = 32;
+    } else {
+      stride2 = 16;
+    }
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      adr_stride = stride << scale;
+    } else {
+      adr_stride1 = 8;  //stride << scale1;
+      adr_stride2 = 16; //stride << scale2;
+    }
 
     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
     // rax and rdx are used by pcmpestri as elements counters
@@ -6767,26 +7015,39 @@
 
     // fast path : compare first 2 8-char vectors.
     bind(COMPARE_16_CHARS);
-    movdqu(vec1, Address(str1, 0));
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      movdqu(vec1, Address(str1, 0));
+    } else {
+      pmovzxbw(vec1, Address(str1, 0));
+    }
     pcmpestri(vec1, Address(str2, 0), pcmpmask);
     jccb(Assembler::below, COMPARE_INDEX_CHAR);
 
-    movdqu(vec1, Address(str1, adr_stride));
-    pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      movdqu(vec1, Address(str1, adr_stride));
+      pcmpestri(vec1, Address(str2, adr_stride), pcmpmask);
+    } else {
+      pmovzxbw(vec1, Address(str1, adr_stride1));
+      pcmpestri(vec1, Address(str2, adr_stride2), pcmpmask);
+    }
     jccb(Assembler::aboveEqual, COMPARE_WIDE_VECTORS);
     addl(cnt1, stride);
 
     // Compare the characters at index in cnt1
-    bind(COMPARE_INDEX_CHAR); //cnt1 has the offset of the mismatching character
-    load_unsigned_short(result, Address(str1, cnt1, scale));
-    load_unsigned_short(cnt2, Address(str2, cnt1, scale));
+    bind(COMPARE_INDEX_CHAR); // cnt1 has the offset of the mismatching character
+    load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
     subl(result, cnt2);
     jmp(POP_LABEL);
 
     // Setup the registers to start vector comparison loop
     bind(COMPARE_WIDE_VECTORS);
-    lea(str1, Address(str1, result, scale));
-    lea(str2, Address(str2, result, scale));
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      lea(str1, Address(str1, result, scale));
+      lea(str2, Address(str2, result, scale));
+    } else {
+      lea(str1, Address(str1, result, scale1));
+      lea(str2, Address(str2, result, scale2));
+    }
     subl(result, stride2);
     subl(cnt2, stride2);
     jccb(Assembler::zero, COMPARE_WIDE_TAIL);
@@ -6794,8 +7055,13 @@
 
     //  In a loop, compare 16-chars (32-bytes) at once using (vpxor+vptest)
     bind(COMPARE_WIDE_VECTORS_LOOP);
-    vmovdqu(vec1, Address(str1, result, scale));
-    vpxor(vec1, Address(str2, result, scale));
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      vmovdqu(vec1, Address(str1, result, scale));
+      vpxor(vec1, Address(str2, result, scale));
+    } else {
+      vpmovzxbw(vec1, Address(str1, result, scale1));
+      vpxor(vec1, Address(str2, result, scale2));
+    }
     vptest(vec1, vec1);
     jccb(Assembler::notZero, VECTOR_NOT_EQUAL);
     addptr(result, stride2);
@@ -6818,8 +7084,13 @@
     bind(VECTOR_NOT_EQUAL);
     // clean upper bits of YMM registers
     vpxor(vec1, vec1);
-    lea(str1, Address(str1, result, scale));
-    lea(str2, Address(str2, result, scale));
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      lea(str1, Address(str1, result, scale));
+      lea(str2, Address(str2, result, scale));
+    } else {
+      lea(str1, Address(str1, result, scale1));
+      lea(str2, Address(str2, result, scale2));
+    }
     jmp(COMPARE_16_CHARS);
 
     // Compare tail chars, length between 1 to 15 chars
@@ -6828,13 +7099,22 @@
     cmpl(cnt2, stride);
     jccb(Assembler::less, COMPARE_SMALL_STR);
 
-    movdqu(vec1, Address(str1, 0));
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      movdqu(vec1, Address(str1, 0));
+    } else {
+      pmovzxbw(vec1, Address(str1, 0));
+    }
     pcmpestri(vec1, Address(str2, 0), pcmpmask);
     jcc(Assembler::below, COMPARE_INDEX_CHAR);
     subptr(cnt2, stride);
     jccb(Assembler::zero, LENGTH_DIFF_LABEL);
-    lea(str1, Address(str1, result, scale));
-    lea(str2, Address(str2, result, scale));
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      lea(str1, Address(str1, result, scale));
+      lea(str2, Address(str2, result, scale));
+    } else {
+      lea(str1, Address(str1, result, scale1));
+      lea(str2, Address(str2, result, scale2));
+    }
     negptr(cnt2);
     jmpb(WHILE_HEAD_LABEL);
 
@@ -6846,10 +7126,17 @@
     // start from first character again because it has aligned address.
     movl(result, cnt2);
     andl(cnt2, ~(stride - 1));   // cnt2 holds the vector count
+    if (ae == StrIntrinsicNode::LL) {
+      pcmpmask &= ~0x01;
+    }
     jccb(Assembler::zero, COMPARE_TAIL);
-
-    lea(str1, Address(str1, result, scale));
-    lea(str2, Address(str2, result, scale));
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      lea(str1, Address(str1, result, scale));
+      lea(str2, Address(str2, result, scale));
+    } else {
+      lea(str1, Address(str1, result, scale1));
+      lea(str2, Address(str2, result, scale2));
+    }
     negptr(result);
 
     // pcmpestri
@@ -6865,8 +7152,13 @@
     assert(result == rax && cnt2 == rdx && cnt1 == rcx, "pcmpestri");
 
     bind(COMPARE_WIDE_VECTORS);
-    movdqu(vec1, Address(str1, result, scale));
-    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      movdqu(vec1, Address(str1, result, scale));
+      pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+    } else {
+      pmovzxbw(vec1, Address(str1, result, scale1));
+      pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
+    }
     // After pcmpestri cnt1(rcx) contains mismatched element index
 
     jccb(Assembler::below, VECTOR_NOT_EQUAL);  // CF==1
@@ -6881,15 +7173,19 @@
     movl(cnt2, stride);
     movl(result, stride);
     negptr(result);
-    movdqu(vec1, Address(str1, result, scale));
-    pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+    if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+      movdqu(vec1, Address(str1, result, scale));
+      pcmpestri(vec1, Address(str2, result, scale), pcmpmask);
+    } else {
+      pmovzxbw(vec1, Address(str1, result, scale1));
+      pcmpestri(vec1, Address(str2, result, scale2), pcmpmask);
+    }
     jccb(Assembler::aboveEqual, LENGTH_DIFF_LABEL);
 
     // Mismatched characters in the vectors
     bind(VECTOR_NOT_EQUAL);
     addptr(cnt1, result);
-    load_unsigned_short(result, Address(str1, cnt1, scale));
-    load_unsigned_short(cnt2, Address(str2, cnt1, scale));
+    load_next_elements(result, cnt2, str1, str2, scale, scale1, scale2, cnt1, ae);
     subl(result, cnt2);
     jmpb(POP_LABEL);
 
@@ -6898,15 +7194,19 @@
     // Fallthru to tail compare
   }
   // Shift str2 and str1 to the end of the arrays, negate min
-  lea(str1, Address(str1, cnt2, scale));
-  lea(str2, Address(str2, cnt2, scale));
+  if (ae == StrIntrinsicNode::LL || ae == StrIntrinsicNode::UU) {
+    lea(str1, Address(str1, cnt2, scale));
+    lea(str2, Address(str2, cnt2, scale));
+  } else {
+    lea(str1, Address(str1, cnt2, scale1));
+    lea(str2, Address(str2, cnt2, scale2));
+  }
   decrementl(cnt2);  // first character was compared already
   negptr(cnt2);
 
   // Compare the rest of the elements
   bind(WHILE_HEAD_LABEL);
-  load_unsigned_short(result, Address(str1, cnt2, scale, 0));
-  load_unsigned_short(cnt1, Address(str2, cnt2, scale, 0));
+  load_next_elements(result, cnt1, str1, str2, scale, scale1, scale2, cnt2, ae);
   subl(result, cnt1);
   jccb(Assembler::notZero, POP_LABEL);
   increment(cnt2);
@@ -6915,6 +7215,10 @@
   // Strings are equal up to min length.  Return the length difference.
   bind(LENGTH_DIFF_LABEL);
   pop(result);
+  if (ae == StrIntrinsicNode::UU) {
+    // Divide diff by 2 to get number of chars
+    sarl(result, 1);
+  }
   jmpb(DONE_LABEL);
 
   // Discard the stored length difference
@@ -6923,23 +7227,164 @@
 
   // That's it
   bind(DONE_LABEL);
-}
-
-// Compare char[] arrays aligned to 4 bytes or substrings.
-void MacroAssembler::char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
-                                        Register limit, Register result, Register chr,
-                                        XMMRegister vec1, XMMRegister vec2) {
+  if(ae == StrIntrinsicNode::UL) {
+    negl(result);
+  }
+}
+
+// Search for Non-ASCII character (Negative byte value) in a byte array,
+// return true if it has any and false otherwise.
+void MacroAssembler::has_negatives(Register ary1, Register len,
+                                   Register result, Register tmp1,
+                                   XMMRegister vec1, XMMRegister vec2) {
+
+  // rsi: byte array
+  // rcx: len
+  // rax: result
   ShortBranchVerifier sbv(this);
-  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR;
+  assert_different_registers(ary1, len, result, tmp1);
+  assert_different_registers(vec1, vec2);
+  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
+
+  // len == 0
+  testl(len, len);
+  jcc(Assembler::zero, FALSE_LABEL);
+
+  movl(result, len); // copy
+
+  if (UseAVX >= 2) {
+    // With AVX2, use 32-byte vector compare
+    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+    // Compare 32-byte vectors
+    andl(result, 0x0000001f);  //   tail count (in bytes)
+    andl(len, 0xffffffe0);   // vector count (in bytes)
+    jccb(Assembler::zero, COMPARE_TAIL);
+
+    lea(ary1, Address(ary1, len, Address::times_1));
+    negptr(len);
+
+    movl(tmp1, 0x80808080);   // create mask to test for Unicode chars in vector
+    movdl(vec2, tmp1);
+    vpbroadcastd(vec2, vec2);
+
+    bind(COMPARE_WIDE_VECTORS);
+    vmovdqu(vec1, Address(ary1, len, Address::times_1));
+    vptest(vec1, vec2);
+    jccb(Assembler::notZero, TRUE_LABEL);
+    addptr(len, 32);
+    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+    testl(result, result);
+    jccb(Assembler::zero, FALSE_LABEL);
+
+    vmovdqu(vec1, Address(ary1, result, Address::times_1, -32));
+    vptest(vec1, vec2);
+    jccb(Assembler::notZero, TRUE_LABEL);
+    jmpb(FALSE_LABEL);
+
+    bind(COMPARE_TAIL); // len is zero
+    movl(len, result);
+    // Fallthru to tail compare
+  } else if (UseSSE42Intrinsics) {
+    // With SSE4.2, use double quad vector compare
+    Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
+
+    // Compare 16-byte vectors
+    andl(result, 0x0000000f);  //   tail count (in bytes)
+    andl(len, 0xfffffff0);   // vector count (in bytes)
+    jccb(Assembler::zero, COMPARE_TAIL);
+
+    lea(ary1, Address(ary1, len, Address::times_1));
+    negptr(len);
+
+    movl(tmp1, 0x80808080);
+    movdl(vec2, tmp1);
+    pshufd(vec2, vec2, 0);
+
+    bind(COMPARE_WIDE_VECTORS);
+    movdqu(vec1, Address(ary1, len, Address::times_1));
+    ptest(vec1, vec2);
+    jccb(Assembler::notZero, TRUE_LABEL);
+    addptr(len, 16);
+    jcc(Assembler::notZero, COMPARE_WIDE_VECTORS);
+
+    testl(result, result);
+    jccb(Assembler::zero, FALSE_LABEL);
+
+    movdqu(vec1, Address(ary1, result, Address::times_1, -16));
+    ptest(vec1, vec2);
+    jccb(Assembler::notZero, TRUE_LABEL);
+    jmpb(FALSE_LABEL);
+
+    bind(COMPARE_TAIL); // len is zero
+    movl(len, result);
+    // Fallthru to tail compare
+  }
+
+  // Compare 4-byte vectors
+  andl(len, 0xfffffffc); // vector count (in bytes)
+  jccb(Assembler::zero, COMPARE_CHAR);
+
+  lea(ary1, Address(ary1, len, Address::times_1));
+  negptr(len);
+
+  bind(COMPARE_VECTORS);
+  movl(tmp1, Address(ary1, len, Address::times_1));
+  andl(tmp1, 0x80808080);
+  jccb(Assembler::notZero, TRUE_LABEL);
+  addptr(len, 4);
+  jcc(Assembler::notZero, COMPARE_VECTORS);
+
+  // Compare trailing char (final 2 bytes), if any
+  bind(COMPARE_CHAR);
+  testl(result, 0x2);   // tail  char
+  jccb(Assembler::zero, COMPARE_BYTE);
+  load_unsigned_short(tmp1, Address(ary1, 0));
+  andl(tmp1, 0x00008080);
+  jccb(Assembler::notZero, TRUE_LABEL);
+  subptr(result, 2);
+  lea(ary1, Address(ary1, 2));
+
+  bind(COMPARE_BYTE);
+  testl(result, 0x1);   // tail  byte
+  jccb(Assembler::zero, FALSE_LABEL);
+  load_unsigned_byte(tmp1, Address(ary1, 0));
+  andl(tmp1, 0x00000080);
+  jccb(Assembler::notEqual, TRUE_LABEL);
+  jmpb(FALSE_LABEL);
+
+  bind(TRUE_LABEL);
+  movl(result, 1);   // return true
+  jmpb(DONE);
+
+  bind(FALSE_LABEL);
+  xorl(result, result); // return false
+
+  // That's it
+  bind(DONE);
+  if (UseAVX >= 2) {
+    // clean upper bits of YMM registers
+    vpxor(vec1, vec1);
+    vpxor(vec2, vec2);
+  }
+}
+
+// Compare char[] or byte[] arrays aligned to 4 bytes or substrings.
+void MacroAssembler::arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+                                   Register limit, Register result, Register chr,
+                                   XMMRegister vec1, XMMRegister vec2, bool is_char) {
+  ShortBranchVerifier sbv(this);
+  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_VECTORS, COMPARE_CHAR, COMPARE_BYTE;
 
   int length_offset  = arrayOopDesc::length_offset_in_bytes();
-  int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
-
-  // Check the input args
-  cmpptr(ary1, ary2);
-  jcc(Assembler::equal, TRUE_LABEL);
+  int base_offset    = arrayOopDesc::base_offset_in_bytes(is_char ? T_CHAR : T_BYTE);
 
   if (is_array_equ) {
+    // Check the input args
+    cmpptr(ary1, ary2);
+    jcc(Assembler::equal, TRUE_LABEL);
+
     // Need additional checks for arrays_equals.
     testptr(ary1, ary1);
     jcc(Assembler::zero, FALSE_LABEL);
@@ -6962,7 +7407,10 @@
     lea(ary2, Address(ary2, base_offset));
   }
 
-  shll(limit, 1);      // byte count != 0
+  if (is_array_equ && is_char) {
+    // arrays_equals when used for char[].
+    shll(limit, 1);      // byte count != 0
+  }
   movl(result, limit); // copy
 
   if (UseAVX >= 2) {
@@ -6970,7 +7418,7 @@
     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
 
     // Compare 32-byte vectors
-    andl(result, 0x0000001e);  //   tail count (in bytes)
+    andl(result, 0x0000001f);  //   tail count (in bytes)
     andl(limit, 0xffffffe0);   // vector count (in bytes)
     jccb(Assembler::zero, COMPARE_TAIL);
 
@@ -7007,7 +7455,7 @@
     Label COMPARE_WIDE_VECTORS, COMPARE_TAIL;
 
     // Compare 16-byte vectors
-    andl(result, 0x0000000e);  //   tail count (in bytes)
+    andl(result, 0x0000000f);  //   tail count (in bytes)
     andl(limit, 0xfffffff0);   // vector count (in bytes)
     jccb(Assembler::zero, COMPARE_TAIL);
 
@@ -7059,12 +7507,26 @@
   // Compare trailing char (final 2 bytes), if any
   bind(COMPARE_CHAR);
   testl(result, 0x2);   // tail  char
-  jccb(Assembler::zero, TRUE_LABEL);
+  jccb(Assembler::zero, COMPARE_BYTE);
   load_unsigned_short(chr, Address(ary1, 0));
   load_unsigned_short(limit, Address(ary2, 0));
   cmpl(chr, limit);
   jccb(Assembler::notEqual, FALSE_LABEL);
 
+  if (is_array_equ && is_char) {
+    bind(COMPARE_BYTE);
+  } else {
+    lea(ary1, Address(ary1, 2));
+    lea(ary2, Address(ary2, 2));
+
+    bind(COMPARE_BYTE);
+    testl(result, 0x1);   // tail  byte
+    jccb(Assembler::zero, TRUE_LABEL);
+    load_unsigned_byte(chr, Address(ary1, 0));
+    load_unsigned_byte(limit, Address(ary2, 0));
+    cmpl(chr, limit);
+    jccb(Assembler::notEqual, FALSE_LABEL);
+  }
   bind(TRUE_LABEL);
   movl(result, 1);   // return true
   jmpb(DONE);
@@ -7081,6 +7543,8 @@
   }
 }
 
+#endif
+
 void MacroAssembler::generate_fill(BasicType t, bool aligned,
                                    Register to, Register value, Register count,
                                    Register rtmp, XMMRegister xtmp) {
@@ -9085,6 +9549,179 @@
 #undef BLOCK_COMMENT
 
 
+// Compress char[] array to byte[].
+void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
+                                         XMMRegister tmp1Reg, XMMRegister tmp2Reg,
+                                         XMMRegister tmp3Reg, XMMRegister tmp4Reg,
+                                         Register tmp5, Register result) {
+  Label copy_chars_loop, return_length, return_zero, done;
+
+  // rsi: src
+  // rdi: dst
+  // rdx: len
+  // rcx: tmp5
+  // rax: result
+
+  // rsi holds start addr of source char[] to be compressed
+  // rdi holds start addr of destination byte[]
+  // rdx holds length
+
+  assert(len != result, "");
+
+  // save length for return
+  push(len);
+
+  if (UseSSE42Intrinsics) {
+    Label copy_32_loop, copy_16, copy_tail;
+
+    movl(result, len);
+    movl(tmp5, 0xff00ff00);   // create mask to test for Unicode chars in vectors
+
+    // vectored compression
+    andl(len, 0xfffffff0);    // vector count (in chars)
+    andl(result, 0x0000000f);    // tail count (in chars)
+    testl(len, len);
+    jccb(Assembler::zero, copy_16);
+
+    // compress 16 chars per iter
+    movdl(tmp1Reg, tmp5);
+    pshufd(tmp1Reg, tmp1Reg, 0);   // store Unicode mask in tmp1Reg
+    pxor(tmp4Reg, tmp4Reg);
+
+    lea(src, Address(src, len, Address::times_2));
+    lea(dst, Address(dst, len, Address::times_1));
+    negptr(len);
+
+    bind(copy_32_loop);
+    movdqu(tmp2Reg, Address(src, len, Address::times_2));     // load 1st 8 characters
+    por(tmp4Reg, tmp2Reg);
+    movdqu(tmp3Reg, Address(src, len, Address::times_2, 16)); // load next 8 characters
+    por(tmp4Reg, tmp3Reg);
+    ptest(tmp4Reg, tmp1Reg);       // check for Unicode chars in next vector
+    jcc(Assembler::notZero, return_zero);
+    packuswb(tmp2Reg, tmp3Reg);    // only ASCII chars; compress each to 1 byte
+    movdqu(Address(dst, len, Address::times_1), tmp2Reg);
+    addptr(len, 16);
+    jcc(Assembler::notZero, copy_32_loop);
+
+    // compress next vector of 8 chars (if any)
+    bind(copy_16);
+    movl(len, result);
+    andl(len, 0xfffffff8);    // vector count (in chars)
+    andl(result, 0x00000007);    // tail count (in chars)
+    testl(len, len);
+    jccb(Assembler::zero, copy_tail);
+
+    movdl(tmp1Reg, tmp5);
+    pshufd(tmp1Reg, tmp1Reg, 0);   // store Unicode mask in tmp1Reg
+    pxor(tmp3Reg, tmp3Reg);
+
+    movdqu(tmp2Reg, Address(src, 0));
+    ptest(tmp2Reg, tmp1Reg);       // check for Unicode chars in vector
+    jccb(Assembler::notZero, return_zero);
+    packuswb(tmp2Reg, tmp3Reg);    // only LATIN1 chars; compress each to 1 byte
+    movq(Address(dst, 0), tmp2Reg);
+    addptr(src, 16);
+    addptr(dst, 8);
+
+    bind(copy_tail);
+    movl(len, result);
+  }
+  // compress 1 char per iter
+  testl(len, len);
+  jccb(Assembler::zero, return_length);
+  lea(src, Address(src, len, Address::times_2));
+  lea(dst, Address(dst, len, Address::times_1));
+  negptr(len);
+
+  bind(copy_chars_loop);
+  load_unsigned_short(result, Address(src, len, Address::times_2));
+  testl(result, 0xff00);      // check if Unicode char
+  jccb(Assembler::notZero, return_zero);
+  movb(Address(dst, len, Address::times_1), result);  // ASCII char; compress to 1 byte
+  increment(len);
+  jcc(Assembler::notZero, copy_chars_loop);
+
+  // if compression succeeded, return length
+  bind(return_length);
+  pop(result);
+  jmpb(done);
+
+  // if compression failed, return 0
+  bind(return_zero);
+  xorl(result, result);
+  addptr(rsp, wordSize);
+
+  bind(done);
+}
+
+// Inflate byte[] array to char[].
+void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
+                                        XMMRegister tmp1, Register tmp2) {
+  Label copy_chars_loop, done;
+
+  // rsi: src
+  // rdi: dst
+  // rdx: len
+  // rcx: tmp2
+
+  // rsi holds start addr of source byte[] to be inflated
+  // rdi holds start addr of destination char[]
+  // rdx holds length
+  assert_different_registers(src, dst, len, tmp2);
+
+  if (UseSSE42Intrinsics) {
+    Label copy_8_loop, copy_bytes, copy_tail;
+
+    movl(tmp2, len);
+    andl(tmp2, 0x00000007);   // tail count (in chars)
+    andl(len, 0xfffffff8);    // vector count (in chars)
+    jccb(Assembler::zero, copy_tail);
+
+    // vectored inflation
+    lea(src, Address(src, len, Address::times_1));
+    lea(dst, Address(dst, len, Address::times_2));
+    negptr(len);
+
+    // inflate 8 chars per iter
+    bind(copy_8_loop);
+    pmovzxbw(tmp1, Address(src, len, Address::times_1));  // unpack to 8 words
+    movdqu(Address(dst, len, Address::times_2), tmp1);
+    addptr(len, 8);
+    jcc(Assembler::notZero, copy_8_loop);
+
+    bind(copy_tail);
+    movl(len, tmp2);
+
+    cmpl(len, 4);
+    jccb(Assembler::less, copy_bytes);
+
+    movdl(tmp1, Address(src, 0));  // load 4 byte chars
+    pmovzxbw(tmp1, tmp1);
+    movq(Address(dst, 0), tmp1);
+    subptr(len, 4);
+    addptr(src, 4);
+    addptr(dst, 8);
+
+    bind(copy_bytes);
+  }
+  testl(len, len);
+  jccb(Assembler::zero, done);
+  lea(src, Address(src, len, Address::times_1));
+  lea(dst, Address(dst, len, Address::times_2));
+  negptr(len);
+
+  // inflate 1 char per iter
+  bind(copy_chars_loop);
+  load_unsigned_byte(tmp2, Address(src, len, Address::times_1));  // load byte char
+  movw(Address(dst, len, Address::times_2), tmp2);  // inflate byte char to word
+  increment(len);
+  jcc(Assembler::notZero, copy_chars_loop);
+
+  bind(done);
+}
+
+
 Assembler::Condition MacroAssembler::negate_condition(Assembler::Condition cond) {
   switch (cond) {
     // Note some conditions are synonyms for others
--- a/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/macroAssembler_x86.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -29,7 +29,6 @@
 #include "utilities/macros.hpp"
 #include "runtime/rtmLocking.hpp"
 
-
 // MacroAssembler extends Assembler by frequently used macros.
 //
 // Instructions for which a 'better' code sequence exists depending
@@ -56,6 +55,8 @@
   #define VIRTUAL virtual
 #endif
 
+#define COMMA ,
+
   VIRTUAL void call_VM_leaf_base(
     address entry_point,               // the entry point
     int     number_of_arguments        // the number of arguments to pop after the call
@@ -910,6 +911,11 @@
   void fast_exp(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
                 XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
                 Register rax, Register rcx, Register rdx, Register tmp);
+
+  void fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3,
+                XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7,
+                Register rax, Register rcx, Register rdx, Register tmp1 LP64_ONLY(COMMA Register tmp2));
+
   void increase_precision();
   void restore_precision();
 
@@ -1205,32 +1211,50 @@
   // clear memory of size 'cnt' qwords, starting at 'base'.
   void clear_mem(Register base, Register cnt, Register rtmp);
 
+#ifdef COMPILER2
+  void string_indexof_char(Register str1, Register cnt1, Register ch, Register result,
+                           XMMRegister vec1, XMMRegister vec2, XMMRegister vec3, Register tmp);
+
   // IndexOf strings.
   // Small strings are loaded through stack if they cross page boundary.
   void string_indexof(Register str1, Register str2,
                       Register cnt1, Register cnt2,
                       int int_cnt2,  Register result,
-                      XMMRegister vec, Register tmp);
+                      XMMRegister vec, Register tmp,
+                      int ae);
 
   // IndexOf for constant substrings with size >= 8 elements
   // which don't need to be loaded through stack.
   void string_indexofC8(Register str1, Register str2,
                       Register cnt1, Register cnt2,
                       int int_cnt2,  Register result,
-                      XMMRegister vec, Register tmp);
+                      XMMRegister vec, Register tmp,
+                      int ae);
 
     // Smallest code: we don't need to load through stack,
     // check string tail.
 
+  // helper function for string_compare
+  void load_next_elements(Register elem1, Register elem2, Register str1, Register str2,
+                          Address::ScaleFactor scale, Address::ScaleFactor scale1,
+                          Address::ScaleFactor scale2, Register index, int ae);
   // Compare strings.
   void string_compare(Register str1, Register str2,
                       Register cnt1, Register cnt2, Register result,
-                      XMMRegister vec1);
+                      XMMRegister vec1, int ae);
 
-  // Compare char[] arrays.
-  void char_arrays_equals(bool is_array_equ, Register ary1, Register ary2,
-                          Register limit, Register result, Register chr,
-                          XMMRegister vec1, XMMRegister vec2);
+  // Search for Non-ASCII character (Negative byte value) in a byte array,
+  // return true if it has any and false otherwise.
+  void has_negatives(Register ary1, Register len,
+                     Register result, Register tmp1,
+                     XMMRegister vec1, XMMRegister vec2);
+
+  // Compare char[] or byte[] arrays.
+  void arrays_equals(bool is_array_equ, Register ary1, Register ary2,
+                     Register limit, Register result, Register chr,
+                     XMMRegister vec1, XMMRegister vec2, bool is_char);
+
+#endif
 
   // Fill primitive arrays
   void generate_fill(BasicType t, bool aligned,
@@ -1325,6 +1349,15 @@
   void fold_8bit_crc32(Register crc, Register table, Register tmp);
   void fold_8bit_crc32(XMMRegister crc, Register table, XMMRegister xtmp, Register tmp);
 
+  // Compress char[] array to byte[].
+  void char_array_compress(Register src, Register dst, Register len,
+                           XMMRegister tmp1, XMMRegister tmp2, XMMRegister tmp3,
+                           XMMRegister tmp4, Register tmp5, Register result);
+
+  // Inflate byte[] array to char[].
+  void byte_array_inflate(Register src, Register dst, Register len,
+                          XMMRegister tmp1, Register tmp2);
+
 #undef VIRTUAL
 
 };
--- a/src/cpu/x86/vm/macroAssembler_x86_libm.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/macroAssembler_x86_libm.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -24,8 +24,19 @@
  *
  */
 
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "macroAssembler_x86.hpp"
+
+#ifdef _MSC_VER
+#define ALIGNED_(x) __declspec(align(x))
+#else
+#define ALIGNED_(x) __attribute__ ((aligned(x)))
+#endif
+
 /******************************************************************************/
-//                     ALGORITHM DESCRIPTION
+//                     ALGORITHM DESCRIPTION - EXP()
 //                     ---------------------
 //
 // Description:
@@ -58,18 +69,6 @@
 //
 /******************************************************************************/
 
-
-#include "precompiled.hpp"
-#include "asm/assembler.hpp"
-#include "asm/assembler.inline.hpp"
-#include "macroAssembler_x86.hpp"
-
-#ifdef _MSC_VER
-#define ALIGNED_(x) __declspec(align(x))
-#else
-#define ALIGNED_(x) __attribute__ ((aligned(x)))
-#endif
-
 #ifdef _LP64
 
 ALIGNED_(16) juint _cv[] =
@@ -409,6 +408,7 @@
   bind(B1_5);
   addq(rsp, 24);
 }
+
 #endif
 
 #ifndef _LP64
@@ -675,3 +675,614 @@
 }
 
 #endif
+
+/******************************************************************************/
+//                     ALGORITHM DESCRIPTION - LOG()
+//                     ---------------------
+//
+//    x=2^k * mx, mx in [1,2)
+//
+//    Get B~1/mx based on the output of rcpss instruction (B0)
+//    B = int((B0*2^7+0.5))/2^7
+//
+//    Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
+//
+//    Result:  k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6)  and
+//             p(r) is a degree 7 polynomial
+//             -log(B) read from data table (high, low parts)
+//             Result is formed from high and low parts
+//
+// Special cases:
+//  log(NaN) = quiet NaN, and raise invalid exception
+//  log(+INF) = that INF
+//  log(0) = -INF with divide-by-zero exception raised
+//  log(1) = +0
+//  log(x) = NaN with invalid exception raised if x < -0, including -INF
+//
+/******************************************************************************/
+
+#ifdef _LP64
+
+ALIGNED_(16) juint _L_tbl[] =
+{
+  0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
+  0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
+  0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
+  0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
+  0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
+  0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
+  0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
+  0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
+  0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
+  0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
+  0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
+  0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
+  0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
+  0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
+  0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
+  0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
+  0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
+  0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
+  0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
+  0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
+  0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
+  0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
+  0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
+  0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
+  0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
+  0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
+  0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
+  0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
+  0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
+  0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
+  0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
+  0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
+  0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
+  0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
+  0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
+  0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
+  0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
+  0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
+  0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
+  0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
+  0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
+  0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
+  0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
+  0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
+  0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
+  0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
+  0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
+  0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
+  0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
+  0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
+  0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
+  0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
+  0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
+  0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
+  0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
+  0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
+  0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
+  0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
+  0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
+  0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
+  0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
+  0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
+  0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
+  0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
+  0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
+  0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
+  0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
+  0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
+  0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
+  0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
+  0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
+  0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
+  0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
+  0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
+  0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
+  0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
+  0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
+  0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
+  0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
+  0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
+  0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
+  0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
+  0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
+  0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
+  0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
+  0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
+  0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
+  0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
+  0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
+  0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
+  0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
+  0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
+  0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
+  0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
+  0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
+  0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
+  0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
+  0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
+  0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
+  0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
+  0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
+  0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
+  0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
+  0x80000000UL
+};
+
+ALIGNED_(16) juint _log2[] =
+{
+  0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL
+};
+
+ALIGNED_(16) juint _coeff[] =
+{
+  0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
+  0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
+  0x00000000UL, 0xbfe00000UL
+};
+
+//registers,
+// input: xmm0
+// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+//          rax, rdx, rcx, r8, r11
+
+void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp1, Register tmp2) {
+  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
+  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
+  Label L_2TAG_PACKET_8_0_2;
+  Label L_2TAG_PACKET_12_0_2, L_2TAG_PACKET_13_0_2, B1_3, B1_5, start;
+
+  assert_different_registers(tmp1, tmp2, eax, ecx, edx);
+  jmp(start);
+  address L_tbl = (address)_L_tbl;
+  address log2 = (address)_log2;
+  address coeff = (address)_coeff;
+
+  bind(start);
+  subq(rsp, 24);
+  movsd(Address(rsp, 0), xmm0);
+  mov64(rax, 0x3ff0000000000000);
+  movdq(xmm2, rax);
+  mov64(rdx, 0x77f0000000000000);
+  movdq(xmm3, rdx);
+  movl(ecx, 32768);
+  movdl(xmm4, rcx);
+  mov64(tmp1, 0xffffe00000000000);
+  movdq(xmm5, tmp1);
+  movdqu(xmm1, xmm0);
+  pextrw(eax, xmm0, 3);
+  por(xmm0, xmm2);
+  movl(ecx, 16352);
+  psrlq(xmm0, 27);
+  lea(tmp2, ExternalAddress(L_tbl));
+  psrld(xmm0, 2);
+  rcpps(xmm0, xmm0);
+  psllq(xmm1, 12);
+  pshufd(xmm6, xmm5, 228);
+  psrlq(xmm1, 12);
+  subl(eax, 16);
+  cmpl(eax, 32736);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
+
+  bind(L_2TAG_PACKET_1_0_2);
+  paddd(xmm0, xmm4);
+  por(xmm1, xmm3);
+  movdl(edx, xmm0);
+  psllq(xmm0, 29);
+  pand(xmm5, xmm1);
+  pand(xmm0, xmm6);
+  subsd(xmm1, xmm5);
+  mulpd(xmm5, xmm0);
+  andl(eax, 32752);
+  subl(eax, ecx);
+  cvtsi2sdl(xmm7, eax);
+  mulsd(xmm1, xmm0);
+  movq(xmm6, ExternalAddress(log2));       // 0xfefa3800UL, 0x3fa62e42UL
+  movdqu(xmm3, ExternalAddress(coeff));    // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
+  subsd(xmm5, xmm2);
+  andl(edx, 16711680);
+  shrl(edx, 12);
+  movdqu(xmm0, Address(tmp2, edx));
+  movdqu(xmm4, ExternalAddress(16 + coeff)); // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
+  addsd(xmm1, xmm5);
+  movdqu(xmm2, ExternalAddress(32 + coeff)); // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
+  mulsd(xmm6, xmm7);
+  movddup(xmm5, xmm1);
+  mulsd(xmm7, ExternalAddress(8 + log2));    // 0x93c76730UL, 0x3ceef357UL
+  mulsd(xmm3, xmm1);
+  addsd(xmm0, xmm6);
+  mulpd(xmm4, xmm5);
+  mulpd(xmm5, xmm5);
+  movddup(xmm6, xmm0);
+  addsd(xmm0, xmm1);
+  addpd(xmm4, xmm2);
+  mulpd(xmm3, xmm5);
+  subsd(xmm6, xmm0);
+  mulsd(xmm4, xmm1);
+  pshufd(xmm2, xmm0, 238);
+  addsd(xmm1, xmm6);
+  mulsd(xmm5, xmm5);
+  addsd(xmm7, xmm2);
+  addpd(xmm4, xmm3);
+  addsd(xmm1, xmm7);
+  mulpd(xmm4, xmm5);
+  addsd(xmm1, xmm4);
+  pshufd(xmm5, xmm4, 238);
+  addsd(xmm1, xmm5);
+  addsd(xmm0, xmm1);
+  jmp(B1_5);
+
+  bind(L_2TAG_PACKET_0_0_2);
+  movq(xmm0, Address(rsp, 0));
+  movq(xmm1, Address(rsp, 0));
+  addl(eax, 16);
+  cmpl(eax, 32768);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_2_0_2);
+  cmpl(eax, 16);
+  jcc(Assembler::below, L_2TAG_PACKET_3_0_2);
+
+  bind(L_2TAG_PACKET_4_0_2);
+  addsd(xmm0, xmm0);
+  jmp(B1_5);
+
+  bind(L_2TAG_PACKET_5_0_2);
+  jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
+  cmpl(edx, 0);
+  jcc(Assembler::above, L_2TAG_PACKET_4_0_2);
+  jmp(L_2TAG_PACKET_6_0_2);
+
+  bind(L_2TAG_PACKET_3_0_2);
+  xorpd(xmm1, xmm1);
+  addsd(xmm1, xmm0);
+  movdl(edx, xmm1);
+  psrlq(xmm1, 32);
+  movdl(ecx, xmm1);
+  orl(edx, ecx);
+  cmpl(edx, 0);
+  jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
+  xorpd(xmm1, xmm1);
+  movl(eax, 18416);
+  pinsrw(xmm1, eax, 3);
+  mulsd(xmm0, xmm1);
+  movdqu(xmm1, xmm0);
+  pextrw(eax, xmm0, 3);
+  por(xmm0, xmm2);
+  psrlq(xmm0, 27);
+  movl(ecx, 18416);
+  psrld(xmm0, 2);
+  rcpps(xmm0, xmm0);
+  psllq(xmm1, 12);
+  pshufd(xmm6, xmm5, 228);
+  psrlq(xmm1, 12);
+  jmp(L_2TAG_PACKET_1_0_2);
+
+  bind(L_2TAG_PACKET_2_0_2);
+  movdl(edx, xmm1);
+  psrlq(xmm1, 32);
+  movdl(ecx, xmm1);
+  addl(ecx, ecx);
+  cmpl(ecx, -2097152);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_5_0_2);
+  orl(edx, ecx);
+  cmpl(edx, 0);
+  jcc(Assembler::equal, L_2TAG_PACKET_7_0_2);
+
+  bind(L_2TAG_PACKET_6_0_2);
+  xorpd(xmm1, xmm1);
+  xorpd(xmm0, xmm0);
+  movl(eax, 32752);
+  pinsrw(xmm1, eax, 3);
+  mulsd(xmm0, xmm1);
+  movl(Address(rsp, 16), 3);
+  jmp(L_2TAG_PACKET_8_0_2);
+  bind(L_2TAG_PACKET_7_0_2);
+  xorpd(xmm1, xmm1);
+  xorpd(xmm0, xmm0);
+  movl(eax, 49136);
+  pinsrw(xmm0, eax, 3);
+  divsd(xmm0, xmm1);
+  movl(Address(rsp, 16), 2);
+
+  bind(L_2TAG_PACKET_8_0_2);
+  movq(Address(rsp, 8), xmm0);
+
+  bind(B1_3);
+  movq(xmm0, Address(rsp, 8));
+
+  bind(B1_5);
+  addq(rsp, 24);
+}
+
+#endif
+
+#ifndef _LP64
+
+ALIGNED_(16) juint _static_const_table_log[] =
+{
+  0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
+  0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
+  0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
+  0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
+  0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
+  0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
+  0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
+  0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
+  0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
+  0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
+  0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
+  0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
+  0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
+  0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
+  0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
+  0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
+  0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
+  0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
+  0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
+  0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
+  0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
+  0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
+  0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
+  0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
+  0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
+  0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
+  0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
+  0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
+  0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
+  0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
+  0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
+  0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
+  0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
+  0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
+  0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
+  0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
+  0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
+  0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
+  0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
+  0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
+  0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
+  0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
+  0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
+  0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
+  0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
+  0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
+  0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
+  0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
+  0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
+  0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
+  0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
+  0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
+  0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
+  0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
+  0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
+  0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
+  0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
+  0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
+  0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
+  0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
+  0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
+  0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
+  0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
+  0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
+  0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
+  0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
+  0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
+  0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
+  0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
+  0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
+  0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
+  0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
+  0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
+  0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
+  0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
+  0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
+  0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
+  0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
+  0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
+  0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
+  0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
+  0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
+  0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
+  0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
+  0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
+  0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
+  0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
+  0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
+  0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
+  0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
+  0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
+  0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
+  0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
+  0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
+  0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
+  0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
+  0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
+  0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
+  0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
+  0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
+  0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
+  0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
+  0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
+  0x80000000UL, 0xfefa3800UL, 0x3fa62e42UL, 0x93c76730UL, 0x3ceef357UL,
+  0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL, 0x3d6fb175UL,
+  0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL, 0x9999999aUL, 0x3fc99999UL,
+  0x00000000UL, 0xbfe00000UL, 0x00000000UL, 0xffffe000UL, 0x00000000UL,
+  0xffffe000UL
+};
+//registers,
+// input: xmm0
+// scratch: xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7
+//          rax, rdx, rcx, rbx (tmp)
+
+void MacroAssembler::fast_log(XMMRegister xmm0, XMMRegister xmm1, XMMRegister xmm2, XMMRegister xmm3, XMMRegister xmm4, XMMRegister xmm5, XMMRegister xmm6, XMMRegister xmm7, Register eax, Register ecx, Register edx, Register tmp) {
+  Label L_2TAG_PACKET_0_0_2, L_2TAG_PACKET_1_0_2, L_2TAG_PACKET_2_0_2, L_2TAG_PACKET_3_0_2;
+  Label L_2TAG_PACKET_4_0_2, L_2TAG_PACKET_5_0_2, L_2TAG_PACKET_6_0_2, L_2TAG_PACKET_7_0_2;
+  Label L_2TAG_PACKET_8_0_2, L_2TAG_PACKET_9_0_2;
+  Label L_2TAG_PACKET_10_0_2, start;
+
+  assert_different_registers(tmp, eax, ecx, edx);
+  jmp(start);
+  address static_const_table = (address)_static_const_table_log;
+
+  bind(start);
+  subl(rsp, 104);
+  movl(Address(rsp, 40), tmp);
+  lea(tmp, ExternalAddress(static_const_table));
+  xorpd(xmm2, xmm2);
+  movl(eax, 16368);
+  pinsrw(xmm2, eax, 3);
+  xorpd(xmm3, xmm3);
+  movl(edx, 30704);
+  pinsrw(xmm3, edx, 3);
+  movsd(xmm0, Address(rsp, 112));
+  movapd(xmm1, xmm0);
+  movl(ecx, 32768);
+  movdl(xmm4, ecx);
+  movsd(xmm5, Address(tmp, 2128));         // 0x00000000UL, 0xffffe000UL
+  pextrw(eax, xmm0, 3);
+  por(xmm0, xmm2);
+  psllq(xmm0, 5);
+  movl(ecx, 16352);
+  psrlq(xmm0, 34);
+  rcpss(xmm0, xmm0);
+  psllq(xmm1, 12);
+  pshufd(xmm6, xmm5, 228);
+  psrlq(xmm1, 12);
+  subl(eax, 16);
+  cmpl(eax, 32736);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_0_0_2);
+
+  bind(L_2TAG_PACKET_1_0_2);
+  paddd(xmm0, xmm4);
+  por(xmm1, xmm3);
+  movdl(edx, xmm0);
+  psllq(xmm0, 29);
+  pand(xmm5, xmm1);
+  pand(xmm0, xmm6);
+  subsd(xmm1, xmm5);
+  mulpd(xmm5, xmm0);
+  andl(eax, 32752);
+  subl(eax, ecx);
+  cvtsi2sdl(xmm7, eax);
+  mulsd(xmm1, xmm0);
+  movsd(xmm6, Address(tmp, 2064));         // 0xfefa3800UL, 0x3fa62e42UL
+  movdqu(xmm3, Address(tmp, 2080));        // 0x92492492UL, 0x3fc24924UL, 0x00000000UL, 0xbfd00000UL
+  subsd(xmm5, xmm2);
+  andl(edx, 16711680);
+  shrl(edx, 12);
+  movdqu(xmm0, Address(tmp, edx));
+  movdqu(xmm4, Address(tmp, 2096));        // 0x3d6fb175UL, 0xbfc5555eUL, 0x55555555UL, 0x3fd55555UL
+  addsd(xmm1, xmm5);
+  movdqu(xmm2, Address(tmp, 2112));        // 0x9999999aUL, 0x3fc99999UL, 0x00000000UL, 0xbfe00000UL
+  mulsd(xmm6, xmm7);
+  pshufd(xmm5, xmm1, 68);
+  mulsd(xmm7, Address(tmp, 2072));         // 0x93c76730UL, 0x3ceef357UL, 0x92492492UL, 0x3fc24924UL
+  mulsd(xmm3, xmm1);
+  addsd(xmm0, xmm6);
+  mulpd(xmm4, xmm5);
+  mulpd(xmm5, xmm5);
+  pshufd(xmm6, xmm0, 228);
+  addsd(xmm0, xmm1);
+  addpd(xmm4, xmm2);
+  mulpd(xmm3, xmm5);
+  subsd(xmm6, xmm0);
+  mulsd(xmm4, xmm1);
+  pshufd(xmm2, xmm0, 238);
+  addsd(xmm1, xmm6);
+  mulsd(xmm5, xmm5);
+  addsd(xmm7, xmm2);
+  addpd(xmm4, xmm3);
+  addsd(xmm1, xmm7);
+  mulpd(xmm4, xmm5);
+  addsd(xmm1, xmm4);
+  pshufd(xmm5, xmm4, 238);
+  addsd(xmm1, xmm5);
+  addsd(xmm0, xmm1);
+  jmp(L_2TAG_PACKET_2_0_2);
+
+  bind(L_2TAG_PACKET_0_0_2);
+  movsd(xmm0, Address(rsp, 112));
+  movdqu(xmm1, xmm0);
+  addl(eax, 16);
+  cmpl(eax, 32768);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_3_0_2);
+  cmpl(eax, 16);
+  jcc(Assembler::below, L_2TAG_PACKET_4_0_2);
+
+  bind(L_2TAG_PACKET_5_0_2);
+  addsd(xmm0, xmm0);
+  jmp(L_2TAG_PACKET_2_0_2);
+
+  bind(L_2TAG_PACKET_6_0_2);
+  jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
+  cmpl(edx, 0);
+  jcc(Assembler::above, L_2TAG_PACKET_5_0_2);
+  jmp(L_2TAG_PACKET_7_0_2);
+
+  bind(L_2TAG_PACKET_3_0_2);
+  movdl(edx, xmm1);
+  psrlq(xmm1, 32);
+  movdl(ecx, xmm1);
+  addl(ecx, ecx);
+  cmpl(ecx, -2097152);
+  jcc(Assembler::aboveEqual, L_2TAG_PACKET_6_0_2);
+  orl(edx, ecx);
+  cmpl(edx, 0);
+  jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
+
+  bind(L_2TAG_PACKET_7_0_2);
+  xorpd(xmm1, xmm1);
+  xorpd(xmm0, xmm0);
+  movl(eax, 32752);
+  pinsrw(xmm1, eax, 3);
+  movl(edx, 3);
+  mulsd(xmm0, xmm1);
+
+  bind(L_2TAG_PACKET_9_0_2);
+  movsd(Address(rsp, 0), xmm0);
+  movsd(xmm0, Address(rsp, 112));
+  fld_d(Address(rsp, 0));
+  jmp(L_2TAG_PACKET_10_0_2);
+
+  bind(L_2TAG_PACKET_8_0_2);
+  xorpd(xmm1, xmm1);
+  xorpd(xmm0, xmm0);
+  movl(eax, 49136);
+  pinsrw(xmm0, eax, 3);
+  divsd(xmm0, xmm1);
+  movl(edx, 2);
+  jmp(L_2TAG_PACKET_9_0_2);
+
+  bind(L_2TAG_PACKET_4_0_2);
+  movdl(edx, xmm1);
+  psrlq(xmm1, 32);
+  movdl(ecx, xmm1);
+  orl(edx, ecx);
+  cmpl(edx, 0);
+  jcc(Assembler::equal, L_2TAG_PACKET_8_0_2);
+  xorpd(xmm1, xmm1);
+  movl(eax, 18416);
+  pinsrw(xmm1, eax, 3);
+  mulsd(xmm0, xmm1);
+  movapd(xmm1, xmm0);
+  pextrw(eax, xmm0, 3);
+  por(xmm0, xmm2);
+  psllq(xmm0, 5);
+  movl(ecx, 18416);
+  psrlq(xmm0, 34);
+  rcpss(xmm0, xmm0);
+  psllq(xmm1, 12);
+  pshufd(xmm6, xmm5, 228);
+  psrlq(xmm1, 12);
+  jmp(L_2TAG_PACKET_1_0_2);
+
+  bind(L_2TAG_PACKET_2_0_2);
+  movsd(Address(rsp, 24), xmm0);
+  fld_d(Address(rsp, 24));
+
+  bind(L_2TAG_PACKET_10_0_2);
+  movl(tmp, Address(rsp, 40));
+}
+
+#endif
--- a/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/stubGenerator_x86_32.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -2095,14 +2095,6 @@
 
   void generate_math_stubs() {
     {
-      StubCodeMark mark(this, "StubRoutines", "log");
-      StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
-
-      __ fld_d(Address(rsp, 4));
-      __ flog();
-      __ ret(0);
-    }
-    {
       StubCodeMark mark(this, "StubRoutines", "log10");
       StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
 
@@ -3065,6 +3057,32 @@
 
   }
 
+ address generate_libmLog() {
+   address start = __ pc();
+
+   const XMMRegister x0 = xmm0;
+   const XMMRegister x1 = xmm1;
+   const XMMRegister x2 = xmm2;
+   const XMMRegister x3 = xmm3;
+
+   const XMMRegister x4 = xmm4;
+   const XMMRegister x5 = xmm5;
+   const XMMRegister x6 = xmm6;
+   const XMMRegister x7 = xmm7;
+
+   const Register tmp = rbx;
+
+   BLOCK_COMMENT("Entry:");
+   __ enter(); // required for proper stackwalking of RuntimeStub frame
+   __ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
+   __ leave(); // required for proper stackwalking of RuntimeStub frame
+   __ ret(0);
+
+   return start;
+
+ }
+
+
 
   // Safefetch stubs.
   void generate_safefetch(const char* name, int size, address* entry,
@@ -3288,6 +3306,7 @@
     }
     if (VM_Version::supports_sse2()) {
       StubRoutines::_dexp = generate_libmExp();
+      StubRoutines::_dlog = generate_libmLog();
     }
   }
 
--- a/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/stubGenerator_x86_64.cpp	Wed Nov 11 23:51:57 2015 -0500
@@ -2974,19 +2974,6 @@
 
   void generate_math_stubs() {
     {
-      StubCodeMark mark(this, "StubRoutines", "log");
-      StubRoutines::_intrinsic_log = (double (*)(double)) __ pc();
-
-      __ subq(rsp, 8);
-      __ movdbl(Address(rsp, 0), xmm0);
-      __ fld_d(Address(rsp, 0));
-      __ flog();
-      __ fstp_d(Address(rsp, 0));
-      __ movdbl(xmm0, Address(rsp, 0));
-      __ addq(rsp, 8);
-      __ ret(0);
-    }
-    {
       StubCodeMark mark(this, "StubRoutines", "log10");
       StubRoutines::_intrinsic_log10 = (double (*)(double)) __ pc();
 
@@ -4187,15 +4174,58 @@
 
 #ifdef _WIN64
     // save the xmm registers which must be preserved 6-7
-    __ movdqu(xmm_save(6), as_XMMRegister(6));
-    __ movdqu(xmm_save(7), as_XMMRegister(7));
+    __ subptr(rsp, 4 * wordSize);
+    __ movdqu(Address(rsp, 0), xmm6);
+    __ movdqu(Address(rsp, 2 * wordSize), xmm7);
 #endif
       __ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
 
 #ifdef _WIN64
     // restore xmm regs belonging to calling function
-    __ movdqu(as_XMMRegister(6), xmm_save(6));
-    __ movdqu(as_XMMRegister(7), xmm_save(7));
+      __ movdqu(xmm6, Address(rsp, 0));
+      __ movdqu(xmm7, Address(rsp, 2 * wordSize));
+      __ addptr(rsp, 4 * wordSize);
+#endif
+
+    __ leave(); // required for proper stackwalking of RuntimeStub frame
+    __ ret(0);
+
+    return start;
+
+  }
+
+  address generate_libmLog() {
+    address start = __ pc();
+
+    const XMMRegister x0 = xmm0;
+    const XMMRegister x1 = xmm1;
+    const XMMRegister x2 = xmm2;
+    const XMMRegister x3 = xmm3;
+
+    const XMMRegister x4 = xmm4;
+    const XMMRegister x5 = xmm5;
+    const XMMRegister x6 = xmm6;
+    const XMMRegister x7 = xmm7;
+
+    const Register tmp1 = r11;
+    const Register tmp2 = r8;
+
+    BLOCK_COMMENT("Entry:");
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+#ifdef _WIN64
+    // save the xmm registers which must be preserved 6-7
+    __ subptr(rsp, 4 * wordSize);
+    __ movdqu(Address(rsp, 0), xmm6);
+    __ movdqu(Address(rsp, 2 * wordSize), xmm7);
+#endif
+    __ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
+
+#ifdef _WIN64
+    // restore xmm regs belonging to calling function
+    __ movdqu(xmm6, Address(rsp, 0));
+    __ movdqu(xmm7, Address(rsp, 2 * wordSize));
+    __ addptr(rsp, 4 * wordSize);
 #endif
 
     __ leave(); // required for proper stackwalking of RuntimeStub frame
@@ -4392,7 +4422,10 @@
       StubRoutines::_crc32c_table_addr = (address)StubRoutines::x86::_crc32c_table;
       StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(supports_clmul);
     }
-    StubRoutines::_dexp = generate_libmExp();
+    if (VM_Version::supports_sse2()) {
+      StubRoutines::_dexp = generate_libmExp();
+      StubRoutines::_dlog = generate_libmLog();
+    }
   }
 
   void generate_all() {
--- a/src/cpu/x86/vm/vmStructs_x86.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/vmStructs_x86.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -82,6 +82,7 @@
   declare_constant(VM_Version::CPU_AVX512CD)                        \
   declare_constant(VM_Version::CPU_AVX512BW)
 
-#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
+#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
+  declare_preprocessor_constant("VM_Version::CPU_AVX512VL", CPU_AVX512VL)
 
 #endif // CPU_X86_VM_VMSTRUCTS_X86_HPP
--- a/src/cpu/x86/vm/x86.ad	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/x86.ad	Wed Nov 11 23:51:57 2015 -0500
@@ -1707,6 +1707,10 @@
       if (!VM_Version::supports_cx8())
         ret_value = false;
       break;
+    case Op_CMoveVD:
+      if (UseAVX < 1 || UseAVX > 2)
+        ret_value = false;
+      break;
   }
 
   return ret_value;  // Per default match rules are supported.
@@ -2089,6 +2093,29 @@
   interface(REG_INTER);
 %}
 
+// Comparison Code for FP conditional move
+operand cmpOp_vcmppd() %{
+  match(Bool);
+
+  predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
+            n->as_Bool()->_test._test != BoolTest::no_overflow);
+  format %{ "" %}
+  interface(COND_INTER) %{
+    equal        (0x0, "eq");
+    less         (0x1, "lt");
+    less_equal   (0x2, "le");
+    not_equal    (0xC, "ne");
+    greater_equal(0xD, "ge");
+    greater      (0xE, "gt");
+    //TODO cannot compile (adlc breaks) without two next lines with error:
+    // x86_64.ad(13987) Syntax Error: :In operand cmpOp_vcmppd: Do not support this encode constant: ' %{
+    // equal' for overflow.
+    overflow     (0x20, "o");  // not really supported by the instruction
+    no_overflow  (0x21, "no"); // not really supported by the instruction
+  %}
+%}
+
+
 // INSTRUCTIONS -- Platform independent definitions (same for 32- and 64-bit)
 
 // ============================================================================
@@ -7393,6 +7420,22 @@
   ins_pipe( pipe_slow );
 %}
 
+instruct vcmov4D_reg(vecY dst, vecY src1, vecY src2, immI8 cop, cmpOp_vcmppd copnd) %{
+  predicate(UseAVX > 0 && UseAVX < 3 && n->as_Vector()->length() == 4);
+  match(Set dst (CMoveVD (Binary copnd cop) (Binary src1 src2)));
+  effect(TEMP dst, USE src1, USE src2);
+  format %{ "cmppd.$copnd  $dst, $src1, $src2  ! vcmovevd, cond=$cop\n\t"
+            "vpblendd $dst,$src1,$src2,$dst ! vcmovevd\n\t"
+         %}
+  ins_encode %{
+    int vector_len = 1;
+    int cond = (Assembler::Condition)($copnd$$cmpcode);
+    __ cmppd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, cond, vector_len);
+    __ vpblendd($dst$$XMMRegister, $src1$$XMMRegister, $src2$$XMMRegister, $dst$$XMMRegister, vector_len);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 // --------------------------------- DIV --------------------------------------
 
 // Floats vector div
--- a/src/cpu/x86/vm/x86_32.ad	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/x86_32.ad	Wed Nov 11 23:51:57 2015 -0500
@@ -9950,41 +9950,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
-  predicate (UseSSE<=1);
-  // The source Double operand on FPU stack
-  match(Set dst (LogD src));
-  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
-  // fxch         ; swap ST(0) with ST(1)
-  // fyl2x        ; compute log_e(2) * log_2(x)
-  format %{ "FLDLN2 \t\t\t#Log_e\n\t"
-            "FXCH   \n\t"
-            "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
-         %}
-  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
-              Opcode(0xD9), Opcode(0xC9),   // fxch
-              Opcode(0xD9), Opcode(0xF1));  // fyl2x
-
-  ins_pipe( pipe_slow );
-%}
-
-instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
-  predicate (UseSSE>=2);
-  effect(KILL cr);
-  // The source and result Double operands in XMM registers
-  match(Set dst (LogD src));
-  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
-  // fyl2x        ; compute log_e(2) * log_2(x)
-  format %{ "FLDLN2 \t\t\t#Log_e\n\t"
-            "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
-         %}
-  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
-              Push_SrcD(src),
-              Opcode(0xD9), Opcode(0xF1),   // fyl2x
-              Push_ResultD(dst));
-  ins_pipe( pipe_slow );
-%}
-
 //-------------Float Instructions-------------------------------
 // Float Math
 
@@ -11470,16 +11435,62 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
-                        eAXRegI result, regD tmp1, eFlagsReg cr) %{
+instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
+                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 
-  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
   ins_encode %{
     __ string_compare($str1$$Register, $str2$$Register,
                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister);
+                      $tmp1$$XMMRegister, StrIntrinsicNode::LL);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
+                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, StrIntrinsicNode::UU);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
+                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, StrIntrinsicNode::LU);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
+                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
+  ins_encode %{
+    __ string_compare($str2$$Register, $str1$$Register,
+                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, StrIntrinsicNode::UL);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -11492,21 +11503,50 @@
 
   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
   ins_encode %{
-    __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
-                          $cnt$$Register, $result$$Register, $tmp3$$Register,
-                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
-  %}
+    __ arrays_equals(false, $str1$$Register, $str2$$Register,
+                     $cnt$$Register, $result$$Register, $tmp3$$Register,
+                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
+  %} 
+
   ins_pipe( pipe_slow );
 %}
 
 // fast search of substring with known size.
-instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
-                            eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics);
+instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
+                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
 
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
+  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    if (icnt2 >= 16) {
+      // IndexOf for constant substrings with size >= 16 elements
+      // which don't need to be loaded through stack.
+      __ string_indexofC8($str1$$Register, $str2$$Register,
+                          $cnt1$$Register, $cnt2$$Register,
+                          icnt2, $result$$Register,
+                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
+    } else {
+      // Small strings are loaded through stack if they cross page boundary.
+      __ string_indexof($str1$$Register, $str2$$Register,
+                        $cnt1$$Register, $cnt2$$Register,
+                        icnt2, $result$$Register,
+                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// fast search of substring with known size.
+instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
+                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
   ins_encode %{
     int icnt2 = (int)$int_cnt2$$constant;
     if (icnt2 >= 8) {
@@ -11515,47 +11555,182 @@
       __ string_indexofC8($str1$$Register, $str2$$Register,
                           $cnt1$$Register, $cnt2$$Register,
                           icnt2, $result$$Register,
-                          $vec$$XMMRegister, $tmp$$Register);
+                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
     } else {
       // Small strings are loaded through stack if they cross page boundary.
       __ string_indexof($str1$$Register, $str2$$Register,
                         $cnt1$$Register, $cnt2$$Register,
                         icnt2, $result$$Register,
-                        $vec$$XMMRegister, $tmp$$Register);
+                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
     }
   %}
   ins_pipe( pipe_slow );
 %}
 
-instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
-                        eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
-  predicate(UseSSE42Intrinsics);
+// fast search of substring with known size.
+instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
+                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    if (icnt2 >= 8) {
+      // IndexOf for constant substrings with size >= 8 elements
+      // which don't need to be loaded through stack.
+      __ string_indexofC8($str1$$Register, $str2$$Register,
+                          $cnt1$$Register, $cnt2$$Register,
+                          icnt2, $result$$Register,
+                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
+    } else {
+      // Small strings are loaded through stack if they cross page boundary.
+      __ string_indexof($str1$$Register, $str2$$Register,
+                        $cnt1$$Register, $cnt2$$Register,
+                        icnt2, $result$$Register,
+                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
+                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
 
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
+  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
                       $cnt1$$Register, $cnt2$$Register,
                       (-1), $result$$Register,
-                      $vec$$XMMRegister, $tmp$$Register);
+                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
+                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      (-1), $result$$Register,
+                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
+                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      (-1), $result$$Register,
+                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
+                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
+  predicate(UseSSE42Intrinsics);
+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
+  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
+  ins_encode %{
+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
+                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
 
 // fast array equals
-instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
-                      regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
+instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
+                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
 %{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (AryEq ary1 ary2));
   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
   //ins_cost(300);
 
-  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
-  ins_encode %{
-    __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
-                          $tmp3$$Register, $result$$Register, $tmp4$$Register,
-                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode %{
+    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
+                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
+                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
+                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
+%{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (AryEq ary1 ary2));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
+  //ins_cost(300);
+
+  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode %{
+    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
+                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
+                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
+                      regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
+%{
+  match(Set result (HasNegatives ary1 len));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
+
+  format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
+  ins_encode %{
+    __ has_negatives($ary1$$Register, $len$$Register,
+                     $result$$Register, $tmp3$$Register,
+                     $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// fast char[] to byte[] compression
+instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
+                         eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
+  match(Set result (StrCompressedCopy src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
+
+  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
+  ins_encode %{
+    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
+                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
+                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// fast byte[] to char[] inflation
+instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
+                        regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
+
+  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
+  ins_encode %{
+    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
+                          $tmp1$$XMMRegister, $tmp2$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
--- a/src/cpu/x86/vm/x86_64.ad	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/x86/vm/x86_64.ad	Wed Nov 11 23:51:57 2015 -0500
@@ -9870,21 +9870,6 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct logD_reg(regD dst) %{
-  // The source and result Double operands in XMM registers
-  match(Set dst (LogD dst));
-  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
-  // fyl2x        ; compute log_e(2) * log_2(x)
-  format %{ "fldln2\t\t\t#Log_e\n\t"
-            "fyl2x\t\t\t# Q=Log_e*Log_2(x)\n\t"
-         %}
-  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
-              Push_SrcXD(dst),
-              Opcode(0xD9), Opcode(0xF1),   // fyl2x
-              Push_ResultXD(dst));
-  ins_pipe( pipe_slow );
-%}
-
 instruct powD_reg(regD dst, regD src0, regD src1, rax_RegI rax, rdx_RegI rdx, rcx_RegI rcx, rFlagsReg cr) %{
   match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
   effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
@@ -10462,30 +10447,108 @@
   ins_pipe( pipe_slow );
 %}
 
-instruct string_compare(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
-                        rax_RegI result, regD tmp1, rFlagsReg cr)
-%{
+instruct string_compareL(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
+                         rax_RegI result, regD tmp1, rFlagsReg cr)
+%{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
 
-  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
   ins_encode %{
     __ string_compare($str1$$Register, $str2$$Register,
                       $cnt1$$Register, $cnt2$$Register, $result$$Register,
-                      $tmp1$$XMMRegister);
+                      $tmp1$$XMMRegister, StrIntrinsicNode::LL);
   %}
   ins_pipe( pipe_slow );
 %}
 
+instruct string_compareU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
+                         rax_RegI result, regD tmp1, rFlagsReg cr)
+%{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, StrIntrinsicNode::UU);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_compareLU(rdi_RegP str1, rcx_RegI cnt1, rsi_RegP str2, rdx_RegI cnt2,
+                          rax_RegI result, regD tmp1, rFlagsReg cr)
+%{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
+  ins_encode %{
+    __ string_compare($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, StrIntrinsicNode::LU);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_compareUL(rsi_RegP str1, rdx_RegI cnt1, rdi_RegP str2, rcx_RegI cnt2,
+                          rax_RegI result, regD tmp1, rFlagsReg cr)
+%{
+  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
+  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
+
+  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
+  ins_encode %{
+    __ string_compare($str2$$Register, $str1$$Register,
+                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
+                      $tmp1$$XMMRegister, StrIntrinsicNode::UL);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
 // fast search of substring with known size.
-instruct string_indexof_con(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
-                            rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
-%{
-  predicate(UseSSE42Intrinsics);
+instruct string_indexof_conL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
+                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
+%{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
 
-  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
+  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    if (icnt2 >= 16) {
+      // IndexOf for constant substrings with size >= 16 elements
+      // which don't need to be loaded through stack.
+      __ string_indexofC8($str1$$Register, $str2$$Register,
+                          $cnt1$$Register, $cnt2$$Register,
+                          icnt2, $result$$Register,
+                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
+    } else {
+      // Small strings are loaded through stack if they cross page boundary.
+      __ string_indexof($str1$$Register, $str2$$Register,
+                        $cnt1$$Register, $cnt2$$Register,
+                        icnt2, $result$$Register,
+                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// fast search of substring with known size.
+instruct string_indexof_conU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
+                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
+%{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
   ins_encode %{
     int icnt2 = (int)$int_cnt2$$constant;
     if (icnt2 >= 8) {
@@ -10494,31 +10557,108 @@
       __ string_indexofC8($str1$$Register, $str2$$Register,
                           $cnt1$$Register, $cnt2$$Register,
                           icnt2, $result$$Register,
-                          $vec$$XMMRegister, $tmp$$Register);
+                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
     } else {
       // Small strings are loaded through stack if they cross page boundary.
       __ string_indexof($str1$$Register, $str2$$Register,
                         $cnt1$$Register, $cnt2$$Register,
                         icnt2, $result$$Register,
-                        $vec$$XMMRegister, $tmp$$Register);
+                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
     }
   %}
   ins_pipe( pipe_slow );
 %}
 
-instruct string_indexof(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
-                        rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
-%{
-  predicate(UseSSE42Intrinsics);
+// fast search of substring with known size.
+instruct string_indexof_conUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, immI int_cnt2,
+                             rbx_RegI result, regD vec, rax_RegI cnt2, rcx_RegI tmp, rFlagsReg cr)
+%{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
+  ins_encode %{
+    int icnt2 = (int)$int_cnt2$$constant;
+    if (icnt2 >= 8) {
+      // IndexOf for constant substrings with size >= 8 elements
+      // which don't need to be loaded through stack.
+      __ string_indexofC8($str1$$Register, $str2$$Register,
+                          $cnt1$$Register, $cnt2$$Register,
+                          icnt2, $result$$Register,
+                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
+    } else {
+      // Small strings are loaded through stack if they cross page boundary.
+      __ string_indexof($str1$$Register, $str2$$Register,
+                        $cnt1$$Register, $cnt2$$Register,
+                        icnt2, $result$$Register,
+                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
+    }
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_indexofL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
+                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
+%{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
   match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
   effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
 
-  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
+  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
   ins_encode %{
     __ string_indexof($str1$$Register, $str2$$Register,
                       $cnt1$$Register, $cnt2$$Register,
                       (-1), $result$$Register,
-                      $vec$$XMMRegister, $tmp$$Register);
+                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_indexofU(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
+                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
+%{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      (-1), $result$$Register,
+                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_indexofUL(rdi_RegP str1, rdx_RegI cnt1, rsi_RegP str2, rax_RegI cnt2,
+                         rbx_RegI result, regD vec, rcx_RegI tmp, rFlagsReg cr)
+%{
+  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
+  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
+  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
+
+  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
+  ins_encode %{
+    __ string_indexof($str1$$Register, $str2$$Register,
+                      $cnt1$$Register, $cnt2$$Register,
+                      (-1), $result$$Register,
+                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct string_indexofU_char(rdi_RegP str1, rdx_RegI cnt1, rax_RegI ch,
+                              rbx_RegI result, regD vec1, regD vec2, regD vec3, rcx_RegI tmp, rFlagsReg cr)
+%{
+  predicate(UseSSE42Intrinsics);
+  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
+  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
+  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
+  ins_encode %{
+    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
+                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
@@ -10532,26 +10672,86 @@
 
   format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
   ins_encode %{
-    __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
-                          $cnt$$Register, $result$$Register, $tmp3$$Register,
-                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+    __ arrays_equals(false, $str1$$Register, $str2$$Register,
+                     $cnt$$Register, $result$$Register, $tmp3$$Register,
+                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
   %}
   ins_pipe( pipe_slow );
 %}
 
 // fast array equals
-instruct array_equals(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
-                      regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
-%{
+instruct array_equalsB(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
+                       regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
+%{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
   match(Set result (AryEq ary1 ary2));
   effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
-  //ins_cost(300);
-
-  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
-  ins_encode %{
-    __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
-                          $tmp3$$Register, $result$$Register, $tmp4$$Register,
-                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+
+  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode %{
+    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
+                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
+                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct array_equalsC(rdi_RegP ary1, rsi_RegP ary2, rax_RegI result,
+                      regD tmp1, regD tmp2, rcx_RegI tmp3, rbx_RegI tmp4, rFlagsReg cr)
+%{
+  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
+  match(Set result (AryEq ary1 ary2));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
+
+  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
+  ins_encode %{
+    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
+                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
+                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+instruct has_negatives(rsi_RegP ary1, rcx_RegI len, rax_RegI result,
+                      regD tmp1, regD tmp2, rbx_RegI tmp3, rFlagsReg cr)
+%{
+  match(Set result (HasNegatives ary1 len));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
+
+  format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
+  ins_encode %{
+    __ has_negatives($ary1$$Register, $len$$Register,
+                     $result$$Register, $tmp3$$Register,
+                     $tmp1$$XMMRegister, $tmp2$$XMMRegister);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// fast char[] to byte[] compression
+instruct string_compress(rsi_RegP src, rdi_RegP dst, rdx_RegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
+                         rcx_RegI tmp5, rax_RegI result, rFlagsReg cr) %{
+  match(Set result (StrCompressedCopy src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
+
+  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
+  ins_encode %{
+    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
+                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
+                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
+  %}
+  ins_pipe( pipe_slow );
+%}
+
+// fast byte[] to char[] inflation
+instruct string_inflate(Universe dummy, rsi_RegP src, rdi_RegP dst, rdx_RegI len,
+                        regD tmp1, rcx_RegI tmp2, rFlagsReg cr) %{
+  match(Set dummy (StrInflatedCopy src (Binary dst len)));
+  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
+
+  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
+  ins_encode %{
+    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
+                          $tmp1$$XMMRegister, $tmp2$$Register);
   %}
   ins_pipe( pipe_slow );
 %}
--- a/src/cpu/zero/vm/globals_zero.hpp	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/cpu/zero/vm/globals_zero.hpp	Wed Nov 11 23:51:57 2015 -0500
@@ -69,6 +69,9 @@
 
 define_pd_global(bool, PreserveFramePointer, false);
 
+// No performance work done here yet.
+define_pd_global(bool, CompactStrings, false);
+
 #define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct, range, constraint)  \
                                                                             \
   product(bool, UseFastEmptyMethods, true,                                  \
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,15 +22,18 @@
  */
 package jdk.vm.ci.amd64;
 
-import static jdk.vm.ci.code.MemoryBarriers.*;
-import static jdk.vm.ci.code.Register.*;
+import static jdk.vm.ci.code.MemoryBarriers.LOAD_STORE;
+import static jdk.vm.ci.code.MemoryBarriers.STORE_STORE;
+import static jdk.vm.ci.code.Register.SPECIAL;
 
-import java.nio.*;
-import java.util.*;
+import java.nio.ByteOrder;
+import java.util.EnumSet;
 
-import jdk.vm.ci.code.*;
+import jdk.vm.ci.code.Architecture;
+import jdk.vm.ci.code.Register;
 import jdk.vm.ci.code.Register.RegisterCategory;
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.PlatformKind;
 
 /**
  * Represents the AMD64 architecture.
@@ -65,9 +68,7 @@
         r8, r9, r10, r11, r12, r13, r14, r15
     };
 
-    private static final int XMM_REFERENCE_MAP_SHIFT = 2;
-
-    public static final RegisterCategory XMM = new RegisterCategory("XMM", cpuRegisters.length, XMM_REFERENCE_MAP_SHIFT);
+    public static final RegisterCategory XMM = new RegisterCategory("XMM");
 
     // XMM registers
     public static final Register xmm0 = new Register(16, 0, "xmm0", XMM);
@@ -79,8 +80,8 @@
     public static final Register xmm6 = new Register(22, 6, "xmm6", XMM);
     public static final Register xmm7 = new Register(23, 7, "xmm7", XMM);
 
-    public static final Register xmm8 =  new Register(24,  8, "xmm8",  XMM);
-    public static final Register xmm9 =  new Register(25,  9, "xmm9",  XMM);
+    public static final Register xmm8  = new Register(24,  8, "xmm8",  XMM);
+    public static final Register xmm9  = new Register(25,  9, "xmm9",  XMM);
     public static final Register xmm10 = new Register(26, 10, "xmm10", XMM);
     public static final Register xmm11 = new Register(27, 11, "xmm11", XMM);
     public static final Register xmm12 = new Register(28, 12, "xmm12", XMM);
@@ -88,28 +89,77 @@
     public static final Register xmm14 = new Register(30, 14, "xmm14", XMM);
     public static final Register xmm15 = new Register(31, 15, "xmm15", XMM);
 
-    public static final Register[] xmmRegisters = {
+    public static final Register xmm16 = new Register(32, 16, "xmm16", XMM);
+    public static final Register xmm17 = new Register(33, 17, "xmm17", XMM);
+    public static final Register xmm18 = new Register(34, 18, "xmm18", XMM);
+    public static final Register xmm19 = new Register(35, 19, "xmm19", XMM);
+    public static final Register xmm20 = new Register(36, 20, "xmm20", XMM);
+    public static final Register xmm21 = new Register(37, 21, "xmm21", XMM);
+    public static final Register xmm22 = new Register(38, 22, "xmm22", XMM);
+    public static final Register xmm23 = new Register(39, 23, "xmm23", XMM);
+
+    public static final Register xmm24 = new Register(40, 24, "xmm24", XMM);
+    public static final Register xmm25 = new Register(41, 25, "xmm25", XMM);
+    public static final Register xmm26 = new Register(42, 26, "xmm26", XMM);
+    public static final Register xmm27 = new Register(43, 27, "xmm27", XMM);
+    public static final Register xmm28 = new Register(44, 28, "xmm28", XMM);
+    public static final Register xmm29 = new Register(45, 29, "xmm29", XMM);
+    public static final Register xmm30 = new Register(46, 30, "xmm30", XMM);
+    public static final Register xmm31 = new Register(47, 31, "xmm31", XMM);
+
+    public static final Register[] xmmRegistersSSE = {
         xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
         xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15
     };
 
-    public static final Register[] cpuxmmRegisters = {
+    public static final Register[] xmmRegistersAVX512 = {
+        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
+        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+        xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23,
+        xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31
+    };
+
+    public static final RegisterCategory MASK = new RegisterCategory("MASK", false);
+
+    public static final Register k0 = new Register(48, 0, "k0", MASK);
+    public static final Register k1 = new Register(49, 1, "k1", MASK);
+    public static final Register k2 = new Register(50, 2, "k2", MASK);
+    public static final Register k3 = new Register(51, 3, "k3", MASK);
+    public static final Register k4 = new Register(52, 4, "k4", MASK);
+    public static final Register k5 = new Register(53, 5, "k5", MASK);
+    public static final Register k6 = new Register(54, 6, "k6", MASK);
+    public static final Register k7 = new Register(55, 7, "k7", MASK);
+
+    public static final Register[] valueRegistersSSE = {
         rax,  rcx,  rdx,   rbx,   rsp,   rbp,   rsi,   rdi,
         r8,   r9,   r10,   r11,   r12,   r13,   r14,   r15,
         xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
         xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15
     };
 
+    public static final Register[] valueRegistersAVX512 = {
+        rax,  rcx,  rdx,   rbx,   rsp,   rbp,   rsi,   rdi,
+        r8,   r9,   r10,   r11,   r12,   r13,   r14,   r15,
+        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
+        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+        xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23,
+        xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31,
+        k0, k1, k2, k3, k4, k5, k6, k7
+    };
+
     /**
      * Register used to construct an instruction-relative address.
      */
-    public static final Register rip = new Register(32, -1, "rip", SPECIAL);
+    public static final Register rip = new Register(56, -1, "rip", SPECIAL);
 
     public static final Register[] allRegisters = {
         rax,  rcx,  rdx,   rbx,   rsp,   rbp,   rsi,   rdi,
         r8,   r9,   r10,   r11,   r12,   r13,   r14,   r15,
         xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
         xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15,
+        xmm16, xmm17, xmm18, xmm19, xmm20, xmm21, xmm22, xmm23,
+        xmm24, xmm25, xmm26, xmm27, xmm28, xmm29, xmm30, xmm31,
+        k0, k1, k2, k3, k4, k5, k6, k7,
         rip
     };
 
@@ -151,7 +201,8 @@
         AVX512PF,
         AVX512ER,
         AVX512CD,
-        AVX512BW
+        AVX512BW,
+        AVX512VL
     }
 
     private final EnumSet<CPUFeature> features;
@@ -166,11 +217,21 @@
 
     private final EnumSet<Flag> flags;
 
+    private final AMD64Kind largestKind;
+
     public AMD64(EnumSet<CPUFeature> features, EnumSet<Flag> flags) {
-        super("AMD64", JavaKind.Long, ByteOrder.LITTLE_ENDIAN, true, allRegisters, LOAD_STORE | STORE_STORE, 1, cpuRegisters.length + (xmmRegisters.length << XMM_REFERENCE_MAP_SHIFT), 8);
+        super("AMD64", AMD64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, LOAD_STORE | STORE_STORE, 1, 8);
         this.features = features;
         this.flags = flags;
         assert features.contains(CPUFeature.SSE2) : "minimum config for x64";
+
+        if (features.contains(CPUFeature.AVX512F)) {
+            largestKind = AMD64Kind.V512_QWORD;
+        } else if (features.contains(CPUFeature.AVX)) {
+            largestKind = AMD64Kind.V256_QWORD;
+        } else {
+            largestKind = AMD64Kind.V128_QWORD;
+        }
     }
 
     public EnumSet<CPUFeature> getFeatures() {
@@ -182,50 +243,60 @@
     }
 
     @Override
+    public Register[] getAvailableValueRegisters() {
+        if (features.contains(CPUFeature.AVX512F)) {
+            return valueRegistersAVX512;
+        } else {
+            return valueRegistersSSE;
+        }
+    }
+
+    @Override
     public PlatformKind getPlatformKind(JavaKind javaKind) {
-        if (javaKind.isObject()) {
-            return getWordKind();
-        } else {
-            return javaKind;
+        switch (javaKind) {
+            case Boolean:
+            case Byte:
+                return AMD64Kind.BYTE;
+            case Short:
+            case Char:
+                return AMD64Kind.WORD;
+            case Int:
+                return AMD64Kind.DWORD;
+            case Long:
+            case Object:
+                return AMD64Kind.QWORD;
+            case Float:
+                return AMD64Kind.SINGLE;
+            case Double:
+                return AMD64Kind.DOUBLE;
+            default:
+                return null;
         }
     }
 
     @Override
     public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) {
-        if (!(platformKind instanceof JavaKind)) {
-            return false;
+        AMD64Kind kind = (AMD64Kind) platformKind;
+        if (kind.isInteger()) {
+            return category.equals(CPU);
+        } else if (kind.isXMM()) {
+            return category.equals(XMM);
+        } else {
+            assert kind.isMask();
+            return category.equals(MASK);
         }
-
-        JavaKind kind = (JavaKind) platformKind;
-        if (category.equals(CPU)) {
-            switch (kind) {
-                case Boolean:
-                case Byte:
-                case Char:
-                case Short:
-                case Int:
-                case Long:
-                    return true;
-            }
-        } else if (category.equals(XMM)) {
-            switch (kind) {
-                case Float:
-                case Double:
-                    return true;
-            }
-        }
-
-        return false;
     }
 
     @Override
-    public PlatformKind getLargestStorableKind(RegisterCategory category) {
+    public AMD64Kind getLargestStorableKind(RegisterCategory category) {
         if (category.equals(CPU)) {
-            return JavaKind.Long;
+            return AMD64Kind.QWORD;
         } else if (category.equals(XMM)) {
-            return JavaKind.Double;
+            return largestKind;
+        } else if (category.equals(MASK)) {
+            return AMD64Kind.MASK64;
         } else {
-            return JavaKind.Illegal;
+            return null;
         }
     }
 }
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.amd64/src/jdk/vm/ci/amd64/AMD64Kind.java	Wed Nov 11 23:51:57 2015 -0500
@@ -0,0 +1,214 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.vm.ci.amd64;
+
+import jdk.vm.ci.meta.PlatformKind;
+
+public enum AMD64Kind implements PlatformKind {
+
+    // scalar
+    BYTE(1),
+    WORD(2),
+    DWORD(4),
+    QWORD(8),
+    SINGLE(4),
+    DOUBLE(8),
+
+    // SSE2
+    V32_BYTE(4, BYTE),
+    V32_WORD(4, WORD),
+    V64_BYTE(8, BYTE),
+    V64_WORD(8, WORD),
+    V64_DWORD(8, DWORD),
+    V128_BYTE(16, BYTE),
+    V128_WORD(16, WORD),
+    V128_DWORD(16, DWORD),
+    V128_QWORD(16, QWORD),
+    V128_SINGLE(16, SINGLE),
+    V128_DOUBLE(16, DOUBLE),
+
+    // AVX
+    V256_BYTE(32, BYTE),
+    V256_WORD(32, WORD),
+    V256_DWORD(32, DWORD),
+    V256_QWORD(32, QWORD),
+    V256_SINGLE(32, SINGLE),
+    V256_DOUBLE(32, DOUBLE),
+
+    // AVX512
+    V512_BYTE(64, BYTE),
+    V512_WORD(64, WORD),
+    V512_DWORD(64, DWORD),
+    V512_QWORD(64, QWORD),
+    V512_SINGLE(64, SINGLE),
+    V512_DOUBLE(64, DOUBLE),
+
+    MASK8(1),
+    MASK16(2),
+    MASK32(4),
+    MASK64(8);
+
+    private final int size;
+    private final int vectorLength;
+
+    private final AMD64Kind scalar;
+    private final EnumKey<AMD64Kind> key = new EnumKey<>(this);
+
+    private AMD64Kind(int size) {
+        this.size = size;
+        this.scalar = this;
+        this.vectorLength = 1;
+    }
+
+    private AMD64Kind(int size, AMD64Kind scalar) {
+        this.size = size;
+        this.scalar = scalar;
+
+        assert size % scalar.size == 0;
+        this.vectorLength = size / scalar.size;
+    }
+
+    public AMD64Kind getScalar() {
+        return scalar;
+    }
+
+    public int getSizeInBytes() {
+        return size;
+    }
+
+    public int getVectorLength() {
+        return vectorLength;
+    }
+
+    public Key getKey() {
+        return key;
+    }
+
+    public boolean isInteger() {
+        switch (this) {
+            case BYTE:
+            case WORD:
+            case DWORD:
+            case QWORD:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    public boolean isXMM() {
+        switch (this) {
+            case SINGLE:
+            case DOUBLE:
+            case V32_BYTE:
+            case V32_WORD:
+            case V64_BYTE:
+            case V64_WORD:
+            case V64_DWORD:
+            case V128_BYTE:
+            case V128_WORD:
+            case V128_DWORD:
+            case V128_QWORD:
+            case V128_SINGLE:
+            case V128_DOUBLE:
+            case V256_BYTE:
+            case V256_WORD:
+            case V256_DWORD:
+            case V256_QWORD:
+            case V256_SINGLE:
+            case V256_DOUBLE:
+            case V512_BYTE:
+            case V512_WORD:
+            case V512_DWORD:
+            case V512_QWORD:
+            case V512_SINGLE:
+            case V512_DOUBLE:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    public boolean isMask() {
+        switch (this) {
+            case MASK8:
+            case MASK16:
+            case MASK32:
+            case MASK64:
+                return true;
+            default:
+                return false;
+        }
+    }
+
+    public char getTypeChar() {
+        switch (this) {
+            case BYTE:
+                return 'b';
+            case WORD:
+                return 'w';
+            case DWORD:
+                return 'd';
+            case QWORD:
+                return 'q';
+            case SINGLE:
+                return 'S';
+            case DOUBLE:
+                return 'D';
+            case V32_BYTE:
+            case V32_WORD:
+            case V64_BYTE:
+            case V64_WORD:
+            case V64_DWORD:
+                return 'v';
+            case V128_BYTE:
+            case V128_WORD:
+            case V128_DWORD:
+            case V128_QWORD:
+            case V128_SINGLE:
+            case V128_DOUBLE:
+                return 'x';
+            case V256_BYTE:
+            case V256_WORD:
+            case V256_DWORD:
+            case V256_QWORD:
+            case V256_SINGLE:
+            case V256_DOUBLE:
+                return 'y';
+            case V512_BYTE:
+            case V512_WORD:
+            case V512_DWORD:
+            case V512_QWORD:
+            case V512_SINGLE:
+            case V512_DOUBLE:
+                return 'z';
+            case MASK8:
+            case MASK16:
+            case MASK32:
+            case MASK64:
+                return 'k';
+            default:
+                return '-';
+        }
+    }
+}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/AbstractAddress.java	Wed Nov 11 18:04:33 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-/*
- * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package jdk.vm.ci.code;
-
-/**
- * Abstract base class that represents a platform specific address.
- */
-public abstract class AbstractAddress {
-}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/Architecture.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/Architecture.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,11 +22,12 @@
  */
 package jdk.vm.ci.code;
 
-import java.nio.*;
-import java.util.*;
+import java.nio.ByteOrder;
+import java.util.Arrays;
 
 import jdk.vm.ci.code.Register.RegisterCategory;
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.PlatformKind;
 
 /**
  * Represents a CPU architecture, including information such as its endianness, CPU registers, word
@@ -35,13 +36,6 @@
 public abstract class Architecture {
 
     /**
-     * The number of entries required in a {@link ReferenceMap} covering all the registers that may
-     * store references. The index of a register in the reference map is given by
-     * {@link Register#getReferenceMapIndex()}.
-     */
-    private final int registerReferenceMapSize;
-
-    /**
      * The architecture specific type of a native word.
      */
     private final PlatformKind wordKind;
@@ -85,7 +79,7 @@
     private final int returnAddressSize;
 
     protected Architecture(String name, PlatformKind wordKind, ByteOrder byteOrder, boolean unalignedMemoryAccess, Register[] registers, int implicitMemoryBarriers, int nativeCallDisplacementOffset,
-                    int registerReferenceMapSize, int returnAddressSize) {
+                    int returnAddressSize) {
         this.name = name;
         this.registers = registers;
         this.wordKind = wordKind;
@@ -93,7 +87,6 @@
         this.unalignedMemoryAccess = unalignedMemoryAccess;
         this.implicitMemoryBarriers = implicitMemoryBarriers;
         this.machineCodeCallDisplacementOffset = nativeCallDisplacementOffset;
-        this.registerReferenceMapSize = registerReferenceMapSize;
         this.returnAddressSize = returnAddressSize;
     }
 
@@ -107,10 +100,6 @@
         return getName().toLowerCase();
     }
 
-    public int getRegisterReferenceMapSize() {
-        return registerReferenceMapSize;
-    }
-
     /**
      * Gets the natural size of words (typically registers and pointers) of this architecture, in
      * bytes.
@@ -131,13 +120,23 @@
     }
 
     /**
-     * Gets an array of all available registers on this architecture. The index of each register in
-     * this array is equal to its {@linkplain Register#number number}.
+     * Gets an array of all registers that exist on this architecture. This contains all registers
+     * that exist in the specification of this architecture. Not all of them may be available on
+     * this particular architecture instance. The index of each register in this array is equal to
+     * its {@linkplain Register#number number}.
      */
     public Register[] getRegisters() {
         return registers.clone();
     }
 
+    /**
+     * Gets an array of all registers available for storing values on this architecture. This may be
+     * a subset of {@link #getRegisters()}, depending on the capabilities of this particular CPU.
+     */
+    public Register[] getAvailableValueRegisters() {
+        return getRegisters();
+    }
+
     public ByteOrder getByteOrder() {
         return byteOrder;
     }
@@ -207,7 +206,6 @@
                 assert this.byteOrder.equals(that.byteOrder);
                 assert this.implicitMemoryBarriers == that.implicitMemoryBarriers;
                 assert this.machineCodeCallDisplacementOffset == that.machineCodeCallDisplacementOffset;
-                assert this.registerReferenceMapSize == that.registerReferenceMapSize;
                 assert Arrays.equals(this.registers, that.registers);
                 assert this.returnAddressSize == that.returnAddressSize;
                 assert this.unalignedMemoryAccess == that.unalignedMemoryAccess;
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/BailoutException.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/BailoutException.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,7 +22,7 @@
  */
 package jdk.vm.ci.code;
 
-import java.util.*;
+import java.util.Locale;
 
 /**
  * Exception thrown when the compiler refuses to compile a method because of problems with the
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/BytecodeFrame.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/BytecodeFrame.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,9 +22,12 @@
  */
 package jdk.vm.ci.code;
 
-import java.util.*;
+import java.util.Arrays;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.JavaValue;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
+import jdk.vm.ci.meta.Value;
 
 /**
  * Represents the Java bytecode frame state(s) at a given position including {@link Value locations}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/BytecodePosition.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/BytecodePosition.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,9 +22,9 @@
  */
 package jdk.vm.ci.code;
 
-import java.util.*;
+import java.util.Objects;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
 
 /**
  * Represents a code position, that is, a chain of inlined methods with bytecode locations, that is
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/CallingConvention.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/CallingConvention.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,9 +22,10 @@
  */
 package jdk.vm.ci.code;
 
-import static jdk.vm.ci.code.ValueUtil.*;
-
-import jdk.vm.ci.meta.*;
+import static jdk.vm.ci.code.ValueUtil.isAllocatableValue;
+import static jdk.vm.ci.code.ValueUtil.isStackSlot;
+import jdk.vm.ci.meta.AllocatableValue;
+import jdk.vm.ci.meta.Value;
 
 /**
  * A calling convention describes the locations in which the arguments for a call are placed and the
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/CodeCacheProvider.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/CodeCacheProvider.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,9 +22,14 @@
  */
 package jdk.vm.ci.code;
 
-import jdk.vm.ci.code.CompilationResult.*;
-import jdk.vm.ci.code.DataSection.*;
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.code.CompilationResult.Call;
+import jdk.vm.ci.code.CompilationResult.DataPatch;
+import jdk.vm.ci.code.CompilationResult.Mark;
+import jdk.vm.ci.code.DataSection.Data;
+import jdk.vm.ci.meta.Constant;
+import jdk.vm.ci.meta.JavaConstant;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
+import jdk.vm.ci.meta.SpeculationLog;
 
 /**
  * Access to code cache related details and requirements.
@@ -32,26 +37,62 @@
 public interface CodeCacheProvider {
 
     /**
-     * Adds the given compilation result as an implementation of the given method without making it
-     * the default implementation.
+     * Installs code for a given method based on a given compilation result without making it the
+     * default implementation of the method.
      *
-     * @param method a method to which the executable code is begin added
+     * @param method a method implemented by the installed code
      * @param compResult the compilation result to be added
-     * @param speculationLog the speculation log to be used
-     * @return a reference to the compiled and ready-to-run code or throws a
-     *         {@link BailoutException} if the code installation failed
+     * @param log the speculation log to be used
+     * @param installedCode a predefined {@link InstalledCode} object to use as a reference to the
+     *            installed code. If {@code null}, a new {@link InstalledCode} object will be
+     *            created.
+     * @return a reference to the ready-to-run code
+     * @throws BailoutException if the code installation failed
      */
-    InstalledCode addMethod(ResolvedJavaMethod method, CompilationResult compResult, SpeculationLog speculationLog, InstalledCode predefinedInstalledCode);
+    default InstalledCode addCode(ResolvedJavaMethod method, CompilationResult compResult, SpeculationLog log, InstalledCode installedCode) {
+        return installCode(new CompilationRequest(method), compResult, installedCode, log, false);
+    }
 
     /**
-     * Sets the given compilation result as the default implementation of the given method.
+     * Installs code for a given method based on a given compilation result and makes it the default
+     * implementation of the method.
      *
-     * @param method a method to which the executable code is begin added
+     * @param method a method implemented by the installed code and for which the installed code
+     *            becomes the default implementation
      * @param compResult the compilation result to be added
-     * @return a reference to the compiled and ready-to-run code or null if the code installation
-     *         failed
+     * @return a reference to the ready-to-run code
+     * @throws BailoutException if the code installation failed
      */
-    InstalledCode setDefaultMethod(ResolvedJavaMethod method, CompilationResult compResult);
+    default InstalledCode setDefaultCode(ResolvedJavaMethod method, CompilationResult compResult) {
+        return installCode(new CompilationRequest(method), compResult, null, null, true);
+    }
+
+    /**
+     * Installs code based on a given compilation result.
+     *
+     * @param compRequest details of the method compiled to produce {@code compResult} or
+     *            {@code null} if the input to {@code compResult} was not a
+     *            {@link ResolvedJavaMethod}
+     * @param compResult the compilation result to be added
+     * @param installedCode a pre-allocated {@link InstalledCode} object to use as a reference to
+     *            the installed code. If {@code null}, a new {@link InstalledCode} object will be
+     *            created.
+     * @param log the speculation log to be used
+     * @param isDefault specifies if the installed code should be made the default implementation of
+     *            {@code compRequest.getMethod()}. The default implementation for a method is the
+     *            code executed for standard calls to the method. This argument is ignored if
+     *            {@code compRequest == null}.
+     * @return a reference to the compiled and ready-to-run installed code
+     * @throws BailoutException if the code installation failed
+     */
+    InstalledCode installCode(CompilationRequest compRequest, CompilationResult compResult, InstalledCode installedCode, SpeculationLog log, boolean isDefault);
+
+    /**
+     * Invalidates {@code installedCode} such that {@link InvalidInstalledCodeException} will be
+     * raised the next time {@code installedCode} is
+     * {@linkplain InstalledCode#executeVarargs(Object...) executed}.
+     */
+    void invalidateInstalledCode(InstalledCode installedCode);
 
     /**
      * Gets a name for a {@link Mark} mark.
@@ -102,4 +143,16 @@
      * Create a new speculation log for the target runtime.
      */
     SpeculationLog createSpeculationLog();
+
+    /**
+     * Returns the maximum absolute offset of a PC relative call to a given address from any
+     * position in the code cache or -1 when not applicable. Intended for determining the required
+     * size of address/offset fields.
+     */
+    long getMaxCallTargetOffset(long address);
+
+    /**
+     * Determines if debug info should also be emitted at non-safepoint locations.
+     */
+    boolean shouldDebugNonSafepoints();
 }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/CodeUtil.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/CodeUtil.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,9 +22,15 @@
  */
 package jdk.vm.ci.code;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Map;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.JavaType;
+import jdk.vm.ci.meta.MetaUtil;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
+import jdk.vm.ci.meta.Signature;
 
 /**
  * Miscellaneous collection of utility methods used by {@code jdk.vm.ci.code} and its clients.
@@ -323,49 +329,12 @@
     public interface RefMapFormatter {
 
         String formatStackSlot(int frameRefMapIndex);
-
-        String formatRegister(int regRefMapIndex);
     }
 
     /**
-     * Formats a location in a register reference map.
+     * Formats a location present in a reference map.
      */
-    public static class DefaultRegFormatter implements RefMapFormatter {
-
-        private final Register[] registers;
-
-        public DefaultRegFormatter(Architecture arch) {
-            registers = new Register[arch.getRegisterReferenceMapSize()];
-            for (Register r : arch.getRegisters()) {
-                if (r.getReferenceMapIndex() >= 0) {
-                    registers[r.getReferenceMapIndex()] = r;
-                }
-            }
-        }
-
-        public String formatStackSlot(int frameRefMapIndex) {
-            return null;
-        }
-
-        public String formatRegister(int regRefMapIndex) {
-            int i = regRefMapIndex;
-            int idx = 0;
-            while (registers[i] == null) {
-                i--;
-                idx++;
-            }
-            if (idx == 0) {
-                return registers[i].toString();
-            } else {
-                return String.format("%s+%d", registers[i].toString(), idx);
-            }
-        }
-    }
-
-    /**
-     * Formats a location present in a register or frame reference map.
-     */
-    public static class DefaultRefMapFormatter extends DefaultRegFormatter {
+    public static class DefaultRefMapFormatter implements RefMapFormatter {
 
         /**
          * The size of a stack slot.
@@ -383,8 +352,7 @@
          */
         public final int refMapToFPOffset;
 
-        public DefaultRefMapFormatter(Architecture arch, int slotSize, Register fp, int refMapToFPOffset) {
-            super(arch);
+        public DefaultRefMapFormatter(int slotSize, Register fp, int refMapToFPOffset) {
             this.slotSize = slotSize;
             this.fp = fp;
             this.refMapToFPOffset = refMapToFPOffset;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/CompilationRequest.java	Wed Nov 11 23:51:57 2015 -0500
@@ -0,0 +1,78 @@
+/*
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package jdk.vm.ci.code;
+
+import jdk.vm.ci.meta.ResolvedJavaMethod;
+
+/**
+ * Represents a request to compile a method.
+ */
+public class CompilationRequest {
+
+    private final ResolvedJavaMethod method;
+
+    private final int entryBCI;
+
+    /**
+     * Creates a request to compile a method starting at its entry point.
+     *
+     * @param method the method to be compiled
+     */
+    public CompilationRequest(ResolvedJavaMethod method) {
+        this(method, -1);
+    }
+
+    /**
+     * Creates a request to compile a method starting at a given BCI.
+     *
+     * @param method the method to be compiled
+     * @param entryBCI the bytecode index (BCI) at which to start compiling where -1 denotes the
+     *            method's entry point
+     */
+    public CompilationRequest(ResolvedJavaMethod method, int entryBCI) {
+        assert method != null;
+        this.method = method;
+        this.entryBCI = entryBCI;
+    }
+
+    /**
+     * Gets the method to be compiled.
+     */
+    public ResolvedJavaMethod getMethod() {
+        return method;
+    }
+
+    /**
+     * Gets the bytecode index (BCI) at which to start compiling where -1 denotes a non-OSR
+     * compilation request and all other values denote an on stack replacement (OSR) compilation
+     * request.
+     */
+    public int getEntryBCI() {
+        return entryBCI;
+    }
+
+    @Override
+    public String toString() {
+        return method.format("%H.%n(%p)@" + entryBCI);
+    }
+}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/CompilationResult.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/CompilationResult.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,13 +22,24 @@
  */
 package jdk.vm.ci.code;
 
-import static java.util.Collections.*;
-import static jdk.vm.ci.meta.MetaUtil.*;
+import static java.util.Collections.emptyList;
+import static java.util.Collections.unmodifiableList;
+import static jdk.vm.ci.meta.MetaUtil.identityHashCodeString;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
 
-import jdk.vm.ci.meta.*;
-import jdk.vm.ci.meta.Assumptions.*;
+import jdk.vm.ci.meta.Assumptions.Assumption;
+import jdk.vm.ci.meta.InvokeTarget;
+import jdk.vm.ci.meta.JavaConstant;
+import jdk.vm.ci.meta.MetaUtil;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
+import jdk.vm.ci.meta.VMConstant;
 
 /**
  * Represents the output from compiling a method, including the compiled machine code, associated
@@ -115,8 +126,8 @@
 
     public enum MetaSpaceAccessType {
         Move,
-        Store, // store only works for compressed oops (memory <- 32bit value). Compressed oops is
-               // not supported using AOT. TODO: Look at HotSpotStoreConstantOp
+        Store,  // store only works for compressed oops (memory <- 32bit value). Compressed oops is
+        // not supported using AOT. TODO: Look at HotSpotStoreConstantOp
         Compare; // HotSpotCompareMemoryConstantOp, HotSpotCompareConstantOp
 
         private MetaSpaceAccessType() {
@@ -128,13 +139,11 @@
      */
     public static final class MetaSpaceAccess extends Infopoint {
 
-        private static final long serialVersionUID = 1701958512608684706L;
-
         /**
          * Metaspace reference.
          */
         public final Object reference; // Object here is a HotSpotResolvedObjectType or a
-                                       // HotSpotMetaSpaceConstant
+        // HotSpotMetaSpaceConstant
 
         public final MetaSpaceAccessType type;
 
@@ -296,6 +305,15 @@
             }
             return false;
         }
+
+        @Override
+        public String toString() {
+            if (initialized) {
+                return String.format("DataSection[0x%x]", offset);
+            } else {
+                return "DataSection[?]";
+            }
+        }
     }
 
     /**
@@ -528,8 +546,6 @@
         }
     }
 
-    private int id = -1;
-
     /**
      * Specifies whether this compilation is a {@code +ImmutableCode} {@code +GeneratePIC}
      * compilation.
@@ -612,7 +628,6 @@
             CompilationResult that = (CompilationResult) obj;
             // @formatter:off
             if (this.entryBCI == that.entryBCI &&
-                this.id == that.id &&
                 this.customStackAreaOffset == that.customStackAreaOffset &&
                 this.totalFrameSize == that.totalFrameSize &&
                 this.targetCodeSize == that.targetCodeSize &&
@@ -633,20 +648,6 @@
     }
 
     /**
-     * @return the compile id
-     */
-    public int getId() {
-        return id;
-    }
-
-    /**
-     * @param id the compile id to set
-     */
-    public void setId(int id) {
-        this.id = id;
-    }
-
-    /**
      * @return true is this is a {@code +ImmutableCode} {@code +GeneratePIC} compilation, false
      *         otherwise.
      */
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/DataSection.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/DataSection.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,15 +22,18 @@
  */
 package jdk.vm.ci.code;
 
-import static jdk.vm.ci.meta.MetaUtil.*;
+import static jdk.vm.ci.meta.MetaUtil.identityHashCodeString;
 
-import java.nio.*;
-import java.util.*;
-import java.util.function.*;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.Objects;
+import java.util.function.Consumer;
 
-import jdk.vm.ci.code.CompilationResult.*;
-import jdk.vm.ci.code.DataSection.*;
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.code.CompilationResult.DataPatch;
+import jdk.vm.ci.code.CompilationResult.DataSectionReference;
+import jdk.vm.ci.code.DataSection.Data;
+import jdk.vm.ci.meta.SerializableConstant;
 
 public final class DataSection implements Iterable<Data> {
 
@@ -176,11 +179,27 @@
      */
     public DataSectionReference insertData(Data data) {
         assert !finalLayout;
-        if (data.ref == null) {
-            data.ref = new DataSectionReference();
+        synchronized (data) {
+            if (data.ref == null) {
+                data.ref = new DataSectionReference();
+                dataItems.add(data);
+            }
+            return data.ref;
+        }
+    }
+
+    /**
+     * Transfers all {@link Data} from the provided other {@link DataSection} to this
+     * {@link DataSection}, and empties the other section.
+     */
+    public void addAll(DataSection other) {
+        assert !finalLayout && !other.finalLayout;
+
+        for (Data data : other.dataItems) {
+            assert data.ref != null;
             dataItems.add(data);
         }
-        return data.ref;
+        other.dataItems.clear();
     }
 
     /**
@@ -195,14 +214,16 @@
         dataItems.sort((a, b) -> a.alignment - b.alignment);
 
         int position = 0;
+        int alignment = 1;
         for (Data d : dataItems) {
-            sectionAlignment = lcm(sectionAlignment, d.alignment);
+            alignment = lcm(alignment, d.alignment);
             position = align(position, d.alignment);
 
             d.ref.setOffset(position);
             position += d.size;
         }
 
+        sectionAlignment = alignment;
         sectionSize = position;
     }
 
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/DebugInfo.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/DebugInfo.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,7 +22,7 @@
  */
 package jdk.vm.ci.code;
 
-import java.util.*;
+import java.util.Objects;
 
 /**
  * Represents the debugging information for a particular point of execution. This information
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/InstalledCode.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/InstalledCode.java	Wed Nov 11 23:51:57 2015 -0500
@@ -29,14 +29,19 @@
 public class InstalledCode {
 
     /**
-     * Raw address of this code blob.
+     * Raw address address of entity representing this installed code.
      */
-    private long address;
+    protected long address;
+
+    /**
+     * Raw address of entryPoint of this installed code.
+     */
+    protected long entryPoint;
 
     /**
      * Counts how often the address field was reassigned.
      */
-    private long version;
+    protected long version;
 
     protected final String name;
 
@@ -44,27 +49,29 @@
         this.name = name;
     }
 
-    public final void setAddress(long address) {
-        this.address = address;
-        version++;
-    }
-
     /**
-     * @return the address of this code blob
+     * @return the address of entity representing this installed code.
      */
     public final long getAddress() {
         return address;
     }
 
     /**
-     * @return the address of this code blob
+     * @return the address of the normal entry point of the installed code.
+     */
+    public final long getEntryPoint() {
+        return entryPoint;
+    }
+
+    /**
+     * @return the version number of this installed code
      */
     public final long getVersion() {
         return version;
     }
 
     /**
-     * Returns the name of this code blob.
+     * Returns the name of this installed code.
      */
     public String getName() {
         return name;
@@ -79,10 +86,19 @@
     }
 
     /**
-     * Returns the number of instruction bytes for this code.
+     * @return true if the code represented by this object is still valid for invocation, false
+     *         otherwise (may happen due to deopt, etc.)
      */
-    public long getCodeSize() {
-        return 0;
+    public boolean isValid() {
+        return entryPoint != 0;
+    }
+
+    /**
+     * @return true if the code represented by this object still exists and might have live
+     *         activations, false otherwise (may happen due to deopt, etc.)
+     */
+    public boolean isAlive() {
+        return address != 0;
     }
 
     /**
@@ -93,17 +109,9 @@
     }
 
     /**
-     * @return true if the code represented by this object is still valid, false otherwise (may
-     *         happen due to deopt, etc.)
-     */
-    public boolean isValid() {
-        return address != 0;
-    }
-
-    /**
      * Invalidates this installed code such that any subsequent
      * {@linkplain #executeVarargs(Object...) invocation} will throw an
-     * {@link InvalidInstalledCodeException}.
+     * {@link InvalidInstalledCodeException} and all existing invocations will be deoptimized.
      */
     public void invalidate() {
         throw new UnsupportedOperationException();
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/Location.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/Location.java	Wed Nov 11 23:51:57 2015 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/Register.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/Register.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,7 +22,8 @@
  */
 package jdk.vm.ci.code;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.LIRKind;
 
 /**
  * Represents a target machine register.
@@ -80,22 +81,15 @@
     public static class RegisterCategory {
 
         private final String name;
-
-        private final int referenceMapOffset;
-        private final int referenceMapShift;
+        private final boolean mayContainReference;
 
         public RegisterCategory(String name) {
-            this(name, 0, 0);
+            this(name, true);
         }
 
-        public RegisterCategory(String name, int referenceMapOffset) {
-            this(name, referenceMapOffset, 0);
-        }
-
-        public RegisterCategory(String name, int referenceMapOffset, int referenceMapShift) {
+        public RegisterCategory(String name, boolean mayContainReference) {
             this.name = name;
-            this.referenceMapOffset = referenceMapOffset;
-            this.referenceMapShift = referenceMapShift;
+            this.mayContainReference = mayContainReference;
         }
 
         @Override
@@ -112,7 +106,7 @@
         public boolean equals(Object obj) {
             if (obj instanceof RegisterCategory) {
                 RegisterCategory that = (RegisterCategory) obj;
-                return this.referenceMapOffset == that.referenceMapOffset && this.referenceMapShift == that.referenceMapShift && this.name.equals(that.name);
+                return this.name.equals(that.name);
             }
             return false;
         }
@@ -138,10 +132,10 @@
     }
 
     /**
-     * Get the start index of this register in the {@link ReferenceMap}.
+     * Determine whether this register needs to be part of the reference map.
      */
-    public int getReferenceMapIndex() {
-        return (encoding << registerCategory.referenceMapShift) + registerCategory.referenceMapOffset;
+    public boolean mayContainReference() {
+        return registerCategory.mayContainReference;
     }
 
     /**
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/RegisterAttributes.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/RegisterAttributes.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,7 +22,7 @@
  */
 package jdk.vm.ci.code;
 
-import java.util.*;
+import java.util.Arrays;
 
 /**
  * A collection of register attributes. The specific attribute values for a register may be local to
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/RegisterConfig.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/RegisterConfig.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,8 +22,10 @@
  */
 package jdk.vm.ci.code;
 
-import jdk.vm.ci.code.CallingConvention.*;
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.code.CallingConvention.Type;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.JavaType;
+import jdk.vm.ci.meta.PlatformKind;
 
 /**
  * A register configuration binds roles and {@linkplain RegisterAttributes attributes} to physical
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/RegisterSaveLayout.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/RegisterSaveLayout.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,7 +22,11 @@
  */
 package jdk.vm.ci.code;
 
-import java.util.*;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.TreeMap;
 
 /**
  * A map from registers to frame slots. This can be used to describe where callee saved registers
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/RegisterValue.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/RegisterValue.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,7 +22,9 @@
  */
 package jdk.vm.ci.code;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.AllocatableValue;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.LIRKind;
 
 /**
  * Denotes a register that stores a value of a fixed kind. There is exactly one (canonical) instance
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/SourceStackTrace.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/SourceStackTrace.java	Wed Nov 11 23:51:57 2015 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2015, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/StackLockValue.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/StackLockValue.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,9 +22,9 @@
  */
 package jdk.vm.ci.code;
 
-import static jdk.vm.ci.code.ValueUtil.*;
-
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.AllocatableValue;
+import jdk.vm.ci.meta.JavaValue;
+import jdk.vm.ci.meta.Value;
 
 /**
  * Represents lock information in the debug information.
@@ -32,10 +32,10 @@
 public final class StackLockValue implements JavaValue {
 
     private JavaValue owner;
-    private StackSlotValue slot;
+    private AllocatableValue slot;
     private final boolean eliminated;
 
-    public StackLockValue(JavaValue object, StackSlotValue slot, boolean eliminated) {
+    public StackLockValue(JavaValue object, AllocatableValue slot, boolean eliminated) {
         this.owner = object;
         this.slot = slot;
         this.eliminated = eliminated;
@@ -81,8 +81,7 @@
         return false;
     }
 
-    public void setSlot(StackSlotValue stackSlot) {
-        assert slot == null || (isVirtualStackSlot(slot) && (slot.equals(stackSlot) || isStackSlot(stackSlot))) : String.format("Can not set slot for %s to %s", this, stackSlot);
+    public void setSlot(AllocatableValue stackSlot) {
         slot = stackSlot;
     }
 }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/StackSlot.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/StackSlot.java	Wed Nov 11 23:51:57 2015 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2010, 2014, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2010, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -22,13 +22,14 @@
  */
 package jdk.vm.ci.code;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.AllocatableValue;
+import jdk.vm.ci.meta.LIRKind;
 
 /**
  * Represents a compiler spill slot or an outgoing stack-based argument in a method's frame or an
  * incoming stack-based argument in a method's {@linkplain #isInCallerFrame() caller's frame}.
  */
-public final class StackSlot extends StackSlotValue {
+public final class StackSlot extends AllocatableValue {
 
     private final int offset;
     private final boolean addFrameSize;
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/StackSlotValue.java	Wed Nov 11 18:04:33 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-/*
- * Copyright (c) 2014, 2014, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package jdk.vm.ci.code;
-
-import jdk.vm.ci.meta.*;
-
-/**
- * Common base class for {@linkplain StackSlot real} and {@linkplain VirtualStackSlot virtual} stack
- * slots.
- */
-public abstract class StackSlotValue extends AllocatableValue {
-
-    public StackSlotValue(LIRKind lirKind) {
-        super(lirKind);
-    }
-
-}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/TargetDescription.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/TargetDescription.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,9 +22,10 @@
  */
 package jdk.vm.ci.code;
 
-import static jdk.vm.ci.meta.MetaUtil.*;
-
-import jdk.vm.ci.meta.*;
+import static jdk.vm.ci.meta.MetaUtil.identityHashCodeString;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.LIRKind;
+import jdk.vm.ci.meta.PlatformKind;
 
 /**
  * Represents the target machine for a compiler, including the CPU architecture, the size of
@@ -50,9 +51,9 @@
     public final int wordSize;
 
     /**
-     * The kind to be used for representing raw pointers and CPU registers.
+     * The {@link JavaKind} to be used for representing raw pointers and CPU registers in Java code.
      */
-    public final JavaKind wordKind;
+    public final JavaKind wordJavaKind;
 
     /**
      * The stack alignment requirement of the platform. For example, from Appendix D of <a
@@ -78,10 +79,12 @@
         this.arch = arch;
         this.isMP = isMP;
         this.wordSize = arch.getWordSize();
-        this.wordKind = JavaKind.fromWordSize(wordSize);
+        this.wordJavaKind = JavaKind.fromWordSize(wordSize);
         this.stackAlignment = stackAlignment;
         this.implicitNullCheckLimit = implicitNullCheckLimit;
         this.inlineObjects = inlineObjects;
+
+        assert arch.getPlatformKind(wordJavaKind).equals(arch.getWordKind());
     }
 
     @Override
@@ -101,7 +104,7 @@
                 this.inlineObjects == that.inlineObjects &&
                 this.isMP == that.isMP &&
                 this.stackAlignment == that.stackAlignment &&
-                this.wordKind.equals(that.wordKind) &&
+                this.wordJavaKind.equals(that.wordJavaKind) &&
                 this.wordSize == that.wordSize &&
                 this.arch.equals(that.arch)) {
                 return true;
@@ -116,10 +119,6 @@
         return identityHashCodeString(this);
     }
 
-    public int getSizeInBytes(PlatformKind kind) {
-        return kind.getSizeInBytes();
-    }
-
     public LIRKind getLIRKind(JavaKind javaKind) {
         PlatformKind platformKind = arch.getPlatformKind(javaKind);
         if (javaKind.isObject()) {
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/UnsignedMath.java	Wed Nov 11 18:04:33 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-/*
- * Copyright (c) 2011, 2011, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package jdk.vm.ci.code;
-
-import java.math.*;
-
-//JaCoCo Exclude
-
-/**
- * Utilities for unsigned comparisons. All methods have correct, but slow, standard Java
- * implementations so that they can be used with compilers not supporting the intrinsics.
- */
-public class UnsignedMath {
-
-    private static final long MASK = 0xffffffffL;
-
-    /**
-     * Unsigned comparison aboveThan for two numbers.
-     */
-    public static boolean aboveThan(int a, int b) {
-        return (a & MASK) > (b & MASK);
-    }
-
-    /**
-     * Unsigned comparison aboveOrEqual for two numbers.
-     */
-    public static boolean aboveOrEqual(int a, int b) {
-        return (a & MASK) >= (b & MASK);
-    }
-
-    /**
-     * Unsigned comparison belowThan for two numbers.
-     */
-    public static boolean belowThan(int a, int b) {
-        return (a & MASK) < (b & MASK);
-    }
-
-    /**
-     * Unsigned comparison belowOrEqual for two numbers.
-     */
-    public static boolean belowOrEqual(int a, int b) {
-        return (a & MASK) <= (b & MASK);
-    }
-
-    /**
-     * Unsigned comparison aboveThan for two numbers.
-     */
-    public static boolean aboveThan(long a, long b) {
-        return (a > b) ^ ((a < 0) != (b < 0));
-    }
-
-    /**
-     * Unsigned comparison aboveOrEqual for two numbers.
-     */
-    public static boolean aboveOrEqual(long a, long b) {
-        return (a >= b) ^ ((a < 0) != (b < 0));
-    }
-
-    /**
-     * Unsigned comparison belowThan for two numbers.
-     */
-    public static boolean belowThan(long a, long b) {
-        return (a < b) ^ ((a < 0) != (b < 0));
-    }
-
-    /**
-     * Unsigned comparison belowOrEqual for two numbers.
-     */
-    public static boolean belowOrEqual(long a, long b) {
-        return (a <= b) ^ ((a < 0) != (b < 0));
-    }
-
-    /**
-     * Unsigned division for two numbers.
-     */
-    public static int divide(int a, int b) {
-        return (int) ((a & MASK) / (b & MASK));
-    }
-
-    /**
-     * Unsigned remainder for two numbers.
-     */
-    public static int remainder(int a, int b) {
-        return (int) ((a & MASK) % (b & MASK));
-    }
-
-    /**
-     * Unsigned division for two numbers.
-     */
-    public static long divide(long a, long b) {
-        return bi(a).divide(bi(b)).longValue();
-    }
-
-    /**
-     * Unsigned remainder for two numbers.
-     */
-    public static long remainder(long a, long b) {
-        return bi(a).remainder(bi(b)).longValue();
-    }
-
-    private static BigInteger bi(long unsigned) {
-        return unsigned >= 0 ? BigInteger.valueOf(unsigned) : BigInteger.valueOf(unsigned & 0x7fffffffffffffffL).setBit(63);
-    }
-}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/ValueUtil.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/ValueUtil.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,9 +22,14 @@
  */
 package jdk.vm.ci.code;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.List;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.AllocatableValue;
+import jdk.vm.ci.meta.JavaConstant;
+import jdk.vm.ci.meta.JavaValue;
+import jdk.vm.ci.meta.PlatformKind;
+import jdk.vm.ci.meta.Value;
 
 /**
  * Utility class for working with the {@link Value} class and its subclasses.
@@ -60,6 +65,11 @@
         return value instanceof JavaConstant;
     }
 
+    public static JavaConstant asConstantJavaValue(JavaValue value) {
+        assert value != null;
+        return (JavaConstant) value;
+    }
+
     public static boolean isAllocatableValue(Value value) {
         assert value != null;
         return value instanceof AllocatableValue;
@@ -80,26 +90,6 @@
         return (StackSlot) value;
     }
 
-    public static boolean isStackSlotValue(Value value) {
-        assert value != null;
-        return value instanceof StackSlotValue;
-    }
-
-    public static StackSlotValue asStackSlotValue(Value value) {
-        assert value != null;
-        return (StackSlotValue) value;
-    }
-
-    public static boolean isVirtualStackSlot(Value value) {
-        assert value != null;
-        return value instanceof VirtualStackSlot;
-    }
-
-    public static VirtualStackSlot asVirtualStackSlot(Value value) {
-        assert value != null;
-        return (VirtualStackSlot) value;
-    }
-
     public static boolean isRegister(Value value) {
         assert value != null;
         return value instanceof RegisterValue;
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/VirtualObject.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/VirtualObject.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,9 +22,15 @@
  */
 package jdk.vm.ci.code;
 
-import java.util.*;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.IdentityHashMap;
+import java.util.Set;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.JavaValue;
+import jdk.vm.ci.meta.ResolvedJavaField;
+import jdk.vm.ci.meta.ResolvedJavaType;
 
 /**
  * An instance of this class represents an object whose allocation was removed by escape analysis.
@@ -134,45 +140,6 @@
         return id;
     }
 
-    private boolean checkValues() {
-        assert (values == null) == (slotKinds == null);
-        if (values != null) {
-            assert values.length == slotKinds.length;
-            if (!type.isArray()) {
-                ResolvedJavaField[] fields = type.getInstanceFields(true);
-                int fieldIndex = 0;
-                for (int i = 0; i < values.length; i++) {
-                    ResolvedJavaField field = fields[fieldIndex++];
-                    JavaKind valKind = slotKinds[i].getStackKind();
-                    if (field.getJavaKind() == JavaKind.Object) {
-                        assert valKind.isObject() : field + ": " + valKind + " != " + field.getJavaKind();
-                    } else {
-                        if ((valKind == JavaKind.Double || valKind == JavaKind.Long) && field.getJavaKind() == JavaKind.Int) {
-                            assert fields[fieldIndex].getJavaKind() == JavaKind.Int;
-                            fieldIndex++;
-                        } else {
-                            assert valKind == field.getJavaKind().getStackKind() : field + ": " + valKind + " != " + field.getJavaKind();
-                        }
-                    }
-                }
-                assert fields.length == fieldIndex : type + ": fields=" + Arrays.toString(fields) + ", field values=" + Arrays.toString(values);
-            } else {
-                JavaKind componentKind = type.getComponentType().getJavaKind().getStackKind();
-                if (componentKind == JavaKind.Object) {
-                    for (int i = 0; i < values.length; i++) {
-                        assert slotKinds[i].isObject() : slotKinds[i] + " != " + componentKind;
-                    }
-                } else {
-                    for (int i = 0; i < values.length; i++) {
-                        assert slotKinds[i] == componentKind || componentKind.getBitCount() >= slotKinds[i].getBitCount() ||
-                                        (componentKind == JavaKind.Int && slotKinds[i].getBitCount() >= JavaKind.Int.getBitCount()) : slotKinds[i] + " != " + componentKind;
-                    }
-                }
-            }
-        }
-        return true;
-    }
-
     /**
      * Overwrites the current set of values with a new one.
      *
@@ -183,7 +150,6 @@
     public void setValues(JavaValue[] values, JavaKind[] slotKinds) {
         this.values = values;
         this.slotKinds = slotKinds;
-        assert checkValues();
     }
 
     @Override
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/VirtualStackSlot.java	Wed Nov 11 18:04:33 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2014, 2014, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package jdk.vm.ci.code;
-
-import jdk.vm.ci.meta.*;
-
-/**
- * {@link VirtualStackSlot}s are stack slots that are not yet fixed to specific frame offset. They
- * are replaced by real {@link StackSlot}s with a fixed position in the frame before code emission.
- */
-public abstract class VirtualStackSlot extends StackSlotValue {
-
-    private final int id;
-
-    public VirtualStackSlot(int id, LIRKind lirKind) {
-        super(lirKind);
-        this.id = id;
-    }
-
-    public int getId() {
-        return id;
-    }
-
-    @Override
-    public String toString() {
-        return "vstack:" + id + getKindSuffix();
-    }
-
-    @Override
-    public int hashCode() {
-        final int prime = 31;
-        int result = super.hashCode();
-        result = prime * result + id;
-        return result;
-    }
-
-    @Override
-    public boolean equals(Object obj) {
-        if (this == obj) {
-            return true;
-        }
-        if (!super.equals(obj)) {
-            return false;
-        }
-        if (getClass() != obj.getClass()) {
-            return false;
-        }
-        VirtualStackSlot other = (VirtualStackSlot) obj;
-        if (id != other.id) {
-            return false;
-        }
-        return true;
-    }
-
-}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/package-info.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/package-info.java	Wed Nov 11 23:51:57 2015 -0500
@@ -18,10 +18,10 @@
  * if you need additional information or have any questions.
  */
 /**
- * Package that defines the interface between a Java application that wants to install code and the
- * runtime. The runtime provides in implementation of the {@link jdk.vm.ci.code.CodeCacheProvider}
- * interface. The method
- * {@link jdk.vm.ci.code.CodeCacheProvider#addMethod(jdk.vm.ci.meta.ResolvedJavaMethod, CompilationResult, jdk.vm.ci.meta.SpeculationLog, InstalledCode)}
- * can be used to install code for a given method.
+ * Package that defines the interface between a Java application that wants to install code and the runtime.
+ * The runtime provides in implementation of the {@link jdk.vm.ci.code.CodeCacheProvider} interface.
+ * The method {@link jdk.vm.ci.code.CodeCacheProvider#addCode(jdk.vm.ci.meta.ResolvedJavaMethod, CompilationResult, jdk.vm.ci.meta.SpeculationLog, InstalledCode)}
+ * can be used to install code.
  */
 package jdk.vm.ci.code;
+
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/stack/InspectedFrame.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/stack/InspectedFrame.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,7 +22,7 @@
  */
 package jdk.vm.ci.code.stack;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
 
 public interface InspectedFrame {
 
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/stack/StackIntrospection.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.code/src/jdk/vm/ci/code/stack/StackIntrospection.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,7 +22,7 @@
  */
 package jdk.vm.ci.code.stack;
 
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.meta.ResolvedJavaMethod;
 
 public interface StackIntrospection {
 
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.common/src/jdk/vm/ci/common/JVMCIError.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.common/src/jdk/vm/ci/common/JVMCIError.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,7 +22,8 @@
  */
 package jdk.vm.ci.common;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Locale;
 
 /**
  * Indicates a condition in JVMCI related code that should never occur during normal operation.
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.compiler/src/jdk/vm/ci/compiler/Compiler.java	Wed Nov 11 18:04:33 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package jdk.vm.ci.compiler;
-
-import jdk.vm.ci.meta.*;
-import jdk.vm.ci.options.*;
-
-public interface Compiler {
-    int INVOCATION_ENTRY_BCI = -1;
-
-    @Option(help = "", type = OptionType.Debug) OptionValue<String> PrintFilter = new OptionValue<>(null);
-    @Option(help = "", type = OptionType.Debug) OptionValue<Boolean> PrintCompilation = new OptionValue<>(false);
-    @Option(help = "", type = OptionType.Debug) OptionValue<Boolean> PrintAfterCompilation = new OptionValue<>(false);
-    @Option(help = "", type = OptionType.Debug) OptionValue<Boolean> PrintBailout = new OptionValue<>(false);
-    @Option(help = "", type = OptionType.Debug) OptionValue<Boolean> ExitVMOnBailout = new OptionValue<>(false);
-    @Option(help = "", type = OptionType.Debug) OptionValue<Boolean> ExitVMOnException = new OptionValue<>(true);
-    @Option(help = "", type = OptionType.Debug) OptionValue<Boolean> PrintStackTraceOnException = new OptionValue<>(false);
-
-    /**
-     * Request the compilation of a method by this JVMCI compiler. The compiler should compile the
-     * method to machine code and install it in the code cache if the compilation is successful.
-     *
-     * @param method the method that should be compiled
-     * @param entryBCI the BCI at which to start compiling where -1 denotes a non-OSR compilation
-     *            request and all other values denote an OSR compilation request
-     * @param jvmciEnv pointer to native {@code JVMCIEnv} object
-     * @param id a unique identifier for this compilation
-     */
-    void compileMethod(ResolvedJavaMethod method, int entryBCI, long jvmciEnv, int id);
-}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.compiler/src/jdk/vm/ci/compiler/CompilerFactory.java	Wed Nov 11 18:04:33 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2015, 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package jdk.vm.ci.compiler;
-
-import jdk.vm.ci.code.*;
-import jdk.vm.ci.runtime.*;
-
-/**
- * Factory for a JVMCI compiler.
- */
-public interface CompilerFactory {
-
-    /**
-     * Get the name of this compiler. The compiler will be selected when the jvmci.compiler system
-     * property is equal to this name.
-     */
-    String getCompilerName();
-
-    /**
-     * Initialize an {@link Architecture}. The compiler has the opportunity to extend the
-     * {@link Architecture} description with a custom subclass.
-     */
-    Architecture initializeArchitecture(Architecture arch);
-
-    /**
-     * Create a new instance of the {@link Compiler}.
-     */
-    Compiler createCompiler(JVMCIRuntime runtime);
-}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.compiler/src/jdk/vm/ci/compiler/StartupEventListener.java	Wed Nov 11 18:04:33 2015 -0500
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,31 +0,0 @@
-/*
- * Copyright (c) 2015, 2015, Oracle and/or its affiliates. All rights reserved.
- * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
- *
- * This code is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License version 2 only, as
- * published by the Free Software Foundation.
- *
- * This code is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
- * version 2 for more details (a copy is included in the LICENSE file that
- * accompanied this code).
- *
- * You should have received a copy of the GNU General Public License version
- * 2 along with this work; if not, write to the Free Software Foundation,
- * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
- *
- * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
- * or visit www.oracle.com if you need additional information or have any
- * questions.
- */
-package jdk.vm.ci.compiler;
-
-public interface StartupEventListener {
-
-    /**
-     * This method is called before any of the {@link CompilerFactory} methods.
-     */
-    void beforeJVMCIStartup();
-}
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotJVMCIBackendFactory.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotJVMCIBackendFactory.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,18 +22,26 @@
  */
 package jdk.vm.ci.hotspot.amd64;
 
-import static jdk.vm.ci.inittimer.InitTimer.*;
+import static jdk.vm.ci.inittimer.InitTimer.timer;
 
-import java.util.*;
+import java.util.EnumSet;
 
-import jdk.vm.ci.amd64.*;
-import jdk.vm.ci.code.*;
-import jdk.vm.ci.compiler.*;
-import jdk.vm.ci.hotspot.*;
-import jdk.vm.ci.inittimer.*;
-import jdk.vm.ci.meta.*;
-import jdk.vm.ci.runtime.*;
-import jdk.vm.ci.service.*;
+import jdk.vm.ci.amd64.AMD64;
+import jdk.vm.ci.code.Architecture;
+import jdk.vm.ci.code.RegisterConfig;
+import jdk.vm.ci.code.TargetDescription;
+import jdk.vm.ci.code.stack.StackIntrospection;
+import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider;
+import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
+import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
+import jdk.vm.ci.hotspot.HotSpotJVMCIRuntimeProvider;
+import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider;
+import jdk.vm.ci.hotspot.HotSpotStackIntrospection;
+import jdk.vm.ci.hotspot.HotSpotVMConfig;
+import jdk.vm.ci.inittimer.InitTimer;
+import jdk.vm.ci.meta.ConstantReflectionProvider;
+import jdk.vm.ci.runtime.JVMCIBackend;
+import jdk.vm.ci.service.ServiceProvider;
 
 @ServiceProvider(HotSpotJVMCIBackendFactory.class)
 public class AMD64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory {
@@ -68,6 +76,9 @@
         if ((config.x86CPUFeatures & config.cpuLZCNT) != 0) {
             features.add(AMD64.CPUFeature.LZCNT);
         }
+        if ((config.x86CPUFeatures & config.cpuERMS) != 0) {
+            features.add(AMD64.CPUFeature.ERMS);
+        }
         if ((config.x86CPUFeatures & config.cpuAVX) != 0) {
             features.add(AMD64.CPUFeature.AVX);
         }
@@ -77,12 +88,42 @@
         if ((config.x86CPUFeatures & config.cpuAES) != 0) {
             features.add(AMD64.CPUFeature.AES);
         }
-        if ((config.x86CPUFeatures & config.cpuERMS) != 0) {
-            features.add(AMD64.CPUFeature.ERMS);
+        if ((config.x86CPUFeatures & config.cpu3DNOWPREFETCH) != 0) {
+            features.add(AMD64.CPUFeature.AMD_3DNOW_PREFETCH);
         }
         if ((config.x86CPUFeatures & config.cpuBMI1) != 0) {
             features.add(AMD64.CPUFeature.BMI1);
         }
+        if ((config.x86CPUFeatures & config.cpuBMI2) != 0) {
+            features.add(AMD64.CPUFeature.BMI2);
+        }
+        if ((config.x86CPUFeatures & config.cpuRTM) != 0) {
+            features.add(AMD64.CPUFeature.RTM);
+        }
+        if ((config.x86CPUFeatures & config.cpuADX) != 0) {
+            features.add(AMD64.CPUFeature.ADX);
+        }
+        if ((config.x86CPUFeatures & config.cpuAVX512F) != 0) {
+            features.add(AMD64.CPUFeature.AVX512F);
+        }
+        if ((config.x86CPUFeatures & config.cpuAVX512DQ) != 0) {
+            features.add(AMD64.CPUFeature.AVX512DQ);
+        }
+        if ((config.x86CPUFeatures & config.cpuAVX512PF) != 0) {
+            features.add(AMD64.CPUFeature.AVX512PF);
+        }
+        if ((config.x86CPUFeatures & config.cpuAVX512ER) != 0) {
+            features.add(AMD64.CPUFeature.AVX512ER);
+        }
+        if ((config.x86CPUFeatures & config.cpuAVX512CD) != 0) {
+            features.add(AMD64.CPUFeature.AVX512CD);
+        }
+        if ((config.x86CPUFeatures & config.cpuAVX512BW) != 0) {
+            features.add(AMD64.CPUFeature.AVX512BW);
+        }
+        if ((config.x86CPUFeatures & config.cpuAVX512VL) != 0) {
+            features.add(AMD64.CPUFeature.AVX512VL);
+        }
         return features;
     }
 
@@ -97,12 +138,12 @@
         return flags;
     }
 
-    protected TargetDescription createTarget(HotSpotVMConfig config, CompilerFactory compilerFactory) {
+    protected TargetDescription createTarget(HotSpotVMConfig config) {
         final int stackFrameAlignment = 16;
         final int implicitNullCheckLimit = 4096;
         final boolean inlineObjects = true;
         Architecture arch = new AMD64(computeFeatures(config), computeFlags(config));
-        return new TargetDescription(compilerFactory.initializeArchitecture(arch), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
+        return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
     }
 
     protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntimeProvider runtime) {
@@ -132,15 +173,16 @@
     }
 
     @SuppressWarnings("try")
-    public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntimeProvider runtime, CompilerFactory compilerFactory, JVMCIBackend host) {
+    public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntimeProvider runtime, JVMCIBackend host) {
 
         assert host == null;
-        TargetDescription target = createTarget(runtime.getConfig(), compilerFactory);
+        TargetDescription target = createTarget(runtime.getConfig());
 
         RegisterConfig regConfig;
         HotSpotCodeCacheProvider codeCache;
         ConstantReflectionProvider constantReflection;
         HotSpotMetaAccessProvider metaAccess;
+        StackIntrospection stackIntrospection;
         try (InitTimer t = timer("create providers")) {
             try (InitTimer rt = timer("create MetaAccess provider")) {
                 metaAccess = createMetaAccess(runtime);
@@ -154,13 +196,16 @@
             try (InitTimer rt = timer("create ConstantReflection provider")) {
                 constantReflection = createConstantReflection(runtime);
             }
+            try (InitTimer rt = timer("create StackIntrospection provider")) {
+                stackIntrospection = new HotSpotStackIntrospection(runtime);
+            }
         }
         try (InitTimer rt = timer("instantiate backend")) {
-            return createBackend(metaAccess, codeCache, constantReflection);
+            return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
         }
     }
 
-    protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection) {
-        return new JVMCIBackend(metaAccess, codeCache, constantReflection);
+    protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection, StackIntrospection stackIntrospection) {
+        return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
     }
 }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotRegisterConfig.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.amd64/src/jdk/vm/ci/hotspot/amd64/AMD64HotSpotRegisterConfig.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,16 +22,47 @@
  */
 package jdk.vm.ci.hotspot.amd64;
 
-import static jdk.vm.ci.amd64.AMD64.*;
+import static jdk.vm.ci.amd64.AMD64.r12;
+import static jdk.vm.ci.amd64.AMD64.r15;
+import static jdk.vm.ci.amd64.AMD64.r8;
+import static jdk.vm.ci.amd64.AMD64.r9;
+import static jdk.vm.ci.amd64.AMD64.rax;
+import static jdk.vm.ci.amd64.AMD64.rcx;
+import static jdk.vm.ci.amd64.AMD64.rdi;
+import static jdk.vm.ci.amd64.AMD64.rdx;
+import static jdk.vm.ci.amd64.AMD64.rsi;
+import static jdk.vm.ci.amd64.AMD64.rsp;
+import static jdk.vm.ci.amd64.AMD64.xmm0;
+import static jdk.vm.ci.amd64.AMD64.xmm1;
+import static jdk.vm.ci.amd64.AMD64.xmm2;
+import static jdk.vm.ci.amd64.AMD64.xmm3;
+import static jdk.vm.ci.amd64.AMD64.xmm4;
+import static jdk.vm.ci.amd64.AMD64.xmm5;
+import static jdk.vm.ci.amd64.AMD64.xmm6;
+import static jdk.vm.ci.amd64.AMD64.xmm7;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
 
-import jdk.vm.ci.amd64.*;
-import jdk.vm.ci.code.*;
-import jdk.vm.ci.code.CallingConvention.*;
-import jdk.vm.ci.common.*;
-import jdk.vm.ci.hotspot.*;
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.code.Architecture;
+import jdk.vm.ci.code.CallingConvention;
+import jdk.vm.ci.code.CallingConvention.Type;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.code.RegisterAttributes;
+import jdk.vm.ci.code.RegisterConfig;
+import jdk.vm.ci.code.StackSlot;
+import jdk.vm.ci.code.TargetDescription;
+import jdk.vm.ci.common.JVMCIError;
+import jdk.vm.ci.hotspot.HotSpotVMConfig;
+import jdk.vm.ci.meta.AllocatableValue;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.JavaType;
+import jdk.vm.ci.meta.LIRKind;
+import jdk.vm.ci.meta.PlatformKind;
+import jdk.vm.ci.meta.Value;
 
 public class AMD64HotSpotRegisterConfig implements RegisterConfig {
 
@@ -86,28 +117,30 @@
      */
     private final boolean needsNativeStackHomeSpace;
 
-    private static Register[] initAllocatable(boolean reserveForHeapBase) {
-        Register[] registers = null;
-        // @formatter:off
-        if (reserveForHeapBase) {
-            registers = new Register[] {
-                        rax, rbx, rcx, rdx, /*rsp,*/ rbp, rsi, rdi, r8, r9,  r10, r11, /*r12,*/ r13, r14, /*r15, */
-                        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
-                        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15
-                      };
-        } else {
-            registers = new Register[] {
-                        rax, rbx, rcx, rdx, /*rsp,*/ rbp, rsi, rdi, r8, r9,  r10, r11, r12, r13, r14, /*r15, */
-                        xmm0, xmm1, xmm2,  xmm3,  xmm4,  xmm5,  xmm6,  xmm7,
-                        xmm8, xmm9, xmm10, xmm11, xmm12, xmm13, xmm14, xmm15
-                      };
+    private static Register[] initAllocatable(Architecture arch, boolean reserveForHeapBase) {
+        Register[] allRegisters = arch.getAvailableValueRegisters();
+        Register[] registers = new Register[allRegisters.length - (reserveForHeapBase ? 3 : 2)];
+
+        int idx = 0;
+        for (Register reg : allRegisters) {
+            if (reg.equals(rsp) || reg.equals(r15)) {
+                // skip stack pointer and thread register
+                continue;
+            }
+            if (reserveForHeapBase && reg.equals(r12)) {
+                // skip heap base register
+                continue;
+            }
+
+            registers[idx++] = reg;
         }
-       // @formatter:on
+
+        assert idx == registers.length;
         return registers;
     }
 
     public AMD64HotSpotRegisterConfig(Architecture architecture, HotSpotVMConfig config) {
-        this(architecture, config, initAllocatable(config.useCompressedOops));
+        this(architecture, config, initAllocatable(architecture, config.useCompressedOops));
         assert callerSaved.length >= allocatable.length;
     }
 
@@ -125,7 +158,7 @@
             this.needsNativeStackHomeSpace = false;
         }
 
-        this.allocatable = allocatable.clone();
+        this.allocatable = allocatable;
         Set<Register> callerSaveSet = new HashSet<>();
         Collections.addAll(callerSaveSet, allocatable);
         Collections.addAll(callerSaveSet, xmmParameterRegisters);
@@ -134,7 +167,7 @@
         callerSaved = callerSaveSet.toArray(new Register[callerSaveSet.size()]);
 
         allAllocatableAreCallerSaved = true;
-        attributesMap = RegisterAttributes.createMap(this, AMD64.allRegisters);
+        attributesMap = RegisterAttributes.createMap(this, architecture.getRegisters());
     }
 
     @Override
@@ -221,7 +254,7 @@
             if (locations[i] == null) {
                 LIRKind lirKind = target.getLIRKind(kind);
                 locations[i] = StackSlot.get(lirKind, currentStackOffset, !type.out);
-                currentStackOffset += Math.max(target.getSizeInBytes(lirKind.getPlatformKind()), target.wordSize);
+                currentStackOffset += Math.max(lirKind.getPlatformKind().getSizeInBytes(), target.wordSize);
             }
         }
 
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotJVMCIBackendFactory.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotJVMCIBackendFactory.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,28 +22,36 @@
  */
 package jdk.vm.ci.hotspot.sparc;
 
-import static jdk.vm.ci.inittimer.InitTimer.*;
+import static jdk.vm.ci.inittimer.InitTimer.timer;
 
-import java.util.*;
+import java.util.EnumSet;
 
-import jdk.vm.ci.code.*;
-import jdk.vm.ci.compiler.*;
-import jdk.vm.ci.hotspot.*;
-import jdk.vm.ci.inittimer.*;
-import jdk.vm.ci.runtime.*;
-import jdk.vm.ci.service.*;
-import jdk.vm.ci.sparc.*;
+import jdk.vm.ci.code.Architecture;
+import jdk.vm.ci.code.RegisterConfig;
+import jdk.vm.ci.code.TargetDescription;
+import jdk.vm.ci.code.stack.StackIntrospection;
+import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider;
+import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
+import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
+import jdk.vm.ci.hotspot.HotSpotJVMCIRuntimeProvider;
+import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider;
+import jdk.vm.ci.hotspot.HotSpotStackIntrospection;
+import jdk.vm.ci.hotspot.HotSpotVMConfig;
+import jdk.vm.ci.inittimer.InitTimer;
+import jdk.vm.ci.runtime.JVMCIBackend;
+import jdk.vm.ci.service.ServiceProvider;
+import jdk.vm.ci.sparc.SPARC;
 import jdk.vm.ci.sparc.SPARC.CPUFeature;
 
 @ServiceProvider(HotSpotJVMCIBackendFactory.class)
 public class SPARCHotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory {
 
-    protected TargetDescription createTarget(HotSpotVMConfig config, CompilerFactory compilerFactory) {
+    protected TargetDescription createTarget(HotSpotVMConfig config) {
         final int stackFrameAlignment = 16;
         final int implicitNullCheckLimit = 4096;
         final boolean inlineObjects = false;
         Architecture arch = new SPARC(computeFeatures(config));
-        return new TargetDescription(compilerFactory.initializeArchitecture(arch), true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
+        return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
     }
 
     protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntimeProvider runtime, TargetDescription target, RegisterConfig regConfig) {
@@ -64,8 +72,62 @@
         if ((config.sparcFeatures & config.cbcondInstructions) != 0) {
             features.add(CPUFeature.CBCOND);
         }
-        if (config.useBlockZeroing) {
-            features.add(CPUFeature.BLOCK_ZEROING);
+        if ((config.sparcFeatures & config.v8Instructions) != 0) {
+            features.add(CPUFeature.V8);
+        }
+        if ((config.sparcFeatures & config.hardwareMul32) != 0) {
+            features.add(CPUFeature.HARDWARE_MUL32);
+        }
+        if ((config.sparcFeatures & config.hardwareDiv32) != 0) {
+            features.add(CPUFeature.HARDWARE_DIV32);
+        }
+        if ((config.sparcFeatures & config.hardwareFsmuld) != 0) {
+            features.add(CPUFeature.HARDWARE_FSMULD);
+        }
+        if ((config.sparcFeatures & config.hardwarePopc) != 0) {
+            features.add(CPUFeature.HARDWARE_POPC);
+        }
+        if ((config.sparcFeatures & config.v9Instructions) != 0) {
+            features.add(CPUFeature.V9);
+        }
+        if ((config.sparcFeatures & config.sun4v) != 0) {
+            features.add(CPUFeature.SUN4V);
+        }
+        if ((config.sparcFeatures & config.blkInitInstructions) != 0) {
+            features.add(CPUFeature.BLK_INIT_INSTRUCTIONS);
+        }
+        if ((config.sparcFeatures & config.fmafInstructions) != 0) {
+            features.add(CPUFeature.FMAF);
+        }
+        if ((config.sparcFeatures & config.fmauInstructions) != 0) {
+            features.add(CPUFeature.FMAU);
+        }
+        if ((config.sparcFeatures & config.sparc64Family) != 0) {
+            features.add(CPUFeature.SPARC64_FAMILY);
+        }
+        if ((config.sparcFeatures & config.mFamily) != 0) {
+            features.add(CPUFeature.M_FAMILY);
+        }
+        if ((config.sparcFeatures & config.tFamily) != 0) {
+            features.add(CPUFeature.T_FAMILY);
+        }
+        if ((config.sparcFeatures & config.t1Model) != 0) {
+            features.add(CPUFeature.T1_MODEL);
+        }
+        if ((config.sparcFeatures & config.sparc5Instructions) != 0) {
+            features.add(CPUFeature.SPARC5);
+        }
+        if ((config.sparcFeatures & config.aesInstructions) != 0) {
+            features.add(CPUFeature.SPARC64_FAMILY);
+        }
+        if ((config.sparcFeatures & config.sha1Instruction) != 0) {
+            features.add(CPUFeature.SHA1);
+        }
+        if ((config.sparcFeatures & config.sha256Instruction) != 0) {
+            features.add(CPUFeature.SHA256);
+        }
+        if ((config.sparcFeatures & config.sha512Instruction) != 0) {
+            features.add(CPUFeature.SHA512);
         }
         return features;
     }
@@ -81,20 +143,22 @@
     }
 
     @SuppressWarnings("try")
-    public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntimeProvider runtime, CompilerFactory compilerFactory, JVMCIBackend host) {
+    public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntimeProvider runtime, JVMCIBackend host) {
         assert host == null;
-        TargetDescription target = createTarget(runtime.getConfig(), compilerFactory);
+        TargetDescription target = createTarget(runtime.getConfig());
 
         HotSpotMetaAccessProvider metaAccess = new HotSpotMetaAccessProvider(runtime);
-        RegisterConfig regConfig = new SPARCHotSpotRegisterConfig(target, runtime.getConfig());
+        RegisterConfig regConfig = new SPARCHotSpotRegisterConfig(target.arch, runtime.getConfig());
         HotSpotCodeCacheProvider codeCache = createCodeCache(runtime, target, regConfig);
         HotSpotConstantReflectionProvider constantReflection = new HotSpotConstantReflectionProvider(runtime);
+        StackIntrospection stackIntrospection = new HotSpotStackIntrospection(runtime);
         try (InitTimer rt = timer("instantiate backend")) {
-            return createBackend(metaAccess, codeCache, constantReflection);
+            return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
         }
     }
 
-    protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, HotSpotConstantReflectionProvider constantReflection) {
-        return new JVMCIBackend(metaAccess, codeCache, constantReflection);
+    protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, HotSpotConstantReflectionProvider constantReflection,
+                    StackIntrospection stackIntrospection) {
+        return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
     }
 }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotRegisterConfig.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot.sparc/src/jdk/vm/ci/hotspot/sparc/SPARCHotSpotRegisterConfig.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,16 +22,72 @@
  */
 package jdk.vm.ci.hotspot.sparc;
 
-import static jdk.vm.ci.sparc.SPARC.*;
+import static jdk.vm.ci.code.CallingConvention.Type.JavaCall;
+import static jdk.vm.ci.code.CallingConvention.Type.JavaCallee;
+import static jdk.vm.ci.code.CallingConvention.Type.NativeCall;
+import static jdk.vm.ci.meta.JavaKind.Void;
+import static jdk.vm.ci.meta.Value.ILLEGAL;
+import static jdk.vm.ci.sparc.SPARC.REGISTER_SAFE_AREA_SIZE;
+import static jdk.vm.ci.sparc.SPARC.d0;
+import static jdk.vm.ci.sparc.SPARC.d2;
+import static jdk.vm.ci.sparc.SPARC.d4;
+import static jdk.vm.ci.sparc.SPARC.d6;
+import static jdk.vm.ci.sparc.SPARC.f0;
+import static jdk.vm.ci.sparc.SPARC.f1;
+import static jdk.vm.ci.sparc.SPARC.f2;
+import static jdk.vm.ci.sparc.SPARC.f3;
+import static jdk.vm.ci.sparc.SPARC.f4;
+import static jdk.vm.ci.sparc.SPARC.f5;
+import static jdk.vm.ci.sparc.SPARC.f6;
+import static jdk.vm.ci.sparc.SPARC.f7;
+import static jdk.vm.ci.sparc.SPARC.g0;
+import static jdk.vm.ci.sparc.SPARC.g2;
+import static jdk.vm.ci.sparc.SPARC.g6;
+import static jdk.vm.ci.sparc.SPARC.i0;
+import static jdk.vm.ci.sparc.SPARC.i1;
+import static jdk.vm.ci.sparc.SPARC.i2;
+import static jdk.vm.ci.sparc.SPARC.i3;
+import static jdk.vm.ci.sparc.SPARC.i4;
+import static jdk.vm.ci.sparc.SPARC.i5;
+import static jdk.vm.ci.sparc.SPARC.i6;
+import static jdk.vm.ci.sparc.SPARC.i7;
+import static jdk.vm.ci.sparc.SPARC.l0;
+import static jdk.vm.ci.sparc.SPARC.l1;
+import static jdk.vm.ci.sparc.SPARC.l2;
+import static jdk.vm.ci.sparc.SPARC.l3;
+import static jdk.vm.ci.sparc.SPARC.l4;
+import static jdk.vm.ci.sparc.SPARC.l5;
+import static jdk.vm.ci.sparc.SPARC.l6;
+import static jdk.vm.ci.sparc.SPARC.l7;
+import static jdk.vm.ci.sparc.SPARC.o0;
+import static jdk.vm.ci.sparc.SPARC.o1;
+import static jdk.vm.ci.sparc.SPARC.o2;
+import static jdk.vm.ci.sparc.SPARC.o3;
+import static jdk.vm.ci.sparc.SPARC.o4;
+import static jdk.vm.ci.sparc.SPARC.o5;
+import static jdk.vm.ci.sparc.SPARC.sp;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
 
-import jdk.vm.ci.code.*;
-import jdk.vm.ci.code.CallingConvention.*;
-import jdk.vm.ci.common.*;
-import jdk.vm.ci.hotspot.*;
-import jdk.vm.ci.meta.*;
-import jdk.vm.ci.sparc.*;
+import jdk.vm.ci.code.Architecture;
+import jdk.vm.ci.code.CallingConvention;
+import jdk.vm.ci.code.CallingConvention.Type;
+import jdk.vm.ci.code.Register;
+import jdk.vm.ci.code.RegisterAttributes;
+import jdk.vm.ci.code.RegisterConfig;
+import jdk.vm.ci.code.StackSlot;
+import jdk.vm.ci.code.TargetDescription;
+import jdk.vm.ci.common.JVMCIError;
+import jdk.vm.ci.hotspot.HotSpotVMConfig;
+import jdk.vm.ci.meta.AllocatableValue;
+import jdk.vm.ci.meta.JavaKind;
+import jdk.vm.ci.meta.JavaType;
+import jdk.vm.ci.meta.LIRKind;
+import jdk.vm.ci.meta.PlatformKind;
+import jdk.vm.ci.sparc.SPARC;
 
 public class SPARCHotSpotRegisterConfig implements RegisterConfig {
 
@@ -41,6 +97,11 @@
 
     private final RegisterAttributes[] attributesMap;
 
+    /**
+     * Does native code (C++ code) spill arguments in registers to the parent frame?
+     */
+    private final boolean addNativeRegisterArgumentSlots;
+
     @Override
     public Register[] getAllocatableRegisters() {
         return allocatable.clone();
@@ -50,22 +111,9 @@
         ArrayList<Register> list = new ArrayList<>();
         for (Register reg : registers) {
             if (architecture.canStoreValue(reg.getRegisterCategory(), kind)) {
-                // Special treatment for double precision
-                // TODO: This is wasteful it uses only half of the registers as float.
-                if (kind == JavaKind.Double) {
-                    if (reg.getRegisterCategory().equals(FPUd)) {
-                        list.add(reg);
-                    }
-                } else if (kind == JavaKind.Float) {
-                    if (reg.getRegisterCategory().equals(FPUs)) {
-                        list.add(reg);
-                    }
-                } else {
-                    list.add(reg);
-                }
+                list.add(reg);
             }
         }
-
         Register[] ret = list.toArray(new Register[list.size()]);
         return ret;
     }
@@ -78,76 +126,57 @@
     private final Register[] cpuCallerParameterRegisters = {o0, o1, o2, o3, o4, o5};
     private final Register[] cpuCalleeParameterRegisters = {i0, i1, i2, i3, i4, i5};
 
-    private final Register[] fpuParameterRegisters = {f0, f1, f2, f3, f4, f5, f6, f7};
+    private final Register[] fpuFloatParameterRegisters = {f0, f1, f2, f3, f4, f5, f6, f7};
     private final Register[] fpuDoubleParameterRegisters = {d0, null, d2, null, d4, null, d6, null};
+
     // @formatter:off
-    private final Register[] callerSaveRegisters =
-                   {g1, g2, g3, g4, g5, g6, g7,
-                    o0, o1, o2, o3, o4, o5, o7,
-                    f0,  f1,  f2,  f3,  f4,  f5,  f6,  f7,
-                    f8,  f9,  f10, f11, f12, f13, f14, f15,
-                    f16, f17, f18, f19, f20, f21, f22, f23,
-                    f24, f25, f26, f27, f28, f29, f30, f31,
-                    d32, d34, d36, d38, d40, d42, d44, d46,
-                    d48, d50, d52, d54, d56, d58, d60, d62};
-    // @formatter:on
+    private final Register[] callerSaveRegisters;
 
     /**
      * Registers saved by the callee. This lists all L and I registers which are saved in the
      * register window.
      */
-    private final Register[] calleeSaveRegisters = {l0, l1, l2, l3, l4, l5, l6, l7, i0, i1, i2, i3, i4, i5, i6, i7};
+    private final Register[] calleeSaveRegisters = {
+                    l0, l1, l2, l3, l4, l5, l6, l7,
+                    i0, i1, i2, i3, i4, i5, i6, i7};
+    // @formatter:on
 
-    private static Register[] initAllocatable(boolean reserveForHeapBase) {
-        Register[] registers = null;
-        if (reserveForHeapBase) {
-            // @formatter:off
-            registers = new Register[]{
-                        // TODO this is not complete
-                        // o7 cannot be used as register because it is always overwritten on call
-                        // and the current register handler would ignore this fact if the called
-                        // method still does not modify registers, in fact o7 is modified by the Call instruction
-                        // There would be some extra handlin necessary to be able to handle the o7 properly for local usage
-                        g1, g4, g5,
-                        o0, o1, o2, o3, o4, o5, /*o6,o7,*/
-                        l0, l1, l2, l3, l4, l5, l6, l7,
-                        i0, i1, i2, i3, i4, i5, /*i6,*/ /*i7,*/
-                        //f0, f1, f2, f3, f4, f5, f6, f7,
-                        f8,  f9,  f10, f11, f12, f13, f14, f15,
-                        f16, f17, f18, f19, f20, f21, f22, f23,
-                        f24, f25, f26, f27, f28, f29, f30, f31,
-                        d32, d34, d36, d38, d40, d42, d44, d46,
-                        d48, d50, d52, d54, d56, d58, d60, d62
-            };
-            // @formatter:on
-        } else {
-            // @formatter:off
-            registers = new Register[]{
-                        // TODO this is not complete
-                        g1, g4, g5,
-                        o0, o1, o2, o3, o4, o5, /*o6, o7,*/
-                        l0, l1, l2, l3, l4, l5, l6, l7,
-                        i0, i1, i2, i3, i4, i5, /*i6,*/ /*i7,*/
-//                        f0, f1, f2, f3, f4, f5, f6, f7
-                        f8,  f9,  f10, f11, f12, f13, f14, f15,
-                        f16, f17, f18, f19, f20, f21, f22, f23,
-                        f24, f25, f26, f27, f28, f29, f30, f31,
-                        d32, d34, d36, d38, d40, d42, d44, d46,
-                        d48, d50, d52, d54, d56, d58, d60, d62
-            };
-            // @formatter:on
+    private static Register[] initAllocatable(Architecture arch, boolean reserveForHeapBase) {
+        Register[] allRegisters = arch.getAvailableValueRegisters();
+        Register[] registers = new Register[allRegisters.length - (reserveForHeapBase ? 4 : 3)];
+
+        int idx = 0;
+        for (Register reg : allRegisters) {
+            if (reg.equals(sp) || reg.equals(g2) || reg.equals(g0)) {
+                // skip g0, stack pointer and thread register
+                continue;
+            }
+            if (reserveForHeapBase && reg.equals(g6)) {
+                // skip heap base register
+                continue;
+            }
+
+            registers[idx++] = reg;
         }
 
+        assert idx == registers.length;
         return registers;
     }
 
-    public SPARCHotSpotRegisterConfig(TargetDescription target, HotSpotVMConfig config) {
-        this(target, initAllocatable(config.useCompressedOops));
+    public SPARCHotSpotRegisterConfig(Architecture arch, HotSpotVMConfig config) {
+        this(arch, initAllocatable(arch, config.useCompressedOops), config);
     }
 
-    public SPARCHotSpotRegisterConfig(TargetDescription target, Register[] allocatable) {
-        this.architecture = target.arch;
+    public SPARCHotSpotRegisterConfig(Architecture arch, Register[] allocatable, HotSpotVMConfig config) {
+        this.architecture = arch;
         this.allocatable = allocatable.clone();
+        this.addNativeRegisterArgumentSlots = config.linuxOs;
+        HashSet<Register> callerSaveSet = new HashSet<>();
+        Collections.addAll(callerSaveSet, arch.getAvailableValueRegisters());
+        for (Register cs : calleeSaveRegisters) {
+            callerSaveSet.remove(cs);
+        }
+        this.callerSaveRegisters = callerSaveSet.toArray(new Register[callerSaveSet.size()]);
         attributesMap = RegisterAttributes.createMap(this, SPARC.allRegisters);
     }
 
@@ -172,21 +201,31 @@
 
     @Override
     public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, TargetDescription target, boolean stackOnly) {
-        if (type == Type.JavaCall || type == Type.NativeCall) {
+        if (type == JavaCall || type == NativeCall) {
             return callingConvention(cpuCallerParameterRegisters, returnType, parameterTypes, type, target, stackOnly);
         }
-        if (type == Type.JavaCallee) {
+        if (type == JavaCallee) {
             return callingConvention(cpuCalleeParameterRegisters, returnType, parameterTypes, type, target, stackOnly);
         }
         throw JVMCIError.shouldNotReachHere();
     }
 
     public Register[] getCallingConventionRegisters(Type type, JavaKind kind) {
-        if (architecture.canStoreValue(FPUs, kind) || architecture.canStoreValue(FPUd, kind)) {
-            return fpuParameterRegisters;
+        switch (kind) {
+            case Boolean:
+            case Byte:
+            case Short:
+            case Char:
+            case Int:
+            case Long:
+            case Object:
+                return type == Type.JavaCallee ? cpuCalleeParameterRegisters : cpuCallerParameterRegisters;
+            case Double:
+            case Float:
+                return fpuFloatParameterRegisters;
+            default:
+                throw JVMCIError.shouldNotReachHere("Unknown JavaKind " + kind);
         }
-        assert architecture.canStoreValue(CPU, kind);
-        return type == Type.JavaCallee ? cpuCalleeParameterRegisters : cpuCallerParameterRegisters;
     }
 
     private CallingConvention callingConvention(Register[] generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, Type type, TargetDescription target, boolean stackOnly) {
@@ -213,7 +252,7 @@
                     }
                     break;
                 case Double:
-                    if (!stackOnly && currentFloating < fpuParameterRegisters.length) {
+                    if (!stackOnly && currentFloating < fpuFloatParameterRegisters.length) {
                         if (currentFloating % 2 != 0) {
                             // Make register number even to be a double reg
                             currentFloating++;
@@ -224,8 +263,8 @@
                     }
                     break;
                 case Float:
-                    if (!stackOnly && currentFloating < fpuParameterRegisters.length) {
-                        Register register = fpuParameterRegisters[currentFloating++];
+                    if (!stackOnly && currentFloating < fpuFloatParameterRegisters.length) {
+                        Register register = fpuFloatParameterRegisters[currentFloating++];
                         locations[i] = register.asValue(target.getLIRKind(kind));
                     }
                     break;
@@ -234,20 +273,27 @@
             }
 
             if (locations[i] == null) {
+                LIRKind lirKind = target.getLIRKind(kind);
                 // Stack slot is always aligned to its size in bytes but minimum wordsize
-                int typeSize = SPARC.spillSlotSize(target, kind);
+                int typeSize = lirKind.getPlatformKind().getSizeInBytes();
                 currentStackOffset = roundUp(currentStackOffset, typeSize);
-                int slotOffset = currentStackOffset + SPARC.REGISTER_SAFE_AREA_SIZE;
-                locations[i] = StackSlot.get(target.getLIRKind(kind.getStackKind()), slotOffset, !type.out);
+                int slotOffset = currentStackOffset + REGISTER_SAFE_AREA_SIZE;
+                locations[i] = StackSlot.get(lirKind, slotOffset, !type.out);
                 currentStackOffset += typeSize;
             }
         }
 
-        JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind();
-        AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind, type).asValue(target.getLIRKind(returnKind.getStackKind()));
-        // Space where callee may spill outgoing parameters o0...o5
-        int lowerOutgoingSpace = Math.min(locations.length, 6) * target.wordSize;
-        return new CallingConvention(currentStackOffset + lowerOutgoingSpace, returnLocation, locations);
+        JavaKind returnKind = returnType == null ? Void : returnType.getJavaKind();
+        AllocatableValue returnLocation = returnKind == Void ? ILLEGAL : getReturnRegister(returnKind, type).asValue(target.getLIRKind(returnKind.getStackKind()));
+
+        int outArgSpillArea;
+        if (type == NativeCall && addNativeRegisterArgumentSlots) {
+            // Space for native callee which may spill our outgoing arguments
+            outArgSpillArea = Math.min(locations.length, generalParameterRegisters.length) * target.wordSize;
+        } else {
+            outArgSpillArea = 0;
+        }
+        return new CallingConvention(currentStackOffset + outArgSpillArea, returnLocation, locations);
     }
 
     private static int roundUp(int number, int mod) {
@@ -256,7 +302,7 @@
 
     @Override
     public Register getReturnRegister(JavaKind kind) {
-        return getReturnRegister(kind, Type.JavaCallee);
+        return getReturnRegister(kind, JavaCallee);
     }
 
     private static Register getReturnRegister(JavaKind kind, Type type) {
@@ -268,7 +314,7 @@
             case Int:
             case Long:
             case Object:
-                return type == Type.JavaCallee ? i0 : o0;
+                return type == JavaCallee ? i0 : o0;
             case Float:
                 return f0;
             case Double:
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/CompilerToVM.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/CompilerToVM.java	Wed Nov 11 23:51:57 2015 -0500
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2015, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -23,6 +23,7 @@
 
 package jdk.vm.ci.hotspot;
 
+import static jdk.vm.ci.hotspot.HotSpotJVMCIRuntime.runtime;
 import static jdk.vm.ci.inittimer.InitTimer.timer;
 
 import java.lang.reflect.Constructor;
@@ -36,7 +37,6 @@
 import jdk.vm.ci.meta.JavaType;
 import jdk.vm.ci.meta.ResolvedJavaMethod;
 import jdk.vm.ci.meta.ResolvedJavaType;
-import jdk.vm.ci.meta.SpeculationLog;
 import sun.misc.Unsafe;
 
 /**
@@ -44,7 +44,7 @@
  * pointer as an argument (e.g., {@link #getSymbol(long)}) is undefined if the argument does not
  * denote a valid native object.
  */
-public final class CompilerToVM {
+final class CompilerToVM {
     /**
      * Initializes the native part of the JVMCI runtime.
      */
@@ -62,6 +62,14 @@
     }
 
     /**
+     * Gets the {@link CompilerToVM} instance associated with the singleton
+     * {@link HotSpotJVMCIRuntime} instance.
+     */
+    public static CompilerToVM compilerToVM() {
+        return runtime().getCompilerToVM();
+    }
+
+    /**
      * Copies the original bytecode of {@code method} into a new byte array and returns it.
      *
      * @return a new byte array containing the original bytecode of {@code method}
@@ -301,7 +309,7 @@
      *         {@link HotSpotVMConfig#codeInstallResultDependenciesFailed} or
      *         {@link HotSpotVMConfig#codeInstallResultDependenciesInvalid}.
      */
-    public native int installCode(TargetDescription target, HotSpotCompiledCode compiledCode, InstalledCode code, SpeculationLog speculationLog);
+    native int installCode(TargetDescription target, HotSpotCompiledCode compiledCode, InstalledCode code, HotSpotSpeculationLog speculationLog);
 
     public native int getMetadata(TargetDescription target, HotSpotCompiledCode compiledCode, HotSpotMetaData metaData);
 
@@ -317,18 +325,18 @@
      * @param timeUnitsPerSecond the granularity of the units for the {@code time} value
      * @param installedCode the nmethod installed as a result of the compilation
      */
-    public synchronized native void notifyCompilationStatistics(int id, HotSpotResolvedJavaMethodImpl method, boolean osr, int processedBytecodes, long time, long timeUnitsPerSecond,
+    synchronized native void notifyCompilationStatistics(int id, HotSpotResolvedJavaMethodImpl method, boolean osr, int processedBytecodes, long time, long timeUnitsPerSecond,
                     InstalledCode installedCode);
 
     /**
      * Resets all compilation statistics.
      */
-    public native void resetCompilationStatistics();
+    native void resetCompilationStatistics();
 
     /**
      * Initializes the fields of {@code config}.
      */
-    native long initializeConfiguration();
+    native long initializeConfiguration(HotSpotVMConfig config);
 
     /**
      * Resolves the implementation of {@code method} for virtual dispatches on objects of dynamic
@@ -367,7 +375,7 @@
      * @param address an address that may be called from any code in the code cache
      * @return -1 if {@code address == 0}
      */
-    public native long getMaxCallTargetOffset(long address);
+    native long getMaxCallTargetOffset(long address);
 
     /**
      * Gets a textual disassembly of {@code codeBlob}.
@@ -376,7 +384,7 @@
      *         {@code codeBlob} could not be disassembled for some reason
      */
     // The HotSpot disassembler seems not to be thread safe so it's better to synchronize its usage
-    public synchronized native String disassembleCodeBlob(long codeBlob);
+    synchronized native String disassembleCodeBlob(InstalledCode installedCode);
 
     /**
      * Gets a stack trace element for {@code method} at bytecode index {@code bci}.
@@ -454,12 +462,12 @@
      * Invalidates {@code installedCode} such that {@link InvalidInstalledCodeException} will be
      * raised the next time {@code installedCode} is executed.
      */
-    public native void invalidateInstalledCode(InstalledCode installedCode);
+    native void invalidateInstalledCode(InstalledCode installedCode);
 
     /**
      * Collects the current values of all JVMCI benchmark counters, summed up over all threads.
      */
-    public native long[] collectCounters();
+    native long[] collectCounters();
 
     /**
      * Determines if {@code metaspaceMethodData} is mature.
@@ -489,7 +497,7 @@
      * @param methods the methods to look for, where {@code null} means that any frame is returned
      * @return the frame, or {@code null} if the end of the stack was reached during the search
      */
-    public native HotSpotStackFrameReference getNextStackFrame(HotSpotStackFrameReference frame, HotSpotResolvedJavaMethodImpl[] methods, int initialSkip);
+    native HotSpotStackFrameReference getNextStackFrame(HotSpotStackFrameReference frame, ResolvedJavaMethod[] methods, int initialSkip);
 
     /**
      * Materializes all virtual objects within {@code stackFrame} updates its locals.
@@ -512,30 +520,34 @@
     /**
      * Determines if debug info should also be emitted at non-safepoint locations.
      */
-    public native boolean shouldDebugNonSafepoints();
+
+    native boolean shouldDebugNonSafepoints();
 
     /**
      * Writes {@code length} bytes from {@code bytes} starting at offset {@code offset} to the
      * HotSpot's log stream.
      *
-     * @exception NullPointerException if <code>bytes</code> is <code>null</code>.
+     * @exception NullPointerException if {@code bytes == null}
      * @exception IndexOutOfBoundsException if copying would cause access of data outside array
-     *                bounds.
+     *                bounds
      */
-    public native void writeDebugOutput(byte[] bytes, int offset, int length);
+    native void writeDebugOutput(byte[] bytes, int offset, int length);
 
     /**
      * Flush HotSpot's log stream.
      */
-    public native void flushDebugOutput();
+    native void flushDebugOutput();
 
     /**
-     * Read a value representing a metaspace Method* and return the
-     * {@link HotSpotResolvedJavaMethodImpl} wrapping it. This method does no checking that the
-     * location actually contains a valid Method*. If the {@code base} object is a
+     * Read a HotSpot Method* value from the memory location described by {@code base} plus
+     * {@code displacement} and return the {@link HotSpotResolvedJavaMethodImpl} wrapping it. This
+     * method does no checking that the memory location actually contains a valid pointer and may
+     * crash the VM if an invalid location is provided. If the {@code base} is null then
+     * {@code displacement} is used by itself. If {@code base} is a
      * {@link HotSpotResolvedJavaMethodImpl}, {@link HotSpotConstantPool} or
      * {@link HotSpotResolvedObjectTypeImpl} then the metaspace pointer is fetched from that object
-     * and used as the base. Otherwise the object itself is used as the base.
+     * and added to {@code displacement}. Any other non-null object type causes an
+     * {@link IllegalArgumentException} to be thrown.
      *
      * @param base an object to read from or null
      * @param displacement
@@ -544,12 +556,14 @@
     native HotSpotResolvedJavaMethodImpl getResolvedJavaMethod(Object base, long displacement);
 
     /**
-     * Read a value representing a metaspace ConstantPool* and return the
-     * {@link HotSpotConstantPool} wrapping it. This method does no checking that the location
-     * actually contains a valid ConstantPool*. If the {@code base} object is a
-     * {@link HotSpotResolvedJavaMethodImpl}, {@link HotSpotConstantPool} or
-     * {@link HotSpotResolvedObjectTypeImpl} then the metaspace pointer is fetched from that object
-     * and used as the base. Otherwise the object itself is used as the base.
+     * Read a HotSpot ConstantPool* value from the memory location described by {@code base} plus
+     * {@code displacement} and return the {@link HotSpotConstantPool} wrapping it. This method does
+     * no checking that the memory location actually contains a valid pointer and may crash the VM
+     * if an invalid location is provided. If the {@code base} is null then {@code displacement} is
+     * used by itself. If {@code base} is a {@link HotSpotResolvedJavaMethodImpl},
+     * {@link HotSpotConstantPool} or {@link HotSpotResolvedObjectTypeImpl} then the metaspace
+     * pointer is fetched from that object and added to {@code displacement}. Any other non-null
+     * object type causes an {@link IllegalArgumentException} to be thrown.
      *
      * @param base an object to read from or null
      * @param displacement
@@ -558,12 +572,15 @@
     native HotSpotConstantPool getConstantPool(Object base, long displacement);
 
     /**
-     * Read a value representing a metaspace Klass* and return the
-     * {@link HotSpotResolvedObjectTypeImpl} wrapping it. The method does no checking that the
-     * location actually contains a valid Klass*. If the {@code base} object is a
+     * Read a HotSpot Klass* value from the memory location described by {@code base} plus
+     * {@code displacement} and return the {@link HotSpotResolvedObjectTypeImpl} wrapping it. This
+     * method does no checking that the memory location actually contains a valid pointer and may
+     * crash the VM if an invalid location is provided. If the {@code base} is null then
+     * {@code displacement} is used by itself. If {@code base} is a
      * {@link HotSpotResolvedJavaMethodImpl}, {@link HotSpotConstantPool} or
      * {@link HotSpotResolvedObjectTypeImpl} then the metaspace pointer is fetched from that object
-     * and used as the base. Otherwise the object itself is used as the base.
+     * and added to {@code displacement}. Any other non-null object type causes an
+     * {@link IllegalArgumentException} to be thrown.
      *
      * @param base an object to read from or null
      * @param displacement
@@ -571,4 +588,17 @@
      * @return null or the resolved method for this location
      */
     native HotSpotResolvedObjectTypeImpl getResolvedJavaType(Object base, long displacement, boolean compressed);
+
+    /**
+     * Return the size of the HotSpot ProfileData* pointed at by {@code position}. If
+     * {@code position} is outside the space of the MethodData then an
+     * {@link IllegalArgumentException} is thrown. A {@code position} inside the MethodData but that
+     * isn't pointing at a valid ProfileData will crash the VM.
+     *
+     * @param metaspaceMethodData
+     * @param position
+     * @return the size of the ProfileData item pointed at by {@code position}
+     * @throws IllegalArgumentException if an out of range position is given
+     */
+    native int methodDataProfileDataSize(long metaspaceMethodData, int position);
 }
--- a/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotCodeCacheProvider.java	Wed Nov 11 18:04:33 2015 -0500
+++ b/src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspot/src/jdk/vm/ci/hotspot/HotSpotCodeCacheProvider.java	Wed Nov 11 23:51:57 2015 -0500
@@ -22,15 +22,30 @@
  */
 package jdk.vm.ci.hotspot;
 
-import static jdk.vm.ci.hotspot.HotSpotCompressedNullConstant.*;
+import static jdk.vm.ci.hotspot.HotSpotCompressedNullConstant.COMPRESSED_NULL;
 
-import java.lang.reflect.*;
+import java.lang.reflect.Field;
 
-import jdk.vm.ci.code.*;
-import jdk.vm.ci.code.CompilationResult.*;
-import jdk.vm.ci.code.DataSection.*;
-import jdk.vm.ci.common.*;
-import jdk.vm.ci.meta.*;
+import jdk.vm.ci.code.BailoutException;
+import jdk.vm.ci.code.CodeCacheProvider;
+import jdk.vm.ci.code.CompilationRequest;
+import jdk.vm.ci.code.CompilationResult;
+import jdk.vm.ci.code.CompilationResult.Call;
+import jdk.vm.ci.code.CompilationResult.ConstantReference;
+import jdk.vm.ci.code.CompilationResult.DataPatch;
+import jdk.vm.ci.code.CompilationResult.Mark;
+import jdk.vm.ci.code.DataSection;
+import jdk.vm.ci.code.DataSection.Data;
+import jdk.vm.ci.code.DataSection.DataBuilder;
+import jdk.vm.ci.code.InstalledCode;
+import jdk.vm.ci.code.RegisterConfig;
+import jdk.vm.ci.code.TargetDescription;
+import jdk.vm.ci.common.JVMCIError;
+import jdk.vm.ci.meta.Constant