changeset 27326:612c725d7a6a

Merge
author dsamersoff
date Sat, 16 Jan 2016 12:04:47 +0100
parents d06ef31f563b 77ccddf2c10b
children 24059544e015
files src/jdk.vm.ci/share/classes/jdk.vm.ci.hotspotvmconfig/src/jdk/vm/ci/hotspotvmconfig/HotSpotVMManual.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options.processor/src/META-INF/services/javax.annotation.processing.Processor src/jdk.vm.ci/share/classes/jdk.vm.ci.options.processor/src/jdk/vm/ci/options/processor/OptionProcessor.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/DerivedOptionValue.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/NestedBooleanOptionValue.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/Option.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/OptionDescriptor.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/OptionDescriptors.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/OptionType.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/OptionValue.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/OptionsLoader.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/OptionsParser.java src/jdk.vm.ci/share/classes/jdk.vm.ci.options/src/jdk/vm/ci/options/StableOptionValue.java src/share/vm/oops/typeArrayOop.cpp test/compiler/jvmci/jdk.vm.ci.options.test/src/jdk/vm/ci/options/test/NestedBooleanOptionValueTest.java test/compiler/jvmci/jdk.vm.ci.options.test/src/jdk/vm/ci/options/test/TestOptionValue.java test/gc/6581734/Test6581734.java test/gc/6845368/bigobj.java test/gc/7072527/TestFullGCCount.java
diffstat 595 files changed, 101560 insertions(+), 72966 deletions(-) [+]
line wrap: on
line diff
--- a/.hgtags	Sat Jan 16 13:56:49 2016 +0300
+++ b/.hgtags	Sat Jan 16 12:04:47 2016 +0100
@@ -502,3 +502,5 @@
 de592ea5f7ba0f8a8c5afc03bd169f7690c72b6f jdk-9+97
 e5b1a23be1e105417ba1c4c576ab373eb3fa2c2b jdk-9+98
 f008e8cc10d5b3212fb22d58c96fa01d38654f19 jdk-9+99
+bdb0acafc63c42e84d9d8195bf2e2b25ee9c3306 jdk-9+100
+9f45d3d57d6948cf526fbc2e2891a9a74ac6941a jdk-9+101
--- a/.mx.jvmci/mx_jvmci.py	Sat Jan 16 13:56:49 2016 +0300
+++ b/.mx.jvmci/mx_jvmci.py	Sat Jan 16 12:04:47 2016 +0100
@@ -677,12 +677,6 @@
                 assert service
                 self.services.setdefault(service, []).append(provider)
             return True
-        elif arcname.endswith('_OptionDescriptors.class'):
-            # Need to create service files for the providers of the
-            # jdk.vm.ci.options.Options service created by
-            # jdk.vm.ci.options.processor.OptionProcessor.
-            provider = arcname[:-len('.class'):].replace('/', '.')
-            self.services.setdefault('jdk.vm.ci.options.OptionDescriptors', []).append(provider)
         return False
 
     def __addsrc__(self, arcname, contents):
@@ -761,21 +755,6 @@
         if jacocoArgs:
             args = jacocoArgs + args
 
-        # Support for -G: options
-        def translateGOption(arg):
-            if arg.startswith('-G:+'):
-                if '=' in arg:
-                    mx.abort('Mixing + and = in -G: option specification: ' + arg)
-                arg = '-Djvmci.option.' + arg[len('-G:+'):] + '=true'
-            elif arg.startswith('-G:-'):
-                if '=' in arg:
-                    mx.abort('Mixing - and = in -G: option specification: ' + arg)
-                arg = '-Djvmci.option.' + arg[len('-G:+'):] + '=false'
-            elif arg.startswith('-G:'):
-                arg = '-Djvmci.option.' + arg[len('-G:'):]
-            return arg
-        args = map(translateGOption, args)
-
         args = ['-Xbootclasspath/p:' + dep.classpath_repr() for dep in _jvmci_bootclasspath_prepends] + args
 
         jvmciModeArgs = _jvmciModes[_vm.jvmciMode]
--- a/.mx.jvmci/suite.py	Sat Jan 16 13:56:49 2016 +0300
+++ b/.mx.jvmci/suite.py	Sat Jan 16 12:04:47 2016 +0100
@@ -109,7 +109,6 @@
         "jdk.vm.ci.code",
       ],
       "checkstyle" : "jdk.vm.ci.service",
-      "annotationProcessors" : ["JVMCI_OPTIONS_PROCESSOR"],
       "javaCompliance" : "1.8",
       "workingSets" : "API,JVMCI",
     },
@@ -135,40 +134,17 @@
       "workingSets" : "JVMCI",
     },
 
-    "jdk.vm.ci.options" : {
+    # ------------- JVMCI:HotSpot -------------
+
+    "jdk.vm.ci.aarch64" : {
       "subDir" : "src/jdk.vm.ci/share/classes",
       "sourceDirs" : ["src"],
-      "checkstyle" : "jdk.vm.ci.service",
-      "dependencies" : ["jdk.vm.ci.inittimer"],
-      "javaCompliance" : "1.8",
-      "workingSets" : "JVMCI",
-    },
-
-    "jdk.vm.ci.options.processor" : {
-      "subDir" : "src/jdk.vm.ci/share/classes",
-      "sourceDirs" : ["src"],
-      "dependencies" : [
-        "jdk.vm.ci.options",
-      ],
+      "dependencies" : ["jdk.vm.ci.code"],
       "checkstyle" : "jdk.vm.ci.service",
       "javaCompliance" : "1.8",
-      "workingSets" : "JVMCI,Codegen",
+      "workingSets" : "JVMCI,AArch64",
     },
 
-    "jdk.vm.ci.options.test" : {
-      "subDir" : "test/compiler/jvmci",
-      "sourceDirs" : ["src"],
-      "dependencies" : [
-        "jdk.vm.ci.options",
-        "mx:JUNIT",
-      ],
-      "checkstyle" : "jdk.vm.ci.service",
-      "javaCompliance" : "1.8",
-      "workingSets" : "JVMCI",
-    },
-
-    # ------------- JVMCI:HotSpot -------------
-
     "jdk.vm.ci.amd64" : {
       "subDir" : "src/jdk.vm.ci/share/classes",
       "sourceDirs" : ["src"],
@@ -191,15 +167,12 @@
       "subDir" : "src/jdk.vm.ci/share/classes",
       "sourceDirs" : ["src"],
       "dependencies" : [
-        "jdk.vm.ci.options",
         "jdk.vm.ci.hotspotvmconfig",
         "jdk.vm.ci.common",
+        "jdk.vm.ci.inittimer",
         "jdk.vm.ci.runtime",
         "jdk.vm.ci.service",
       ],
-      "annotationProcessors" : [
-        "JVMCI_OPTIONS_PROCESSOR",
-      ],
       "checkstyle" : "jdk.vm.ci.service",
       "javaCompliance" : "1.8",
       "workingSets" : "JVMCI",
@@ -213,6 +186,21 @@
       "workingSets" : "JVMCI,HotSpot",
     },
 
+    "jdk.vm.ci.hotspot.aarch64" : {
+      "subDir" : "src/jdk.vm.ci/share/classes",
+      "sourceDirs" : ["src"],
+      "dependencies" : [
+        "jdk.vm.ci.aarch64",
+        "jdk.vm.ci.hotspot",
+      ],
+      "checkstyle" : "jdk.vm.ci.service",
+      "annotationProcessors" : [
+        "JVMCI_SERVICE_PROCESSOR",
+      ],
+      "javaCompliance" : "1.8",
+      "workingSets" : "JVMCI,HotSpot,AArch64",
+    },
+
     "jdk.vm.ci.hotspot.amd64" : {
       "subDir" : "src/jdk.vm.ci/share/classes",
       "sourceDirs" : ["src"],
@@ -258,22 +246,17 @@
       "dependencies" : ["jdk.vm.ci.service"],
     },
 
-    "JVMCI_OPTIONS" : {
-      "subDir" : "src/jdk.vm.ci/share/classes",
-      "dependencies" : ["jdk.vm.ci.options"],
-    },
-
     "JVMCI_API" : {
       "subDir" : "src/jdk.vm.ci/share/classes",
       "dependencies" : [
         "jdk.vm.ci.inittimer",
         "jdk.vm.ci.runtime",
         "jdk.vm.ci.common",
+        "jdk.vm.ci.aarch64",
         "jdk.vm.ci.amd64",
         "jdk.vm.ci.sparc",
       ],
       "distDependencies" : [
-        "JVMCI_OPTIONS",
         "JVMCI_SERVICE",
       ],
     },
@@ -288,6 +271,7 @@
     "JVMCI_HOTSPOT" : {
       "subDir" : "src/jdk.vm.ci/share/classes",
       "dependencies" : [
+        "jdk.vm.ci.hotspot.aarch64",
         "jdk.vm.ci.hotspot.amd64",
         "jdk.vm.ci.hotspot.sparc",
       ],
@@ -301,7 +285,6 @@
     "JVMCI_TEST" : {
       "subDir" : "test/compiler/jvmci",
       "dependencies" : [
-        "jdk.vm.ci.options.test",
         "jdk.vm.ci.runtime.test",
       ],
       "distDependencies" : [
@@ -310,13 +293,6 @@
       "exclude" : ["mx:JUNIT"],
     },
 
-    "JVMCI_OPTIONS_PROCESSOR" : {
-      "subDir" : "src/jdk.vm.ci/share/classes",
-      "dependencies" : ["jdk.vm.ci.options.processor"],
-      "distDependencies" : [
-        "JVMCI_OPTIONS",
-      ],
-    },
 
     "JVMCI_SERVICE_PROCESSOR" : {
       "subDir" : "src/jdk.vm.ci/share/classes",
@@ -332,25 +308,23 @@
       "subDir" : "src/jdk.vm.ci/share/classes",
       "overlaps" : [
         "JVMCI_API",
-        "JVMCI_OPTIONS",
         "JVMCI_SERVICE",
         "JVMCI_HOTSPOT",
         "JVMCI_HOTSPOTVMCONFIG",
         "JVMCI_SERVICE_PROCESSOR",
-        "JVMCI_OPTIONS_PROCESSOR"
       ],
       "dependencies" : [
-        "jdk.vm.ci.options",
         "jdk.vm.ci.service",
         "jdk.vm.ci.inittimer",
         "jdk.vm.ci.runtime",
         "jdk.vm.ci.common",
+        "jdk.vm.ci.aarch64",
         "jdk.vm.ci.amd64",
         "jdk.vm.ci.sparc",
         "jdk.vm.ci.hotspotvmconfig",
+        "jdk.vm.ci.hotspot.aarch64",
         "jdk.vm.ci.hotspot.amd64",
         "jdk.vm.ci.hotspot.sparc",
-        "jdk.vm.ci.options.processor",
         "jdk.vm.ci.service.processor"
       ],
     },
--- a/make/aix/Makefile	Sat Jan 16 13:56:49 2016 +0300
+++ b/make/aix/Makefile	Sat Jan 16 12:04:47 2016 +0100
@@ -1,6 +1,6 @@
 #
 # Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
-# Copyright 2012, 2013 SAP AG. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
 # DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 #
 # This code is free software; you can redistribute it and/or modify it
@@ -61,10 +61,6 @@
     FORCE_TIERED=1
   endif
 endif
-# C1 is not ported on ppc64(le), so we cannot build a tiered VM:
-ifneq (,$(filter $(ARCH),ppc64 pp64le))
-  FORCE_TIERED=0
-endif
 
 ifdef LP64
   ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")
--- a/make/aix/makefiles/fastdebug.make	Sat Jan 16 13:56:49 2016 +0300
+++ b/make/aix/makefiles/fastdebug.make	Sat Jan 16 12:04:47 2016 +0100
@@ -68,5 +68,5 @@
 LFLAGS_QIPA=
 
 VERSION = optimized
-SYSDEFS += -DASSERT -DFASTDEBUG
+SYSDEFS += -DASSERT
 PICFLAGS = DEFAULT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/make/aix/makefiles/tiered.make	Sat Jan 16 12:04:47 2016 +0100
@@ -0,0 +1,32 @@
+#
+# Copyright (c) 2006, 2015, Oracle and/or its affiliates. All rights reserved.
+# Copyright 2012, 2015 SAP AG. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+#
+#
+
+# Sets make macros for making tiered version of VM
+
+TYPE=TIERED
+
+VM_SUBDIR = server
+
+CFLAGS += -DCOMPILER2 -DCOMPILER1
--- a/make/excludeSrc.make	Sat Jan 16 13:56:49 2016 +0300
+++ b/make/excludeSrc.make	Sat Jan 16 12:04:47 2016 +0100
@@ -107,8 +107,8 @@
 	 memTracker.cpp nmtDCmd.cpp mallocSiteTable.cpp
 endif
 
-ifneq (,$(findstring $(Platform_arch_model), x86_64, sparc))
-      # JVMCI is supported only on x86_64 and SPARC.
+ifneq (,$(findstring $(Platform_arch_model), aarch64, arm_64, sparc, x86_64))
+      # JVMCI is supported
 else
       INCLUDE_JVMCI := false
 endif
--- a/make/gensrc/Gensrc-jdk.vm.ci.gmk	Sat Jan 16 13:56:49 2016 +0300
+++ b/make/gensrc/Gensrc-jdk.vm.ci.gmk	Sat Jan 16 12:04:47 2016 +0100
@@ -36,15 +36,6 @@
 ################################################################################
 # Compile the annotation processor
 
-$(eval $(call SetupJavaCompilation, BUILD_JVMCI_OPTIONS, \
-    SETUP := GENERATE_OLDBYTECODE, \
-    SRC := $(SRC_DIR)/jdk.vm.ci.options/src \
-        $(SRC_DIR)/jdk.vm.ci.options.processor/src \
-        $(SRC_DIR)/jdk.vm.ci.inittimer/src, \
-    BIN := $(BUILDTOOLS_OUTPUTDIR)/jvmci_options, \
-    JAR := $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.options.jar, \
-))
-
 $(eval $(call SetupJavaCompilation, BUILD_JVMCI_SERVICE, \
     SETUP := GENERATE_OLDBYTECODE, \
     SRC := $(SRC_DIR)/jdk.vm.ci.service/src \
@@ -57,6 +48,7 @@
 
 PROC_SRC_SUBDIRS := \
     jdk.vm.ci.hotspot \
+    jdk.vm.ci.hotspot.aarch64 \
     jdk.vm.ci.hotspot.amd64 \
     jdk.vm.ci.hotspot.sparc \
     jdk.vm.ci.runtime \
@@ -69,15 +61,15 @@
 ALL_SRC_DIRS := $(wildcard $(SRC_DIR)/*/src)
 SOURCEPATH := $(call PathList, $(ALL_SRC_DIRS))
 PROCESSOR_PATH := $(call PathList, \
-    $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.options.jar \
     $(BUILDTOOLS_OUTPUTDIR)/jdk.vm.ci.service.jar)
 
 $(GENSRC_DIR)/_gensrc_proc_done: $(PROC_SRCS) \
-    $(BUILD_JVMCI_OPTIONS) $(BUILD_JVMCI_SERVICE)
+    $(BUILD_JVMCI_SERVICE)
 	$(MKDIR) -p $(@D)
 	$(eval $(call ListPathsSafely,PROC_SRCS,$(@D)/_gensrc_proc_files))
 	$(JAVA_SMALL) $(NEW_JAVAC) \
 	    -XDignore.symbol.file \
+            -bootclasspath $(JDK_OUTPUTDIR)/modules/java.base \
 	    -sourcepath $(SOURCEPATH) \
 	    -implicit:none \
 	    -proc:only \
@@ -91,15 +83,6 @@
 
 ################################################################################
 
-$(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors: \
-    $(GENSRC_DIR)/_gensrc_proc_done
-	$(MKDIR) -p $(@D)
-	$(FIND) $(GENSRC_DIR) -name '*_OptionDescriptors.java' | $(SED) 's:.*/jdk\.vm\.ci/\(.*\)\.java:\1:' | $(TR) '/' '.' > $@
-
-TARGETS += $(GENSRC_DIR)/META-INF/services/jdk.vm.ci.options.OptionDescriptors
-
-################################################################################
-
 $(GENSRC_DIR)/_providers_converted: $(GENSRC_DIR)/_gensrc_proc_done
 	$(MKDIR) -p $(GENSRC_DIR)/META-INF/services
 	($(CD) $(GENSRC_DIR)/META-INF/jvmci.providers && \
--- a/make/linux/Makefile	Sat Jan 16 13:56:49 2016 +0300
+++ b/make/linux/Makefile	Sat Jan 16 12:04:47 2016 +0100
@@ -57,14 +57,6 @@
     FORCE_TIERED=1
   endif
 endif
-# C1 is not ported on ppc64, so we cannot build a tiered VM:
-# Notice: after 8046471 ARCH will be 'ppc' for top-level ppc64 builds but
-# 'ppc64' for HotSpot-only ppc64 builds. Need to detect both variants here!
-ifneq (,$(findstring $(ARCH), ppc ppc64))
-  ifeq ($(ARCH_DATA_MODEL), 64)
-    FORCE_TIERED=0
-  endif
-endif
 
 ifdef LP64
   ifeq ("$(filter $(LP64_ARCH),$(BUILDARCH))","")
--- a/make/test/JtregNative.gmk	Sat Jan 16 13:56:49 2016 +0300
+++ b/make/test/JtregNative.gmk	Sat Jan 16 12:04:47 2016 +0100
@@ -46,6 +46,8 @@
     $(HOTSPOT_TOPDIR)/test/runtime/jni/8033445 \
     $(HOTSPOT_TOPDIR)/test/runtime/jni/ToStringInInterfaceTest \
     $(HOTSPOT_TOPDIR)/test/runtime/SameObject \
+    $(HOTSPOT_TOPDIR)/test/compiler/floatingpoint/ \
+    $(HOTSPOT_TOPDIR)/test/compiler/calls \
     #
 
 # Add conditional directories here when needed.
--- a/src/cpu/aarch64/vm/aarch64.ad	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/aarch64.ad	Sat Jan 16 12:04:47 2016 +0100
@@ -3484,10 +3484,14 @@
   return 0;
 }
 
-bool Matcher::is_short_branch_offset(int rule, int br_size, int offset)
-{
-  Unimplemented();
-  return false;
+// Is this branch offset short enough that a short branch can be used?
+//
+// NOTE: If the platform does not provide any short branch variants, then
+//       this method should return false for offset 0.
+bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
+  // The passed offset is relative to address of the branch.
+
+  return (-32768 <= offset && offset < 32768);
 }
 
 const bool Matcher::isSimpleConstant64(jlong value) {
@@ -4667,17 +4671,12 @@
     if (!_method) {
       // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
       call = __ trampoline_call(Address(addr, relocInfo::runtime_call_type), &cbuf);
-    } else if (_optimized_virtual) {
-      call = __ trampoline_call(Address(addr, relocInfo::opt_virtual_call_type), &cbuf);
     } else {
-      call = __ trampoline_call(Address(addr, relocInfo::static_call_type), &cbuf);
-    }
-    if (call == NULL) {
-      ciEnv::current()->record_failure("CodeCache is full");
-      return;
-    }
-
-    if (_method) {
+      int method_index = resolved_method_index(cbuf);
+      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
+                                                  : static_call_Relocation::spec(method_index);
+      call = __ trampoline_call(Address(addr, rspec), &cbuf);
+
       // Emit stub for static call
       address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
       if (stub == NULL) {
@@ -4685,11 +4684,16 @@
         return;
       }
     }
+    if (call == NULL) {
+      ciEnv::current()->record_failure("CodeCache is full");
+      return;
+    }
   %}
 
   enc_class aarch64_enc_java_dynamic_call(method meth) %{
     MacroAssembler _masm(&cbuf);
-    address call = __ ic_call((address)$meth$$method);
+    int method_index = resolved_method_index(cbuf);
+    address call = __ ic_call((address)$meth$$method, method_index);
     if (call == NULL) {
       ciEnv::current()->record_failure("CodeCache is full");
       return;
@@ -13845,7 +13849,8 @@
 
 // Test bit and Branch
 
-instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl, rFlagsReg cr) %{
+// Patterns for short (< 32KiB) variants
+instruct cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
   match(If cmp (CmpL op1 op2));
   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
@@ -13855,16 +13860,15 @@
   format %{ "cb$cmp   $op1, $labl # long" %}
   ins_encode %{
     Label* L = $labl$$label;
-    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
-    if (cond == Assembler::LT)
-      __ tbnz($op1$$Register, 63, *L);
-    else
-      __ tbz($op1$$Register, 63, *L);
+    Assembler::Condition cond =
+      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
+    __ tbr(cond, $op1$$Register, 63, *L);
   %}
   ins_pipe(pipe_cmp_branch);
-%}
-
-instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl, rFlagsReg cr) %{
+  ins_short_branch(1);
+%}
+
+instruct cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
   match(If cmp (CmpI op1 op2));
   predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
             || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
@@ -13874,16 +13878,15 @@
   format %{ "cb$cmp   $op1, $labl # int" %}
   ins_encode %{
     Label* L = $labl$$label;
-    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
-    if (cond == Assembler::LT)
-      __ tbnz($op1$$Register, 31, *L);
-    else
-      __ tbz($op1$$Register, 31, *L);
+    Assembler::Condition cond =
+      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
+    __ tbr(cond, $op1$$Register, 31, *L);
   %}
   ins_pipe(pipe_cmp_branch);
-%}
-
-instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl, rFlagsReg cr) %{
+  ins_short_branch(1);
+%}
+
+instruct cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
   match(If cmp (CmpL (AndL op1 op2) op3));
   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
@@ -13896,15 +13899,13 @@
     Label* L = $labl$$label;
     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
     int bit = exact_log2($op2$$constant);
-    if (cond == Assembler::EQ)
-      __ tbz($op1$$Register, bit, *L);
-    else
-      __ tbnz($op1$$Register, bit, *L);
+    __ tbr(cond, $op1$$Register, bit, *L);
   %}
   ins_pipe(pipe_cmp_branch);
-%}
-
-instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl, rFlagsReg cr) %{
+  ins_short_branch(1);
+%}
+
+instruct cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
   match(If cmp (CmpI (AndI op1 op2) op3));
   predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
             || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
@@ -13917,10 +13918,79 @@
     Label* L = $labl$$label;
     Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
     int bit = exact_log2($op2$$constant);
-    if (cond == Assembler::EQ)
-      __ tbz($op1$$Register, bit, *L);
-    else
-      __ tbnz($op1$$Register, bit, *L);
+    __ tbr(cond, $op1$$Register, bit, *L);
+  %}
+  ins_pipe(pipe_cmp_branch);
+  ins_short_branch(1);
+%}
+
+// And far variants
+instruct far_cmpL_branch_sign(cmpOp cmp, iRegL op1, immL0 op2, label labl) %{
+  match(If cmp (CmpL op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
+            || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl # long" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond =
+      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
+    __ tbr(cond, $op1$$Register, 63, *L, /*far*/true);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpI_branch_sign(cmpOp cmp, iRegIorL2I op1, immI0 op2, label labl) %{
+  match(If cmp (CmpI op1 op2));
+  predicate(n->in(1)->as_Bool()->_test._test == BoolTest::lt
+            || n->in(1)->as_Bool()->_test._test == BoolTest::ge);
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "cb$cmp   $op1, $labl # int" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond =
+      ((Assembler::Condition)$cmp$$cmpcode == Assembler::LT) ? Assembler::NE : Assembler::EQ;
+    __ tbr(cond, $op1$$Register, 31, *L, /*far*/true);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpL_branch_bit(cmpOp cmp, iRegL op1, immL op2, immL0 op3, label labl) %{
+  match(If cmp (CmpL (AndL op1 op2) op3));
+  predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
+            && is_power_of_2(n->in(2)->in(1)->in(2)->get_long()));
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "tb$cmp   $op1, $op2, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    int bit = exact_log2($op2$$constant);
+    __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
+  %}
+  ins_pipe(pipe_cmp_branch);
+%}
+
+instruct far_cmpI_branch_bit(cmpOp cmp, iRegIorL2I op1, immI op2, immI0 op3, label labl) %{
+  match(If cmp (CmpI (AndI op1 op2) op3));
+  predicate((n->in(1)->as_Bool()->_test._test == BoolTest::ne
+            || n->in(1)->as_Bool()->_test._test == BoolTest::eq)
+            && is_power_of_2(n->in(2)->in(1)->in(2)->get_int()));
+  effect(USE labl);
+
+  ins_cost(BRANCH_COST);
+  format %{ "tb$cmp   $op1, $op2, $labl" %}
+  ins_encode %{
+    Label* L = $labl$$label;
+    Assembler::Condition cond = (Assembler::Condition)$cmp$$cmpcode;
+    int bit = exact_log2($op2$$constant);
+    __ tbr(cond, $op1$$Register, bit, *L, /*far*/true);
   %}
   ins_pipe(pipe_cmp_branch);
 %}
@@ -15318,6 +15388,124 @@
   ins_pipe(pipe_class_default);
 %}
 
+// --------------------------------- MLA --------------------------------------
+
+instruct vmla4S(vecD dst, vecD src1, vecD src2)
+%{
+  predicate(n->as_Vector()->length() == 2 ||
+            n->as_Vector()->length() == 4);
+  match(Set dst (AddVS dst (MulVS src1 src2)));
+  ins_cost(INSN_COST);
+  format %{ "mlav  $dst,$src1,$src2\t# vector (4H)" %}
+  ins_encode %{
+    __ mlav(as_FloatRegister($dst$$reg), __ T4H,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmla8S(vecX dst, vecX src1, vecX src2)
+%{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (AddVS dst (MulVS src1 src2)));
+  ins_cost(INSN_COST);
+  format %{ "mlav  $dst,$src1,$src2\t# vector (8H)" %}
+  ins_encode %{
+    __ mlav(as_FloatRegister($dst$$reg), __ T8H,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmla2I(vecD dst, vecD src1, vecD src2)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (AddVI dst (MulVI src1 src2)));
+  ins_cost(INSN_COST);
+  format %{ "mlav  $dst,$src1,$src2\t# vector (2S)" %}
+  ins_encode %{
+    __ mlav(as_FloatRegister($dst$$reg), __ T2S,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmla4I(vecX dst, vecX src1, vecX src2)
+%{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (AddVI dst (MulVI src1 src2)));
+  ins_cost(INSN_COST);
+  format %{ "mlav  $dst,$src1,$src2\t# vector (4S)" %}
+  ins_encode %{
+    __ mlav(as_FloatRegister($dst$$reg), __ T4S,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+// --------------------------------- MLS --------------------------------------
+
+instruct vmls4S(vecD dst, vecD src1, vecD src2)
+%{
+  predicate(n->as_Vector()->length() == 2 ||
+            n->as_Vector()->length() == 4);
+  match(Set dst (SubVS dst (MulVS src1 src2)));
+  ins_cost(INSN_COST);
+  format %{ "mlsv  $dst,$src1,$src2\t# vector (4H)" %}
+  ins_encode %{
+    __ mlsv(as_FloatRegister($dst$$reg), __ T4H,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmls8S(vecX dst, vecX src1, vecX src2)
+%{
+  predicate(n->as_Vector()->length() == 8);
+  match(Set dst (SubVS dst (MulVS src1 src2)));
+  ins_cost(INSN_COST);
+  format %{ "mlsv  $dst,$src1,$src2\t# vector (8H)" %}
+  ins_encode %{
+    __ mlsv(as_FloatRegister($dst$$reg), __ T8H,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmls2I(vecD dst, vecD src1, vecD src2)
+%{
+  predicate(n->as_Vector()->length() == 2);
+  match(Set dst (SubVI dst (MulVI src1 src2)));
+  ins_cost(INSN_COST);
+  format %{ "mlsv  $dst,$src1,$src2\t# vector (2S)" %}
+  ins_encode %{
+    __ mlsv(as_FloatRegister($dst$$reg), __ T2S,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
+instruct vmls4I(vecX dst, vecX src1, vecX src2)
+%{
+  predicate(n->as_Vector()->length() == 4);
+  match(Set dst (SubVI dst (MulVI src1 src2)));
+  ins_cost(INSN_COST);
+  format %{ "mlsv  $dst,$src1,$src2\t# vector (4S)" %}
+  ins_encode %{
+    __ mlsv(as_FloatRegister($dst$$reg), __ T4S,
+            as_FloatRegister($src1$$reg),
+            as_FloatRegister($src2$$reg));
+  %}
+  ins_pipe(pipe_class_default);
+%}
+
 // --------------------------------- DIV --------------------------------------
 
 instruct vdiv2F(vecD dst, vecD src1, vecD src2)
--- a/src/cpu/aarch64/vm/assembler_aarch64.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -135,15 +135,10 @@
 // bytecode pointer
 REGISTER_DECLARATION(Register, rbcp,      r22);
 // Dispatch table base
-REGISTER_DECLARATION(Register, rdispatch,      r21);
+REGISTER_DECLARATION(Register, rdispatch, r21);
 // Java stack pointer
 REGISTER_DECLARATION(Register, esp,      r20);
 
-// TODO : x86 uses rbp to save SP in method handle code
-// we may need to do the same with fp
-// JSR 292 fixed register usages:
-//REGISTER_DECLARATION(Register, r_mh_SP_save, r29);
-
 #define assert_cond(ARG1) assert(ARG1, #ARG1)
 
 namespace asm_util {
@@ -551,6 +546,7 @@
         size = 0; break;
       default:
         ShouldNotReachHere();
+        size = 0;  // unreachable
       }
     } else {
       size = i->get(31, 31);
@@ -2041,6 +2037,8 @@
   INSN(addv, 0, 0b100001);
   INSN(subv, 1, 0b100001);
   INSN(mulv, 0, 0b100111);
+  INSN(mlav, 0, 0b100101);
+  INSN(mlsv, 1, 0b100101);
   INSN(sshl, 0, 0b010001);
   INSN(ushl, 1, 0b010001);
 
--- a/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/c1_LIRAssembler_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -173,6 +173,7 @@
     break;
   default:
     ShouldNotReachHere();
+    result = 0;  // unreachable
   }
   return result;
 }
@@ -720,6 +721,7 @@
     break;
   default:
     ShouldNotReachHere();
+    insn = &Assembler::str;  // unreachable
   }
 
   if (info) add_debug_info_for_null_check_here(info);
@@ -1110,6 +1112,7 @@
       case lir_cond_greaterEqual: acond = (is_unordered ? Assembler::HS : Assembler::GE); break;
       case lir_cond_greater:      acond = (is_unordered ? Assembler::HI : Assembler::GT); break;
       default:                    ShouldNotReachHere();
+        acond = Assembler::EQ;  // unreachable
       }
     } else {
       switch (op->cond()) {
@@ -1121,7 +1124,8 @@
         case lir_cond_greater:      acond = Assembler::GT; break;
         case lir_cond_belowEqual:   acond = Assembler::LS; break;
         case lir_cond_aboveEqual:   acond = Assembler::HS; break;
-        default:                         ShouldNotReachHere();
+        default:                    ShouldNotReachHere();
+          acond = Assembler::EQ;  // unreachable
       }
     }
     __ br(acond,*(op->label()));
@@ -1313,7 +1317,9 @@
   ciMethodData* md;
   ciProfileData* data;
 
-  if (op->should_profile()) {
+  const bool should_profile = op->should_profile();
+
+  if (should_profile) {
     ciMethod* method = op->profiled_method();
     assert(method != NULL, "Should have method");
     int bci = op->profiled_bci();
@@ -1324,8 +1330,8 @@
     assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
   }
   Label profile_cast_success, profile_cast_failure;
-  Label *success_target = op->should_profile() ? &profile_cast_success : success;
-  Label *failure_target = op->should_profile() ? &profile_cast_failure : failure;
+  Label *success_target = should_profile ? &profile_cast_success : success;
+  Label *failure_target = should_profile ? &profile_cast_failure : failure;
 
   if (obj == k_RInfo) {
     k_RInfo = dst;
@@ -1341,7 +1347,7 @@
 
   assert_different_registers(obj, k_RInfo, klass_RInfo);
 
-    if (op->should_profile()) {
+    if (should_profile) {
       Label not_null;
       __ cbnz(obj, not_null);
       // Object is null; update MDO and exit
@@ -1413,7 +1419,7 @@
       // successful cast, fall through to profile or jump
     }
   }
-  if (op->should_profile()) {
+  if (should_profile) {
     Register mdo  = klass_RInfo, recv = k_RInfo;
     __ bind(profile_cast_success);
     __ mov_metadata(mdo, md->constant_encoding());
@@ -1438,6 +1444,8 @@
 
 
 void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+  const bool should_profile = op->should_profile();
+
   LIR_Code code = op->code();
   if (code == lir_store_check) {
     Register value = op->object()->as_register();
@@ -1452,7 +1460,7 @@
     ciMethodData* md;
     ciProfileData* data;
 
-    if (op->should_profile()) {
+    if (should_profile) {
       ciMethod* method = op->profiled_method();
       assert(method != NULL, "Should have method");
       int bci = op->profiled_bci();
@@ -1463,10 +1471,10 @@
       assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
     }
     Label profile_cast_success, profile_cast_failure, done;
-    Label *success_target = op->should_profile() ? &profile_cast_success : &done;
-    Label *failure_target = op->should_profile() ? &profile_cast_failure : stub->entry();
-
-    if (op->should_profile()) {
+    Label *success_target = should_profile ? &profile_cast_success : &done;
+    Label *failure_target = should_profile ? &profile_cast_failure : stub->entry();
+
+    if (should_profile) {
       Label not_null;
       __ cbnz(value, not_null);
       // Object is null; update MDO and exit
@@ -1502,7 +1510,7 @@
     __ cbzw(k_RInfo, *failure_target);
     // fall through to the success case
 
-    if (op->should_profile()) {
+    if (should_profile) {
       Register mdo  = klass_RInfo, recv = k_RInfo;
       __ bind(profile_cast_success);
       __ mov_metadata(mdo, md->constant_encoding());
@@ -1621,9 +1629,10 @@
   case lir_cond_lessEqual:    acond = Assembler::LE; ncond = Assembler::GT; break;
   case lir_cond_greaterEqual: acond = Assembler::GE; ncond = Assembler::LT; break;
   case lir_cond_greater:      acond = Assembler::GT; ncond = Assembler::LE; break;
-  case lir_cond_belowEqual:   Unimplemented(); break;
-  case lir_cond_aboveEqual:   Unimplemented(); break;
+  case lir_cond_belowEqual:
+  case lir_cond_aboveEqual:
   default:                    ShouldNotReachHere();
+    acond = Assembler::EQ; ncond = Assembler::NE;  // unreachable
   }
 
   assert(result->is_single_cpu() || result->is_double_cpu(),
@@ -1724,6 +1733,7 @@
         break;
       default:
         ShouldNotReachHere();
+        c = 0;  // unreachable
         break;
       }
 
@@ -1926,6 +1936,7 @@
         break;
       default:
         ShouldNotReachHere();
+        imm = 0;  // unreachable
         break;
       }
 
@@ -3123,6 +3134,9 @@
     break;
   default:
     ShouldNotReachHere();
+    lda = &MacroAssembler::ldaxr;
+    add = &MacroAssembler::add;
+    stl = &MacroAssembler::stlxr;  // unreachable
   }
 
   switch (code) {
--- a/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -238,6 +238,7 @@
     }
   } else {
     ShouldNotReachHere();
+    r = NULL;  // unreachable
   }
   return r;
 }
--- a/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/c1_MacroAssembler_aarch64.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -27,6 +27,7 @@
 #define CPU_AARCH64_VM_C1_MACROASSEMBLER_AARCH64_HPP
 
 using MacroAssembler::build_frame;
+using MacroAssembler::null_check;
 
 // C1_MacroAssembler contains high-level macros for C1
 
--- a/src/cpu/aarch64/vm/frame_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/frame_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -433,11 +433,11 @@
   // This is the sp before any possible extension (adapter/locals).
   intptr_t* unextended_sp = interpreter_frame_sender_sp();
 
-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
   if (map->update_map()) {
     update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
   }
-#endif // COMPILER2
+#endif // COMPILER2 || INCLUDE_JVMCI
 
   return frame(sender_sp, unextended_sp, link(), sender_pc());
 }
--- a/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -28,6 +28,10 @@
 
 const int StackAlignmentInBytes  = 16;
 
+// Indicates whether the C calling conventions require that
+// 32-bit integer argument values are extended to 64 bits.
+const bool CCallingConventionRequiresIntsAsLongs = false;
+
 #define SUPPORTS_NATIVE_CX8
 
 // The maximum B/BL offset range on AArch64 is 128MB.
--- a/src/cpu/aarch64/vm/globals_aarch64.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -40,14 +40,7 @@
 define_pd_global(bool, TrapBasedNullChecks,  false);
 define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
 
-// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
-// assign a different value for C2 without touching a number of files. Use
-// #ifdef to minimize the change as it's late in Mantis. -- FIXME.
-// c1 doesn't have this problem because the fix to 4858033 assures us
-// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
-// the uep and the vep doesn't get real alignment but just slops on by
-// only assured that the entry instruction meets the 5 byte size requirement.
-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
 define_pd_global(intx, CodeEntryAlignment,       64);
 #else
 define_pd_global(intx, CodeEntryAlignment,       16);
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -1054,13 +1054,39 @@
     bind(skip_receiver_profile);
 
     // The method data pointer needs to be updated to reflect the new target.
+#if INCLUDE_JVMCI
+    if (MethodProfileWidth == 0) {
+      update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+    }
+#else // INCLUDE_JVMCI
     update_mdp_by_constant(mdp,
                            in_bytes(VirtualCallData::
                                     virtual_call_data_size()));
+#endif // INCLUDE_JVMCI
     bind(profile_continue);
   }
 }
 
+#if INCLUDE_JVMCI
+void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
+  assert_different_registers(method, mdp, reg2);
+  if (ProfileInterpreter && MethodProfileWidth > 0) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    Label done;
+    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
+      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
+    bind(done);
+
+    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+#endif // INCLUDE_JVMCI
+
 // This routine creates a state machine for updating the multi-row
 // type profile at a virtual call site (or other type-sensitive bytecode).
 // The machine visits each row (of receiver/count) until the receiver type
@@ -1080,14 +1106,36 @@
     if (is_virtual_call) {
       increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
     }
-    return;
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()));
+    }
+#endif // INCLUDE_JVMCI
+  } else {
+    int non_profiled_offset = -1;
+    if (is_virtual_call) {
+      non_profiled_offset = in_bytes(CounterData::count_offset());
+    }
+#if INCLUDE_JVMCI
+    else if (EnableJVMCI) {
+      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
+    }
+#endif // INCLUDE_JVMCI
+
+    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
+        &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
   }
+}
 
-  int last_row = VirtualCallData::row_limit() - 1;
+void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
+                                        Register reg2, int start_row, Label& done, int total_rows,
+                                        OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                        int non_profiled_offset) {
+  int last_row = total_rows - 1;
   assert(start_row <= last_row, "must be work left to do");
-  // Test this row for both the receiver and for null.
+  // Test this row for both the item and for null.
   // Take any of three different outcomes:
-  //   1. found receiver => increment count and goto done
+  //   1. found item => increment count and goto done
   //   2. found null => keep looking for case 1, maybe allocate this cell
   //   3. found something else => keep looking for cases 1 and 2
   // Case 3 is handled by a recursive call.
@@ -1095,55 +1143,56 @@
     Label next_test;
     bool test_for_null_also = (row == start_row);
 
-    // See if the receiver is receiver[n].
-    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
-    test_mdp_data_at(mdp, recvr_offset, receiver,
+    // See if the item is item[n].
+    int item_offset = in_bytes(item_offset_fn(row));
+    test_mdp_data_at(mdp, item_offset, item,
                      (test_for_null_also ? reg2 : noreg),
                      next_test);
-    // (Reg2 now contains the receiver from the CallData.)
+    // (Reg2 now contains the item from the CallData.)
 
-    // The receiver is receiver[n].  Increment count[n].
-    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    // The item is item[n].  Increment count[n].
+    int count_offset = in_bytes(item_count_offset_fn(row));
     increment_mdp_data_at(mdp, count_offset);
     b(done);
     bind(next_test);
 
     if (test_for_null_also) {
       Label found_null;
-      // Failed the equality check on receiver[n]...  Test for null.
+      // Failed the equality check on item[n]...  Test for null.
       if (start_row == last_row) {
         // The only thing left to do is handle the null case.
-        if (is_virtual_call) {
+        if (non_profiled_offset >= 0) {
           cbz(reg2, found_null);
-          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Item did not match any saved item and there is no empty row for it.
           // Increment total counter to indicate polymorphic case.
-          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+          increment_mdp_data_at(mdp, non_profiled_offset);
           b(done);
           bind(found_null);
         } else {
-          cbz(reg2, done);
+          cbnz(reg2, done);
         }
         break;
       }
       // Since null is rare, make it be the branch-taken case.
-      cbz(reg2,found_null);
+      cbz(reg2, found_null);
 
       // Put all the "Case 3" tests here.
-      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
+      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
+        item_offset_fn, item_count_offset_fn, non_profiled_offset);
 
-      // Found a null.  Keep searching for a matching receiver,
+      // Found a null.  Keep searching for a matching item,
       // but remember that this is an empty (unused) slot.
       bind(found_null);
     }
   }
 
-  // In the fall-through case, we found no matching receiver, but we
-  // observed the receiver[start_row] is NULL.
+  // In the fall-through case, we found no matching item, but we
+  // observed the item[start_row] is NULL.
 
-  // Fill in the receiver field and increment the count.
-  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
-  set_mdp_data_at(mdp, recvr_offset, receiver);
-  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  // Fill in the item field and increment the count.
+  int item_offset = in_bytes(item_offset_fn(start_row));
+  set_mdp_data_at(mdp, item_offset, item);
+  int count_offset = in_bytes(item_count_offset_fn(start_row));
   mov(reg2, DataLayout::counter_increment);
   set_mdp_data_at(mdp, count_offset, reg2);
   if (start_row > 0) {
@@ -1347,9 +1396,8 @@
   // the code to check if the event should be sent.
   if (JvmtiExport::can_post_interpreter_events()) {
     Label L;
-    ldr(r3, Address(rthread, JavaThread::interp_only_mode_offset()));
-    tst(r3, ~0);
-    br(Assembler::EQ, L);
+    ldrw(r3, Address(rthread, JavaThread::interp_only_mode_offset()));
+    cbzw(r3, L);
     call_VM(noreg, CAST_FROM_FN_PTR(address,
                                     InterpreterRuntime::post_method_entry));
     bind(L);
--- a/src/cpu/aarch64/vm/interp_masm_aarch64.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -33,6 +33,7 @@
 
 // This file specializes the assember with interpreter-specific macros
 
+typedef ByteSize (*OffsetFunction)(uint);
 
 class InterpreterMacroAssembler: public MacroAssembler {
  protected:
@@ -234,6 +235,10 @@
   void record_klass_in_profile_helper(Register receiver, Register mdp,
                                       Register reg2, int start_row,
                                       Label& done, bool is_virtual_call);
+  void record_item_in_profile_helper(Register item, Register mdp,
+                                     Register reg2, int start_row, Label& done, int total_rows,
+                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
+                                     int non_profiled_offset);
 
   void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
   void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
@@ -247,6 +252,7 @@
   void profile_virtual_call(Register receiver, Register mdp,
                             Register scratch2,
                             bool receiver_can_be_null = false);
+  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
   void profile_ret(Register return_bci, Register mdp);
   void profile_null_seen(Register mdp);
   void profile_typecheck(Register mdp, Register klass, Register scratch);
--- a/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -61,6 +61,7 @@
     case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
     case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
     default:        ShouldNotReachHere();
+      name = NULL;  // unreachable
   }
   ResourceMark rm;
   BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
@@ -125,6 +126,7 @@
     case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
     case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
     default:        ShouldNotReachHere();
+      slow_case_addr = NULL;  // unreachable
   }
 
   {
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -678,7 +678,7 @@
 
   if (cbuf) cbuf->set_insts_mark();
   relocate(entry.rspec());
-  if (Assembler::reachable_from_branch_at(pc(), entry.target())) {
+  if (!far_branches()) {
     bl(entry.target());
   } else {
     bl(pc());
@@ -733,8 +733,8 @@
   return stub;
 }
 
-address MacroAssembler::ic_call(address entry) {
-  RelocationHolder rh = virtual_call_Relocation::spec(pc());
+address MacroAssembler::ic_call(address entry, jint method_index) {
+  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
   // address const_ptr = long_constant((jlong)Universe::non_oop_word());
   // unsigned long offset;
   // ldr_constant(rscratch2, const_ptr);
--- a/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -410,7 +410,7 @@
 
 #define WRAP(INSN)                                                            \
   void INSN(Register Rd, Register Rn, Register Rm, Register Ra) {             \
-    if ((VM_Version::cpu_cpuFeatures() & VM_Version::CPU_A53MAC) && Ra != zr) \
+    if ((VM_Version::features() & VM_Version::CPU_A53MAC) && Ra != zr)        \
       nop();                                                                  \
     Assembler::INSN(Rd, Rn, Rm, Ra);                                          \
   }
@@ -480,6 +480,32 @@
     orr(Vd, T, Vn, Vn);
   }
 
+public:
+
+  // Generalized Test Bit And Branch, including a "far" variety which
+  // spans more than 32KiB.
+  void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool far = false) {
+    assert(cond == EQ || cond == NE, "must be");
+
+    if (far)
+      cond = ~cond;
+
+    void (Assembler::* branch)(Register Rt, int bitpos, Label &L);
+    if (cond == Assembler::EQ)
+      branch = &Assembler::tbz;
+    else
+      branch = &Assembler::tbnz;
+
+    if (far) {
+      Label L;
+      (this->*branch)(Rt, bitpos, L);
+      b(dest);
+      bind(L);
+    } else {
+      (this->*branch)(Rt, bitpos, dest);
+    }
+  }
+
   // macro instructions for accessing and updating floating point
   // status register
   //
@@ -976,7 +1002,7 @@
   }
 
   // Emit the CompiledIC call idiom
-  address ic_call(address entry);
+  address ic_call(address entry, jint method_index = 0);
 
 public:
 
--- a/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/nativeInst_aarch64.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -62,7 +62,6 @@
   inline bool is_jump_or_nop();
   inline bool is_cond_jump();
   bool is_safepoint_poll();
-  inline bool is_mov_literal64();
   bool is_movz();
   bool is_movk();
   bool is_sigill_zombie_not_entrant();
@@ -98,6 +97,14 @@
   static bool is_ldr_literal_at(address instr);
   static bool is_ldrw_to_zr(address instr);
 
+  static bool is_call_at(address instr) {
+    const uint32_t insn = (*(uint32_t*)instr);
+    return (insn >> 26) == 0b100101;
+  }
+  bool is_call() {
+    return is_call_at(addr_at(0));
+  }
+
   static bool maybe_cpool_ref(address instr) {
     return is_adrp_at(instr) || is_ldr_literal_at(instr);
   }
@@ -157,11 +164,6 @@
   inline friend NativeCall* nativeCall_at(address address);
   inline friend NativeCall* nativeCall_before(address return_address);
 
-  static bool is_call_at(address instr) {
-    const uint32_t insn = (*(uint32_t*)instr);
-    return (insn >> 26) == 0b100101;
-  }
-
   static bool is_call_before(address return_address) {
     return is_call_at(return_address - NativeCall::return_address_offset);
   }
--- a/src/cpu/aarch64/vm/relocInfo_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/relocInfo_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -59,14 +59,20 @@
 
 address Relocation::pd_call_destination(address orig_addr) {
   assert(is_call(), "should be a call here");
-  if (is_call()) {
+  if (NativeCall::is_call_at(addr())) {
     address trampoline = nativeCall_at(addr())->get_trampoline();
     if (trampoline) {
       return nativeCallTrampolineStub_at(trampoline)->destination();
     }
   }
   if (orig_addr != NULL) {
-    return MacroAssembler::pd_call_destination(orig_addr);
+    address new_addr = MacroAssembler::pd_call_destination(orig_addr);
+    // If call is branch to self, don't try to relocate it, just leave it
+    // as branch to self. This happens during code generation if the code
+    // buffer expands. It will be relocated to the trampoline above once
+    // code generation is complete.
+    new_addr = (new_addr == orig_addr) ? addr() : new_addr;
+    return new_addr;
   }
   return MacroAssembler::pd_call_destination(addr());
 }
--- a/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -39,10 +39,13 @@
 #ifdef COMPILER1
 #include "c1/c1_Runtime1.hpp"
 #endif
-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
 #include "adfiles/ad_aarch64.hpp"
 #include "opto/runtime.hpp"
 #endif
+#if INCLUDE_JVMCI
+#include "jvmci/jvmciJavaClasses.hpp"
+#endif
 
 #ifdef BUILTIN_SIM
 #include "../../../../../../simulator/simulator.hpp"
@@ -109,14 +112,14 @@
 };
 
 OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {
-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
   if (save_vectors) {
     // Save upper half of vector registers
     int vect_words = 32 * 8 / wordSize;
     additional_frame_words += vect_words;
   }
 #else
-  assert(!save_vectors, "vectors are generated only by C2");
+  assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
 #endif
 
   int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
@@ -166,7 +169,7 @@
 
 void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
 #ifndef COMPILER2
-  assert(!restore_vectors, "vectors are generated only by C2");
+  assert(!restore_vectors, "vectors are generated only by C2 and JVMCI");
 #endif
   __ pop_CPU_state(restore_vectors);
   __ leave();
@@ -547,6 +550,18 @@
   // Pre-load the register-jump target early, to schedule it better.
   __ ldr(rscratch1, Address(rmethod, in_bytes(Method::from_compiled_offset())));
 
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    // check if this call should be routed towards a specific entry point
+    __ ldr(rscratch2, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
+    Label no_alternative_target;
+    __ cbz(rscratch2, no_alternative_target);
+    __ mov(rscratch1, rscratch2);
+    __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
+    __ bind(no_alternative_target);
+  }
+#endif // INCLUDE_JVMCI
+
   // Now generate the shuffle code.
   for (int i = 0; i < total_args_passed; i++) {
     if (sig_bt[i] == T_VOID) {
@@ -1075,7 +1090,7 @@
 }
 
 
-// Check GC_locker::needs_gc and enter the runtime if it's true.  This
+// Check GCLocker::needs_gc and enter the runtime if it's true.  This
 // keeps a new JNI critical region from starting until a GC has been
 // forced.  Save down any oops in registers and describe them in an
 // OopMap.
@@ -1257,14 +1272,14 @@
 // GetPrimtiveArrayCritical and disallow the use of any other JNI
 // functions.  The wrapper is expected to unpack the arguments before
 // passing them to the callee and perform checks before and after the
-// native call to ensure that they GC_locker
+// native call to ensure that they GCLocker
 // lock_critical/unlock_critical semantics are followed.  Some other
 // parts of JNI setup are skipped like the tear down of the JNI handle
 // block and the check for pending exceptions it's impossible for them
 // to be thrown.
 //
 // They are roughly structured like this:
-//    if (GC_locker::needs_gc())
+//    if (GCLocker::needs_gc())
 //      SharedRuntime::block_for_jni_critical();
 //    tranistion to thread_in_native
 //    unpack arrray arguments and call native entry point
@@ -2237,7 +2252,13 @@
   // Allocate space for the code
   ResourceMark rm;
   // Setup code generation tools
-  CodeBuffer buffer("deopt_blob", 2048, 1024);
+  int pad = 0;
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    pad += 512; // Increase the buffer size when compiling for JVMCI
+  }
+#endif
+  CodeBuffer buffer("deopt_blob", 2048+pad, 1024);
   MacroAssembler* masm = new MacroAssembler(&buffer);
   int frame_size_in_words;
   OopMap* map = NULL;
@@ -2294,6 +2315,12 @@
   __ b(cont);
 
   int reexecute_offset = __ pc() - start;
+#if defined(INCLUDE_JVMCI) && !defined(COMPILER1)
+  if (EnableJVMCI && UseJVMCICompiler) {
+    // JVMCI does not use this kind of deoptimization
+    __ should_not_reach_here();
+  }
+#endif
 
   // Reexecute case
   // return address is the pc describes what bci to do re-execute at
@@ -2304,6 +2331,44 @@
   __ movw(rcpool, Deoptimization::Unpack_reexecute); // callee-saved
   __ b(cont);
 
+#if INCLUDE_JVMCI
+  Label after_fetch_unroll_info_call;
+  int implicit_exception_uncommon_trap_offset = 0;
+  int uncommon_trap_offset = 0;
+
+  if (EnableJVMCI) {
+    implicit_exception_uncommon_trap_offset = __ pc() - start;
+
+    __ ldr(lr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
+    __ str(zr, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
+
+    uncommon_trap_offset = __ pc() - start;
+
+    // Save everything in sight.
+    RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
+    // fetch_unroll_info needs to call last_java_frame()
+    Label retaddr;
+    __ set_last_Java_frame(sp, noreg, retaddr, rscratch1);
+
+    __ ldrw(c_rarg1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
+    __ movw(rscratch1, -1);
+    __ strw(rscratch1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
+
+    __ movw(rcpool, (int32_t)Deoptimization::Unpack_reexecute);
+    __ mov(c_rarg0, rthread);
+    __ lea(rscratch1,
+           RuntimeAddress(CAST_FROM_FN_PTR(address,
+                                           Deoptimization::uncommon_trap)));
+    __ blrt(rscratch1, 2, 0, MacroAssembler::ret_type_integral);
+    __ bind(retaddr);
+    oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
+
+    __ reset_last_Java_frame(false, false);
+
+    __ b(after_fetch_unroll_info_call);
+  } // EnableJVMCI
+#endif // INCLUDE_JVMCI
+
   int exception_offset = __ pc() - start;
 
   // Prolog for exception case
@@ -2395,7 +2460,13 @@
 
   __ reset_last_Java_frame(false, true);
 
-  // Load UnrollBlock* into rdi
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    __ bind(after_fetch_unroll_info_call);
+  }
+#endif
+
+  // Load UnrollBlock* into r5
   __ mov(r5, r0);
 
   __ ldrw(rcpool, Address(r5, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
@@ -2547,7 +2618,12 @@
 
   _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
   _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
-
+#if INCLUDE_JVMCI
+  if (EnableJVMCI) {
+    _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
+    _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
+  }
+#endif
 #ifdef BUILTIN_SIM
   if (NotifySimulator) {
     unsigned char *base = _deopt_blob->code_begin();
@@ -2560,7 +2636,7 @@
   return 0;
 }
 
-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
 //------------------------------generate_uncommon_trap_blob--------------------
 void SharedRuntime::generate_uncommon_trap_blob() {
   // Allocate space for the code
@@ -2943,7 +3019,7 @@
 }
 
 
-#ifdef COMPILER2
+#if defined(COMPILER2) || INCLUDE_JVMCI
 // This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
 //
 //------------------------------generate_exception_blob---------------------------
--- a/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -958,8 +958,8 @@
     const Register t0 = r3, t1 = r4;
 
     if (is_backwards) {
-      __ lea(s, Address(s, count, Address::uxtw(exact_log2(-step))));
-      __ lea(d, Address(d, count, Address::uxtw(exact_log2(-step))));
+      __ lea(s, Address(s, count, Address::lsl(exact_log2(-step))));
+      __ lea(d, Address(d, count, Address::lsl(exact_log2(-step))));
     }
 
     Label done, tail;
@@ -1051,10 +1051,10 @@
     __ cmp(rscratch2, count);
     __ br(Assembler::HS, end);
     if (size == (size_t)wordSize) {
-      __ ldr(temp, Address(a, rscratch2, Address::uxtw(exact_log2(size))));
+      __ ldr(temp, Address(a, rscratch2, Address::lsl(exact_log2(size))));
       __ verify_oop(temp);
     } else {
-      __ ldrw(r16, Address(a, rscratch2, Address::uxtw(exact_log2(size))));
+      __ ldrw(r16, Address(a, rscratch2, Address::lsl(exact_log2(size))));
       __ decode_heap_oop(temp); // calls verify_oop
     }
     __ add(rscratch2, rscratch2, size);
@@ -1087,12 +1087,14 @@
     __ align(CodeEntryAlignment);
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
+    __ enter();
+
     if (entry != NULL) {
       *entry = __ pc();
       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
       BLOCK_COMMENT("Entry:");
     }
-    __ enter();
+
     if (is_oop) {
       __ push(RegSet::of(d, count), sp);
       // no registers are destroyed by this call
@@ -1104,10 +1106,11 @@
       if (VerifyOops)
         verify_oop_array(size, d, count, r16);
       __ sub(count, count, 1); // make an inclusive end pointer
-      __ lea(count, Address(d, count, Address::uxtw(exact_log2(size))));
+      __ lea(count, Address(d, count, Address::lsl(exact_log2(size))));
       gen_write_ref_array_post_barrier(d, count, rscratch1);
     }
     __ leave();
+    __ mov(r0, zr); // return 0
     __ ret(lr);
 #ifdef BUILTIN_SIM
     {
@@ -1140,11 +1143,16 @@
 
     StubCodeMark mark(this, "StubRoutines", name);
     address start = __ pc();
-
+    __ enter();
+
+    if (entry != NULL) {
+      *entry = __ pc();
+      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
+      BLOCK_COMMENT("Entry:");
+    }
     __ cmp(d, s);
     __ br(Assembler::LS, nooverlap_target);
 
-    __ enter();
     if (is_oop) {
       __ push(RegSet::of(d, count), sp);
       // no registers are destroyed by this call
@@ -1160,6 +1168,7 @@
       gen_write_ref_array_post_barrier(d, count, rscratch1);
     }
     __ leave();
+    __ mov(r0, zr); // return 0
     __ ret(lr);
 #ifdef BUILTIN_SIM
     {
@@ -1559,7 +1568,29 @@
                               Register dst_pos, // destination position (c_rarg3)
                               Register length,
                               Register temp,
-                              Label& L_failed) { Unimplemented(); }
+                              Label& L_failed) {
+    BLOCK_COMMENT("arraycopy_range_checks:");
+
+    assert_different_registers(rscratch1, temp);
+
+    //  if (src_pos + length > arrayOop(src)->length())  FAIL;
+    __ ldrw(rscratch1, Address(src, arrayOopDesc::length_offset_in_bytes()));
+    __ addw(temp, length, src_pos);
+    __ cmpw(temp, rscratch1);
+    __ br(Assembler::HI, L_failed);
+
+    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
+    __ ldrw(rscratch1, Address(dst, arrayOopDesc::length_offset_in_bytes()));
+    __ addw(temp, length, dst_pos);
+    __ cmpw(temp, rscratch1);
+    __ br(Assembler::HI, L_failed);
+
+    // Have to clean up high 32 bits of 'src_pos' and 'dst_pos'.
+    __ movw(src_pos, src_pos);
+    __ movw(dst_pos, dst_pos);
+
+    BLOCK_COMMENT("arraycopy_range_checks done");
+  }
 
   // These stubs get called from some dumb test routine.
   // I'll write them properly when they're called from
@@ -1569,6 +1600,309 @@
   }
 
 
+  //
+  //  Generate 'unsafe' array copy stub
+  //  Though just as safe as the other stubs, it takes an unscaled
+  //  size_t argument instead of an element count.
+  //
+  //  Input:
+  //    c_rarg0   - source array address
+  //    c_rarg1   - destination array address
+  //    c_rarg2   - byte count, treated as ssize_t, can be zero
+  //
+  // Examines the alignment of the operands and dispatches
+  // to a long, int, short, or byte copy loop.
+  //
+  address generate_unsafe_copy(const char *name,
+                               address byte_copy_entry) {
+#ifdef PRODUCT
+    return StubRoutines::_jbyte_arraycopy;
+#else
+    __ align(CodeEntryAlignment);
+    StubCodeMark mark(this, "StubRoutines", name);
+    address start = __ pc();
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+    // bump this on entry, not on exit:
+    __ lea(rscratch2, ExternalAddress((address)&SharedRuntime::_unsafe_array_copy_ctr));
+    __ incrementw(Address(rscratch2));
+    __ b(RuntimeAddress(byte_copy_entry));
+    return start;
+#endif
+  }
+
+  //
+  //  Generate generic array copy stubs
+  //
+  //  Input:
+  //    c_rarg0    -  src oop
+  //    c_rarg1    -  src_pos (32-bits)
+  //    c_rarg2    -  dst oop
+  //    c_rarg3    -  dst_pos (32-bits)
+  //    c_rarg4    -  element count (32-bits)
+  //
+  //  Output:
+  //    r0 ==  0  -  success
+  //    r0 == -1^K - failure, where K is partial transfer count
+  //
+  address generate_generic_copy(const char *name,
+                                address byte_copy_entry, address short_copy_entry,
+                                address int_copy_entry, address oop_copy_entry,
+                                address long_copy_entry, address checkcast_copy_entry) {
+
+    Label L_failed, L_failed_0, L_objArray;
+    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
+
+    // Input registers
+    const Register src        = c_rarg0;  // source array oop
+    const Register src_pos    = c_rarg1;  // source position
+    const Register dst        = c_rarg2;  // destination array oop
+    const Register dst_pos    = c_rarg3;  // destination position
+    const Register length     = c_rarg4;
+
+    StubCodeMark mark(this, "StubRoutines", name);
+
+    __ align(CodeEntryAlignment);
+    address start = __ pc();
+
+    __ enter(); // required for proper stackwalking of RuntimeStub frame
+
+    // bump this on entry, not on exit:
+    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
+
+    //-----------------------------------------------------------------------
+    // Assembler stub will be used for this call to arraycopy
+    // if the following conditions are met:
+    //
+    // (1) src and dst must not be null.
+    // (2) src_pos must not be negative.
+    // (3) dst_pos must not be negative.
+    // (4) length  must not be negative.
+    // (5) src klass and dst klass should be the same and not NULL.
+    // (6) src and dst should be arrays.
+    // (7) src_pos + length must not exceed length of src.
+    // (8) dst_pos + length must not exceed length of dst.
+    //
+
+    //  if (src == NULL) return -1;
+    __ cbz(src, L_failed);
+
+    //  if (src_pos < 0) return -1;
+    __ tbnz(src_pos, 31, L_failed);  // i.e. sign bit set
+
+    //  if (dst == NULL) return -1;
+    __ cbz(dst, L_failed);
+
+    //  if (dst_pos < 0) return -1;
+    __ tbnz(dst_pos, 31, L_failed);  // i.e. sign bit set
+
+    // registers used as temp
+    const Register scratch_length    = r16; // elements count to copy
+    const Register scratch_src_klass = r17; // array klass
+    const Register lh                = r18; // layout helper
+
+    //  if (length < 0) return -1;
+    __ movw(scratch_length, length);        // length (elements count, 32-bits value)
+    __ tbnz(scratch_length, 31, L_failed);  // i.e. sign bit set
+
+    __ load_klass(scratch_src_klass, src);
+#ifdef ASSERT
+    //  assert(src->klass() != NULL);
+    {
+      BLOCK_COMMENT("assert klasses not null {");
+      Label L1, L2;
+      __ cbnz(scratch_src_klass, L2);   // it is broken if klass is NULL
+      __ bind(L1);
+      __ stop("broken null klass");
+      __ bind(L2);
+      __ load_klass(rscratch1, dst);
+      __ cbz(rscratch1, L1);     // this would be broken also
+      BLOCK_COMMENT("} assert klasses not null done");
+    }
+#endif
+
+    // Load layout helper (32-bits)
+    //
+    //  |array_tag|     | header_size | element_type |     |log2_element_size|
+    // 32        30    24            16              8     2                 0
+    //
+    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
+    //
+
+    const int lh_offset = in_bytes(Klass::layout_helper_offset());
+
+    // Handle objArrays completely differently...
+    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+    __ ldrw(lh, Address(scratch_src_klass, lh_offset));
+    __ movw(rscratch1, objArray_lh);
+    __ eorw(rscratch2, lh, rscratch1);
+    __ cbzw(rscratch2, L_objArray);
+
+    //  if (src->klass() != dst->klass()) return -1;
+    __ load_klass(rscratch2, dst);
+    __ eor(rscratch2, rscratch2, scratch_src_klass);
+    __ cbnz(rscratch2, L_failed);
+
+    //  if (!src->is_Array()) return -1;
+    __ tbz(lh, 31, L_failed);  // i.e. (lh >= 0)
+
+    // At this point, it is known to be a typeArray (array_tag 0x3).
+#ifdef ASSERT
+    {
+      BLOCK_COMMENT("assert primitive array {");
+      Label L;
+      __ movw(rscratch2, Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift);
+      __ cmpw(lh, rscratch2);
+      __ br(Assembler::GE, L);
+      __ stop("must be a primitive array");
+      __ bind(L);
+      BLOCK_COMMENT("} assert primitive array done");
+    }
+#endif
+
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
+                           rscratch2, L_failed);
+
+    // TypeArrayKlass
+    //
+    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
+    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
+    //
+
+    const Register rscratch1_offset = rscratch1;    // array offset
+    const Register r18_elsize = lh; // element size
+
+    __ ubfx(rscratch1_offset, lh, Klass::_lh_header_size_shift,
+           exact_log2(Klass::_lh_header_size_mask+1));   // array_offset
+    __ add(src, src, rscratch1_offset);           // src array offset
+    __ add(dst, dst, rscratch1_offset);           // dst array offset
+    BLOCK_COMMENT("choose copy loop based on element size");
+
+    // next registers should be set before the jump to corresponding stub
+    const Register from     = c_rarg0;  // source array address
+    const Register to       = c_rarg1;  // destination array address
+    const Register count    = c_rarg2;  // elements count
+
+    // 'from', 'to', 'count' registers should be set in such order
+    // since they are the same as 'src', 'src_pos', 'dst'.
+
+    assert(Klass::_lh_log2_element_size_shift == 0, "fix this code");
+
+    // The possible values of elsize are 0-3, i.e. exact_log2(element
+    // size in bytes).  We do a simple bitwise binary search.
+  __ BIND(L_copy_bytes);
+    __ tbnz(r18_elsize, 1, L_copy_ints);
+    __ tbnz(r18_elsize, 0, L_copy_shorts);
+    __ lea(from, Address(src, src_pos));// src_addr
+    __ lea(to,   Address(dst, dst_pos));// dst_addr
+    __ movw(count, scratch_length); // length
+    __ b(RuntimeAddress(byte_copy_entry));
+
+  __ BIND(L_copy_shorts);
+    __ lea(from, Address(src, src_pos, Address::lsl(1)));// src_addr
+    __ lea(to,   Address(dst, dst_pos, Address::lsl(1)));// dst_addr
+    __ movw(count, scratch_length); // length
+    __ b(RuntimeAddress(short_copy_entry));
+
+  __ BIND(L_copy_ints);
+    __ tbnz(r18_elsize, 0, L_copy_longs);
+    __ lea(from, Address(src, src_pos, Address::lsl(2)));// src_addr
+    __ lea(to,   Address(dst, dst_pos, Address::lsl(2)));// dst_addr
+    __ movw(count, scratch_length); // length
+    __ b(RuntimeAddress(int_copy_entry));
+
+  __ BIND(L_copy_longs);
+#ifdef ASSERT
+    {
+      BLOCK_COMMENT("assert long copy {");
+      Label L;
+      __ andw(lh, lh, Klass::_lh_log2_element_size_mask); // lh -> r18_elsize
+      __ cmpw(r18_elsize, LogBytesPerLong);
+      __ br(Assembler::EQ, L);
+      __ stop("must be long copy, but elsize is wrong");
+      __ bind(L);
+      BLOCK_COMMENT("} assert long copy done");
+    }
+#endif
+    __ lea(from, Address(src, src_pos, Address::lsl(3)));// src_addr
+    __ lea(to,   Address(dst, dst_pos, Address::lsl(3)));// dst_addr
+    __ movw(count, scratch_length); // length
+    __ b(RuntimeAddress(long_copy_entry));
+
+    // ObjArrayKlass
+  __ BIND(L_objArray);
+    // live at this point:  scratch_src_klass, scratch_length, src[_pos], dst[_pos]
+
+    Label L_plain_copy, L_checkcast_copy;
+    //  test array classes for subtyping
+    __ load_klass(r18, dst);
+    __ cmp(scratch_src_klass, r18); // usual case is exact equality
+    __ br(Assembler::NE, L_checkcast_copy);
+
+    // Identically typed arrays can be copied without element-wise checks.
+    arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
+                           rscratch2, L_failed);
+
+    __ lea(from, Address(src, src_pos, Address::lsl(3)));
+    __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+    __ lea(to, Address(dst, dst_pos, Address::lsl(3)));
+    __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+    __ movw(count, scratch_length); // length
+  __ BIND(L_plain_copy);
+    __ b(RuntimeAddress(oop_copy_entry));
+
+  __ BIND(L_checkcast_copy);
+    // live at this point:  scratch_src_klass, scratch_length, r18 (dst_klass)
+    {
+      // Before looking at dst.length, make sure dst is also an objArray.
+      __ ldrw(rscratch1, Address(r18, lh_offset));
+      __ movw(rscratch2, objArray_lh);
+      __ eorw(rscratch1, rscratch1, rscratch2);
+      __ cbnzw(rscratch1, L_failed);
+
+      // It is safe to examine both src.length and dst.length.
+      arraycopy_range_checks(src, src_pos, dst, dst_pos, scratch_length,
+                             r18, L_failed);
+
+      const Register rscratch2_dst_klass = rscratch2;
+      __ load_klass(rscratch2_dst_klass, dst); // reload
+
+      // Marshal the base address arguments now, freeing registers.
+      __ lea(from, Address(src, src_pos, Address::lsl(3)));
+      __ add(from, from, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+      __ lea(to, Address(dst, dst_pos, Address::lsl(3)));
+      __ add(to, to, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
+      __ movw(count, length);           // length (reloaded)
+      Register sco_temp = c_rarg3;      // this register is free now
+      assert_different_registers(from, to, count, sco_temp,
+                                 rscratch2_dst_klass, scratch_src_klass);
+      // assert_clean_int(count, sco_temp);
+
+      // Generate the type check.
+      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
+      __ ldrw(sco_temp, Address(rscratch2_dst_klass, sco_offset));
+      // assert_clean_int(sco_temp, r18);
+      generate_type_check(scratch_src_klass, sco_temp, rscratch2_dst_klass, L_plain_copy);
+
+      // Fetch destination element klass from the ObjArrayKlass header.
+      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
+      __ ldr(rscratch2_dst_klass, Address(rscratch2_dst_klass, ek_offset));
+      __ ldrw(sco_temp, Address(rscratch2_dst_klass, sco_offset));
+
+      // the checkcast_copy loop needs two extra arguments:
+      assert(c_rarg3 == sco_temp, "#3 already in place");
+      // Set up arguments for checkcast_copy_entry.
+      __ mov(c_rarg4, rscratch2_dst_klass);  // dst.klass.element_klass
+      __ b(RuntimeAddress(checkcast_copy_entry));
+    }
+
+  __ BIND(L_failed);
+    __ mov(r0, -1);
+    __ leave();   // required for proper stackwalking of RuntimeStub frame
+    __ ret(lr);
+
+    return start;
+  }
+
   void generate_arraycopy_stubs() {
     address entry;
     address entry_jbyte_arraycopy;
@@ -1655,6 +1989,18 @@
     StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
     StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
                                                                         /*dest_uninitialized*/true);
+
+    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
+                                                              entry_jbyte_arraycopy);
+
+    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
+                                                               entry_jbyte_arraycopy,
+                                                               entry_jshort_arraycopy,
+                                                               entry_jint_arraycopy,
+                                                               entry_oop_arraycopy,
+                                                               entry_jlong_arraycopy,
+                                                               entry_checkcast_arraycopy);
+
   }
 
   void generate_math_stubs() { Unimplemented(); }
@@ -1973,7 +2319,7 @@
   //   c_rarg4   - input length
   //
   // Output:
-  //   x0        - input length
+  //   r0        - input length
   //
   address generate_cipherBlockChaining_decryptAESCrypt() {
     assert(UseAES, "need AES instructions and misaligned SSE support");
--- a/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -248,6 +248,7 @@
     break;
   default:
     ShouldNotReachHere();
+    fn = NULL;  // unreachable
   }
   const int gpargs = 0, rtype = 3;
   __ mov(rscratch1, fn);
@@ -436,6 +437,19 @@
   __ restore_constant_pool_cache();
   __ get_method(rmethod);
 
+#if INCLUDE_JVMCI
+  // Check if we need to take lock at entry of synchronized method.
+  if (UseJVMCICompiler) {
+    Label L;
+    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
+    __ cbz(rscratch1, L);
+    // Clear flag.
+    __ strb(zr, Address(rthread, JavaThread::pending_monitorenter_offset()));
+    // Take lock.
+    lock_method();
+    __ bind(L);
+  }
+#endif
   // handle exceptions
   {
     Label L;
@@ -580,7 +594,7 @@
       __ br(Assembler::LT, *profile_method_continue);
 
       // if no method data exists, go to profile_method
-      __ test_method_data_pointer(r0, *profile_method);
+      __ test_method_data_pointer(rscratch2, *profile_method);
     }
 
     {
--- a/src/cpu/aarch64/vm/vmStructs_aarch64.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/vmStructs_aarch64.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2001, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
  * Copyright (c) 2014, Red Hat Inc. All rights reserved. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
@@ -30,16 +30,8 @@
 // constants required by the Serviceability Agent. This file is
 // referenced by vmStructs.cpp.
 
-#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
-                                                                                                                                     \
-  /******************************/                                                                                                   \
-  /* JavaCallWrapper            */                                                                                                   \
-  /******************************/                                                                                                   \
-  /******************************/                                                                                                   \
-  /* JavaFrameAnchor            */                                                                                                   \
-  /******************************/                                                                                                   \
-  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)
-
+#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
+  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
 
 #define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
 
--- a/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -67,8 +67,6 @@
 int VM_Version::_variant;
 int VM_Version::_revision;
 int VM_Version::_stepping;
-int VM_Version::_cpuFeatures;
-const char*           VM_Version::_features_str = "";
 
 static BufferBlob* stub_blob;
 static const int stub_size = 550;
@@ -129,7 +127,7 @@
 
   char buf[512];
 
-  _cpuFeatures = auxv;
+  _features = auxv;
 
   int cpu_lines = 0;
   if (FILE *f = fopen("/proc/cpuinfo", "r")) {
@@ -154,12 +152,12 @@
   }
 
   // Enable vendor specific features
-  if (_cpu == CPU_CAVIUM && _variant == 0) _cpuFeatures |= CPU_DMB_ATOMICS;
-  if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _cpuFeatures |= CPU_A53MAC;
+  if (_cpu == CPU_CAVIUM && _variant == 0) _features |= CPU_DMB_ATOMICS;
+  if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) _features |= CPU_A53MAC;
   // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07)
   // we assume the worst and assume we could be on a big little system and have
   // undisclosed A53 cores which we could be swapped to at any stage
-  if (_cpu == CPU_ARM && cpu_lines == 1 && _model == 0xd07) _cpuFeatures |= CPU_A53MAC;
+  if (_cpu == CPU_ARM && cpu_lines == 1 && _model == 0xd07) _features |= CPU_A53MAC;
 
   sprintf(buf, "0x%02x:0x%x:0x%03x:%d", _cpu, _variant, _model, _revision);
   if (_model2) sprintf(buf+strlen(buf), "(0x%03x)", _model2);
@@ -169,7 +167,7 @@
   if (auxv & HWCAP_SHA1)  strcat(buf, ", sha1");
   if (auxv & HWCAP_SHA2)  strcat(buf, ", sha256");
 
-  _features_str = os::strdup(buf);
+  _features_string = os::strdup(buf);
 
   if (FLAG_IS_DEFAULT(UseCRC32)) {
     UseCRC32 = (auxv & HWCAP_CRC32) != 0;
@@ -182,6 +180,11 @@
     FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
   }
 
+  if (UseVectorizedMismatchIntrinsic) {
+    warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
+    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
+  }
+
   if (auxv & HWCAP_AES) {
     UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
     UseAESIntrinsics =
@@ -199,6 +202,11 @@
     }
   }
 
+  if (UseAESCTRIntrinsics) {
+    warning("AES/CTR intrinsics are not available on this CPU");
+    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
+  }
+
   if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
     UseCRC32Intrinsics = true;
   }
@@ -267,7 +275,7 @@
   }
 
   if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) {
-    UseBarriersForVolatile = (_cpuFeatures & CPU_DMB_ATOMICS) != 0;
+    UseBarriersForVolatile = (_features & CPU_DMB_ATOMICS) != 0;
   }
 
   if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
--- a/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/aarch64/vm/vm_version_aarch64.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -30,7 +30,8 @@
 #include "runtime/vm_version.hpp"
 
 class VM_Version : public Abstract_VM_Version {
-public:
+  friend class JVMCIVMStructs;
+
 protected:
   static int _cpu;
   static int _model;
@@ -38,9 +39,6 @@
   static int _variant;
   static int _revision;
   static int _stepping;
-  static int _cpuFeatures;     // features returned by the "cpuid" instruction
-                               // 0 if this instruction is not available
-  static const char* _features_str;
 
   static void get_processor_features();
 
@@ -52,7 +50,7 @@
   static void assert_is_initialized() {
   }
 
-  enum {
+  enum Family {
     CPU_ARM       = 'A',
     CPU_BROADCOM  = 'B',
     CPU_CAVIUM    = 'C',
@@ -64,9 +62,9 @@
     CPU_QUALCOM   = 'Q',
     CPU_MARVELL   = 'V',
     CPU_INTEL     = 'i',
-  } cpuFamily;
+  };
 
-  enum {
+  enum Feature_Flag {
     CPU_FP           = (1<<0),
     CPU_ASIMD        = (1<<1),
     CPU_EVTSTRM      = (1<<2),
@@ -77,16 +75,13 @@
     CPU_CRC32        = (1<<7),
     CPU_A53MAC       = (1 << 30),
     CPU_DMB_ATOMICS  = (1 << 31),
-  } cpuFeatureFlags;
+  };
 
-  static const char* cpu_features()           { return _features_str; }
   static int cpu_family()                     { return _cpu; }
   static int cpu_model()                      { return _model; }
   static int cpu_model2()                     { return _model2; }
   static int cpu_variant()                    { return _variant; }
   static int cpu_revision()                   { return _revision; }
-  static int cpu_cpuFeatures()                { return _cpuFeatures; }
-
 };
 
 #endif // CPU_AARCH64_VM_VM_VERSION_AARCH64_HPP
--- a/src/cpu/ppc/vm/abstractInterpreter_ppc.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/ppc/vm/abstractInterpreter_ppc.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -141,9 +141,9 @@
 
   intptr_t* locals_base  = (caller->is_interpreted_frame()) ?
     caller->interpreter_frame_esp() + caller_actual_parameters :
-    caller->sp() + method->max_locals() - 1 + (frame::abi_minframe_size / Interpreter::stackElementSize) ;
+    caller->sp() + method->max_locals() - 1 + (frame::abi_minframe_size / Interpreter::stackElementSize);
 
-  intptr_t* monitor_base = caller->sp() - frame::ijava_state_size / Interpreter::stackElementSize ;
+  intptr_t* monitor_base = caller->sp() - frame::ijava_state_size / Interpreter::stackElementSize;
   intptr_t* monitor      = monitor_base - (moncount * frame::interpreter_frame_monitor_size());
   intptr_t* esp_base     = monitor - 1;
   intptr_t* esp          = esp_base - tempcount - popframe_extra_args;
--- a/src/cpu/ppc/vm/assembler_ppc.cpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/ppc/vm/assembler_ppc.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -53,9 +53,6 @@
   return 0x00;                  // illegal instruction 0x00000000
 }
 
-void Assembler::print_instruction(int inst) {
-  Unimplemented();
-}
 
 // Patch instruction `inst' at offset `inst_pos' to refer to
 // `dest_pos' and return the resulting instruction.  We should have
@@ -484,7 +481,7 @@
       if (d != s) { mr(d, s); }
       return 0;
     }
-    if (return_simm16_rest) {
+    if (return_simm16_rest && (d == s)) {
       return xd;
     }
     addi(d, s, xd);
--- a/src/cpu/ppc/vm/assembler_ppc.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/ppc/vm/assembler_ppc.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -31,10 +31,37 @@
 // Address is an abstraction used to represent a memory location
 // as used in assembler instructions.
 // PPC instructions grok either baseReg + indexReg or baseReg + disp.
-// So far we do not use this as simplification by this class is low
-// on PPC with its simple addressing mode. Use RegisterOrConstant to
-// represent an offset.
 class Address VALUE_OBJ_CLASS_SPEC {
+ private:
+  Register _base;         // Base register.
+  Register _index;        // Index register.
+  intptr_t _disp;         // Displacement.
+
+ public:
+  Address(Register b, Register i, address d = 0)
+    : _base(b), _index(i), _disp((intptr_t)d) {
+    assert(i == noreg || d == 0, "can't have both");
+  }
+
+  Address(Register b, address d = 0)
+    : _base(b), _index(noreg), _disp((intptr_t)d) {}
+
+  Address(Register b, intptr_t d)
+    : _base(b), _index(noreg), _disp(d) {}
+
+  Address(Register b, RegisterOrConstant roc)
+    : _base(b), _index(noreg), _disp(0) {
+    if (roc.is_constant()) _disp = roc.as_constant(); else _index = roc.as_register();
+  }
+
+  Address()
+    : _base(noreg), _index(noreg), _disp(0) {}
+
+  // accessors
+  Register base()  const { return _base; }
+  Register index() const { return _index; }
+  int      disp()  const { return (int)_disp; }
+  bool     is_const() const { return _base == noreg && _index == noreg; }
 };
 
 class AddressLiteral VALUE_OBJ_CLASS_SPEC {
@@ -164,10 +191,14 @@
 };
 #endif
 
+
+// The PPC Assembler: Pure assembler doing NO optimizations on the
+// instruction level; i.e., what you write is what you get. The
+// Assembler is generating code into a CodeBuffer.
+
 class Assembler : public AbstractAssembler {
  protected:
   // Displacement routines
-  static void print_instruction(int inst);
   static int  patched_branch(int dest_pos, int inst, int inst_pos);
   static int  branch_destination(int inst, int pos);
 
@@ -839,41 +870,38 @@
 
   enum Predict { pt = 1, pn = 0 }; // pt = predict taken
 
-  // instruction must start at passed address
+  // Instruction must start at passed address.
   static int instr_len(unsigned char *instr) { return BytesPerInstWord; }
 
-  // instruction must be left-justified in argument
-  static int instr_len(unsigned long instr)  { return BytesPerInstWord; }
-
   // longest instructions
   static int instr_maxlen() { return BytesPerInstWord; }
 
   // Test if x is within signed immediate range for nbits.
   static bool is_simm(int x, unsigned int nbits) {
     assert(0 < nbits && nbits < 32, "out of bounds");
-    const int   min      = -( ((int)1) << nbits-1 );
-    const int   maxplus1 =  ( ((int)1) << nbits-1 );
+    const int   min      = -(((int)1) << nbits-1);
+    const int   maxplus1 =  (((int)1) << nbits-1);
     return min <= x && x < maxplus1;
   }
 
   static bool is_simm(jlong x, unsigned int nbits) {
     assert(0 < nbits && nbits < 64, "out of bounds");
-    const jlong min      = -( ((jlong)1) << nbits-1 );
-    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
+    const jlong min      = -(((jlong)1) << nbits-1);
+    const jlong maxplus1 =  (((jlong)1) << nbits-1);
     return min <= x && x < maxplus1;
   }
 
-  // Test if x is within unsigned immediate range for nbits
+  // Test if x is within unsigned immediate range for nbits.
   static bool is_uimm(int x, unsigned int nbits) {
     assert(0 < nbits && nbits < 32, "out of bounds");
-    const int   maxplus1 = ( ((int)1) << nbits );
-    return 0 <= x && x < maxplus1;
+    const unsigned int maxplus1 = (((unsigned int)1) << nbits);
+    return (unsigned int)x < maxplus1;
   }
 
   static bool is_uimm(jlong x, unsigned int nbits) {
     assert(0 < nbits && nbits < 64, "out of bounds");
-    const jlong maxplus1 =  ( ((jlong)1) << nbits );
-    return 0 <= x && x < maxplus1;
+    const julong maxplus1 = (((julong)1) << nbits);
+    return (julong)x < maxplus1;
   }
 
  protected:
@@ -1196,6 +1224,8 @@
   inline void mullw_( Register d, Register a, Register b);
   inline void mulhw(  Register d, Register a, Register b);
   inline void mulhw_( Register d, Register a, Register b);
+  inline void mulhwu( Register d, Register a, Register b);
+  inline void mulhwu_(Register d, Register a, Register b);
   inline void mulhd(  Register d, Register a, Register b);
   inline void mulhd_( Register d, Register a, Register b);
   inline void mulhdu( Register d, Register a, Register b);
@@ -1376,8 +1406,11 @@
   inline void orc(    Register a, Register s, Register b);
   inline void orc_(   Register a, Register s, Register b);
   inline void extsb(  Register a, Register s);
+  inline void extsb_( Register a, Register s);
   inline void extsh(  Register a, Register s);
+  inline void extsh_( Register a, Register s);
   inline void extsw(  Register a, Register s);
+  inline void extsw_( Register a, Register s);
 
   // extended mnemonics
   inline void nop();
@@ -1767,6 +1800,8 @@
   inline void smt_yield();
   inline void smt_mdoio();
   inline void smt_mdoom();
+  // >= Power8
+  inline void smt_miso();
 
   // trap instructions
   inline void twi_0(Register a); // for load with acquire semantics use load+twi_0+isync (trap can't occur)
@@ -2168,6 +2203,7 @@
   inline void load_const(Register d, void* a,           Register tmp = noreg);
   inline void load_const(Register d, Label& L,          Register tmp = noreg);
   inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg);
+  inline void load_const32(Register d, int i); // load signed int (patchable)
 
   // Load a 64 bit constant, optimized, not identifyable.
   // Tmp can be used to increase ILP. Set return_simm16_rest = true to get a
--- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Sat Jan 16 13:56:49 2016 +0300
+++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -117,6 +117,8 @@
 inline void Assembler::mullw_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
 inline void Assembler::mulhw(  Register d, Register a, Register b) { emit_int32(MULHW_OPCODE  | rt(d) | ra(a) | rb(b) | rc(0)); }
 inline void Assembler::mulhw_( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE  | rt(d) | ra(a) | rb(b) | rc(1)); }
+inline void Assembler::mulhwu( Register d, Register a, Register b) { emit_int32(MULHWU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); }
+inline void Assembler::mulhwu_(Register d, Register a, Register b) { emit_int32(MULHWU_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); }
 inline void Assembler::mulhd(  Register d, Register a, Register b) { emit_int32(MULHD_OPCODE  | rt(d) | ra(a) | rb(b) | rc(0)); }
 inline void Assembler::mulhd_( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE  | rt(d) | ra(a) | rb(b) | rc(1)); }
 inline void Assembler::mulhdu( Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); }
@@ -206,8 +208,11 @@
 inline void Assembler::orc(     Register a, Register s, Register b)    { emit_int32(ORC_OPCODE     | rta(a) | rs(s) | rb(b) | rc(0)); }
 inline void Assembler::orc_(    Register a, Register s, Register b)    { emit_int32(ORC_OPCODE     | rta(a) | rs(s) | rb(b) | rc(1)); }
 inline void Assembler::extsb(   Register a, Register s)                { emit_int32(EXTSB_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsb_(  Register a, Register s)                { emit_int32(EXTSB_OPCODE   | rta(a) | rs(s) | rc(1)); }
 inline void Assembler::extsh(   Register a, Register s)                { emit_int32(EXTSH_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsh_(  Register a, Register s)                { emit_int32(EXTSH_OPCODE   | rta(a) | rs(s) | rc(1)); }
 inline void Assembler::extsw(   Register a, Register s)                { emit_int32(EXTSW_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsw_(  Register a, Register s)                { emit_int32(EXTSW_OPCODE   | rta(a) | rs(s) | rc(1)); }
 
 // extended mnemonics
 inline void Assembler::nop()                              { Assembler::ori(R0, R0, 0); }
@@ -609,6 +614,8 @@
 inline void Assembler::smt_yield()            { Assembler::or_unchecked(R27, R27, R27); }
 inline void Assembler::smt_mdoio()            { Assembler::or_unchecked(R29, R29, R29); }
 inline void Assembler::smt_mdoom()            { Assembler::or_unchecked(R30, R30, R30); }
+// >= Power8
+inline void Assembler::smt_miso()             { Assembler::or_unchecked(R26, R26, R26); }
 
 inline void Assembler::twi_0(Register a)      { twi_unchecked(0, a, 0);}
 
@@ -967,12 +974,15 @@
 
 // Load a 64 bit constant encoded by an AddressLiteral. patchable.
 inline void Assembler::load_const(Register d, AddressLiteral& a, Register tmp) {
-  assert(d != R0, "R0 not allowed");
   // First relocate (we don't change the offset in the RelocationHolder,
   // just pass a.rspec()), then delegate to load_const(Register, long).
   relocate(a.rspec());
   load_const(d, (long)a.value(), tmp);
 }
 
+inline void Assembler::load_const32(Register d, int i) {
+  lis(d, i >> 16);
+  ori(d, d, i & 0xFFFF);
+}
 
 #endif // CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/c1_CodeStubs_ppc.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -0,0 +1,527 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_CodeStubs.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "nativeInst_ppc.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "utilities/macros.hpp"
+#include "vmreg_ppc.inline.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc/g1/g1SATBCardTableModRefBS.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#define __ ce->masm()->
+
+
+RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index,
+                               bool throw_index_out_of_bounds_exception)
+  : _throw_index_out_of_bounds_exception(throw_index_out_of_bounds_exception)
+  , _index(index) {
+  assert(info != NULL, "must have info");
+  _info = new CodeEmitInfo(info);
+}
+
+void RangeCheckStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  if (_info->deoptimize_on_exception()) {
+    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+    // May be used by optimizations like LoopInvariantCodeMotion or RangeCheckEliminator.
+    DEBUG_ONLY( __ untested("RangeCheckStub: predicate_failed_trap_id"); )
+    //__ load_const_optimized(R0, a);
+    __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+    __ mtctr(R0);
+    __ bctrl();
+    ce->add_call_info_here(_info);
+    ce->verify_oop_map(_info);
+    debug_only(__ illtrap());
+    return;
+  }
+
+  address stub = _throw_index_out_of_bounds_exception ? Runtime1::entry_for(Runtime1::throw_index_exception_id)
+                                                      : Runtime1::entry_for(Runtime1::throw_range_check_failed_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+
+  Register index = R0; // pass in R0
+  if (_index->is_register()) {
+    __ extsw(index, _index->as_register());
+  } else {
+    __ load_const_optimized(index, _index->as_jint());
+  }
+
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
+  _info = new CodeEmitInfo(info);
+}
+
+void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  //__ load_const_optimized(R0, a);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  // Parameter 1: bci
+  __ load_const_optimized(R0, _bci);
+  __ std(R0, -16, R1_SP);
+
+  // Parameter 2: Method*
+  Metadata *m = _method->as_constant_ptr()->as_metadata();
+  AddressLiteral md = __ constant_metadata_address(m); // Notify OOP recorder (don't need the relocation).
+  __ load_const_optimized(R0, md.value());
+  __ std(R0, -8, R1_SP);
+
+  address a = Runtime1::entry_for(Runtime1::counter_overflow_id);
+  //__ load_const_optimized(R0, a);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+
+  __ b(_continuation);
+}
+
+
+void DivByZeroStub::emit_code(LIR_Assembler* ce) {
+  if (_offset != -1) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(Runtime1::throw_div0_exception_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
+  address a;
+  if (_info->deoptimize_on_exception()) {
+    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
+    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
+  } else {
+    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
+  }
+
+  if (ImplicitNullChecks || TrapBasedNullChecks) {
+    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
+  }
+  __ bind(_entry);
+  //__ load_const_optimized(R0, a);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(a));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  debug_only(__ illtrap());
+}
+
+
+// Implementation of SimpleExceptionStub
+void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(_stub);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  if (_obj->is_valid()) { __ mr_if_needed(/*tmp1 in do_CheckCast*/ R4_ARG2, _obj->as_register()); }
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  debug_only( __ illtrap(); )
+}
+
+
+// Implementation of NewInstanceStub
+NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
+  _result = result;
+  _klass = klass;
+  _klass_reg = klass_reg;
+  _info = new CodeEmitInfo(info);
+  assert(stub_id == Runtime1::new_instance_id                 ||
+         stub_id == Runtime1::fast_new_instance_id            ||
+         stub_id == Runtime1::fast_new_instance_init_check_id,
+         "need new_instance id");
+  _stub_id = stub_id;
+}
+
+void NewInstanceStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  address entry = Runtime1::entry_for(_stub_id);
+  //__ load_const_optimized(R0, entry);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+// Implementation of NewTypeArrayStub
+NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  address entry = Runtime1::entry_for(Runtime1::new_type_array_id);
+  //__ load_const_optimized(R0, entry);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
+  __ mr_if_needed(/*op->tmp1()->as_register()*/ R5_ARG3, _length->as_register()); // already sign-extended
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+// Implementation of NewObjectArrayStub
+NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
+  _klass_reg = klass_reg;
+  _length = length;
+  _result = result;
+  _info = new CodeEmitInfo(info);
+}
+
+void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  address entry = Runtime1::entry_for(Runtime1::new_object_array_id);
+  //__ load_const_optimized(R0, entry);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry));
+  __ mr_if_needed(/*op->tmp1()->as_register()*/ R5_ARG3, _length->as_register()); // already sign-extended
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+
+// Implementation of MonitorAccessStubs
+MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
+  : MonitorAccessStub(obj_reg, lock_reg) {
+  _info = new CodeEmitInfo(info);
+}
+
+void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? Runtime1::monitorenter_id : Runtime1::monitorenter_nofpu_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mr_if_needed(/*scratch_opr()->as_register()*/ R4_ARG2, _obj_reg->as_register());
+  assert(_lock_reg->as_register() == R5_ARG3, "");
+  __ mtctr(R0);
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  ce->verify_oop_map(_info);
+  __ b(_continuation);
+}
+
+void MonitorExitStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  if (_compute_lock) {
+    ce->monitor_address(_monitor_ix, _lock_reg);
+  }
+  address stub = Runtime1::entry_for(ce->compilation()->has_fpu_code() ? Runtime1::monitorexit_id : Runtime1::monitorexit_nofpu_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  assert(_lock_reg->as_register() == R4_ARG2, "");
+  __ mtctr(R0);
+  __ bctrl();
+  __ b(_continuation);
+}
+
+
+// Implementation of patching:
+// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes).
+// - Replace original code with a call to the stub.
+// At Runtime:
+// - call to stub, jump to runtime
+// - in runtime: preserve all registers (especially objects, i.e., source and destination object)
+// - in runtime: after initializing class, restore original code, reexecute instruction
+
+int PatchingStub::_patch_info_offset = -(5 * BytesPerInstWord);
+
+void PatchingStub::align_patch_site(MacroAssembler* ) {
+  // Patch sites on ppc are always properly aligned.
+}
+
+#ifdef ASSERT
+inline void compare_with_patch_site(address template_start, address pc_start, int bytes_to_copy) {
+  address start = template_start;
+  for (int i = 0; i < bytes_to_copy; i++) {
+    address ptr = (address)(pc_start + i);
+    int a_byte = (*ptr) & 0xFF;
+    assert(a_byte == *start++, "should be the same code");
+  }
+}
+#endif
+
+void PatchingStub::emit_code(LIR_Assembler* ce) {
+  // copy original code here
+  assert(NativeGeneralJump::instruction_size <= _bytes_to_copy && _bytes_to_copy <= 0xFF,
+         "not enough room for call");
+  assert((_bytes_to_copy & 0x3) == 0, "must copy a multiple of four bytes");
+
+  Label call_patch;
+
+  int being_initialized_entry = __ offset();
+
+  if (_id == load_klass_id) {
+    // Produce a copy of the load klass instruction for use by the being initialized case.
+    AddressLiteral addrlit((address)NULL, metadata_Relocation::spec(_index));
+    __ load_const(_obj, addrlit, R0);
+    DEBUG_ONLY( compare_with_patch_site(__ code_section()->start() + being_initialized_entry, _pc_start, _bytes_to_copy); )
+  } else if (_id == load_mirror_id || _id == load_appendix_id) {
+    // Produce a copy of the load mirror instruction for use by the being initialized case.
+    AddressLiteral addrlit((address)NULL, oop_Relocation::spec(_index));
+    __ load_const(_obj, addrlit, R0);
+    DEBUG_ONLY( compare_with_patch_site(__ code_section()->start() + being_initialized_entry, _pc_start, _bytes_to_copy); )
+  } else {
+    // Make a copy the code which is going to be patched.
+    for (int i = 0; i < _bytes_to_copy; i++) {
+      address ptr = (address)(_pc_start + i);
+      int a_byte = (*ptr) & 0xFF;
+      __ emit_int8 (a_byte);
+    }
+  }
+
+  address end_of_patch = __ pc();
+  int bytes_to_skip = 0;
+  if (_id == load_mirror_id) {
+    int offset = __ offset();
+    __ block_comment(" being_initialized check");
+
+    // Static field accesses have special semantics while the class
+    // initializer is being run so we emit a test which can be used to
+    // check that this code is being executed by the initializing
+    // thread.
+    assert(_obj != noreg, "must be a valid register");
+    assert(_index >= 0, "must have oop index");
+    __ mr(R0, _obj); // spill
+    __ ld(_obj, java_lang_Class::klass_offset_in_bytes(), _obj);
+    __ ld(_obj, in_bytes(InstanceKlass::init_thread_offset()), _obj);
+    __ cmpd(CCR0, _obj, R16_thread);
+    __ mr(_obj, R0); // restore
+    __ bne(CCR0, call_patch);
+
+    // Load_klass patches may execute the patched code before it's
+    // copied back into place so we need to jump back into the main
+    // code of the nmethod to continue execution.
+    __ b(_patch_site_continuation);
+
+    // Make sure this extra code gets skipped.
+    bytes_to_skip += __ offset() - offset;
+  }
+
+  // Now emit the patch record telling the runtime how to find the
+  // pieces of the patch.  We only need 3 bytes but it has to be
+  // aligned as an instruction so emit 4 bytes.
+  int sizeof_patch_record = 4;
+  bytes_to_skip += sizeof_patch_record;
+
+  // Emit the offsets needed to find the code to patch.
+  int being_initialized_entry_offset = __ offset() - being_initialized_entry + sizeof_patch_record;
+
+  // Emit the patch record.  We need to emit a full word, so emit an extra empty byte.
+  __ emit_int8(0);
+  __ emit_int8(being_initialized_entry_offset);
+  __ emit_int8(bytes_to_skip);
+  __ emit_int8(_bytes_to_copy);
+  address patch_info_pc = __ pc();
+  assert(patch_info_pc - end_of_patch == bytes_to_skip, "incorrect patch info");
+
+  address entry = __ pc();
+  NativeGeneralJump::insert_unconditional((address)_pc_start, entry);
+  address target = NULL;
+  relocInfo::relocType reloc_type = relocInfo::none;
+  switch (_id) {
+    case access_field_id:  target = Runtime1::entry_for(Runtime1::access_field_patching_id); break;
+    case load_klass_id:    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
+                           reloc_type = relocInfo::metadata_type; break;
+    case load_mirror_id:   target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
+                           reloc_type = relocInfo::oop_type; break;
+    case load_appendix_id: target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
+                           reloc_type = relocInfo::oop_type; break;
+    default: ShouldNotReachHere();
+  }
+  __ bind(call_patch);
+
+  __ block_comment("patch entry point");
+  //__ load_const(R0, target); + mtctr + bctrl must have size -_patch_info_offset
+  __ load_const32(R0, MacroAssembler::offset_to_global_toc(target));
+  __ add(R0, R29_TOC, R0);
+  __ mtctr(R0);
+  __ bctrl();
+  assert(_patch_info_offset == (patch_info_pc - __ pc()), "must not change");
+  ce->add_call_info_here(_info);
+  __ b(_patch_site_entry);
+  if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) {
+    CodeSection* cs = __ code_section();
+    address pc = (address)_pc_start;
+    RelocIterator iter(cs, pc, pc + 1);
+    relocInfo::change_reloc_info_for_address(&iter, (address) pc, reloc_type, relocInfo::none);
+  }
+}
+
+
+void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+  address stub = Runtime1::entry_for(Runtime1::deoptimize_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+
+  __ load_const_optimized(R0, _trap_request); // Pass trap request in R0.
+  __ bctrl();
+  ce->add_call_info_here(_info);
+  debug_only(__ illtrap());
+}
+
+
+void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
+  //---------------slow case: call to native-----------------
+  __ bind(_entry);
+  __ mr(R3_ARG1, src()->as_register());
+  __ extsw(R4_ARG2, src_pos()->as_register());
+  __ mr(R5_ARG3, dst()->as_register());
+  __ extsw(R6_ARG4, dst_pos()->as_register());
+  __ extsw(R7_ARG5, length()->as_register());
+
+  ce->emit_static_call_stub();
+
+  bool success = ce->emit_trampoline_stub_for_call(SharedRuntime::get_resolve_static_call_stub());
+  if (!success) { return; }
+
+  __ relocate(relocInfo::static_call_type);
+  // Note: At this point we do not have the address of the trampoline
+  // stub, and the entry point might be too far away for bl, so __ pc()
+  // serves as dummy and the bl will be patched later.
+  __ code()->set_insts_mark();
+  __ bl(__ pc());
+  ce->add_call_info_here(info());
+  ce->verify_oop_map(info());
+
+#ifndef PRODUCT
+  const address counter = (address)&Runtime1::_arraycopy_slowcase_cnt;
+  const Register tmp = R3, tmp2 = R4;
+  int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+  __ lwz(tmp2, simm16_offs, tmp);
+  __ addi(tmp2, tmp2, 1);
+  __ stw(tmp2, simm16_offs, tmp);
+#endif
+
+  __ b(_continuation);
+}
+
+
+///////////////////////////////////////////////////////////////////////////////////
+#if INCLUDE_ALL_GCS
+
+void G1PreBarrierStub::emit_code(LIR_Assembler* ce) {
+  // At this point we know that marking is in progress.
+  // If do_load() is true then we have to emit the
+  // load of the previous value; otherwise it has already
+  // been loaded into _pre_val.
+
+  __ bind(_entry);
+
+  assert(pre_val()->is_register(), "Precondition.");
+  Register pre_val_reg = pre_val()->as_register();
+
+  if (do_load()) {
+    ce->mem2reg(addr(), pre_val(), T_OBJECT, patch_code(), info(), false /*wide*/, false /*unaligned*/);
+  }
+
+  __ cmpdi(CCR0, pre_val_reg, 0);
+  __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), _continuation);
+
+  address stub = Runtime1::entry_for(Runtime1::Runtime1::g1_pre_barrier_slow_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ std(pre_val_reg, -8, R1_SP); // Pass pre_val on stack.
+  __ mtctr(R0);
+  __ bctrl();
+  __ b(_continuation);
+}
+
+void G1PostBarrierStub::emit_code(LIR_Assembler* ce) {
+  __ bind(_entry);
+
+  assert(addr()->is_register(), "Precondition.");
+  assert(new_val()->is_register(), "Precondition.");
+  Register addr_reg = addr()->as_pointer_register();
+  Register new_val_reg = new_val()->as_register();
+
+  __ cmpdi(CCR0, new_val_reg, 0);
+  __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::equal), _continuation);
+
+  address stub = Runtime1::entry_for(Runtime1::Runtime1::g1_post_barrier_slow_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+  __ mr(R0, addr_reg); // Pass addr in R0.
+  __ bctrl();
+  __ b(_continuation);
+}
+
+#endif // INCLUDE_ALL_GCS
+///////////////////////////////////////////////////////////////////////////////////
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/c1_Defs_ppc.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_DEFS_PPC_HPP
+#define CPU_PPC_VM_C1_DEFS_PPC_HPP
+
+// Native word offsets from memory address.
+enum {
+#if defined(VM_LITTLE_ENDIAN)
+  pd_lo_word_offset_in_bytes = 0,
+  pd_hi_word_offset_in_bytes = BytesPerInt
+#else
+  pd_lo_word_offset_in_bytes = BytesPerInt,
+  pd_hi_word_offset_in_bytes = 0
+#endif
+};
+
+
+// Explicit rounding operations are not required to implement the strictFP mode.
+enum {
+  pd_strict_fp_requires_explicit_rounding = false
+};
+
+
+// registers
+enum {
+  pd_nof_cpu_regs_frame_map = 32,              // Number of registers used during code emission.
+  pd_nof_caller_save_cpu_regs_frame_map = 27,  // Number of cpu registers killed by calls. (At least R3_ARG1 ... R10_ARG8, but using all like C2.)
+  pd_nof_cpu_regs_reg_alloc = 27,              // Number of registers that are visible to register allocator.
+  pd_nof_cpu_regs_linearscan = 32,             // Number of registers visible linear scan.
+  pd_first_callee_saved_reg = pd_nof_caller_save_cpu_regs_frame_map,
+  pd_last_callee_saved_reg = pd_nof_cpu_regs_reg_alloc - 1,
+  pd_first_cpu_reg = 0,
+  pd_last_cpu_reg = pd_nof_cpu_regs_reg_alloc - 1,
+
+  pd_nof_fpu_regs_frame_map = 32,              // Number of registers used during code emission.
+  pd_nof_caller_save_fpu_regs_frame_map = 32,  // Number of fpu registers killed by calls.
+  pd_nof_fpu_regs_reg_alloc = 32,              // Number of registers that are visible to register allocator.
+  pd_nof_fpu_regs_linearscan = 32,             // Number of registers visible to linear scan.
+  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
+  pd_last_fpu_reg =  pd_nof_cpu_regs_frame_map + pd_nof_fpu_regs_reg_alloc - 1,
+
+  pd_nof_xmm_regs_linearscan = 0,
+  pd_nof_caller_save_xmm_regs = 0,
+  pd_first_xmm_reg = -1,
+  pd_last_xmm_reg = -1
+};
+
+// For debug info: a float value in a register is saved in single precision by runtime stubs.
+enum {
+  pd_float_saved_as_double = true
+};
+
+#endif // CPU_PPC_VM_C1_DEFS_PPC_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/c1_FpuStackSim_ppc.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -0,0 +1,32 @@
+/*
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_FPUSTACKSIM_PPC_HPP
+#define CPU_PPC_VM_C1_FPUSTACKSIM_PPC_HPP
+
+// No FPU stack on PPC.
+class FpuStackSim;
+
+#endif // CPU_PPC_VM_C1_FPUSTACKSIM_PPC_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/c1_FrameMap_ppc.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_LIR.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "vmreg_ppc.inline.hpp"
+
+
+const int FrameMap::pd_c_runtime_reserved_arg_size = 7;
+
+
+LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool outgoing) {
+  LIR_Opr opr = LIR_OprFact::illegalOpr;
+  VMReg r_1 = reg->first();
+  VMReg r_2 = reg->second();
+  if (r_1->is_stack()) {
+    // Convert stack slot to an SP offset.
+    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
+    // so we must add it in here.
+    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
+    opr = LIR_OprFact::address(new LIR_Address(SP_opr, st_off + STACK_BIAS, type));
+  } else if (r_1->is_Register()) {
+    Register reg = r_1->as_Register();
+    //if (outgoing) {
+    //  assert(!reg->is_in(), "should be using I regs");
+    //} else {
+    //  assert(!reg->is_out(), "should be using O regs");
+    //}
+    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
+      opr = as_long_opr(reg);
+    } else if (type == T_OBJECT || type == T_ARRAY) {
+      opr = as_oop_opr(reg);
+    } else {
+      opr = as_opr(reg);
+    }
+  } else if (r_1->is_FloatRegister()) {
+    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
+    FloatRegister f = r_1->as_FloatRegister();
+    if (type == T_DOUBLE) {
+      opr = as_double_opr(f);
+    } else {
+      opr = as_float_opr(f);
+    }
+  }
+  return opr;
+}
+
+//               FrameMap
+//--------------------------------------------------------
+
+FloatRegister FrameMap::_fpu_regs [FrameMap::nof_fpu_regs];
+
+LIR_Opr  FrameMap::R0_opr;
+LIR_Opr  FrameMap::R1_opr;
+LIR_Opr  FrameMap::R2_opr;
+LIR_Opr  FrameMap::R3_opr;
+LIR_Opr  FrameMap::R4_opr;
+LIR_Opr  FrameMap::R5_opr;
+LIR_Opr  FrameMap::R6_opr;
+LIR_Opr  FrameMap::R7_opr;
+LIR_Opr  FrameMap::R8_opr;
+LIR_Opr  FrameMap::R9_opr;
+LIR_Opr FrameMap::R10_opr;
+LIR_Opr FrameMap::R11_opr;
+LIR_Opr FrameMap::R12_opr;
+LIR_Opr FrameMap::R13_opr;
+LIR_Opr FrameMap::R14_opr;
+LIR_Opr FrameMap::R15_opr;
+LIR_Opr FrameMap::R16_opr;
+LIR_Opr FrameMap::R17_opr;
+LIR_Opr FrameMap::R18_opr;
+LIR_Opr FrameMap::R19_opr;
+LIR_Opr FrameMap::R20_opr;
+LIR_Opr FrameMap::R21_opr;
+LIR_Opr FrameMap::R22_opr;
+LIR_Opr FrameMap::R23_opr;
+LIR_Opr FrameMap::R24_opr;
+LIR_Opr FrameMap::R25_opr;
+LIR_Opr FrameMap::R26_opr;
+LIR_Opr FrameMap::R27_opr;
+LIR_Opr FrameMap::R28_opr;
+LIR_Opr FrameMap::R29_opr;
+LIR_Opr FrameMap::R30_opr;
+LIR_Opr FrameMap::R31_opr;
+
+LIR_Opr  FrameMap::R0_oop_opr;
+//LIR_Opr  FrameMap::R1_oop_opr;
+LIR_Opr  FrameMap::R2_oop_opr;
+LIR_Opr  FrameMap::R3_oop_opr;
+LIR_Opr  FrameMap::R4_oop_opr;
+LIR_Opr  FrameMap::R5_oop_opr;
+LIR_Opr  FrameMap::R6_oop_opr;
+LIR_Opr  FrameMap::R7_oop_opr;
+LIR_Opr  FrameMap::R8_oop_opr;
+LIR_Opr  FrameMap::R9_oop_opr;
+LIR_Opr FrameMap::R10_oop_opr;
+LIR_Opr FrameMap::R11_oop_opr;
+LIR_Opr FrameMap::R12_oop_opr;
+//LIR_Opr FrameMap::R13_oop_opr;
+LIR_Opr FrameMap::R14_oop_opr;
+LIR_Opr FrameMap::R15_oop_opr;
+//LIR_Opr FrameMap::R16_oop_opr;
+LIR_Opr FrameMap::R17_oop_opr;
+LIR_Opr FrameMap::R18_oop_opr;
+LIR_Opr FrameMap::R19_oop_opr;
+LIR_Opr FrameMap::R20_oop_opr;
+LIR_Opr FrameMap::R21_oop_opr;
+LIR_Opr FrameMap::R22_oop_opr;
+LIR_Opr FrameMap::R23_oop_opr;
+LIR_Opr FrameMap::R24_oop_opr;
+LIR_Opr FrameMap::R25_oop_opr;
+LIR_Opr FrameMap::R26_oop_opr;
+LIR_Opr FrameMap::R27_oop_opr;
+LIR_Opr FrameMap::R28_oop_opr;
+//LIR_Opr FrameMap::R29_oop_opr;
+LIR_Opr FrameMap::R30_oop_opr;
+LIR_Opr FrameMap::R31_oop_opr;
+
+LIR_Opr  FrameMap::R0_metadata_opr;
+//LIR_Opr  FrameMap::R1_metadata_opr;
+LIR_Opr  FrameMap::R2_metadata_opr;
+LIR_Opr  FrameMap::R3_metadata_opr;
+LIR_Opr  FrameMap::R4_metadata_opr;
+LIR_Opr  FrameMap::R5_metadata_opr;
+LIR_Opr  FrameMap::R6_metadata_opr;
+LIR_Opr  FrameMap::R7_metadata_opr;
+LIR_Opr  FrameMap::R8_metadata_opr;
+LIR_Opr  FrameMap::R9_metadata_opr;
+LIR_Opr FrameMap::R10_metadata_opr;
+LIR_Opr FrameMap::R11_metadata_opr;
+LIR_Opr FrameMap::R12_metadata_opr;
+//LIR_Opr FrameMap::R13_metadata_opr;
+LIR_Opr FrameMap::R14_metadata_opr;
+LIR_Opr FrameMap::R15_metadata_opr;
+//LIR_Opr FrameMap::R16_metadata_opr;
+LIR_Opr FrameMap::R17_metadata_opr;
+LIR_Opr FrameMap::R18_metadata_opr;
+LIR_Opr FrameMap::R19_metadata_opr;
+LIR_Opr FrameMap::R20_metadata_opr;
+LIR_Opr FrameMap::R21_metadata_opr;
+LIR_Opr FrameMap::R22_metadata_opr;
+LIR_Opr FrameMap::R23_metadata_opr;
+LIR_Opr FrameMap::R24_metadata_opr;
+LIR_Opr FrameMap::R25_metadata_opr;
+LIR_Opr FrameMap::R26_metadata_opr;
+LIR_Opr FrameMap::R27_metadata_opr;
+LIR_Opr FrameMap::R28_metadata_opr;
+//LIR_Opr FrameMap::R29_metadata_opr;
+LIR_Opr FrameMap::R30_metadata_opr;
+LIR_Opr FrameMap::R31_metadata_opr;
+
+LIR_Opr FrameMap::SP_opr;
+
+LIR_Opr FrameMap::R0_long_opr;
+LIR_Opr FrameMap::R3_long_opr;
+
+LIR_Opr FrameMap::F1_opr;
+LIR_Opr FrameMap::F1_double_opr;
+
+LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
+LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
+
+FloatRegister FrameMap::nr2floatreg (int rnr) {
+  assert(_init_done, "tables not initialized");
+  debug_only(fpu_range_check(rnr);)
+  return _fpu_regs[rnr];
+}
+
+
+// Returns true if reg could be smashed by a callee.
+bool FrameMap::is_caller_save_register (LIR_Opr reg) {
+  if (reg->is_single_fpu() || reg->is_double_fpu()) { return true; }
+  if (reg->is_double_cpu()) {
+    return is_caller_save_register(reg->as_register_lo()) ||
+           is_caller_save_register(reg->as_register_hi());
+  }
+  return is_caller_save_register(reg->as_register());
+}
+
+
+bool FrameMap::is_caller_save_register (Register r) {
+  // not visible to allocator: R0: scratch, R1: SP
+  // r->encoding() < 2 + nof_caller_save_cpu_regs();
+  return true; // Currently all regs are caller save.
+}
+
+
+void FrameMap::initialize() {
+  assert(!_init_done, "once");
+
+  int i = 0;
+
+  // Put generally available registers at the beginning (allocated, saved for GC).
+  for (int j = 0; j < nof_cpu_regs; ++j) {
+    Register rj = as_Register(j);
+    if (reg_needs_save(rj)) {
+      map_register(i++, rj);
+    }
+  }
+  assert(i == nof_cpu_regs_reg_alloc, "number of allocated registers");
+
+  // The following registers are not normally available.
+  for (int j = 0; j < nof_cpu_regs; ++j) {
+    Register rj = as_Register(j);
+    if (!reg_needs_save(rj)) {
+      map_register(i++, rj);
+    }
+  }
+  assert(i == nof_cpu_regs, "number of CPU registers");
+
+  for (i = 0; i < nof_fpu_regs; i++) {
+    _fpu_regs[i] = as_FloatRegister(i);
+  }
+
+  _init_done = true;
+
+  R0_opr  = as_opr(R0);
+  R1_opr  = as_opr(R1);
+  R2_opr  = as_opr(R2);
+  R3_opr  = as_opr(R3);
+  R4_opr  = as_opr(R4);
+  R5_opr  = as_opr(R5);
+  R6_opr  = as_opr(R6);
+  R7_opr  = as_opr(R7);
+  R8_opr  = as_opr(R8);
+  R9_opr  = as_opr(R9);
+  R10_opr = as_opr(R10);
+  R11_opr = as_opr(R11);
+  R12_opr = as_opr(R12);
+  R13_opr = as_opr(R13);
+  R14_opr = as_opr(R14);
+  R15_opr = as_opr(R15);
+  R16_opr = as_opr(R16);
+  R17_opr = as_opr(R17);
+  R18_opr = as_opr(R18);
+  R19_opr = as_opr(R19);
+  R20_opr = as_opr(R20);
+  R21_opr = as_opr(R21);
+  R22_opr = as_opr(R22);
+  R23_opr = as_opr(R23);
+  R24_opr = as_opr(R24);
+  R25_opr = as_opr(R25);
+  R26_opr = as_opr(R26);
+  R27_opr = as_opr(R27);
+  R28_opr = as_opr(R28);
+  R29_opr = as_opr(R29);
+  R30_opr = as_opr(R30);
+  R31_opr = as_opr(R31);
+
+  R0_oop_opr  = as_oop_opr(R0);
+  //R1_oop_opr  = as_oop_opr(R1);
+  R2_oop_opr  = as_oop_opr(R2);
+  R3_oop_opr  = as_oop_opr(R3);
+  R4_oop_opr  = as_oop_opr(R4);
+  R5_oop_opr  = as_oop_opr(R5);
+  R6_oop_opr  = as_oop_opr(R6);
+  R7_oop_opr  = as_oop_opr(R7);
+  R8_oop_opr  = as_oop_opr(R8);
+  R9_oop_opr  = as_oop_opr(R9);
+  R10_oop_opr = as_oop_opr(R10);
+  R11_oop_opr = as_oop_opr(R11);
+  R12_oop_opr = as_oop_opr(R12);
+  //R13_oop_opr = as_oop_opr(R13);
+  R14_oop_opr = as_oop_opr(R14);
+  R15_oop_opr = as_oop_opr(R15);
+  //R16_oop_opr = as_oop_opr(R16);
+  R17_oop_opr = as_oop_opr(R17);
+  R18_oop_opr = as_oop_opr(R18);
+  R19_oop_opr = as_oop_opr(R19);
+  R20_oop_opr = as_oop_opr(R20);
+  R21_oop_opr = as_oop_opr(R21);
+  R22_oop_opr = as_oop_opr(R22);
+  R23_oop_opr = as_oop_opr(R23);
+  R24_oop_opr = as_oop_opr(R24);
+  R25_oop_opr = as_oop_opr(R25);
+  R26_oop_opr = as_oop_opr(R26);
+  R27_oop_opr = as_oop_opr(R27);
+  R28_oop_opr = as_oop_opr(R28);
+  //R29_oop_opr = as_oop_opr(R29);
+  R30_oop_opr = as_oop_opr(R30);
+  R31_oop_opr = as_oop_opr(R31);
+
+  R0_metadata_opr  = as_metadata_opr(R0);
+  //R1_metadata_opr  = as_metadata_opr(R1);
+  R2_metadata_opr  = as_metadata_opr(R2);
+  R3_metadata_opr  = as_metadata_opr(R3);
+  R4_metadata_opr  = as_metadata_opr(R4);
+  R5_metadata_opr  = as_metadata_opr(R5);
+  R6_metadata_opr  = as_metadata_opr(R6);
+  R7_metadata_opr  = as_metadata_opr(R7);
+  R8_metadata_opr  = as_metadata_opr(R8);
+  R9_metadata_opr  = as_metadata_opr(R9);
+  R10_metadata_opr = as_metadata_opr(R10);
+  R11_metadata_opr = as_metadata_opr(R11);
+  R12_metadata_opr = as_metadata_opr(R12);
+  //R13_metadata_opr = as_metadata_opr(R13);
+  R14_metadata_opr = as_metadata_opr(R14);
+  R15_metadata_opr = as_metadata_opr(R15);
+  //R16_metadata_opr = as_metadata_opr(R16);
+  R17_metadata_opr = as_metadata_opr(R17);
+  R18_metadata_opr = as_metadata_opr(R18);
+  R19_metadata_opr = as_metadata_opr(R19);
+  R20_metadata_opr = as_metadata_opr(R20);
+  R21_metadata_opr = as_metadata_opr(R21);
+  R22_metadata_opr = as_metadata_opr(R22);
+  R23_metadata_opr = as_metadata_opr(R23);
+  R24_metadata_opr = as_metadata_opr(R24);
+  R25_metadata_opr = as_metadata_opr(R25);
+  R26_metadata_opr = as_metadata_opr(R26);
+  R27_metadata_opr = as_metadata_opr(R27);
+  R28_metadata_opr = as_metadata_opr(R28);
+  //R29_metadata_opr = as_metadata_opr(R29);
+  R30_metadata_opr = as_metadata_opr(R30);
+  R31_metadata_opr = as_metadata_opr(R31);
+
+  SP_opr = as_pointer_opr(R1_SP);
+
+  R0_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(R0), cpu_reg2rnr(R0));
+  R3_long_opr = LIR_OprFact::double_cpu(cpu_reg2rnr(R3), cpu_reg2rnr(R3));
+
+  F1_opr = as_float_opr(F1);
+  F1_double_opr = as_double_opr(F1);
+
+  // All the allocated cpu regs are caller saved.
+  for (int i = 0; i < max_nof_caller_save_cpu_regs; i++) {
+    _caller_save_cpu_regs[i] = LIR_OprFact::single_cpu(i);
+  }
+
+  // All the fpu regs are caller saved.
+  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
+    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
+  }
+}
+
+
+Address FrameMap::make_new_address(ByteSize sp_offset) const {
+  return Address(R1_SP, STACK_BIAS + in_bytes(sp_offset));
+}
+
+
+VMReg FrameMap::fpu_regname (int n) {
+  return as_FloatRegister(n)->as_VMReg();
+}
+
+
+LIR_Opr FrameMap::stack_pointer() {
+  return SP_opr;
+}
+
+
+// JSR 292
+// On PPC64, there is no need to save the SP, because neither
+// method handle intrinsics, nor compiled lambda forms modify it.
+LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
+  return LIR_OprFact::illegalOpr;
+}
+
+
+bool FrameMap::validate_frame() {
+  int max_offset = in_bytes(framesize_in_bytes());
+  int java_index = 0;
+  for (int i = 0; i < _incoming_arguments->length(); i++) {
+    LIR_Opr opr = _incoming_arguments->at(i);
+    if (opr->is_stack()) {
+      max_offset = MAX2(_argument_locations->at(java_index), max_offset);
+    }
+    java_index += type2size[opr->type()];
+  }
+  return Assembler::is_simm16(max_offset + STACK_BIAS);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/c1_FrameMap_ppc.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_FRAMEMAP_PPC_HPP
+#define CPU_PPC_VM_C1_FRAMEMAP_PPC_HPP
+
+ public:
+
+  enum {
+    nof_reg_args = 8,   // Registers R3-R10 are available for parameter passing.
+    first_available_sp_in_frame = frame::jit_out_preserve_size,
+    frame_pad_in_bytes = 0
+  };
+
+  static const int pd_c_runtime_reserved_arg_size;
+
+  static LIR_Opr  R0_opr;
+  static LIR_Opr  R1_opr;
+  static LIR_Opr  R2_opr;
+  static LIR_Opr  R3_opr;
+  static LIR_Opr  R4_opr;
+  static LIR_Opr  R5_opr;
+  static LIR_Opr  R6_opr;
+  static LIR_Opr  R7_opr;
+  static LIR_Opr  R8_opr;
+  static LIR_Opr  R9_opr;
+  static LIR_Opr R10_opr;
+  static LIR_Opr R11_opr;
+  static LIR_Opr R12_opr;
+  static LIR_Opr R13_opr;
+  static LIR_Opr R14_opr;
+  static LIR_Opr R15_opr;
+  static LIR_Opr R16_opr;
+  static LIR_Opr R17_opr;
+  static LIR_Opr R18_opr;
+  static LIR_Opr R19_opr;
+  static LIR_Opr R20_opr;
+  static LIR_Opr R21_opr;
+  static LIR_Opr R22_opr;
+  static LIR_Opr R23_opr;
+  static LIR_Opr R24_opr;
+  static LIR_Opr R25_opr;
+  static LIR_Opr R26_opr;
+  static LIR_Opr R27_opr;
+  static LIR_Opr R28_opr;
+  static LIR_Opr R29_opr;
+  static LIR_Opr R30_opr;
+  static LIR_Opr R31_opr;
+
+  static LIR_Opr  R0_oop_opr;
+  //R1: Stack pointer. Not an oop.
+  static LIR_Opr  R2_oop_opr;
+  static LIR_Opr  R3_oop_opr;
+  static LIR_Opr  R4_oop_opr;
+  static LIR_Opr  R5_oop_opr;
+  static LIR_Opr  R6_oop_opr;
+  static LIR_Opr  R7_oop_opr;
+  static LIR_Opr  R8_oop_opr;
+  static LIR_Opr  R9_oop_opr;
+  static LIR_Opr R10_oop_opr;
+  static LIR_Opr R11_oop_opr;
+  static LIR_Opr R12_oop_opr;
+  //R13: System thread register. Not usable.
+  static LIR_Opr R14_oop_opr;
+  static LIR_Opr R15_oop_opr;
+  //R16: Java thread register. Not an oop.
+  static LIR_Opr R17_oop_opr;
+  static LIR_Opr R18_oop_opr;
+  static LIR_Opr R19_oop_opr;
+  static LIR_Opr R20_oop_opr;
+  static LIR_Opr R21_oop_opr;
+  static LIR_Opr R22_oop_opr;
+  static LIR_Opr R23_oop_opr;
+  static LIR_Opr R24_oop_opr;
+  static LIR_Opr R25_oop_opr;
+  static LIR_Opr R26_oop_opr;
+  static LIR_Opr R27_oop_opr;
+  static LIR_Opr R28_oop_opr;
+  static LIR_Opr R29_oop_opr;
+  //R29: TOC register. Not an oop.
+  static LIR_Opr R30_oop_opr;
+  static LIR_Opr R31_oop_opr;
+
+  static LIR_Opr  R0_metadata_opr;
+  //R1: Stack pointer. Not metadata.
+  static LIR_Opr  R2_metadata_opr;
+  static LIR_Opr  R3_metadata_opr;
+  static LIR_Opr  R4_metadata_opr;
+  static LIR_Opr  R5_metadata_opr;
+  static LIR_Opr  R6_metadata_opr;
+  static LIR_Opr  R7_metadata_opr;
+  static LIR_Opr  R8_metadata_opr;
+  static LIR_Opr  R9_metadata_opr;
+  static LIR_Opr R10_metadata_opr;
+  static LIR_Opr R11_metadata_opr;
+  static LIR_Opr R12_metadata_opr;
+  //R13: System thread register. Not usable.
+  static LIR_Opr R14_metadata_opr;
+  static LIR_Opr R15_metadata_opr;
+  //R16: Java thread register. Not metadata.
+  static LIR_Opr R17_metadata_opr;
+  static LIR_Opr R18_metadata_opr;
+  static LIR_Opr R19_metadata_opr;
+  static LIR_Opr R20_metadata_opr;
+  static LIR_Opr R21_metadata_opr;
+  static LIR_Opr R22_metadata_opr;
+  static LIR_Opr R23_metadata_opr;
+  static LIR_Opr R24_metadata_opr;
+  static LIR_Opr R25_metadata_opr;
+  static LIR_Opr R26_metadata_opr;
+  static LIR_Opr R27_metadata_opr;
+  static LIR_Opr R28_metadata_opr;
+  //R29: TOC register. Not metadata.
+  static LIR_Opr R30_metadata_opr;
+  static LIR_Opr R31_metadata_opr;
+
+  static LIR_Opr SP_opr;
+
+  static LIR_Opr R0_long_opr;
+  static LIR_Opr R3_long_opr;
+
+  static LIR_Opr F1_opr;
+  static LIR_Opr F1_double_opr;
+
+ private:
+  static FloatRegister  _fpu_regs [nof_fpu_regs];
+
+  static LIR_Opr as_long_single_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
+  }
+  static LIR_Opr as_long_pair_opr(Register r) {
+    return LIR_OprFact::double_cpu(cpu_reg2rnr(r->successor()), cpu_reg2rnr(r));
+  }
+
+ public:
+
+#ifdef _LP64
+  static LIR_Opr as_long_opr(Register r) {
+    return as_long_single_opr(r);
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    return as_long_single_opr(r);
+  }
+#else
+  static LIR_Opr as_long_opr(Register r) {
+    Unimplemented(); return 0;
+//    return as_long_pair_opr(r);
+  }
+  static LIR_Opr as_pointer_opr(Register r) {
+    Unimplemented(); return 0;
+//    return as_opr(r);
+  }
+#endif
+  static LIR_Opr as_float_opr(FloatRegister r) {
+    return LIR_OprFact::single_fpu(r->encoding());
+  }
+  static LIR_Opr as_double_opr(FloatRegister r) {
+    return LIR_OprFact::double_fpu(r->encoding());
+  }
+
+  static FloatRegister nr2floatreg (int rnr);
+
+  static VMReg fpu_regname (int n);
+
+  static bool is_caller_save_register(LIR_Opr  reg);
+  static bool is_caller_save_register(Register r);
+
+  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
+  static int last_cpu_reg()             { return pd_last_cpu_reg; }
+
+  // Registers which need to be saved in the frames (e.g. for GC).
+  // Register usage:
+  //  R0: scratch
+  //  R1: sp
+  // R13: system thread id
+  // R16: java thread
+  // R29: global TOC
+  static bool reg_needs_save(Register r) { return r != R0 && r != R1 && r != R13 && r != R16 && r != R29; }
+
+#endif // CPU_PPC_VM_C1_FRAMEMAP_PPC_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/c1_LIRAssembler_ppc.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -0,0 +1,3133 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_MacroAssembler.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArrayKlass.hpp"
+#include "ci/ciInstance.hpp"
+#include "gc/shared/collectedHeap.hpp"
+#include "gc/shared/barrierSet.hpp"
+#include "gc/shared/cardTableModRefBS.hpp"
+#include "nativeInst_ppc.hpp"
+#include "oops/objArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+#define __ _masm->
+
+
+const ConditionRegister LIR_Assembler::BOOL_RESULT = CCR5;
+
+
+bool LIR_Assembler::is_small_constant(LIR_Opr opr) {
+  Unimplemented(); return false; // Currently not used on this platform.
+}
+
+
+LIR_Opr LIR_Assembler::receiverOpr() {
+  return FrameMap::R3_oop_opr;
+}
+
+
+LIR_Opr LIR_Assembler::osrBufferPointer() {
+  return FrameMap::R3_opr;
+}
+
+
+// This specifies the stack pointer decrement needed to build the frame.
+int LIR_Assembler::initial_frame_size_in_bytes() const {
+  return in_bytes(frame_map()->framesize_in_bytes());
+}
+
+
+// Inline cache check: the inline cached class is in inline_cache_reg;
+// we fetch the class of the receiver and compare it with the cached class.
+// If they do not match we jump to slow case.
+int LIR_Assembler::check_icache() {
+  int offset = __ offset();
+  __ inline_cache_check(R3_ARG1, R19_inline_cache_reg);
+  return offset;
+}
+
+
+void LIR_Assembler::osr_entry() {
+  // On-stack-replacement entry sequence:
+  //
+  //   1. Create a new compiled activation.
+  //   2. Initialize local variables in the compiled activation. The expression
+  //      stack must be empty at the osr_bci; it is not initialized.
+  //   3. Jump to the continuation address in compiled code to resume execution.
+
+  // OSR entry point
+  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
+  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
+  ValueStack* entry_state = osr_entry->end()->state();
+  int number_of_locks = entry_state->locks_size();
+
+  // Create a frame for the compiled activation.
+  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
+
+  // OSR buffer is
+  //
+  // locals[nlocals-1..0]
+  // monitors[number_of_locks-1..0]
+  //
+  // Locals is a direct copy of the interpreter frame so in the osr buffer
+  // the first slot in the local array is the last local from the interpreter
+  // and the last slot is local[0] (receiver) from the interpreter.
+  //
+  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
+  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
+  // in the interpreter frame (the method lock if a sync method).
+
+  // Initialize monitors in the compiled activation.
+  //   R3: pointer to osr buffer
+  //
+  // All other registers are dead at this point and the locals will be
+  // copied into place by code emitted in the IR.
+
+  Register OSR_buf = osrBufferPointer()->as_register();
+  { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
+    int monitor_offset = BytesPerWord * method()->max_locals() +
+      (2 * BytesPerWord) * (number_of_locks - 1);
+    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
+    // the OSR buffer using 2 word entries: first the lock and then
+    // the oop.
+    for (int i = 0; i < number_of_locks; i++) {
+      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
+#ifdef ASSERT
+      // Verify the interpreter's monitor has a non-null object.
+      {
+        Label L;
+        __ ld(R0, slot_offset + 1*BytesPerWord, OSR_buf);
+        __ cmpdi(CCR0, R0, 0);
+        __ bne(CCR0, L);
+        __ stop("locked object is NULL");
+        __ bind(L);
+      }
+#endif // ASSERT
+      // Copy the lock field into the compiled activation.
+      Address ml = frame_map()->address_for_monitor_lock(i),
+              mo = frame_map()->address_for_monitor_object(i);
+      assert(ml.index() == noreg && mo.index() == noreg, "sanity");
+      __ ld(R0, slot_offset + 0, OSR_buf);
+      __ std(R0, ml.disp(), ml.base());
+      __ ld(R0, slot_offset + 1*BytesPerWord, OSR_buf);
+      __ std(R0, mo.disp(), mo.base());
+    }
+  }
+}
+
+
+int LIR_Assembler::emit_exception_handler() {
+  // If the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri).
+  __ nop();
+
+  // Generate code for the exception handler.
+  address handler_base = __ start_a_stub(exception_handler_size);
+
+  if (handler_base == NULL) {
+    // Not enough space left for the handler.
+    bailout("exception handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+  address entry_point = CAST_FROM_FN_PTR(address, Runtime1::entry_for(Runtime1::handle_exception_from_callee_id));
+  //__ load_const_optimized(R0, entry_point);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(entry_point));
+  __ mtctr(R0);
+  __ bctr();
+
+  guarantee(code_offset() - offset <= exception_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+
+// Emit the code to remove the frame from the stack in the exception
+// unwind path.
+int LIR_Assembler::emit_unwind_handler() {
+  _masm->block_comment("Unwind handler");
+
+  int offset = code_offset();
+  bool preserve_exception = method()->is_synchronized() || compilation()->env()->dtrace_method_probes();
+  const Register Rexception = R3 /*LIRGenerator::exceptionOopOpr()*/, Rexception_save = R31;
+
+  // Fetch the exception from TLS and clear out exception related thread state.
+  __ ld(Rexception, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
+  __ li(R0, 0);
+  __ std(R0, in_bytes(JavaThread::exception_oop_offset()), R16_thread);
+  __ std(R0, in_bytes(JavaThread::exception_pc_offset()), R16_thread);
+
+  __ bind(_unwind_handler_entry);
+  __ verify_not_null_oop(Rexception);
+  if (preserve_exception) { __ mr(Rexception_save, Rexception); }
+
+  // Perform needed unlocking
+  MonitorExitStub* stub = NULL;
+  if (method()->is_synchronized()) {
+    monitor_address(0, FrameMap::R4_opr);
+    stub = new MonitorExitStub(FrameMap::R4_opr, true, 0);
+    __ unlock_object(R5, R6, R4, *stub->entry());
+    __ bind(*stub->continuation());
+  }
+
+  if (compilation()->env()->dtrace_method_probes()) {
+    Unimplemented();
+  }
+
+  // Dispatch to the unwind logic.
+  address unwind_stub = Runtime1::entry_for(Runtime1::unwind_exception_id);
+  //__ load_const_optimized(R0, unwind_stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(unwind_stub));
+  if (preserve_exception) { __ mr(Rexception, Rexception_save); }
+  __ mtctr(R0);
+  __ bctr();
+
+  // Emit the slow path assembly.
+  if (stub != NULL) {
+    stub->emit_code(this);
+  }
+
+  return offset;
+}
+
+
+int LIR_Assembler::emit_deopt_handler() {
+  // If the last instruction is a call (typically to do a throw which
+  // is coming at the end after block reordering) the return address
+  // must still point into the code area in order to avoid assertion
+  // failures when searching for the corresponding bci => add a nop
+  // (was bug 5/14/1999 - gri).
+  __ nop();
+
+  // Generate code for deopt handler.
+  address handler_base = __ start_a_stub(deopt_handler_size);
+
+  if (handler_base == NULL) {
+    // Not enough space left for the handler.
+    bailout("deopt handler overflow");
+    return -1;
+  }
+
+  int offset = code_offset();
+  __ bl64_patchable(SharedRuntime::deopt_blob()->unpack(), relocInfo::runtime_call_type);
+
+  guarantee(code_offset() - offset <= deopt_handler_size, "overflow");
+  __ end_a_stub();
+
+  return offset;
+}
+
+
+void LIR_Assembler::jobject2reg(jobject o, Register reg) {
+  if (o == NULL) {
+    __ li(reg, 0);
+  } else {
+    AddressLiteral addrlit = __ constant_oop_address(o);
+    __ load_const(reg, addrlit, (reg != R0) ? R0 : noreg);
+  }
+}
+
+
+void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  // Allocate a new index in table to hold the object once it's been patched.
+  int oop_index = __ oop_recorder()->allocate_oop_index(NULL);
+  PatchingStub* patch = new PatchingStub(_masm, patching_id(info), oop_index);
+
+  AddressLiteral addrlit((address)NULL, oop_Relocation::spec(oop_index));
+  __ load_const(reg, addrlit, R0);
+
+  patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+
+void LIR_Assembler::metadata2reg(Metadata* o, Register reg) {
+  AddressLiteral md = __ constant_metadata_address(o); // Notify OOP recorder (don't need the relocation)
+  __ load_const_optimized(reg, md.value(), (reg != R0) ? R0 : noreg);
+}
+
+
+void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo *info) {
+  // Allocate a new index in table to hold the klass once it's been patched.
+  int index = __ oop_recorder()->allocate_metadata_index(NULL);
+  PatchingStub* patch = new PatchingStub(_masm, PatchingStub::load_klass_id, index);
+
+  AddressLiteral addrlit((address)NULL, metadata_Relocation::spec(index));
+  assert(addrlit.rspec().type() == relocInfo::metadata_type, "must be an metadata reloc");
+  __ load_const(reg, addrlit, R0);
+
+  patching_epilog(patch, lir_patch_normal, reg, info);
+}
+
+
+void LIR_Assembler::emit_op3(LIR_Op3* op) {
+  const bool is_int = op->result_opr()->is_single_cpu();
+  Register Rdividend = is_int ? op->in_opr1()->as_register() : op->in_opr1()->as_register_lo();
+  Register Rdivisor  = noreg;
+  Register Rscratch  = op->in_opr3()->as_register();
+  Register Rresult   = is_int ? op->result_opr()->as_register() : op->result_opr()->as_register_lo();
+  long divisor = -1;
+
+  if (op->in_opr2()->is_register()) {
+    Rdivisor = is_int ? op->in_opr2()->as_register() : op->in_opr2()->as_register_lo();
+  } else {
+    divisor = is_int ? op->in_opr2()->as_constant_ptr()->as_jint()
+                     : op->in_opr2()->as_constant_ptr()->as_jlong();
+  }
+
+  assert(Rdividend != Rscratch, "");
+  assert(Rdivisor  != Rscratch, "");
+  assert(op->code() == lir_idiv || op->code() == lir_irem, "Must be irem or idiv");
+
+  if (Rdivisor == noreg) {
+    if (divisor == 1) { // stupid, but can happen
+      if (op->code() == lir_idiv) {
+        __ mr_if_needed(Rresult, Rdividend);
+      } else {
+        __ li(Rresult, 0);
+      }
+
+    } else if (is_power_of_2(divisor)) {
+      // Convert division by a power of two into some shifts and logical operations.
+      int log2 = log2_intptr(divisor);
+
+      // Round towards 0.
+      if (divisor == 2) {
+        if (is_int) {
+          __ srwi(Rscratch, Rdividend, 31);
+        } else {
+          __ srdi(Rscratch, Rdividend, 63);
+        }
+      } else {
+        if (is_int) {
+          __ srawi(Rscratch, Rdividend, 31);
+        } else {
+          __ sradi(Rscratch, Rdividend, 63);
+        }
+        __ clrldi(Rscratch, Rscratch, 64-log2);
+      }
+      __ add(Rscratch, Rdividend, Rscratch);
+
+      if (op->code() == lir_idiv) {
+        if (is_int) {
+          __ srawi(Rresult, Rscratch, log2);
+        } else {
+          __ sradi(Rresult, Rscratch, log2);
+        }
+      } else { // lir_irem
+        __ clrrdi(Rscratch, Rscratch, log2);
+        __ sub(Rresult, Rdividend, Rscratch);
+      }
+
+    } else if (divisor == -1) {
+      if (op->code() == lir_idiv) {
+        __ neg(Rresult, Rdividend);
+      } else {
+        __ li(Rresult, 0);
+      }
+
+    } else {
+      __ load_const_optimized(Rscratch, divisor);
+      if (op->code() == lir_idiv) {
+        if (is_int) {
+          __ divw(Rresult, Rdividend, Rscratch); // Can't divide minint/-1.
+        } else {
+          __ divd(Rresult, Rdividend, Rscratch); // Can't divide minint/-1.
+        }
+      } else {
+        assert(Rscratch != R0, "need both");
+        if (is_int) {
+          __ divw(R0, Rdividend, Rscratch); // Can't divide minint/-1.
+          __ mullw(Rscratch, R0, Rscratch);
+        } else {
+          __ divd(R0, Rdividend, Rscratch); // Can't divide minint/-1.
+          __ mulld(Rscratch, R0, Rscratch);
+        }
+        __ sub(Rresult, Rdividend, Rscratch);
+      }
+
+    }
+    return;
+  }
+
+  Label regular, done;
+  if (is_int) {
+    __ cmpwi(CCR0, Rdivisor, -1);
+  } else {
+    __ cmpdi(CCR0, Rdivisor, -1);
+  }
+  __ bne(CCR0, regular);
+  if (op->code() == lir_idiv) {
+    __ neg(Rresult, Rdividend);
+    __ b(done);
+    __ bind(regular);
+    if (is_int) {
+      __ divw(Rresult, Rdividend, Rdivisor); // Can't divide minint/-1.
+    } else {
+      __ divd(Rresult, Rdividend, Rdivisor); // Can't divide minint/-1.
+    }
+  } else { // lir_irem
+    __ li(Rresult, 0);
+    __ b(done);
+    __ bind(regular);
+    if (is_int) {
+      __ divw(Rscratch, Rdividend, Rdivisor); // Can't divide minint/-1.
+      __ mullw(Rscratch, Rscratch, Rdivisor);
+    } else {
+      __ divd(Rscratch, Rdividend, Rdivisor); // Can't divide minint/-1.
+      __ mulld(Rscratch, Rscratch, Rdivisor);
+    }
+    __ sub(Rresult, Rdividend, Rscratch);
+  }
+  __ bind(done);
+}
+
+
+void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+#ifdef ASSERT
+  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
+  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
+  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
+  assert(op->info() == NULL, "shouldn't have CodeEmitInfo");
+#endif
+
+  Label *L = op->label();
+  if (op->cond() == lir_cond_always) {
+    __ b(*L);
+  } else {
+    Label done;
+    bool is_unordered = false;
+    if (op->code() == lir_cond_float_branch) {
+      assert(op->ublock() != NULL, "must have unordered successor");
+      is_unordered = true;
+    } else {
+      assert(op->code() == lir_branch, "just checking");
+    }
+
+    bool positive = false;
+    Assembler::Condition cond = Assembler::equal;
+    switch (op->cond()) {
+      case lir_cond_equal:        positive = true ; cond = Assembler::equal  ; is_unordered = false; break;
+      case lir_cond_notEqual:     positive = false; cond = Assembler::equal  ; is_unordered = false; break;
+      case lir_cond_less:         positive = true ; cond = Assembler::less   ; break;
+      case lir_cond_belowEqual:   assert(op->code() != lir_cond_float_branch, ""); // fallthru
+      case lir_cond_lessEqual:    positive = false; cond = Assembler::greater; break;
+      case lir_cond_greater:      positive = true ; cond = Assembler::greater; break;
+      case lir_cond_aboveEqual:   assert(op->code() != lir_cond_float_branch, ""); // fallthru
+      case lir_cond_greaterEqual: positive = false; cond = Assembler::less   ; break;
+      default:                    ShouldNotReachHere();
+    }
+    int bo = positive ? Assembler::bcondCRbiIs1 : Assembler::bcondCRbiIs0;
+    int bi = Assembler::bi0(BOOL_RESULT, cond);
+    if (is_unordered) {
+      if (positive) {
+        if (op->ublock() == op->block()) {
+          __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(BOOL_RESULT, Assembler::summary_overflow), *L);
+        }
+      } else {
+        if (op->ublock() != op->block()) { __ bso(BOOL_RESULT, done); }
+      }
+    }
+    __ bc_far_optimized(bo, bi, *L);
+    __ bind(done);
+  }
+}
+
+
+void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+  Bytecodes::Code code = op->bytecode();
+  LIR_Opr src = op->in_opr(),
+          dst = op->result_opr();
+
+  switch(code) {
+    case Bytecodes::_i2l: {
+      __ extsw(dst->as_register_lo(), src->as_register());
+      break;
+    }
+    case Bytecodes::_l2i: {
+      __ mr_if_needed(dst->as_register(), src->as_register_lo()); // high bits are garbage
+      break;
+    }
+    case Bytecodes::_i2b: {
+      __ extsb(dst->as_register(), src->as_register());
+      break;
+    }
+    case Bytecodes::_i2c: {
+      __ clrldi(dst->as_register(), src->as_register(), 64-16);
+      break;
+    }
+    case Bytecodes::_i2s: {
+      __ extsh(dst->as_register(), src->as_register());
+      break;
+    }
+    case Bytecodes::_i2d:
+    case Bytecodes::_l2d: {
+      __ fcfid(dst->as_double_reg(), src->as_double_reg()); // via mem
+      break;
+    }
+    case Bytecodes::_i2f: {
+      FloatRegister rdst = dst->as_float_reg();
+      FloatRegister rsrc = src->as_double_reg(); // via mem
+      if (VM_Version::has_fcfids()) {
+        __ fcfids(rdst, rsrc);
+      } else {
+        __ fcfid(rdst, rsrc);
+        __ frsp(rdst, rdst);
+      }
+      break;
+    }
+    case Bytecodes::_l2f: { // >= Power7
+      assert(VM_Version::has_fcfids(), "fcfid+frsp needs fixup code to avoid rounding incompatibility");
+      __ fcfids(dst->as_float_reg(), src->as_double_reg()); // via mem
+      break;
+    }
+    case Bytecodes::_f2d: {
+      __ fmr_if_needed(dst->as_double_reg(), src->as_float_reg());
+      break;
+    }
+    case Bytecodes::_d2f: {
+      __ frsp(dst->as_float_reg(), src->as_double_reg());
+      break;
+    }
+    case Bytecodes::_d2i:
+    case Bytecodes::_f2i: {
+      FloatRegister rsrc = (code == Bytecodes::_d2i) ? src->as_double_reg() : src->as_float_reg();
+      Address       addr = frame_map()->address_for_slot(dst->double_stack_ix());
+      Label L;
+      // Result must be 0 if value is NaN; test by comparing value to itself.
+      __ fcmpu(CCR0, rsrc, rsrc);
+      __ li(R0, 0); // 0 in case of NAN
+      __ std(R0, addr.disp(), addr.base());
+      __ bso(CCR0, L);
+      __ fctiwz(rsrc, rsrc); // USE_KILL
+      __ stfd(rsrc, addr.disp(), addr.base());
+      __ bind(L);
+      break;
+    }
+    case Bytecodes::_d2l:
+    case Bytecodes::_f2l: {
+      FloatRegister rsrc = (code == Bytecodes::_d2l) ? src->as_double_reg() : src->as_float_reg();
+      Address       addr = frame_map()->address_for_slot(dst->double_stack_ix());
+      Label L;
+      // Result must be 0 if value is NaN; test by comparing value to itself.
+      __ fcmpu(CCR0, rsrc, rsrc);
+      __ li(R0, 0); // 0 in case of NAN
+      __ std(R0, addr.disp(), addr.base());
+      __ bso(CCR0, L);
+      __ fctidz(rsrc, rsrc); // USE_KILL
+      __ stfd(rsrc, addr.disp(), addr.base());
+      __ bind(L);
+      break;
+    }
+
+    default: ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::align_call(LIR_Code) {
+  // do nothing since all instructions are word aligned on ppc
+}
+
+
+bool LIR_Assembler::emit_trampoline_stub_for_call(address target, Register Rtoc) {
+  int start_offset = __ offset();
+  // Put the entry point as a constant into the constant pool.
+  const address entry_point_toc_addr   = __ address_constant(target, RelocationHolder::none);
+  if (entry_point_toc_addr == NULL) {
+    bailout("const section overflow");
+    return false;
+  }
+  const int     entry_point_toc_offset = __ offset_to_method_toc(entry_point_toc_addr);
+
+  // Emit the trampoline stub which will be related to the branch-and-link below.
+  address stub = __ emit_trampoline_stub(entry_point_toc_offset, start_offset, Rtoc);
+  if (!stub) {
+    bailout("no space for trampoline stub");
+    return false;
+  }
+  return true;
+}
+
+
+void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
+  assert(rtype==relocInfo::opt_virtual_call_type || rtype==relocInfo::static_call_type, "unexpected rtype");
+
+  bool success = emit_trampoline_stub_for_call(op->addr());
+  if (!success) { return; }
+
+  __ relocate(rtype);
+  // Note: At this point we do not have the address of the trampoline
+  // stub, and the entry point might be too far away for bl, so __ pc()
+  // serves as dummy and the bl will be patched later.
+  __ code()->set_insts_mark();
+  __ bl(__ pc());
+  add_call_info(code_offset(), op->info());
+}
+
+
+void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
+  __ calculate_address_from_global_toc(R2_TOC, __ method_toc());
+
+  // Virtual call relocation will point to ic load.
+  address virtual_call_meta_addr = __ pc();
+  // Load a clear inline cache.
+  AddressLiteral empty_ic((address) Universe::non_oop_word());
+  bool success = __ load_const_from_method_toc(R19_inline_cache_reg, empty_ic, R2_TOC);
+  if (!success) {
+    bailout("const section overflow");
+    return;
+  }
+  // Call to fixup routine. Fixup routine uses ScopeDesc info
+  // to determine who we intended to call.
+  __ relocate(virtual_call_Relocation::spec(virtual_call_meta_addr));
+
+  success = emit_trampoline_stub_for_call(op->addr(), R2_TOC);
+  if (!success) { return; }
+
+  // Note: At this point we do not have the address of the trampoline
+  // stub, and the entry point might be too far away for bl, so __ pc()
+  // serves as dummy and the bl will be patched later.
+  __ bl(__ pc());
+  add_call_info(code_offset(), op->info());
+}
+
+
+void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
+  ShouldNotReachHere(); // ic_call is used instead.
+}
+
+
+void LIR_Assembler::explicit_null_check(Register addr, CodeEmitInfo* info) {
+  ImplicitNullCheckStub* stub = new ImplicitNullCheckStub(code_offset(), info);
+  __ null_check(addr, stub->entry());
+  append_code_stub(stub);
+}
+
+
+// Attention: caller must encode oop if needed
+int LIR_Assembler::store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned) {
+  int store_offset;
+  if (!Assembler::is_simm16(offset)) {
+    // For offsets larger than a simm16 we setup the offset.
+    assert(wide && !from_reg->is_same_register(FrameMap::R0_opr), "large offset only supported in special case");
+    __ load_const_optimized(R0, offset);
+    store_offset = store(from_reg, base, R0, type, wide);
+  } else {
+    store_offset = code_offset();
+    switch (type) {
+      case T_BOOLEAN: // fall through
+      case T_BYTE  : __ stb(from_reg->as_register(), offset, base); break;
+      case T_CHAR  :
+      case T_SHORT : __ sth(from_reg->as_register(), offset, base); break;
+      case T_INT   : __ stw(from_reg->as_register(), offset, base); break;
+      case T_LONG  : __ std(from_reg->as_register_lo(), offset, base); break;
+      case T_ADDRESS:
+      case T_METADATA: __ std(from_reg->as_register(), offset, base); break;
+      case T_ARRAY : // fall through
+      case T_OBJECT:
+        {
+          if (UseCompressedOops && !wide) {
+            // Encoding done in caller
+            __ stw(from_reg->as_register(), offset, base);
+          } else {
+            __ std(from_reg->as_register(), offset, base);
+          }
+          __ verify_oop(from_reg->as_register());
+          break;
+        }
+      case T_FLOAT : __ stfs(from_reg->as_float_reg(), offset, base); break;
+      case T_DOUBLE: __ stfd(from_reg->as_double_reg(), offset, base); break;
+      default      : ShouldNotReachHere();
+    }
+  }
+  return store_offset;
+}
+
+
+// Attention: caller must encode oop if needed
+int LIR_Assembler::store(LIR_Opr from_reg, Register base, Register disp, BasicType type, bool wide) {
+  int store_offset = code_offset();
+  switch (type) {
+    case T_BOOLEAN: // fall through
+    case T_BYTE  : __ stbx(from_reg->as_register(), base, disp); break;
+    case T_CHAR  :
+    case T_SHORT : __ sthx(from_reg->as_register(), base, disp); break;
+    case T_INT   : __ stwx(from_reg->as_register(), base, disp); break;
+    case T_LONG  :
+#ifdef _LP64
+      __ stdx(from_reg->as_register_lo(), base, disp);
+#else
+      Unimplemented();
+#endif
+      break;
+    case T_ADDRESS:
+      __ stdx(from_reg->as_register(), base, disp);
+      break;
+    case T_ARRAY : // fall through
+    case T_OBJECT:
+      {
+        if (UseCompressedOops && !wide) {
+          // Encoding done in caller.
+          __ stwx(from_reg->as_register(), base, disp);
+        } else {
+          __ stdx(from_reg->as_register(), base, disp);
+        }
+        __ verify_oop(from_reg->as_register()); // kills R0
+        break;
+      }
+    case T_FLOAT : __ stfsx(from_reg->as_float_reg(), base, disp); break;
+    case T_DOUBLE: __ stfdx(from_reg->as_double_reg(), base, disp); break;
+    default      : ShouldNotReachHere();
+  }
+  return store_offset;
+}
+
+
+int LIR_Assembler::load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned) {
+  int load_offset;
+  if (!Assembler::is_simm16(offset)) {
+    // For offsets larger than a simm16 we setup the offset.
+    __ load_const_optimized(R0, offset);
+    load_offset = load(base, R0, to_reg, type, wide);
+  } else {
+    load_offset = code_offset();
+    switch(type) {
+      case T_BOOLEAN: // fall through
+      case T_BYTE  :   __ lbz(to_reg->as_register(), offset, base);
+                       __ extsb(to_reg->as_register(), to_reg->as_register()); break;
+      case T_CHAR  :   __ lhz(to_reg->as_register(), offset, base); break;
+      case T_SHORT :   __ lha(to_reg->as_register(), offset, base); break;
+      case T_INT   :   __ lwa(to_reg->as_register(), offset, base); break;
+      case T_LONG  :   __ ld(to_reg->as_register_lo(), offset, base); break;
+      case T_METADATA: __ ld(to_reg->as_register(), offset, base); break;
+      case T_ADDRESS:
+        if (offset == oopDesc::klass_offset_in_bytes() && UseCompressedClassPointers) {
+          __ lwz(to_reg->as_register(), offset, base);
+          __ decode_klass_not_null(to_reg->as_register());
+        } else {
+          __ ld(to_reg->as_register(), offset, base);
+        }
+        break;
+      case T_ARRAY : // fall through
+      case T_OBJECT:
+        {
+          if (UseCompressedOops && !wide) {
+            __ lwz(to_reg->as_register(), offset, base);
+            __ decode_heap_oop(to_reg->as_register());
+          } else {
+            __ ld(to_reg->as_register(), offset, base);
+          }
+          __ verify_oop(to_reg->as_register());
+          break;
+        }
+      case T_FLOAT:  __ lfs(to_reg->as_float_reg(), offset, base); break;
+      case T_DOUBLE: __ lfd(to_reg->as_double_reg(), offset, base); break;
+      default      : ShouldNotReachHere();
+    }
+  }
+  return load_offset;
+}
+
+
+int LIR_Assembler::load(Register base, Register disp, LIR_Opr to_reg, BasicType type, bool wide) {
+  int load_offset = code_offset();
+  switch(type) {
+    case T_BOOLEAN: // fall through
+    case T_BYTE  :  __ lbzx(to_reg->as_register(), base, disp);
+                    __ extsb(to_reg->as_register(), to_reg->as_register()); break;
+    case T_CHAR  :  __ lhzx(to_reg->as_register(), base, disp); break;
+    case T_SHORT :  __ lhax(to_reg->as_register(), base, disp); break;
+    case T_INT   :  __ lwax(to_reg->as_register(), base, disp); break;
+    case T_ADDRESS: __ ldx(to_reg->as_register(), base, disp); break;
+    case T_ARRAY : // fall through
+    case T_OBJECT:
+      {
+        if (UseCompressedOops && !wide) {
+          __ lwzx(to_reg->as_register(), base, disp);
+          __ decode_heap_oop(to_reg->as_register());
+        } else {
+          __ ldx(to_reg->as_register(), base, disp);
+        }
+        __ verify_oop(to_reg->as_register());
+        break;
+      }
+    case T_FLOAT:  __ lfsx(to_reg->as_float_reg() , base, disp); break;
+    case T_DOUBLE: __ lfdx(to_reg->as_double_reg(), base, disp); break;
+    case T_LONG  :
+#ifdef _LP64
+      __ ldx(to_reg->as_register_lo(), base, disp);
+#else
+      Unimplemented();
+#endif
+      break;
+    default      : ShouldNotReachHere();
+  }
+  return load_offset;
+}
+
+
+void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
+  LIR_Const* c = src->as_constant_ptr();
+  Register src_reg = R0;
+  switch (c->type()) {
+    case T_INT:
+    case T_FLOAT: {
+      int value = c->as_jint_bits();
+      __ load_const_optimized(src_reg, value);
+      Address addr = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ stw(src_reg, addr.disp(), addr.base());
+      break;
+    }
+    case T_ADDRESS: {
+      int value = c->as_jint_bits();
+      __ load_const_optimized(src_reg, value);
+      Address addr = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ std(src_reg, addr.disp(), addr.base());
+      break;
+    }
+    case T_OBJECT: {
+      jobject2reg(c->as_jobject(), src_reg);
+      Address addr = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ std(src_reg, addr.disp(), addr.base());
+      break;
+    }
+    case T_LONG:
+    case T_DOUBLE: {
+      int value = c->as_jlong_bits();
+      __ load_const_optimized(src_reg, value);
+      Address addr = frame_map()->address_for_double_slot(dest->double_stack_ix());
+      __ std(src_reg, addr.disp(), addr.base());
+      break;
+    }
+    default:
+      Unimplemented();
+  }
+}
+
+
+void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
+  LIR_Const* c = src->as_constant_ptr();
+  LIR_Address* addr = dest->as_address_ptr();
+  Register base = addr->base()->as_pointer_register();
+  LIR_Opr tmp = LIR_OprFact::illegalOpr;
+  int offset = -1;
+  // Null check for large offsets in LIRGenerator::do_StoreField.
+  bool needs_explicit_null_check = !ImplicitNullChecks;
+
+  if (info != NULL && needs_explicit_null_check) {
+    explicit_null_check(base, info);
+  }
+
+  switch (c->type()) {
+    case T_FLOAT: type = T_INT;
+    case T_INT:
+    case T_ADDRESS: {
+      tmp = FrameMap::R0_opr;
+      __ load_const_optimized(tmp->as_register(), c->as_jint_bits());
+      break;
+    }
+    case T_DOUBLE: type = T_LONG;
+    case T_LONG: {
+      tmp = FrameMap::R0_long_opr;
+      __ load_const_optimized(tmp->as_register_lo(), c->as_jlong_bits());
+      break;
+    }
+    case T_OBJECT: {
+      tmp = FrameMap::R0_opr;
+      if (UseCompressedOops && !wide && c->as_jobject() != NULL) {
+        AddressLiteral oop_addr = __ constant_oop_address(c->as_jobject());
+        __ lis(R0, oop_addr.value() >> 16); // Don't care about sign extend (will use stw).
+        __ relocate(oop_addr.rspec(), /*compressed format*/ 1);
+        __ ori(R0, R0, oop_addr.value() & 0xffff);
+      } else {
+        jobject2reg(c->as_jobject(), R0);
+      }
+      break;
+    }
+    default:
+      Unimplemented();
+  }
+
+  // Handle either reg+reg or reg+disp address.
+  if (addr->index()->is_valid()) {
+    assert(addr->disp() == 0, "must be zero");
+    offset = store(tmp, base, addr->index()->as_pointer_register(), type, wide);
+  } else {
+    assert(Assembler::is_simm16(addr->disp()), "can't handle larger addresses");
+    offset = store(tmp, base, addr->disp(), type, wide, false);
+  }
+
+  if (info != NULL) {
+    assert(offset != -1, "offset should've been set");
+    if (!needs_explicit_null_check) {
+      add_debug_info_for_null_check(offset, info);
+    }
+  }
+}
+
+
+void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
+  LIR_Const* c = src->as_constant_ptr();
+  LIR_Opr to_reg = dest;
+
+  switch (c->type()) {
+    case T_INT: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ load_const_optimized(dest->as_register(), c->as_jint(), R0);
+      break;
+    }
+    case T_ADDRESS: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ load_const_optimized(dest->as_register(), c->as_jint(), R0);  // Yes, as_jint ...
+      break;
+    }
+    case T_LONG: {
+      assert(patch_code == lir_patch_none, "no patching handled here");
+      __ load_const_optimized(dest->as_register_lo(), c->as_jlong(), R0);
+      break;
+    }
+
+    case T_OBJECT: {
+      if (patch_code == lir_patch_none) {
+        jobject2reg(c->as_jobject(), to_reg->as_register());
+      } else {
+        jobject2reg_with_patching(to_reg->as_register(), info);
+      }
+      break;
+    }
+
+    case T_METADATA:
+      {
+        if (patch_code == lir_patch_none) {
+          metadata2reg(c->as_metadata(), to_reg->as_register());
+        } else {
+          klass2reg_with_patching(to_reg->as_register(), info);
+        }
+      }
+      break;
+
+    case T_FLOAT:
+      {
+        if (to_reg->is_single_fpu()) {
+          address const_addr = __ float_constant(c->as_jfloat());
+          if (const_addr == NULL) {
+            bailout("const section overflow");
+            break;
+          }
+          RelocationHolder rspec = internal_word_Relocation::spec(const_addr);
+          __ relocate(rspec);
+          __ load_const(R0, const_addr);
+          __ lfsx(to_reg->as_float_reg(), R0);
+        } else {
+          assert(to_reg->is_single_cpu(), "Must be a cpu register.");
+          __ load_const_optimized(to_reg->as_register(), jint_cast(c->as_jfloat()), R0);
+        }
+      }
+      break;
+
+    case T_DOUBLE:
+      {
+        if (to_reg->is_double_fpu()) {
+          address const_addr = __ double_constant(c->as_jdouble());
+          if (const_addr == NULL) {
+            bailout("const section overflow");
+            break;
+          }
+          RelocationHolder rspec = internal_word_Relocation::spec(const_addr);
+          __ relocate(rspec);
+          __ load_const(R0, const_addr);
+          __ lfdx(to_reg->as_double_reg(), R0);
+        } else {
+          assert(to_reg->is_double_cpu(), "Must be a long register.");
+          __ load_const_optimized(to_reg->as_register_lo(), jlong_cast(c->as_jdouble()), R0);
+        }
+      }
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+
+Address LIR_Assembler::as_Address(LIR_Address* addr) {
+  Unimplemented(); return Address();
+}
+
+
+inline RegisterOrConstant index_or_disp(LIR_Address* addr) {
+  if (addr->index()->is_illegal()) {
+    return (RegisterOrConstant)(addr->disp());
+  } else {
+    return (RegisterOrConstant)(addr->index()->as_pointer_register());
+  }
+}
+
+
+void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  const Register tmp = R0;
+  switch (type) {
+    case T_INT:
+    case T_FLOAT: {
+      Address from = frame_map()->address_for_slot(src->single_stack_ix());
+      Address to   = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ lwz(tmp, from.disp(), from.base());
+      __ stw(tmp, to.disp(), to.base());
+      break;
+    }
+    case T_ADDRESS:
+    case T_OBJECT: {
+      Address from = frame_map()->address_for_slot(src->single_stack_ix());
+      Address to   = frame_map()->address_for_slot(dest->single_stack_ix());
+      __ ld(tmp, from.disp(), from.base());
+      __ std(tmp, to.disp(), to.base());
+      break;
+    }
+    case T_LONG:
+    case T_DOUBLE: {
+      Address from = frame_map()->address_for_double_slot(src->double_stack_ix());
+      Address to   = frame_map()->address_for_double_slot(dest->double_stack_ix());
+      __ ld(tmp, from.disp(), from.base());
+      __ std(tmp, to.disp(), to.base());
+      break;
+    }
+
+    default:
+      ShouldNotReachHere();
+  }
+}
+
+
+Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
+  Unimplemented(); return Address();
+}
+
+
+Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
+  Unimplemented(); return Address();
+}
+
+
+void LIR_Assembler::mem2reg(LIR_Opr src_opr, LIR_Opr dest, BasicType type,
+                            LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool unaligned) {
+
+  assert(type != T_METADATA, "load of metadata ptr not supported");
+  LIR_Address* addr = src_opr->as_address_ptr();
+  LIR_Opr to_reg = dest;
+
+  Register src = addr->base()->as_pointer_register();
+  Register disp_reg = noreg;
+  int disp_value = addr->disp();
+  bool needs_patching = (patch_code != lir_patch_none);
+  // null check for large offsets in LIRGenerator::do_LoadField
+  bool needs_explicit_null_check = !os::zero_page_read_protected() || !ImplicitNullChecks;
+
+  if (info != NULL && needs_explicit_null_check) {
+    explicit_null_check(src, info);
+  }
+
+  if (addr->base()->type() == T_OBJECT) {
+    __ verify_oop(src);
+  }
+
+  PatchingStub* patch = NULL;
+  if (needs_patching) {
+    patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+    assert(!to_reg->is_double_cpu() ||
+           patch_code == lir_patch_none ||
+           patch_code == lir_patch_normal, "patching doesn't match register");
+  }
+
+  if (addr->index()->is_illegal()) {
+    if (!Assembler::is_simm16(disp_value)) {
+      if (needs_patching) {
+        __ load_const32(R0, 0); // patchable int
+      } else {
+        __ load_const_optimized(R0, disp_value);
+      }
+      disp_reg = R0;
+    }
+  } else {
+    disp_reg = addr->index()->as_pointer_register();
+    assert(disp_value == 0, "can't handle 3 operand addresses");
+  }
+
+  // Remember the offset of the load. The patching_epilog must be done
+  // before the call to add_debug_info, otherwise the PcDescs don't get
+  // entered in increasing order.
+  int offset;
+
+  if (disp_reg == noreg) {
+    assert(Assembler::is_simm16(disp_value), "should have set this up");
+    offset = load(src, disp_value, to_reg, type, wide, unaligned);
+  } else {
+    assert(!unaligned, "unexpected");
+    offset = load(src, disp_reg, to_reg, type, wide);
+  }
+
+  if (patch != NULL) {
+    patching_epilog(patch, patch_code, src, info);
+  }
+  if (info != NULL && !needs_explicit_null_check) {
+    add_debug_info_for_null_check(offset, info);
+  }
+}
+
+
+void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
+  Address addr;
+  if (src->is_single_word()) {
+    addr = frame_map()->address_for_slot(src->single_stack_ix());
+  } else if (src->is_double_word())  {
+    addr = frame_map()->address_for_double_slot(src->double_stack_ix());
+  }
+
+  bool unaligned = (addr.disp() - STACK_BIAS) % 8 != 0;
+  load(addr.base(), addr.disp(), dest, dest->type(), true /*wide*/, unaligned);
+}
+
+
+void LIR_Assembler::reg2stack(LIR_Opr from_reg, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
+  Address addr;
+  if (dest->is_single_word()) {
+    addr = frame_map()->address_for_slot(dest->single_stack_ix());
+  } else if (dest->is_double_word())  {
+    addr = frame_map()->address_for_slot(dest->double_stack_ix());
+  }
+  bool unaligned = (addr.disp() - STACK_BIAS) % 8 != 0;
+  store(from_reg, addr.base(), addr.disp(), from_reg->type(), true /*wide*/, unaligned);
+}
+
+
+void LIR_Assembler::reg2reg(LIR_Opr from_reg, LIR_Opr to_reg) {
+  if (from_reg->is_float_kind() && to_reg->is_float_kind()) {
+    if (from_reg->is_double_fpu()) {
+      // double to double moves
+      assert(to_reg->is_double_fpu(), "should match");
+      __ fmr_if_needed(to_reg->as_double_reg(), from_reg->as_double_reg());
+    } else {
+      // float to float moves
+      assert(to_reg->is_single_fpu(), "should match");
+      __ fmr_if_needed(to_reg->as_float_reg(), from_reg->as_float_reg());
+    }
+  } else if (!from_reg->is_float_kind() && !to_reg->is_float_kind()) {
+    if (from_reg->is_double_cpu()) {
+      __ mr_if_needed(to_reg->as_pointer_register(), from_reg->as_pointer_register());
+    } else if (to_reg->is_double_cpu()) {
+      // int to int moves
+      __ mr_if_needed(to_reg->as_register_lo(), from_reg->as_register());
+    } else {
+      // int to int moves
+      __ mr_if_needed(to_reg->as_register(), from_reg->as_register());
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+  if (to_reg->type() == T_OBJECT || to_reg->type() == T_ARRAY) {
+    __ verify_oop(to_reg->as_register());
+  }
+}
+
+
+void LIR_Assembler::reg2mem(LIR_Opr from_reg, LIR_Opr dest, BasicType type,
+                            LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack,
+                            bool wide, bool unaligned) {
+  assert(type != T_METADATA, "store of metadata ptr not supported");
+  LIR_Address* addr = dest->as_address_ptr();
+
+  Register src = addr->base()->as_pointer_register();
+  Register disp_reg = noreg;
+  int disp_value = addr->disp();
+  bool needs_patching = (patch_code != lir_patch_none);
+  bool compress_oop = (type == T_ARRAY || type == T_OBJECT) && UseCompressedOops && !wide &&
+                      Universe::narrow_oop_mode() != Universe::UnscaledNarrowOop;
+  bool load_disp = addr->index()->is_illegal() && !Assembler::is_simm16(disp_value);
+  bool use_R29 = compress_oop && load_disp; // Avoid register conflict, also do null check before killing R29.
+  // Null check for large offsets in LIRGenerator::do_StoreField.
+  bool needs_explicit_null_check = !ImplicitNullChecks || use_R29;
+
+  if (info != NULL && needs_explicit_null_check) {
+    explicit_null_check(src, info);
+  }
+
+  if (addr->base()->is_oop_register()) {
+    __ verify_oop(src);
+  }
+
+  PatchingStub* patch = NULL;
+  if (needs_patching) {
+    patch = new PatchingStub(_masm, PatchingStub::access_field_id);
+    assert(!from_reg->is_double_cpu() ||
+           patch_code == lir_patch_none ||
+           patch_code == lir_patch_normal, "patching doesn't match register");
+  }
+
+  if (addr->index()->is_illegal()) {
+    if (load_disp) {
+      disp_reg = use_R29 ? R29_TOC : R0;
+      if (needs_patching) {
+        __ load_const32(disp_reg, 0); // patchable int
+      } else {
+        __ load_const_optimized(disp_reg, disp_value);
+      }
+    }
+  } else {
+    disp_reg = addr->index()->as_pointer_register();
+    assert(disp_value == 0, "can't handle 3 operand addresses");
+  }
+
+  // remember the offset of the store. The patching_epilog must be done
+  // before the call to add_debug_info_for_null_check, otherwise the PcDescs don't get
+  // entered in increasing order.
+  int offset;
+
+  if (compress_oop) {
+    Register co = __ encode_heap_oop(R0, from_reg->as_register());
+    from_reg = FrameMap::as_opr(co);
+  }
+
+  if (disp_reg == noreg) {
+    assert(Assembler::is_simm16(disp_value), "should have set this up");
+    offset = store(from_reg, src, disp_value, type, wide, unaligned);
+  } else {
+    assert(!unaligned, "unexpected");
+    offset = store(from_reg, src, disp_reg, type, wide);
+  }
+
+  if (use_R29) {
+    __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R0); // reinit
+  }
+
+  if (patch != NULL) {
+    patching_epilog(patch, patch_code, src, info);
+  }
+
+  if (info != NULL && !needs_explicit_null_check) {
+    add_debug_info_for_null_check(offset, info);
+  }
+}
+
+
+void LIR_Assembler::return_op(LIR_Opr result) {
+  const Register return_pc        = R11;
+  const Register polling_page     = R12;
+
+  // Pop the stack before the safepoint code.
+  int frame_size = initial_frame_size_in_bytes();
+  if (Assembler::is_simm(frame_size, 16)) {
+    __ addi(R1_SP, R1_SP, frame_size);
+  } else {
+    __ pop_frame();
+  }
+
+  if (LoadPollAddressFromThread) {
+    // TODO: PPC port __ ld(polling_page, in_bytes(JavaThread::poll_address_offset()), R16_thread);
+    Unimplemented();
+  } else {
+    __ load_const_optimized(polling_page, (long)(address) os::get_polling_page(), R0); // TODO: PPC port: get_standard_polling_page()
+  }
+
+  // Restore return pc relative to callers' sp.
+  __ ld(return_pc, _abi(lr), R1_SP);
+  // Move return pc to LR.
+  __ mtlr(return_pc);
+
+  // We need to mark the code position where the load from the safepoint
+  // polling page was emitted as relocInfo::poll_return_type here.
+  __ relocate(relocInfo::poll_return_type);
+  __ load_from_polling_page(polling_page);
+
+  // Return.
+  __ blr();
+}
+
+
+int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
+
+  if (LoadPollAddressFromThread) {
+    const Register poll_addr = tmp->as_register();
+    // TODO: PPC port __ ld(poll_addr, in_bytes(JavaThread::poll_address_offset()), R16_thread);
+    Unimplemented();
+    __ relocate(relocInfo::poll_type); // XXX
+    guarantee(info != NULL, "Shouldn't be NULL");
+    int offset = __ offset();
+    add_debug_info_for_branch(info);
+    __ load_from_polling_page(poll_addr);
+    return offset;
+  }
+
+  __ load_const_optimized(tmp->as_register(), (intptr_t)os::get_polling_page(), R0); // TODO: PPC port: get_standard_polling_page()
+  if (info != NULL) {
+    add_debug_info_for_branch(info);
+  }
+  int offset = __ offset();
+  __ relocate(relocInfo::poll_type);
+  __ load_from_polling_page(tmp->as_register());
+
+  return offset;
+}
+
+
+void LIR_Assembler::emit_static_call_stub() {
+  address call_pc = __ pc();
+  address stub = __ start_a_stub(max_static_call_stub_size);
+  if (stub == NULL) {
+    bailout("static call stub overflow");
+    return;
+  }
+
+  // For java_to_interp stubs we use R11_scratch1 as scratch register
+  // and in call trampoline stubs we use R12_scratch2. This way we
+  // can distinguish them (see is_NativeCallTrampolineStub_at()).
+  const Register reg_scratch = R11_scratch1;
+
+  // Create a static stub relocation which relates this stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  int start = __ offset();
+  __ relocate(static_stub_Relocation::spec(call_pc));
+
+  // Now, create the stub's code:
+  // - load the TOC
+  // - load the inline cache oop from the constant pool
+  // - load the call target from the constant pool
+  // - call
+  __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
+  AddressLiteral ic = __ allocate_metadata_address((Metadata *)NULL);
+  bool success = __ load_const_from_method_toc(R19_inline_cache_reg, ic, reg_scratch, /*fixed_size*/ true);
+
+  if (ReoptimizeCallSequences) {
+    __ b64_patchable((address)-1, relocInfo::none);
+  } else {
+    AddressLiteral a((address)-1);
+    success = success && __ load_const_from_method_toc(reg_scratch, a, reg_scratch, /*fixed_size*/ true);
+    __ mtctr(reg_scratch);
+    __ bctr();
+  }
+  if (!success) {
+    bailout("const section overflow");
+    return;
+  }
+
+  assert(__ offset() - start <= max_static_call_stub_size, "stub too big");
+  __ end_a_stub();
+}
+
+
+void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
+  bool unsigned_comp = (condition == lir_cond_belowEqual || condition == lir_cond_aboveEqual);
+  if (opr1->is_single_fpu()) {
+    __ fcmpu(BOOL_RESULT, opr1->as_float_reg(), opr2->as_float_reg());
+  } else if (opr1->is_double_fpu()) {
+    __ fcmpu(BOOL_RESULT, opr1->as_double_reg(), opr2->as_double_reg());
+  } else if (opr1->is_single_cpu()) {
+    if (opr2->is_constant()) {
+      switch (opr2->as_constant_ptr()->type()) {
+        case T_INT:
+          {
+            jint con = opr2->as_constant_ptr()->as_jint();
+            if (unsigned_comp) {
+              if (Assembler::is_uimm(con, 16)) {
+                __ cmplwi(BOOL_RESULT, opr1->as_register(), con);
+              } else {
+                __ load_const_optimized(R0, con);
+                __ cmplw(BOOL_RESULT, opr1->as_register(), R0);
+              }
+            } else {
+              if (Assembler::is_simm(con, 16)) {
+                __ cmpwi(BOOL_RESULT, opr1->as_register(), con);
+              } else {
+                __ load_const_optimized(R0, con);
+                __ cmpw(BOOL_RESULT, opr1->as_register(), R0);
+              }
+            }
+          }
+          break;
+
+        case T_OBJECT:
+          // There are only equal/notequal comparisons on objects.
+          {
+            assert(condition == lir_cond_equal || condition == lir_cond_notEqual, "oops");
+            jobject con = opr2->as_constant_ptr()->as_jobject();
+            if (con == NULL) {
+              __ cmpdi(BOOL_RESULT, opr1->as_register(), 0);
+            } else {
+              jobject2reg(con, R0);
+              __ cmpd(BOOL_RESULT, opr1->as_register(), R0);
+            }
+          }
+          break;
+
+        default:
+          ShouldNotReachHere();
+          break;
+      }
+    } else {
+      if (opr2->is_address()) {
+        DEBUG_ONLY( Unimplemented(); ) // Seems to be unused at the moment.
+        LIR_Address *addr = opr2->as_address_ptr();
+        BasicType type = addr->type();
+        if (type == T_OBJECT) { __ ld(R0, index_or_disp(addr), addr->base()->as_register()); }
+        else                  { __ lwa(R0, index_or_disp(addr), addr->base()->as_register()); }
+        __ cmpd(BOOL_RESULT, opr1->as_register(), R0);
+      } else {
+        if (unsigned_comp) {
+          __ cmplw(BOOL_RESULT, opr1->as_register(), opr2->as_register());
+        } else {
+          __ cmpw(BOOL_RESULT, opr1->as_register(), opr2->as_register());
+        }
+      }
+    }
+  } else if (opr1->is_double_cpu()) {
+    if (opr2->is_constant()) {
+      jlong con = opr2->as_constant_ptr()->as_jlong();
+      if (unsigned_comp) {
+        if (Assembler::is_uimm(con, 16)) {
+          __ cmpldi(BOOL_RESULT, opr1->as_register_lo(), con);
+        } else {
+          __ load_const_optimized(R0, con);
+          __ cmpld(BOOL_RESULT, opr1->as_register_lo(), R0);
+        }
+      } else {
+        if (Assembler::is_simm(con, 16)) {
+          __ cmpdi(BOOL_RESULT, opr1->as_register_lo(), con);
+        } else {
+          __ load_const_optimized(R0, con);
+          __ cmpd(BOOL_RESULT, opr1->as_register_lo(), R0);
+        }
+      }
+    } else if (opr2->is_register()) {
+      if (unsigned_comp) {
+        __ cmpld(BOOL_RESULT, opr1->as_register_lo(), opr2->as_register_lo());
+      } else {
+        __ cmpd(BOOL_RESULT, opr1->as_register_lo(), opr2->as_register_lo());
+      }
+    } else {
+      ShouldNotReachHere();
+    }
+  } else if (opr1->is_address()) {
+    DEBUG_ONLY( Unimplemented(); ) // Seems to be unused at the moment.
+    LIR_Address * addr = opr1->as_address_ptr();
+    BasicType type = addr->type();
+    assert (opr2->is_constant(), "Checking");
+    if (type == T_OBJECT) { __ ld(R0, index_or_disp(addr), addr->base()->as_register()); }
+    else                  { __ lwa(R0, index_or_disp(addr), addr->base()->as_register()); }
+    __ cmpdi(BOOL_RESULT, R0, opr2->as_constant_ptr()->as_jint());
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
+  const Register Rdst = dst->as_register();
+  Label done;
+  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
+    bool is_unordered_less = (code == lir_ucmp_fd2i);
+    if (left->is_single_fpu()) {
+      __ fcmpu(CCR0, left->as_float_reg(), right->as_float_reg());
+    } else if (left->is_double_fpu()) {
+      __ fcmpu(CCR0, left->as_double_reg(), right->as_double_reg());
+    } else {
+      ShouldNotReachHere();
+    }
+    __ li(Rdst, is_unordered_less ? -1 : 1);
+    __ bso(CCR0, done);
+  } else if (code == lir_cmp_l2i) {
+    __ cmpd(CCR0, left->as_register_lo(), right->as_register_lo());
+  } else {
+    ShouldNotReachHere();
+  }
+  __ mfcr(R0); // set bit 32..33 as follows: <: 0b10, =: 0b00, >: 0b01
+  __ srwi(Rdst, R0, 30);
+  __ srawi(R0, R0, 31);
+  __ orr(Rdst, R0, Rdst); // set result as follows: <: -1, =: 0, >: 1
+  __ bind(done);
+}
+
+
+inline void load_to_reg(LIR_Assembler *lasm, LIR_Opr src, LIR_Opr dst) {
+  if (src->is_constant()) {
+    lasm->const2reg(src, dst, lir_patch_none, NULL);
+  } else if (src->is_register()) {
+    lasm->reg2reg(src, dst);
+  } else if (src->is_stack()) {
+    lasm->stack2reg(src, dst, dst->type());
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
+  if (opr1->is_equal(opr2) || opr1->is_same_register(opr2)) {
+    load_to_reg(this, opr1, result); // Condition doesn't matter.
+    return;
+  }
+
+  bool positive = false;
+  Assembler::Condition cond = Assembler::equal;
+  switch (condition) {
+    case lir_cond_equal:        positive = true ; cond = Assembler::equal  ; break;
+    case lir_cond_notEqual:     positive = false; cond = Assembler::equal  ; break;
+    case lir_cond_less:         positive = true ; cond = Assembler::less   ; break;
+    case lir_cond_belowEqual:
+    case lir_cond_lessEqual:    positive = false; cond = Assembler::greater; break;
+    case lir_cond_greater:      positive = true ; cond = Assembler::greater; break;
+    case lir_cond_aboveEqual:
+    case lir_cond_greaterEqual: positive = false; cond = Assembler::less   ; break;
+    default:                    ShouldNotReachHere();
+  }
+
+  // Try to use isel on >=Power7.
+  if (VM_Version::has_isel() && result->is_cpu_register()) {
+    bool o1_is_reg = opr1->is_cpu_register(), o2_is_reg = opr2->is_cpu_register();
+    const Register result_reg = result->is_single_cpu() ? result->as_register() : result->as_register_lo();
+
+    // We can use result_reg to load one operand if not already in register.
+    Register first  = o1_is_reg ? (opr1->is_single_cpu() ? opr1->as_register() : opr1->as_register_lo()) : result_reg,
+             second = o2_is_reg ? (opr2->is_single_cpu() ? opr2->as_register() : opr2->as_register_lo()) : result_reg;
+
+    if (first != second) {
+      if (!o1_is_reg) {
+        load_to_reg(this, opr1, result);
+      }
+
+      if (!o2_is_reg) {
+        load_to_reg(this, opr2, result);
+      }
+
+      __ isel(result_reg, BOOL_RESULT, cond, !positive, first, second);
+      return;
+    }
+  } // isel
+
+  load_to_reg(this, opr1, result);
+
+  Label skip;
+  int bo = positive ? Assembler::bcondCRbiIs1 : Assembler::bcondCRbiIs0;
+  int bi = Assembler::bi0(BOOL_RESULT, cond);
+  __ bc(bo, bi, skip);
+
+  load_to_reg(this, opr2, result);
+  __ bind(skip);
+}
+
+
+void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                             CodeEmitInfo* info, bool pop_fpu_stack) {
+  assert(info == NULL, "unused on this code path");
+  assert(left->is_register(), "wrong items state");
+  assert(dest->is_register(), "wrong items state");
+
+  if (right->is_register()) {
+    if (dest->is_float_kind()) {
+
+      FloatRegister lreg, rreg, res;
+      if (right->is_single_fpu()) {
+        lreg = left->as_float_reg();
+        rreg = right->as_float_reg();
+        res  = dest->as_float_reg();
+        switch (code) {
+          case lir_add: __ fadds(res, lreg, rreg); break;
+          case lir_sub: __ fsubs(res, lreg, rreg); break;
+          case lir_mul: // fall through
+          case lir_mul_strictfp: __ fmuls(res, lreg, rreg); break;
+          case lir_div: // fall through
+          case lir_div_strictfp: __ fdivs(res, lreg, rreg); break;
+          default: ShouldNotReachHere();
+        }
+      } else {
+        lreg = left->as_double_reg();
+        rreg = right->as_double_reg();
+        res  = dest->as_double_reg();
+        switch (code) {
+          case lir_add: __ fadd(res, lreg, rreg); break;
+          case lir_sub: __ fsub(res, lreg, rreg); break;
+          case lir_mul: // fall through
+          case lir_mul_strictfp: __ fmul(res, lreg, rreg); break;
+          case lir_div: // fall through
+          case lir_div_strictfp: __ fdiv(res, lreg, rreg); break;
+          default: ShouldNotReachHere();
+        }
+      }
+
+    } else if (dest->is_double_cpu()) {
+
+      Register dst_lo = dest->as_register_lo();
+      Register op1_lo = left->as_pointer_register();
+      Register op2_lo = right->as_pointer_register();
+
+      switch (code) {
+        case lir_add: __ add(dst_lo, op1_lo, op2_lo); break;
+        case lir_sub: __ sub(dst_lo, op1_lo, op2_lo); break;
+        case lir_mul: __ mulld(dst_lo, op1_lo, op2_lo); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      assert (right->is_single_cpu(), "Just Checking");
+
+      Register lreg = left->as_register();
+      Register res  = dest->as_register();
+      Register rreg = right->as_register();
+      switch (code) {
+        case lir_add:  __ add  (res, lreg, rreg); break;
+        case lir_sub:  __ sub  (res, lreg, rreg); break;
+        case lir_mul:  __ mullw(res, lreg, rreg); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  } else {
+    assert (right->is_constant(), "must be constant");
+
+    if (dest->is_single_cpu()) {
+      Register lreg = left->as_register();
+      Register res  = dest->as_register();
+      int    simm16 = right->as_constant_ptr()->as_jint();
+
+      switch (code) {
+        case lir_sub:  assert(Assembler::is_simm16(-simm16), "cannot encode"); // see do_ArithmeticOp_Int
+                       simm16 = -simm16;
+        case lir_add:  if (res == lreg && simm16 == 0) break;
+                       __ addi(res, lreg, simm16); break;
+        case lir_mul:  if (res == lreg && simm16 == 1) break;
+                       __ mulli(res, lreg, simm16); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      Register lreg = left->as_pointer_register();
+      Register res  = dest->as_register_lo();
+      long con = right->as_constant_ptr()->as_jlong();
+      assert(Assembler::is_simm16(con), "must be simm16");
+
+      switch (code) {
+        case lir_sub:  assert(Assembler::is_simm16(-con), "cannot encode");  // see do_ArithmeticOp_Long
+                       con = -con;
+        case lir_add:  if (res == lreg && con == 0) break;
+                       __ addi(res, lreg, (int)con); break;
+        case lir_mul:  if (res == lreg && con == 1) break;
+                       __ mulli(res, lreg, (int)con); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  }
+}
+
+
+void LIR_Assembler::fpop() {
+  Unimplemented();
+  // do nothing
+}
+
+
+void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr thread, LIR_Opr dest, LIR_Op* op) {
+  switch (code) {
+    case lir_sqrt: {
+      __ fsqrt(dest->as_double_reg(), value->as_double_reg());
+      break;
+    }
+    case lir_abs: {
+      __ fabs(dest->as_double_reg(), value->as_double_reg());
+      break;
+    }
+    default: {
+      ShouldNotReachHere();
+      break;
+    }
+  }
+}
+
+
+void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest) {
+  if (right->is_constant()) { // see do_LogicOp
+    long uimm;
+    Register d, l;
+    if (dest->is_single_cpu()) {
+      uimm = right->as_constant_ptr()->as_jint();
+      d = dest->as_register();
+      l = left->as_register();
+    } else {
+      uimm = right->as_constant_ptr()->as_jlong();
+      d = dest->as_register_lo();
+      l = left->as_register_lo();
+    }
+    long uimms  = (unsigned long)uimm >> 16,
+         uimmss = (unsigned long)uimm >> 32;
+
+    switch (code) {
+      case lir_logic_and:
+        if (uimmss != 0 || (uimms != 0 && (uimm & 0xFFFF) != 0) || is_power_of_2_long(uimm)) {
+          __ andi(d, l, uimm); // special cases
+        } else if (uimms != 0) { __ andis_(d, l, uimms); }
+        else { __ andi_(d, l, uimm); }
+        break;
+
+      case lir_logic_or:
+        if (uimms != 0) { assert((uimm & 0xFFFF) == 0, "sanity"); __ oris(d, l, uimms); }
+        else { __ ori(d, l, uimm); }
+        break;
+
+      case lir_logic_xor:
+        if (uimm == -1) { __ nand(d, l, l); } // special case
+        else if (uimms != 0) { assert((uimm & 0xFFFF) == 0, "sanity"); __ xoris(d, l, uimms); }
+        else { __ xori(d, l, uimm); }
+        break;
+
+      default: ShouldNotReachHere();
+    }
+  } else {
+    assert(right->is_register(), "right should be in register");
+
+    if (dest->is_single_cpu()) {
+      switch (code) {
+        case lir_logic_and: __ andr(dest->as_register(), left->as_register(), right->as_register()); break;
+        case lir_logic_or:  __ orr (dest->as_register(), left->as_register(), right->as_register()); break;
+        case lir_logic_xor: __ xorr(dest->as_register(), left->as_register(), right->as_register()); break;
+        default: ShouldNotReachHere();
+      }
+    } else {
+      Register l = (left->is_single_cpu() && left->is_oop_register()) ? left->as_register() :
+                                                                        left->as_register_lo();
+      Register r = (right->is_single_cpu() && right->is_oop_register()) ? right->as_register() :
+                                                                          right->as_register_lo();
+
+      switch (code) {
+        case lir_logic_and: __ andr(dest->as_register_lo(), l, r); break;
+        case lir_logic_or:  __ orr (dest->as_register_lo(), l, r); break;
+        case lir_logic_xor: __ xorr(dest->as_register_lo(), l, r); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  }
+}
+
+
+int LIR_Assembler::shift_amount(BasicType t) {
+  int elem_size = type2aelembytes(t);
+  switch (elem_size) {
+    case 1 : return 0;
+    case 2 : return 1;
+    case 4 : return 2;
+    case 8 : return 3;
+  }
+  ShouldNotReachHere();
+  return -1;
+}
+
+
+void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
+  info->add_register_oop(exceptionOop);
+
+  // Reuse the debug info from the safepoint poll for the throw op itself.
+  address pc_for_athrow = __ pc();
+  int pc_for_athrow_offset = __ offset();
+  //RelocationHolder rspec = internal_word_Relocation::spec(pc_for_athrow);
+  //__ relocate(rspec);
+  //__ load_const(exceptionPC->as_register(), pc_for_athrow, R0);
+  __ calculate_address_from_global_toc(exceptionPC->as_register(), pc_for_athrow, true, true, /*add_relocation*/ true);
+  add_call_info(pc_for_athrow_offset, info); // for exception handler
+
+  address stub = Runtime1::entry_for(compilation()->has_fpu_code() ? Runtime1::handle_exception_id
+                                                                   : Runtime1::handle_exception_nofpu_id);
+  //__ load_const_optimized(R0, stub);
+  __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(stub));
+  __ mtctr(R0);
+  __ bctr();
+}
+
+
+void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
+  // Note: Not used with EnableDebuggingOnDemand.
+  assert(exceptionOop->as_register() == R3, "should match");
+  __ b(_unwind_handler_entry);
+}
+
+
+void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
+  Register src = op->src()->as_register();
+  Register dst = op->dst()->as_register();
+  Register src_pos = op->src_pos()->as_register();
+  Register dst_pos = op->dst_pos()->as_register();
+  Register length  = op->length()->as_register();
+  Register tmp = op->tmp()->as_register();
+  Register tmp2 = R0;
+
+  int flags = op->flags();
+  ciArrayKlass* default_type = op->expected_type();
+  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
+  if (basic_type == T_ARRAY) basic_type = T_OBJECT;
+
+  // Set up the arraycopy stub information.
+  ArrayCopyStub* stub = op->stub();
+  const int frame_resize = frame::abi_reg_args_size - sizeof(frame::jit_abi); // C calls need larger frame.
+
+  // Always do stub if no type information is available. It's ok if
+  // the known type isn't loaded since the code sanity checks
+  // in debug mode and the type isn't required when we know the exact type
+  // also check that the type is an array type.
+  if (op->expected_type() == NULL) {
+    assert(src->is_nonvolatile() && src_pos->is_nonvolatile() && dst->is_nonvolatile() && dst_pos->is_nonvolatile() &&
+           length->is_nonvolatile(), "must preserve");
+    // 3 parms are int. Convert to long.
+    __ mr(R3_ARG1, src);
+    __ extsw(R4_ARG2, src_pos);
+    __ mr(R5_ARG3, dst);
+    __ extsw(R6_ARG4, dst_pos);
+    __ extsw(R7_ARG5, length);
+    address copyfunc_addr = StubRoutines::generic_arraycopy();
+
+    if (copyfunc_addr == NULL) { // Use C version if stub was not generated.
+      address entry = CAST_FROM_FN_PTR(address, Runtime1::arraycopy);
+      __ call_c_with_frame_resize(entry, frame_resize);
+    } else {
+#ifndef PRODUCT
+      if (PrintC1Statistics) {
+        address counter = (address)&Runtime1::_generic_arraycopystub_cnt;
+        int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+        __ lwz(R11_scratch1, simm16_offs, tmp);
+        __ addi(R11_scratch1, R11_scratch1, 1);
+        __ stw(R11_scratch1, simm16_offs, tmp);
+      }
+#endif
+      __ call_c_with_frame_resize(copyfunc_addr, /*stub does not need resized frame*/ 0);
+
+      __ nand(tmp, R3_RET, R3_RET);
+      __ subf(length, tmp, length);
+      __ add(src_pos, tmp, src_pos);
+      __ add(dst_pos, tmp, dst_pos);
+    }
+
+    __ cmpwi(CCR0, R3_RET, 0);
+    __ bc_far_optimized(Assembler::bcondCRbiIs1, __ bi0(CCR0, Assembler::less), *stub->entry());
+    __ bind(*stub->continuation());
+    return;
+  }
+
+  assert(default_type != NULL && default_type->is_array_klass(), "must be true at this point");
+  Label cont, slow, copyfunc;
+
+  bool simple_check_flag_set = flags & (LIR_OpArrayCopy::src_null_check |
+                                        LIR_OpArrayCopy::dst_null_check |
+                                        LIR_OpArrayCopy::src_pos_positive_check |
+                                        LIR_OpArrayCopy::dst_pos_positive_check |
+                                        LIR_OpArrayCopy::length_positive_check);
+
+  // Use only one conditional branch for simple checks.
+  if (simple_check_flag_set) {
+    ConditionRegister combined_check = CCR1, tmp_check = CCR1;
+
+    // Make sure src and dst are non-null.
+    if (flags & LIR_OpArrayCopy::src_null_check) {
+      __ cmpdi(combined_check, src, 0);
+      tmp_check = CCR0;
+    }
+
+    if (flags & LIR_OpArrayCopy::dst_null_check) {
+      __ cmpdi(tmp_check, dst, 0);
+      if (tmp_check != combined_check) {
+        __ cror(combined_check, Assembler::equal, tmp_check, Assembler::equal);
+      }
+      tmp_check = CCR0;
+    }
+
+    // Clear combined_check.eq if not already used.
+    if (tmp_check == combined_check) {
+      __ crandc(combined_check, Assembler::equal, combined_check, Assembler::equal);
+      tmp_check = CCR0;
+    }
+
+    if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
+      // Test src_pos register.
+      __ cmpwi(tmp_check, src_pos, 0);
+      __ cror(combined_check, Assembler::equal, tmp_check, Assembler::less);
+    }
+
+    if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
+      // Test dst_pos register.
+      __ cmpwi(tmp_check, dst_pos, 0);
+      __ cror(combined_check, Assembler::equal, tmp_check, Assembler::less);
+    }
+
+    if (flags & LIR_OpArrayCopy::length_positive_check) {
+      // Make sure length isn't negative.
+      __ cmpwi(tmp_check, length, 0);
+      __ cror(combined_check, Assembler::equal, tmp_check, Assembler::less);
+    }
+
+    __ beq(combined_check, slow);
+  }
+
+  // Higher 32bits must be null.
+  __ extsw(length, length);
+
+  __ extsw(src_pos, src_pos);
+  if (flags & LIR_OpArrayCopy::src_range_check) {
+    __ lwz(tmp2, arrayOopDesc::length_offset_in_bytes(), src);
+    __ add(tmp, length, src_pos);
+    __ cmpld(CCR0, tmp2, tmp);
+    __ ble(CCR0, slow);
+  }
+
+  __ extsw(dst_pos, dst_pos);
+  if (flags & LIR_OpArrayCopy::dst_range_check) {
+    __ lwz(tmp2, arrayOopDesc::length_offset_in_bytes(), dst);
+    __ add(tmp, length, dst_pos);
+    __ cmpld(CCR0, tmp2, tmp);
+    __ ble(CCR0, slow);
+  }
+
+  int shift = shift_amount(basic_type);
+
+  if (!(flags & LIR_OpArrayCopy::type_check)) {
+    __ b(cont);
+  } else {
+    // We don't know the array types are compatible.
+    if (basic_type != T_OBJECT) {
+      // Simple test for basic type arrays.
+      if (UseCompressedClassPointers) {
+        // We don't need decode because we just need to compare.
+        __ lwz(tmp, oopDesc::klass_offset_in_bytes(), src);
+        __ lwz(tmp2, oopDesc::klass_offset_in_bytes(), dst);
+        __ cmpw(CCR0, tmp, tmp2);
+      } else {
+        __ ld(tmp, oopDesc::klass_offset_in_bytes(), src);
+        __ ld(tmp2, oopDesc::klass_offset_in_bytes(), dst);
+        __ cmpd(CCR0, tmp, tmp2);
+      }
+      __ beq(CCR0, cont);
+    } else {
+      // For object arrays, if src is a sub class of dst then we can
+      // safely do the copy.
+      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+
+      const Register sub_klass = R5, super_klass = R4; // like CheckCast/InstanceOf
+      assert_different_registers(tmp, tmp2, sub_klass, super_klass);
+
+      __ load_klass(sub_klass, src);
+      __ load_klass(super_klass, dst);
+
+      __ check_klass_subtype_fast_path(sub_klass, super_klass, tmp, tmp2,
+                                       &cont, copyfunc_addr != NULL ? &copyfunc : &slow, NULL);
+
+      address slow_stc = Runtime1::entry_for(Runtime1::slow_subtype_check_id);
+      //__ load_const_optimized(tmp, slow_stc, tmp2);
+      __ calculate_address_from_global_toc(tmp, slow_stc, true, true, false);
+      __ mtctr(tmp);
+      __ bctrl(); // sets CR0
+      __ beq(CCR0, cont);
+
+      if (copyfunc_addr != NULL) { // Use stub if available.
+        __ bind(copyfunc);
+        // Src is not a sub class of dst so we have to do a
+        // per-element check.
+        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
+        if ((flags & mask) != mask) {
+          assert(flags & mask, "one of the two should be known to be an object array");
+
+          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
+            __ load_klass(tmp, src);
+          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
+            __ load_klass(tmp, dst);
+          }
+
+          __ lwz(tmp2, in_bytes(Klass::layout_helper_offset()), tmp);
+
+          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
+          __ load_const_optimized(tmp, objArray_lh);
+          __ cmpw(CCR0, tmp, tmp2);
+          __ bne(CCR0, slow);
+        }
+
+        Register src_ptr = R3_ARG1;
+        Register dst_ptr = R4_ARG2;
+        Register len     = R5_ARG3;
+        Register chk_off = R6_ARG4;
+        Register super_k = R7_ARG5;
+
+        __ addi(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type));
+        __ addi(dst_ptr, dst, arrayOopDesc::base_offset_in_bytes(basic_type));
+        if (shift == 0) {
+          __ add(src_ptr, src_pos, src_ptr);
+          __ add(dst_ptr, dst_pos, dst_ptr);
+        } else {
+          __ sldi(tmp, src_pos, shift);
+          __ sldi(tmp2, dst_pos, shift);
+          __ add(src_ptr, tmp, src_ptr);
+          __ add(dst_ptr, tmp2, dst_ptr);
+        }
+
+        __ load_klass(tmp, dst);
+        __ mr(len, length);
+
+        int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
+        __ ld(super_k, ek_offset, tmp);
+
+        int sco_offset = in_bytes(Klass::super_check_offset_offset());
+        __ lwz(chk_off, sco_offset, super_k);
+
+        __ call_c_with_frame_resize(copyfunc_addr, /*stub does not need resized frame*/ 0);
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          Label failed;
+          __ cmpwi(CCR0, R3_RET, 0);
+          __ bne(CCR0, failed);
+          address counter = (address)&Runtime1::_arraycopy_checkcast_cnt;
+          int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+          __ lwz(R11_scratch1, simm16_offs, tmp);
+          __ addi(R11_scratch1, R11_scratch1, 1);
+          __ stw(R11_scratch1, simm16_offs, tmp);
+          __ bind(failed);
+        }
+#endif
+
+        __ nand(tmp, R3_RET, R3_RET);
+        __ cmpwi(CCR0, R3_RET, 0);
+        __ beq(CCR0, *stub->continuation());
+
+#ifndef PRODUCT
+        if (PrintC1Statistics) {
+          address counter = (address)&Runtime1::_arraycopy_checkcast_attempt_cnt;
+          int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+          __ lwz(R11_scratch1, simm16_offs, tmp);
+          __ addi(R11_scratch1, R11_scratch1, 1);
+          __ stw(R11_scratch1, simm16_offs, tmp);
+        }
+#endif
+
+        __ subf(length, tmp, length);
+        __ add(src_pos, tmp, src_pos);
+        __ add(dst_pos, tmp, dst_pos);
+      }
+    }
+  }
+  __ bind(slow);
+  __ b(*stub->entry());
+  __ bind(cont);
+
+#ifdef ASSERT
+  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
+    // Sanity check the known type with the incoming class. For the
+    // primitive case the types must match exactly with src.klass and
+    // dst.klass each exactly matching the default type. For the
+    // object array case, if no type check is needed then either the
+    // dst type is exactly the expected type and the src type is a
+    // subtype which we can't check or src is the same array as dst
+    // but not necessarily exactly of type default_type.
+    Label known_ok, halt;
+    metadata2reg(op->expected_type()->constant_encoding(), tmp);
+    if (UseCompressedClassPointers) {
+      // Tmp holds the default type. It currently comes uncompressed after the
+      // load of a constant, so encode it.
+      __ encode_klass_not_null(tmp);
+      // Load the raw value of the dst klass, since we will be comparing
+      // uncompressed values directly.
+      __ lwz(tmp2, oopDesc::klass_offset_in_bytes(), dst);
+      __ cmpw(CCR0, tmp, tmp2);
+      if (basic_type != T_OBJECT) {
+        __ bne(CCR0, halt);
+        // Load the raw value of the src klass.
+        __ lwz(tmp2, oopDesc::klass_offset_in_bytes(), src);
+        __ cmpw(CCR0, tmp, tmp2);
+        __ beq(CCR0, known_ok);
+      } else {
+        __ beq(CCR0, known_ok);
+        __ cmpw(CCR0, src, dst);
+        __ beq(CCR0, known_ok);
+      }
+    } else {
+      __ ld(tmp2, oopDesc::klass_offset_in_bytes(), dst);
+      __ cmpd(CCR0, tmp, tmp2);
+      if (basic_type != T_OBJECT) {
+        __ bne(CCR0, halt);
+        // Load the raw value of the src klass.
+        __ ld(tmp2, oopDesc::klass_offset_in_bytes(), src);
+        __ cmpd(CCR0, tmp, tmp2);
+        __ beq(CCR0, known_ok);
+      } else {
+        __ beq(CCR0, known_ok);
+        __ cmpd(CCR0, src, dst);
+        __ beq(CCR0, known_ok);
+      }
+    }
+    __ bind(halt);
+    __ stop("incorrect type information in arraycopy");
+    __ bind(known_ok);
+  }
+#endif
+
+#ifndef PRODUCT
+  if (PrintC1Statistics) {
+    address counter = Runtime1::arraycopy_count_address(basic_type);
+    int simm16_offs = __ load_const_optimized(tmp, counter, tmp2, true);
+    __ lwz(R11_scratch1, simm16_offs, tmp);
+    __ addi(R11_scratch1, R11_scratch1, 1);
+    __ stw(R11_scratch1, simm16_offs, tmp);
+  }
+#endif
+
+  Register src_ptr = R3_ARG1;
+  Register dst_ptr = R4_ARG2;
+  Register len     = R5_ARG3;
+
+  __ addi(src_ptr, src, arrayOopDesc::base_offset_in_bytes(basic_type));
+  __ addi(dst_ptr, dst, arrayOopDesc::base_offset_in_bytes(basic_type));
+  if (shift == 0) {
+    __ add(src_ptr, src_pos, src_ptr);
+    __ add(dst_ptr, dst_pos, dst_ptr);
+  } else {
+    __ sldi(tmp, src_pos, shift);
+    __ sldi(tmp2, dst_pos, shift);
+    __ add(src_ptr, tmp, src_ptr);
+    __ add(dst_ptr, tmp2, dst_ptr);
+  }
+
+  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
+  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
+  const char *name;
+  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
+
+  // Arraycopy stubs takes a length in number of elements, so don't scale it.
+  __ mr(len, length);
+  __ call_c_with_frame_resize(entry, /*stub does not need resized frame*/ 0);
+
+  __ bind(*stub->continuation());
+}
+
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
+  if (dest->is_single_cpu()) {
+    __ rldicl(tmp->as_register(), count->as_register(), 0, 64-5);
+#ifdef _LP64
+    if (left->type() == T_OBJECT) {
+      switch (code) {
+        case lir_shl:  __ sld(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        case lir_shr:  __ srad(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        case lir_ushr: __ srd(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        default: ShouldNotReachHere();
+      }
+    } else
+#endif
+      switch (code) {
+        case lir_shl:  __ slw(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        case lir_shr:  __ sraw(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        case lir_ushr: __ srw(dest->as_register(), left->as_register(), tmp->as_register()); break;
+        default: ShouldNotReachHere();
+      }
+  } else {
+    __ rldicl(tmp->as_register(), count->as_register(), 0, 64-6);
+    switch (code) {
+      case lir_shl:  __ sld(dest->as_register_lo(), left->as_register_lo(), tmp->as_register()); break;
+      case lir_shr:  __ srad(dest->as_register_lo(), left->as_register_lo(), tmp->as_register()); break;
+      case lir_ushr: __ srd(dest->as_register_lo(), left->as_register_lo(), tmp->as_register()); break;
+      default: ShouldNotReachHere();
+    }
+  }
+}
+
+
+void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
+#ifdef _LP64
+  if (left->type() == T_OBJECT) {
+    count = count & 63;  // Shouldn't shift by more than sizeof(intptr_t).
+    if (count == 0) { __ mr_if_needed(dest->as_register_lo(), left->as_register()); }
+    else {
+      switch (code) {
+        case lir_shl:  __ sldi(dest->as_register_lo(), left->as_register(), count); break;
+        case lir_shr:  __ sradi(dest->as_register_lo(), left->as_register(), count); break;
+        case lir_ushr: __ srdi(dest->as_register_lo(), left->as_register(), count); break;
+        default: ShouldNotReachHere();
+      }
+    }
+    return;
+  }
+#endif
+
+  if (dest->is_single_cpu()) {
+    count = count & 0x1F; // Java spec
+    if (count == 0) { __ mr_if_needed(dest->as_register(), left->as_register()); }
+    else {
+      switch (code) {
+        case lir_shl: __ slwi(dest->as_register(), left->as_register(), count); break;
+        case lir_shr:  __ srawi(dest->as_register(), left->as_register(), count); break;
+        case lir_ushr: __ srwi(dest->as_register(), left->as_register(), count); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  } else if (dest->is_double_cpu()) {
+    count = count & 63; // Java spec
+    if (count == 0) { __ mr_if_needed(dest->as_pointer_register(), left->as_pointer_register()); }
+    else {
+      switch (code) {
+        case lir_shl:  __ sldi(dest->as_pointer_register(), left->as_pointer_register(), count); break;
+        case lir_shr:  __ sradi(dest->as_pointer_register(), left->as_pointer_register(), count); break;
+        case lir_ushr: __ srdi(dest->as_pointer_register(), left->as_pointer_register(), count); break;
+        default: ShouldNotReachHere();
+      }
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
+  if (op->init_check()) {
+    if (!os::zero_page_read_protected() || !ImplicitNullChecks) {
+      explicit_null_check(op->klass()->as_register(), op->stub()->info());
+    } else {
+      add_debug_info_for_null_check_here(op->stub()->info());
+    }
+    __ lbz(op->tmp1()->as_register(),
+           in_bytes(InstanceKlass::init_state_offset()), op->klass()->as_register());
+    __ cmpwi(CCR0, op->tmp1()->as_register(), InstanceKlass::fully_initialized);
+    __ bc_far_optimized(Assembler::bcondCRbiIs0, __ bi0(CCR0, Assembler::equal), *op->stub()->entry());
+  }
+  __ allocate_object(op->obj()->as_register(),
+                     op->tmp1()->as_register(),
+                     op->tmp2()->as_register(),
+                     op->tmp3()->as_register(),
+                     op->header_size(),
+                     op->object_size(),
+                     op->klass()->as_register(),
+                     *op->stub()->entry());
+
+  __ bind(*op->stub()->continuation());
+  __ verify_oop(op->obj()->as_register());
+}
+
+
+void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
+  LP64_ONLY( __ extsw(op->len()->as_register(), op->len()->as_register()); )
+  if (UseSlowPath ||
+      (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
+      (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
+    __ b(*op->stub()->entry());
+  } else {
+    __ allocate_array(op->obj()->as_register(),
+                      op->len()->as_register(),
+                      op->tmp1()->as_register(),
+                      op->tmp2()->as_register(),
+                      op->tmp3()->as_register(),
+                      arrayOopDesc::header_size(op->type()),
+                      type2aelembytes(op->type()),
+                      op->klass()->as_register(),
+                      *op->stub()->entry());
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+
+void LIR_Assembler::type_profile_helper(Register mdo, int mdo_offset_bias,
+                                        ciMethodData *md, ciProfileData *data,
+                                        Register recv, Register tmp1, Label* update_done) {
+  uint i;
+  for (i = 0; i < VirtualCallData::row_limit(); i++) {
+    Label next_test;
+    // See if the receiver is receiver[n].
+    __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
+    __ verify_klass_ptr(tmp1);
+    __ cmpd(CCR0, recv, tmp1);
+    __ bne(CCR0, next_test);
+
+    __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+    __ addi(tmp1, tmp1, DataLayout::counter_increment);
+    __ std(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+    __ b(*update_done);
+
+    __ bind(next_test);
+  }
+
+  // Didn't find receiver; find next empty slot and fill it in.
+  for (i = 0; i < VirtualCallData::row_limit(); i++) {
+    Label next_test;
+    __ ld(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
+    __ cmpdi(CCR0, tmp1, 0);
+    __ bne(CCR0, next_test);
+    __ li(tmp1, DataLayout::counter_increment);
+    __ std(recv, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i)) - mdo_offset_bias, mdo);
+    __ std(tmp1, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+    __ b(*update_done);
+
+    __ bind(next_test);
+  }
+}
+
+
+void LIR_Assembler::setup_md_access(ciMethod* method, int bci,
+                                    ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias) {
+  md = method->method_data_or_null();
+  assert(md != NULL, "Sanity");
+  data = md->bci_to_data(bci);
+  assert(data != NULL,       "need data for checkcast");
+  assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
+  if (!Assembler::is_simm16(md->byte_offset_of_slot(data, DataLayout::header_offset()) + data->size_in_bytes())) {
+    // The offset is large so bias the mdo by the base of the slot so
+    // that the ld can use simm16s to reference the slots of the data.
+    mdo_offset_bias = md->byte_offset_of_slot(data, DataLayout::header_offset());
+  }
+}
+
+
+void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
+  Register obj = op->object()->as_register();
+  Register k_RInfo = op->tmp1()->as_register();
+  Register klass_RInfo = op->tmp2()->as_register();
+  Register Rtmp1 = op->tmp3()->as_register();
+  Register dst = op->result_opr()->as_register();
+  ciKlass* k = op->klass();
+  bool should_profile = op->should_profile();
+  bool move_obj_to_dst = (op->code() == lir_checkcast);
+  // Attention: do_temp(opTypeCheck->_object) is not used, i.e. obj may be same as one of the temps.
+  bool reg_conflict = (obj == k_RInfo || obj == klass_RInfo || obj == Rtmp1);
+  bool restore_obj = move_obj_to_dst && reg_conflict;
+
+  __ cmpdi(CCR0, obj, 0);
+  if (move_obj_to_dst || reg_conflict) {
+    __ mr_if_needed(dst, obj);
+    if (reg_conflict) { obj = dst; }
+  }
+
+  ciMethodData* md;
+  ciProfileData* data;
+  int mdo_offset_bias = 0;
+  if (should_profile) {
+    ciMethod* method = op->profiled_method();
+    assert(method != NULL, "Should have method");
+    setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias);
+
+    Register mdo      = k_RInfo;
+    Register data_val = Rtmp1;
+    Label not_null;
+    __ bne(CCR0, not_null);
+    metadata2reg(md->constant_encoding(), mdo);
+    __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+    __ lbz(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo);
+    __ ori(data_val, data_val, BitData::null_seen_byte_constant());
+    __ stb(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo);
+    __ b(*obj_is_null);
+    __ bind(not_null);
+  } else {
+    __ beq(CCR0, *obj_is_null);
+  }
+
+  // get object class
+  __ load_klass(klass_RInfo, obj);
+
+  if (k->is_loaded()) {
+    metadata2reg(k->constant_encoding(), k_RInfo);
+  } else {
+    klass2reg_with_patching(k_RInfo, op->info_for_patch());
+  }
+
+  Label profile_cast_failure, failure_restore_obj, profile_cast_success;
+  Label *failure_target = should_profile ? &profile_cast_failure : failure;
+  Label *success_target = should_profile ? &profile_cast_success : success;
+
+  if (op->fast_check()) {
+    assert_different_registers(klass_RInfo, k_RInfo);
+    __ cmpd(CCR0, k_RInfo, klass_RInfo);
+    if (should_profile) {
+      __ bne(CCR0, *failure_target);
+      // Fall through to success case.
+    } else {
+      __ beq(CCR0, *success);
+      // Fall through to failure case.
+    }
+  } else {
+    bool need_slow_path = true;
+    if (k->is_loaded()) {
+      if ((int) k->super_check_offset() != in_bytes(Klass::secondary_super_cache_offset())) {
+        need_slow_path = false;
+      }
+      // Perform the fast part of the checking logic.
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, R0, (need_slow_path ? success_target : NULL),
+                                       failure_target, NULL, RegisterOrConstant(k->super_check_offset()));
+    } else {
+      // Perform the fast part of the checking logic.
+      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, R0, success_target, failure_target);
+    }
+    if (!need_slow_path) {
+      if (!should_profile) { __ b(*success); }
+    } else {
+      // Call out-of-line instance of __ check_klass_subtype_slow_path(...):
+      address entry = Runtime1::entry_for(Runtime1::slow_subtype_check_id);
+      //__ load_const_optimized(Rtmp1, entry, R0);
+      __ calculate_address_from_global_toc(Rtmp1, entry, true, true, false);
+      __ mtctr(Rtmp1);
+      __ bctrl(); // sets CR0
+      if (should_profile) {
+        __ bne(CCR0, *failure_target);
+        // Fall through to success case.
+      } else {
+        __ beq(CCR0, *success);
+        // Fall through to failure case.
+      }
+    }
+  }
+
+  if (should_profile) {
+    Register mdo = k_RInfo, recv = klass_RInfo;
+    assert_different_registers(mdo, recv, Rtmp1);
+    __ bind(profile_cast_success);
+    metadata2reg(md->constant_encoding(), mdo);
+    __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+    type_profile_helper(mdo, mdo_offset_bias, md, data, recv, Rtmp1, success);
+    __ b(*success);
+
+    // Cast failure case.
+    __ bind(profile_cast_failure);
+    metadata2reg(md->constant_encoding(), mdo);
+    __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+    __ ld(Rtmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+    __ addi(Rtmp1, Rtmp1, -DataLayout::counter_increment);
+    __ std(Rtmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+  }
+
+  __ bind(*failure);
+
+  if (restore_obj) {
+    __ mr(op->object()->as_register(), dst);
+    // Fall through to failure case.
+  }
+}
+
+
+void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
+  LIR_Code code = op->code();
+  if (code == lir_store_check) {
+    Register value = op->object()->as_register();
+    Register array = op->array()->as_register();
+    Register k_RInfo = op->tmp1()->as_register();
+    Register klass_RInfo = op->tmp2()->as_register();
+    Register Rtmp1 = op->tmp3()->as_register();
+    bool should_profile = op->should_profile();
+
+    __ verify_oop(value);
+    CodeStub* stub = op->stub();
+    // Check if it needs to be profiled.
+    ciMethodData* md;
+    ciProfileData* data;
+    int mdo_offset_bias = 0;
+    if (should_profile) {
+      ciMethod* method = op->profiled_method();
+      assert(method != NULL, "Should have method");
+      setup_md_access(method, op->profiled_bci(), md, data, mdo_offset_bias);
+    }
+    Label profile_cast_success, failure, done;
+    Label *success_target = should_profile ? &profile_cast_success : &done;
+
+    __ cmpdi(CCR0, value, 0);
+    if (should_profile) {
+      Label not_null;
+      __ bne(CCR0, not_null);
+      Register mdo      = k_RInfo;
+      Register data_val = Rtmp1;
+      metadata2reg(md->constant_encoding(), mdo);
+      __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+      __ lbz(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo);
+      __ ori(data_val, data_val, BitData::null_seen_byte_constant());
+      __ stb(data_val, md->byte_offset_of_slot(data, DataLayout::flags_offset()) - mdo_offset_bias, mdo);
+      __ b(done);
+      __ bind(not_null);
+    } else {
+      __ beq(CCR0, done);
+    }
+    if (!os::zero_page_read_protected() || !ImplicitNullChecks) {
+      explicit_null_check(array, op->info_for_exception());
+    } else {
+      add_debug_info_for_null_check_here(op->info_for_exception());
+    }
+    __ load_klass(k_RInfo, array);
+    __ load_klass(klass_RInfo, value);
+
+    // Get instance klass.
+    __ ld(k_RInfo, in_bytes(ObjArrayKlass::element_klass_offset()), k_RInfo);
+    // Perform the fast part of the checking logic.
+    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, R0, success_target, &failure, NULL);
+
+    // Call out-of-line instance of __ check_klass_subtype_slow_path(...):
+    const address slow_path = Runtime1::entry_for(Runtime1::slow_subtype_check_id);
+    //__ load_const_optimized(R0, slow_path);
+    __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(slow_path));
+    __ mtctr(R0);
+    __ bctrl(); // sets CR0
+    if (!should_profile) {
+      __ beq(CCR0, done);
+      __ bind(failure);
+    } else {
+      __ bne(CCR0, failure);
+      // Fall through to the success case.
+
+      Register mdo  = klass_RInfo, recv = k_RInfo, tmp1 = Rtmp1;
+      assert_different_registers(value, mdo, recv, tmp1);
+      __ bind(profile_cast_success);
+      metadata2reg(md->constant_encoding(), mdo);
+      __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+      __ load_klass(recv, value);
+      type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &done);
+      __ b(done);
+
+      // Cast failure case.
+      __ bind(failure);
+      metadata2reg(md->constant_encoding(), mdo);
+      __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+      Address data_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias);
+      __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+      __ addi(tmp1, tmp1, -DataLayout::counter_increment);
+      __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+    }
+    __ b(*stub->entry());
+    __ bind(done);
+
+  } else if (code == lir_checkcast) {
+    Label success, failure;
+    emit_typecheck_helper(op, &success, /*fallthru*/&failure, &success); // Moves obj to dst.
+    __ b(*op->stub()->entry());
+    __ align(32, 12);
+    __ bind(success);
+  } else if (code == lir_instanceof) {
+    Register dst = op->result_opr()->as_register();
+    Label success, failure, done;
+    emit_typecheck_helper(op, &success, /*fallthru*/&failure, &failure);
+    __ li(dst, 0);
+    __ b(done);
+    __ align(32, 12);
+    __ bind(success);
+    __ li(dst, 1);
+    __ bind(done);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+
+void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
+  Register addr = op->addr()->as_pointer_register();
+  Register cmp_value = noreg, new_value = noreg;
+  bool is_64bit = false;
+
+  if (op->code() == lir_cas_long) {
+    cmp_value = op->cmp_value()->as_register_lo();
+    new_value = op->new_value()->as_register_lo();
+    is_64bit = true;
+  } else if (op->code() == lir_cas_int || op->code() == lir_cas_obj) {
+    cmp_value = op->cmp_value()->as_register();
+    new_value = op->new_value()->as_register();
+    if (op->code() == lir_cas_obj) {
+      if (UseCompressedOops) {
+        Register t1 = op->tmp1()->as_register();
+        Register t2 = op->tmp2()->as_register();
+        cmp_value = __ encode_heap_oop(t1, cmp_value);
+        new_value = __ encode_heap_oop(t2, new_value);
+      } else {
+        is_64bit = true;
+      }
+    }
+  } else {
+    Unimplemented();
+  }
+
+  if (is_64bit) {
+    __ cmpxchgd(BOOL_RESULT, /*current_value=*/R0, cmp_value, new_value, addr,
+                MacroAssembler::MemBarFenceAfter,
+                MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, NULL, /*check without ldarx first*/true);
+  } else {
+    __ cmpxchgw(BOOL_RESULT, /*current_value=*/R0, cmp_value, new_value, addr,
+                MacroAssembler::MemBarFenceAfter,
+                MacroAssembler::cmpxchgx_hint_atomic_update(),
+                noreg, /*check without ldarx first*/true);
+  }
+}
+
+
+void LIR_Assembler::set_24bit_FPU() {
+  Unimplemented();
+}
+
+void LIR_Assembler::reset_FPU() {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::breakpoint() {
+  __ illtrap();
+}
+
+
+void LIR_Assembler::push(LIR_Opr opr) {
+  Unimplemented();
+}
+
+void LIR_Assembler::pop(LIR_Opr opr) {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst_opr) {
+  Address mon_addr = frame_map()->address_for_monitor_lock(monitor_no);
+  Register dst = dst_opr->as_register();
+  Register reg = mon_addr.base();
+  int offset = mon_addr.disp();
+  // Compute pointer to BasicLock.
+  __ add_const_optimized(dst, reg, offset);
+}
+
+
+void LIR_Assembler::emit_lock(LIR_OpLock* op) {
+  Register obj = op->obj_opr()->as_register();
+  Register hdr = op->hdr_opr()->as_register();
+  Register lock = op->lock_opr()->as_register();
+
+  // Obj may not be an oop.
+  if (op->code() == lir_lock) {
+    MonitorEnterStub* stub = (MonitorEnterStub*)op->stub();
+    if (UseFastLocking) {
+      assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+      // Add debug info for NullPointerException only if one is possible.
+      if (op->info() != NULL) {
+        if (!os::zero_page_read_protected() || !ImplicitNullChecks) {
+          explicit_null_check(obj, op->info());
+        } else {
+          add_debug_info_for_null_check_here(op->info());
+        }
+      }
+      __ lock_object(hdr, obj, lock, op->scratch_opr()->as_register(), *op->stub()->entry());
+    } else {
+      // always do slow locking
+      // note: The slow locking code could be inlined here, however if we use
+      //       slow locking, speed doesn't matter anyway and this solution is
+      //       simpler and requires less duplicated code - additionally, the
+      //       slow locking code is the same in either case which simplifies
+      //       debugging.
+      __ b(*op->stub()->entry());
+    }
+  } else {
+    assert (op->code() == lir_unlock, "Invalid code, expected lir_unlock");
+    if (UseFastLocking) {
+      assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
+      __ unlock_object(hdr, obj, lock, *op->stub()->entry());
+    } else {
+      // always do slow unlocking
+      // note: The slow unlocking code could be inlined here, however if we use
+      //       slow unlocking, speed doesn't matter anyway and this solution is
+      //       simpler and requires less duplicated code - additionally, the
+      //       slow unlocking code is the same in either case which simplifies
+      //       debugging.
+      __ b(*op->stub()->entry());
+    }
+  }
+  __ bind(*op->stub()->continuation());
+}
+
+
+void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
+  ciMethod* method = op->profiled_method();
+  int bci          = op->profiled_bci();
+  ciMethod* callee = op->profiled_callee();
+
+  // Update counter for all call types.
+  ciMethodData* md = method->method_data_or_null();
+  assert(md != NULL, "Sanity");
+  ciProfileData* data = md->bci_to_data(bci);
+  assert(data->is_CounterData(), "need CounterData for calls");
+  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
+  Register mdo = op->mdo()->as_register();
+#ifdef _LP64
+  assert(op->tmp1()->is_double_cpu(), "tmp1 must be allocated");
+  Register tmp1 = op->tmp1()->as_register_lo();
+#else
+  assert(op->tmp1()->is_single_cpu(), "tmp1 must be allocated");
+  Register tmp1 = op->tmp1()->as_register();
+#endif
+  metadata2reg(md->constant_encoding(), mdo);
+  int mdo_offset_bias = 0;
+  if (!Assembler::is_simm16(md->byte_offset_of_slot(data, CounterData::count_offset()) +
+                            data->size_in_bytes())) {
+    // The offset is large so bias the mdo by the base of the slot so
+    // that the ld can use simm16s to reference the slots of the data.
+    mdo_offset_bias = md->byte_offset_of_slot(data, CounterData::count_offset());
+    __ add_const_optimized(mdo, mdo, mdo_offset_bias, R0);
+  }
+
+  Bytecodes::Code bc = method->java_code_at_bci(bci);
+  const bool callee_is_static = callee->is_loaded() && callee->is_static();
+  // Perform additional virtual call profiling for invokevirtual and
+  // invokeinterface bytecodes.
+  if ((bc == Bytecodes::_invokevirtual || bc == Bytecodes::_invokeinterface) &&
+      !callee_is_static &&  // Required for optimized MH invokes.
+      C1ProfileVirtualCalls) {
+    assert(op->recv()->is_single_cpu(), "recv must be allocated");
+    Register recv = op->recv()->as_register();
+    assert_different_registers(mdo, tmp1, recv);
+    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
+    ciKlass* known_klass = op->known_holder();
+    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
+      // We know the type that will be seen at this call site; we can
+      // statically update the MethodData* rather than needing to do
+      // dynamic tests on the receiver type.
+
+      // NOTE: we should probably put a lock around this search to
+      // avoid collisions by concurrent compilations.
+      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
+      uint i;
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (known_klass->equals(receiver)) {
+          __ ld(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+          __ addi(tmp1, tmp1, DataLayout::counter_increment);
+          __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+          return;
+        }
+      }
+
+      // Receiver type not found in profile data; select an empty slot.
+
+      // Note that this is less efficient than it should be because it
+      // always does a write to the receiver part of the
+      // VirtualCallData rather than just the first time.
+      for (i = 0; i < VirtualCallData::row_limit(); i++) {
+        ciKlass* receiver = vc_data->receiver(i);
+        if (receiver == NULL) {
+          metadata2reg(known_klass->constant_encoding(), tmp1);
+          __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)) - mdo_offset_bias, mdo);
+
+          __ ld(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+          __ addi(tmp1, tmp1, DataLayout::counter_increment);
+          __ std(tmp1, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)) - mdo_offset_bias, mdo);
+          return;
+        }
+      }
+    } else {
+      __ load_klass(recv, recv);
+      Label update_done;
+      type_profile_helper(mdo, mdo_offset_bias, md, data, recv, tmp1, &update_done);
+      // Receiver did not match any saved receiver and there is no empty row for it.
+      // Increment total counter to indicate polymorphic case.
+      __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+      __ addi(tmp1, tmp1, DataLayout::counter_increment);
+      __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+
+      __ bind(update_done);
+    }
+  } else {
+    // Static call
+    __ ld(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+    __ addi(tmp1, tmp1, DataLayout::counter_increment);
+    __ std(tmp1, md->byte_offset_of_slot(data, CounterData::count_offset()) - mdo_offset_bias, mdo);
+  }
+}
+
+
+void LIR_Assembler::align_backward_branch_target() {
+  __ align(32, 12); // Insert up to 3 nops to align with 32 byte boundary.
+}
+
+
+void LIR_Assembler::emit_delay(LIR_OpDelay* op) {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest) {
+  assert(left->is_register(), "can only handle registers");
+
+  if (left->is_single_cpu()) {
+    __ neg(dest->as_register(), left->as_register());
+  } else if (left->is_single_fpu()) {
+    __ fneg(dest->as_float_reg(), left->as_float_reg());
+  } else if (left->is_double_fpu()) {
+    __ fneg(dest->as_double_reg(), left->as_double_reg());
+  } else {
+    assert (left->is_double_cpu(), "Must be a long");
+    __ neg(dest->as_register_lo(), left->as_register_lo());
+  }
+}
+
+
+void LIR_Assembler::fxch(int i) {
+  Unimplemented();
+}
+
+void LIR_Assembler::fld(int i) {
+  Unimplemented();
+}
+
+void LIR_Assembler::ffree(int i) {
+  Unimplemented();
+}
+
+
+void LIR_Assembler::rt_call(LIR_Opr result, address dest,
+                            const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
+  // Stubs: Called via rt_call, but dest is a stub address (no function descriptor).
+  if (dest == Runtime1::entry_for(Runtime1::register_finalizer_id) ||
+      dest == Runtime1::entry_for(Runtime1::new_multi_array_id   )) {
+    //__ load_const_optimized(R0, dest);
+    __ add_const_optimized(R0, R29_TOC, MacroAssembler::offset_to_global_toc(dest));
+    __ mtctr(R0);
+    __ bctrl();
+    assert(info != NULL, "sanity");
+    add_call_info_here(info);
+    return;
+  }
+
+  __ call_c_with_frame_resize(dest, /*no resizing*/ 0);
+  if (info != NULL) {
+    add_call_info_here(info);
+  }
+}
+
+
+void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
+  ShouldNotReachHere(); // Not needed on _LP64.
+}
+
+void LIR_Assembler::membar() {
+  __ fence();
+}
+
+void LIR_Assembler::membar_acquire() {
+  __ acquire();
+}
+
+void LIR_Assembler::membar_release() {
+  __ release();
+}
+
+void LIR_Assembler::membar_loadload() {
+  __ membar(Assembler::LoadLoad);
+}
+
+void LIR_Assembler::membar_storestore() {
+  __ membar(Assembler::StoreStore);
+}
+
+void LIR_Assembler::membar_loadstore() {
+  __ membar(Assembler::LoadStore);
+}
+
+void LIR_Assembler::membar_storeload() {
+  __ membar(Assembler::StoreLoad);
+}
+
+
+void LIR_Assembler::leal(LIR_Opr addr_opr, LIR_Opr dest) {
+  LIR_Address* addr = addr_opr->as_address_ptr();
+  assert(addr->scale() == LIR_Address::times_1, "no scaling on this platform");
+  if (addr->index()->is_illegal()) {
+    __ add_const_optimized(dest->as_pointer_register(), addr->base()->as_pointer_register(), addr->disp());
+  } else {
+    assert(addr->disp() == 0, "can't have both: index and disp");
+    __ add(dest->as_pointer_register(), addr->index()->as_pointer_register(), addr->base()->as_pointer_register());
+  }
+}
+
+
+void LIR_Assembler::get_thread(LIR_Opr result_reg) {
+  ShouldNotReachHere();
+}
+
+
+#ifdef ASSERT
+// Emit run-time assertion.
+void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
+  Unimplemented();
+}
+#endif
+
+
+void LIR_Assembler::peephole(LIR_List* lir) {
+  // Optimize instruction pairs before emitting.
+  LIR_OpList* inst = lir->instructions_list();
+  for (int i = 1; i < inst->length(); i++) {
+    LIR_Op* op = inst->at(i);
+
+    // 2 register-register-moves
+    if (op->code() == lir_move) {
+      LIR_Opr in2  = ((LIR_Op1*)op)->in_opr(),
+              res2 = ((LIR_Op1*)op)->result_opr();
+      if (in2->is_register() && res2->is_register()) {
+        LIR_Op* prev = inst->at(i - 1);
+        if (prev && prev->code() == lir_move) {
+          LIR_Opr in1  = ((LIR_Op1*)prev)->in_opr(),
+                  res1 = ((LIR_Op1*)prev)->result_opr();
+          if (in1->is_same_register(res2) && in2->is_same_register(res1)) {
+            inst->remove_at(i);
+          }
+        }
+      }
+    }
+
+  }
+  return;
+}
+
+
+void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp) {
+  const Register Rptr = src->as_pointer_register(),
+                 Rtmp = tmp->as_register();
+  Register Rco = noreg;
+  if (UseCompressedOops && data->is_oop()) {
+    Rco = __ encode_heap_oop(Rtmp, data->as_register());
+  }
+
+  Label Lretry;
+  __ bind(Lretry);
+
+  if (data->type() == T_INT) {
+    const Register Rold = dest->as_register(),
+                   Rsrc = data->as_register();
+    assert_different_registers(Rptr, Rtmp, Rold, Rsrc);
+    __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (code == lir_xadd) {
+      __ add(Rtmp, Rsrc, Rold);
+      __ stwcx_(Rtmp, Rptr);
+    } else {
+      __ stwcx_(Rsrc, Rptr);
+    }
+  } else if (data->is_oop()) {
+    assert(code == lir_xchg, "xadd for oops");
+    const Register Rold = dest->as_register();
+    if (UseCompressedOops) {
+      assert_different_registers(Rptr, Rold, Rco);
+      __ lwarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
+      __ stwcx_(Rco, Rptr);
+    } else {
+      const Register Robj = data->as_register();
+      assert_different_registers(Rptr, Rold, Robj);
+      __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
+      __ stdcx_(Robj, Rptr);
+    }
+  } else if (data->type() == T_LONG) {
+    const Register Rold = dest->as_register_lo(),
+                   Rsrc = data->as_register_lo();
+    assert_different_registers(Rptr, Rtmp, Rold, Rsrc);
+    __ ldarx(Rold, Rptr, MacroAssembler::cmpxchgx_hint_atomic_update());
+    if (code == lir_xadd) {
+      __ add(Rtmp, Rsrc, Rold);
+      __ stdcx_(Rtmp, Rptr);
+    } else {
+      __ stdcx_(Rsrc, Rptr);
+    }
+  } else {
+    ShouldNotReachHere();
+  }
+
+  if (UseStaticBranchPredictionInCompareAndSwapPPC64) {
+    __ bne_predict_not_taken(CCR0, Lretry);
+  } else {
+    __ bne(                  CCR0, Lretry);
+  }
+
+  if (UseCompressedOops && data->is_oop()) {
+    __ decode_heap_oop(dest->as_register());
+  }
+}
+
+
+void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
+  Register obj = op->obj()->as_register();
+  Register tmp = op->tmp()->as_pointer_register();
+  LIR_Address* mdo_addr = op->mdp()->as_address_ptr();
+  ciKlass* exact_klass = op->exact_klass();
+  intptr_t current_klass = op->current_klass();
+  bool not_null = op->not_null();
+  bool no_conflict = op->no_conflict();
+
+  Label Lupdate, Ldo_update, Ldone;
+
+  bool do_null = !not_null;
+  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
+  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
+
+  assert(do_null || do_update, "why are we here?");
+  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
+
+  __ verify_oop(obj);
+
+  if (do_null) {
+    if (!TypeEntries::was_null_seen(current_klass)) {
+      __ cmpdi(CCR0, obj, 0);
+      __ bne(CCR0, Lupdate);
+      __ ld(R0, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+      __ ori(R0, R0, TypeEntries::null_seen);
+      if (do_update) {
+        __ b(Ldo_update);
+      } else {
+        __ std(R0, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+      }
+    } else {
+      if (do_update) {
+        __ cmpdi(CCR0, obj, 0);
+        __ beq(CCR0, Ldone);
+      }
+    }
+#ifdef ASSERT
+  } else {
+    __ cmpdi(CCR0, obj, 0);
+    __ bne(CCR0, Lupdate);
+    __ stop("unexpect null obj", 0x9652);
+#endif
+  }
+
+  __ bind(Lupdate);
+  if (do_update) {
+    Label Lnext;
+    const Register klass = R29_TOC; // kill and reload
+    bool klass_reg_used = false;
+#ifdef ASSERT
+    if (exact_klass != NULL) {
+      Label ok;
+      klass_reg_used = true;
+      __ load_klass(klass, obj);
+      metadata2reg(exact_klass->constant_encoding(), R0);
+      __ cmpd(CCR0, klass, R0);
+      __ beq(CCR0, ok);
+      __ stop("exact klass and actual klass differ", 0x8564);
+      __ bind(ok);
+    }
+#endif
+
+    if (!no_conflict) {
+      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
+        klass_reg_used = true;
+        if (exact_klass != NULL) {
+          __ ld(tmp, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+          metadata2reg(exact_klass->constant_encoding(), klass);
+        } else {
+          __ load_klass(klass, obj);
+          __ ld(tmp, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register()); // may kill obj
+        }
+
+        // Like InterpreterMacroAssembler::profile_obj_type
+        __ clrrdi(R0, tmp, exact_log2(-TypeEntries::type_klass_mask));
+        // Basically same as andi(R0, tmp, TypeEntries::type_klass_mask);
+        __ cmpd(CCR1, R0, klass);
+        // Klass seen before, nothing to do (regardless of unknown bit).
+        //beq(CCR1, do_nothing);
+
+        __ andi_(R0, klass, TypeEntries::type_unknown);
+        // Already unknown. Nothing to do anymore.
+        //bne(CCR0, do_nothing);
+        __ crorc(CCR0, Assembler::equal, CCR1, Assembler::equal); // cr0 eq = cr1 eq or cr0 ne
+        __ beq(CCR0, Lnext);
+
+        if (TypeEntries::is_type_none(current_klass)) {
+          __ clrrdi_(R0, tmp, exact_log2(-TypeEntries::type_mask));
+          __ orr(R0, klass, tmp); // Combine klass and null_seen bit (only used if (tmp & type_mask)==0).
+          __ beq(CCR0, Ldo_update); // First time here. Set profile type.
+        }
+
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
+
+        __ ld(tmp, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+        __ andi_(R0, tmp, TypeEntries::type_unknown);
+        // Already unknown. Nothing to do anymore.
+        __ bne(CCR0, Lnext);
+      }
+
+      // Different than before. Cannot keep accurate profile.
+      __ ori(R0, tmp, TypeEntries::type_unknown);
+    } else {
+      // There's a single possible klass at this profile point
+      assert(exact_klass != NULL, "should be");
+      __ ld(tmp, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+
+      if (TypeEntries::is_type_none(current_klass)) {
+        klass_reg_used = true;
+        metadata2reg(exact_klass->constant_encoding(), klass);
+
+        __ clrrdi(R0, tmp, exact_log2(-TypeEntries::type_klass_mask));
+        // Basically same as andi(R0, tmp, TypeEntries::type_klass_mask);
+        __ cmpd(CCR1, R0, klass);
+        // Klass seen before, nothing to do (regardless of unknown bit).
+        __ beq(CCR1, Lnext);
+#ifdef ASSERT
+        {
+          Label ok;
+          __ clrrdi_(R0, tmp, exact_log2(-TypeEntries::type_mask));
+          __ beq(CCR0, ok); // First time here.
+
+          __ stop("unexpected profiling mismatch", 0x7865);
+          __ bind(ok);
+        }
+#endif
+        // First time here. Set profile type.
+        __ orr(R0, klass, tmp); // Combine klass and null_seen bit (only used if (tmp & type_mask)==0).
+      } else {
+        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
+               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
+
+        // Already unknown. Nothing to do anymore.
+        __ andi_(R0, tmp, TypeEntries::type_unknown);
+        __ bne(CCR0, Lnext);
+
+        // Different than before. Cannot keep accurate profile.
+        __ ori(R0, tmp, TypeEntries::type_unknown);
+      }
+    }
+
+    __ bind(Ldo_update);
+    __ std(R0, index_or_disp(mdo_addr), mdo_addr->base()->as_pointer_register());
+
+    __ bind(Lnext);
+    if (klass_reg_used) { __ load_const_optimized(R29_TOC, MacroAssembler::global_toc(), R0); } // reinit
+  }
+  __ bind(Ldone);
+}
+
+
+void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
+  assert(op->crc()->is_single_cpu(), "crc must be register");
+  assert(op->val()->is_single_cpu(), "byte value must be register");
+  assert(op->result_opr()->is_single_cpu(), "result must be register");
+  Register crc = op->crc()->as_register();
+  Register val = op->val()->as_register();
+  Register res = op->result_opr()->as_register();
+
+  assert_different_registers(val, crc, res);
+
+  __ load_const_optimized(res, StubRoutines::crc_table_addr(), R0);
+  __ nand(crc, crc, crc); // ~crc
+  __ update_byte_crc32(crc, val, res);
+  __ nand(res, crc, crc); // ~crc
+}
+
+#undef __
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/c1_LIRAssembler_ppc.hpp	Sat Jan 16 12:04:47 2016 +0100
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2000, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_C1_LIRASSEMBLER_PPC_HPP
+#define CPU_PPC_VM_C1_LIRASSEMBLER_PPC_HPP
+
+ private:
+
+  //////////////////////////////////////////////////////////////////////////////
+  // PPC64 load/store emission
+  //
+  // The PPC ld/st instructions cannot accomodate displacements > 16 bits long.
+  // The following "pseudo" instructions (load/store) make it easier to
+  // use the indexed addressing mode by allowing 32 bit displacements:
+  //
+
+  void explicit_null_check(Register addr, CodeEmitInfo* info);
+
+  int store(LIR_Opr from_reg, Register base, int offset, BasicType type, bool wide, bool unaligned);
+  int store(LIR_Opr from_reg, Register base, Register disp, BasicType type, bool wide);
+
+  int load(Register base, int offset, LIR_Opr to_reg, BasicType type, bool wide, bool unaligned);
+  int load(Register base, Register disp, LIR_Opr to_reg, BasicType type, bool wide);
+
+  int shift_amount(BasicType t);
+
+  // Record the type of the receiver in ReceiverTypeData.
+  void type_profile_helper(Register mdo, int mdo_offset_bias,
+                           ciMethodData *md, ciProfileData *data,
+                           Register recv, Register tmp1, Label* update_done);
+  // Setup pointers to MDO, MDO slot, also compute offset bias to access the slot.
+  void setup_md_access(ciMethod* method, int bci,
+                       ciMethodData*& md, ciProfileData*& data, int& mdo_offset_bias);
+ public:
+  static const ConditionRegister BOOL_RESULT;
+
+  // Emit trampoline stub for call. Call bailout() if failed. Return true on success.
+  bool emit_trampoline_stub_for_call(address target, Register Rtoc = noreg);
+
+enum {
+  max_static_call_stub_size = 4 * BytesPerInstWord + MacroAssembler::b64_patchable_size,
+  call_stub_size = max_static_call_stub_size + MacroAssembler::trampoline_stub_size, // or smaller
+  exception_handler_size = MacroAssembler::b64_patchable_size, // or smaller
+  deopt_handler_size = MacroAssembler::bl64_patchable_size
+};
+
+#endif // CPU_PPC_VM_C1_LIRASSEMBLER_PPC_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/c1_LIRGenerator_ppc.cpp	Sat Jan 16 12:04:47 2016 +0100
@@ -0,0 +1,1429 @@
+/*
+ * Copyright (c) 2005, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2015 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "c1/c1_Compilation.hpp"
+#include "c1/c1_FrameMap.hpp"
+#include "c1/c1_Instruction.hpp"
+#include "c1/c1_LIRAssembler.hpp"
+#include "c1/c1_LIRGenerator.hpp"
+#include "c1/c1_Runtime1.hpp"
+#include "c1/c1_ValueStack.hpp"
+#include "ci/ciArray.hpp"
+#include "ci/ciObjArrayKlass.hpp"
+#include "ci/ciTypeArrayKlass.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_ppc.inline.hpp"
+
+#ifdef ASSERT
+#define __ gen()->lir(__FILE__, __LINE__)->
+#else
+#define __ gen()->lir()->
+#endif
+
+void LIRItem::load_byte_item() {
+  // Byte loads use same registers as other loads.
+  load_item();
+}
+
+
+void LIRItem::load_nonconstant() {
+  LIR_Opr r = value()->operand();
+  if (_gen->can_inline_as_constant(value())) {
+    if (!r->is_constant()) {
+      r = LIR_OprFact::value_type(value()->type());
+    }
+    _result = r;
+  } else {
+    load_item();
+  }
+}
+
+
+inline void load_int_as_long(LIR_List *ll, LIRItem &li, LIR_Opr dst) {
+  LIR_Opr r = li.value()->operand();
+  if (r->is_register()) {
+    LIR_Opr dst_l = FrameMap::as_long_opr(dst->as_register());
+    ll->convert(Bytecodes::_i2l, li.result(), dst_l); // Convert.
+  } else {
+    // Constants or memory get loaded with sign extend on this platform.
+    ll->move(li.result(), dst);
+  }
+}
+
+
+//--------------------------------------------------------------
+//               LIRGenerator
+//--------------------------------------------------------------
+
+LIR_Opr LIRGenerator::exceptionOopOpr()              { return FrameMap::R3_oop_opr; }
+LIR_Opr LIRGenerator::exceptionPcOpr()               { return FrameMap::R4_opr; }
+LIR_Opr LIRGenerator::syncLockOpr()                  { return FrameMap::R5_opr; }     // Need temp effect for MonitorEnterStub.
+LIR_Opr LIRGenerator::syncTempOpr()                  { return FrameMap::R4_oop_opr; } // Need temp effect for MonitorEnterStub.
+LIR_Opr LIRGenerator::getThreadTemp()                { return LIR_OprFact::illegalOpr; } // not needed
+
+LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
+  LIR_Opr opr;
+  switch (type->tag()) {
+  case intTag:     opr = FrameMap::R3_opr;         break;
+  case objectTag:  opr = FrameMap::R3_oop_opr;     break;
+  case longTag:    opr = FrameMap::R3_long_opr;    break;
+  case floatTag:   opr = FrameMap::F1_opr;         break;
+  case doubleTag:  opr = FrameMap::F1_double_opr;  break;
+
+  case addressTag:
+  default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
+  }
+
+  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
+  return opr;
+}
+
+LIR_Opr LIRGenerator::rlock_callee_saved(BasicType type) {
+  ShouldNotReachHere();
+  return LIR_OprFact::illegalOpr;
+}
+
+
+LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
+  return new_register(T_INT);
+}
+
+
+//--------- loading items into registers --------------------------------
+
+// PPC cannot inline all constants.
+bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
+  if (v->type()->as_IntConstant() != NULL) {
+    return Assembler::is_simm16(v->type()->as_IntConstant()->value());
+  } else if (v->type()->as_LongConstant() != NULL) {
+    return Assembler::is_simm16(v->type()->as_LongConstant()->value());
+  } else if (v->type()->as_ObjectConstant() != NULL) {
+    return v->type()->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+}
+
+
+// Only simm16 constants can be inlined.
+bool LIRGenerator::can_inline_as_constant(Value i) const {
+  return can_store_as_constant(i, as_BasicType(i->type()));
+}
+
+
+bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const {
+  if (c->type() == T_INT) {
+    return Assembler::is_simm16(c->as_jint());
+  }
+  if (c->type() == T_LONG) {
+    return Assembler::is_simm16(c->as_jlong());
+  }
+  if (c->type() == T_OBJECT) {
+    return c->as_jobject() == NULL;
+  }
+  return false;
+}
+
+
+LIR_Opr LIRGenerator::safepoint_poll_register() {
+  return new_register(T_INT);
+}
+
+
+LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
+                                            int shift, int disp, BasicType type) {
+  assert(base->is_register(), "must be");
+
+  // Accumulate fixed displacements.
+  if (index->is_constant()) {
+    disp += index->as_constant_ptr()->as_jint() << shift;
+    index = LIR_OprFact::illegalOpr;
+  }
+
+  if (index->is_register()) {
+    // Apply the shift and accumulate the displacement.
+    if (shift > 0) {
+      LIR_Opr tmp = new_pointer_register();
+      __ shift_left(index, shift, tmp);
+      index = tmp;
+    }
+    if (disp != 0) {
+      LIR_Opr tmp = new_pointer_register();
+      if (Assembler::is_simm16(disp)) {
+        __ add(index, LIR_OprFact::intptrConst(disp), tmp);
+        index = tmp;
+      } else {
+        __ move(LIR_OprFact::intptrConst(disp), tmp);
+        __ add(tmp, index, tmp);
+        index = tmp;
+      }
+      disp = 0;
+    }
+  } else if (!Assembler::is_simm16(disp)) {
+    // Index is illegal so replace it with the displacement loaded into a register.
+    index = new_pointer_register();
+    __ move(LIR_OprFact::intptrConst(disp), index);
+    disp = 0;
+  }
+
+  // At this point we either have base + index or base + displacement.
+  if (disp == 0) {
+    return new LIR_Address(base, index, type);
+  } else {
+    assert(Assembler::is_simm16(disp), "must be");
+    return new LIR_Address(base, disp, type);
+  }
+}
+
+
+LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
+                                              BasicType type, bool needs_card_mark) {
+  int elem_size = type2aelembytes(type);
+  int shift = exact_log2(elem_size);
+
+  LIR_Opr base_opr;
+  int offset = arrayOopDesc::base_offset_in_bytes(type);
+
+  if (index_opr->is_constant()) {
+    int i = index_opr->as_constant_ptr()->as_jint();
+    int array_offset = i * elem_size;
+    if (Assembler::is_simm16(array_offset + offset)) {
+      base_opr = array_opr;
+      offset = array_offset + offset;
+    } else {
+      base_opr = new_pointer_register();
+      if (Assembler::is_simm16(array_offset)) {
+        __ add(array_opr, LIR_OprFact::intptrConst(array_offset), base_opr);
+      } else {
+        __ move(LIR_OprFact::intptrConst(array_offset), base_opr);
+        __ add(base_opr, array_opr, base_opr);
+      }
+    }
+  } else {
+#ifdef _LP64
+    if (index_opr->type() == T_INT) {
+      LIR_Opr tmp = new_register(T_LONG);
+      __ convert(Bytecodes::_i2l, index_opr, tmp);
+      index_opr = tmp;
+    }
+#endif
+
+    base_opr = new_pointer_register();
+    assert (index_opr->is_register(), "Must be register");
+    if (shift > 0) {
+      __ shift_left(index_opr, shift, base_opr);
+      __ add(base_opr, array_opr, base_opr);
+    } else {
+      __ add(index_opr, array_opr, base_opr);
+    }
+  }
+  if (needs_card_mark) {
+    LIR_Opr ptr = new_pointer_register();
+    __ add(base_opr, LIR_OprFact::intptrConst(offset), ptr);
+    return new LIR_Address(ptr, type);
+  } else {
+    return new LIR_Address(base_opr, offset, type);
+  }
+}
+
+
+LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
+  LIR_Opr r = NULL;
+  if (type == T_LONG) {
+    r = LIR_OprFact::longConst(x);
+  } else if (type == T_INT) {
+    r = LIR_OprFact::intConst(x);
+  } else {
+    ShouldNotReachHere();
+  }
+  if (!Assembler::is_simm16(x)) {
+    LIR_Opr tmp = new_register(type);
+    __ move(r, tmp);
+    return tmp;
+  }
+  return r;
+}
+
+
+void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
+  LIR_Opr pointer = new_pointer_register();
+  __ move(LIR_OprFact::intptrConst(counter), pointer);
+  LIR_Address* addr = new LIR_Address(pointer, type);
+  increment_counter(addr, step);
+}
+
+
+void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+  LIR_Opr temp = new_register(addr->type());
+  __ move(addr, temp);
+  __ add(temp, load_immediate(step, addr->type()), temp);
+  __ move(temp, addr);
+}
+
+
+void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
+  LIR_Opr tmp = FrameMap::R0_opr;
+  __ load(new LIR_Address(base, disp, T_INT), tmp, info);
+  __ cmp(condition, tmp, c);
+}
+
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
+                               int disp, BasicType type, CodeEmitInfo* info) {
+  LIR_Opr tmp = FrameMap::R0_opr;
+  __ load(new LIR_Address(base, disp, type), tmp, info);
+  __ cmp(condition, reg, tmp);
+}
+
+
+void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
+                               LIR_Opr disp, BasicType type, CodeEmitInfo* info) {
+  LIR_Opr tmp = FrameMap::R0_opr;
+  __ load(new LIR_Address(base, disp, type), tmp, info);
+  __ cmp(condition, reg, tmp);
+}
+
+
+bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+  assert(left != result, "should be different registers");
+  if (is_power_of_2(c + 1)) {
+    __ shift_left(left, log2_intptr(c + 1), result);
+    __ sub(result, left, result);
+    return true;
+  } else if (is_power_of_2(c - 1)) {
+    __ shift_left(left, log2_intptr(c - 1), result);
+    __ add(result, left, result);
+    return true;
+  }
+  return false;
+}
+
+
+void LIRGenerator::store_stack_parameter(LIR_Opr item, ByteSize offset_from_sp) {
+  BasicType t = item->type();
+  LIR_Opr sp_opr = FrameMap::SP_opr;
+  if ((t == T_LONG || t == T_DOUBLE) &&
+      ((in_bytes(offset_from_sp) - STACK_BIAS) % 8 != 0)) {
+    __ unaligned_move(item, new LIR_Address(sp_opr, in_bytes(offset_from_sp), t));
+  } else {
+    __ move(item, new LIR_Address(sp_opr, in_bytes(offset_from_sp), t));
+  }
+}
+
+
+//----------------------------------------------------------------------
+//             visitor functions
+//----------------------------------------------------------------------
+
+void LIRGenerator::do_StoreIndexed(StoreIndexed* x) {
+  assert(x->is_pinned(),"");
+  bool needs_range_check = x->compute_needs_range_check();
+  bool use_length = x->length() != NULL;
+  bool obj_store = x->elt_type() == T_ARRAY || x->elt_type() == T_OBJECT;
+  bool needs_store_check = obj_store && (x->value()->as_Constant() == NULL ||
+                                         !get_jobject_constant(x->value())->is_null_object() ||
+                                         x->should_profile());
+
+  LIRItem array(x->array(), this);
+  LIRItem index(x->index(), this);
+  LIRItem value(x->value(), this);
+  LIRItem length(this);
+
+  array.load_item();
+  index.load_nonconstant();
+
+  if (use_length && needs_range_check) {
+    length.set_instruction(x->length());
+    length.load_item();
+  }
+  if (needs_store_check) {
+    value.load_item();
+  } else {
+    value.load_for_store(x->elt_type());
+  }
+
+  set_no_result(x);
+
+  // The CodeEmitInfo must be duplicated for each different
+  // LIR-instruction because spilling can occur anywhere between two
+  // instructions and so the debug information must be different.
+  CodeEmitInfo* range_check_info = state_for(x);
+  CodeEmitInfo* null_check_info = NULL;
+  if (x->needs_null_check()) {
+    null_check_info = new CodeEmitInfo(range_check_info);
+  }
+
+  // Emit array address setup early so it schedules better.
+  LIR_Address* array_addr = emit_array_address(array.result(), index.result(), x->elt_type(), obj_store);
+
+  if (GenerateRangeChecks && needs_range_check) {
+    if (use_length) {
+      __ cmp(lir_cond_belowEqual, length.result(), index.result());
+      __ branch(lir_cond_belowEqual, T_INT, new RangeCheckStub(range_check_info, index.result()));
+    } else {
+      array_range_check(array.result(), index.result(), null_check_info, range_check_info);
+      // Range_check also does the null check.
+      null_check_info = NULL;
+    }
+  }
+
+  if (GenerateArrayStoreCheck && needs_store_check) {
+    // Following registers are used by slow_subtype_check:
+    LIR_Opr tmp1 = FrameMap::R4_opr; // super_klass
+    LIR_Opr tmp2 = FrameMap::R5_opr; // sub_klass
+    LIR_Opr tmp3 = FrameMap::R6_opr; // temp
+
+    CodeEmitInfo* store_check_info = new CodeEmitInfo(range_check_info);
+    __ store_check(value.result(), array.result(), tmp1, tmp2, tmp3,
+                   store_check_info, x->profiled_method(), x->profiled_bci());
+  }
+
+  if (obj_store) {
+    // Needs GC write barriers.
+    pre_barrier(LIR_OprFact::address(array_addr), LIR_OprFact::illegalOpr /* pre_val */,
+                true /* do_load */, false /* patch */, NULL);
+  }
+  __ move(value.result(), array_addr, null_check_info);
+  if (obj_store) {
+    // Precise card mark.
+    post_barrier(LIR_OprFact::address(array_addr), value.result());
+  }
+}
+
+
+void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
+  assert(x->is_pinned(),"");
+  LIRItem obj(x->obj(), this);
+  obj.load_item();
+
+  set_no_result(x);
+
+  // We use R4+R5 in order to get a temp effect. These regs are used in slow path (MonitorEnterStub).
+  LIR_Opr lock    = FrameMap::R5_opr;
+  LIR_Opr scratch = FrameMap::R4_opr;
+  LIR_Opr hdr     = FrameMap::R6_opr;
+
+  CodeEmitInfo* info_for_exception = NULL;
+  if (x->needs_null_check()) {
+    info_for_exception = state_for(x);
+  }
+
+  // This CodeEmitInfo must not have the xhandlers because here the
+  // object is already locked (xhandlers expects object to be unlocked).
+  CodeEmitInfo* info = state_for(x, x->state(), true);
+  monitor_enter(obj.result(), lock, hdr, scratch, x->monitor_no(), info_for_exception, info);
+}
+
+
+void LIRGenerator::do_MonitorExit(MonitorExit* x) {
+  assert(x->is_pinned(),"");
+  LIRItem obj(x->obj(), this);
+  obj.dont_load_item();
+
+  set_no_result(x);
+  LIR_Opr lock     = FrameMap::R5_opr;
+  LIR_Opr hdr      = FrameMap::R4_opr; // Used for slow path (MonitorExitStub).
+  LIR_Opr obj_temp = FrameMap::R6_opr;
+  monitor_exit(obj_temp, lock, hdr, LIR_OprFact::illegalOpr, x->monitor_no());
+}
+
+
+// _ineg, _lneg, _fneg, _dneg
+void LIRGenerator::do_NegateOp(NegateOp* x) {
+  LIRItem value(x->x(), this);
+  value.load_item();
+  LIR_Opr reg = rlock_result(x);
+  __ negate(value.result(), reg);
+}
+
+
+// for  _fadd, _fmul, _fsub, _fdiv, _frem
+//      _dadd, _dmul, _dsub, _ddiv, _drem
+void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
+  switch (x->op()) {
+  case Bytecodes::_fadd:
+  case Bytecodes::_fmul:
+  case Bytecodes::_fsub:
+  case Bytecodes::_fdiv:
+  case Bytecodes::_dadd:
+  case Bytecodes::_dmul:
+  case Bytecodes::_dsub:
+  case Bytecodes::_ddiv: {
+    LIRItem left(x->x(), this);
+    LIRItem right(x->y(), this);
+    left.load_item();
+    right.load_item();
+    rlock_result(x);
+    arithmetic_op_fpu(x->op(), x->operand(), left.result(), right.result(), x->is_strictfp());
+  }
+  break;
+
+  case Bytecodes::_frem:
+  case Bytecodes::_drem: {
+    address entry = NULL;
+    switch (x->op()) {
+    case Bytecodes::_frem:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
+      break;
+    case Bytecodes::_drem:
+      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    LIR_Opr result = call_runtime(x->x(), x->y(), entry, x->type(), NULL);
+    set_result(x, result);
+  }
+  break;
+
+  default: ShouldNotReachHere();
+  }
+}
+
+
+// for  _ladd, _lmul, _lsub, _ldiv, _lrem
+void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
+  bool is_div_rem = x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem;
+
+  LIRItem right(x->y(), this);
+  // Missing test if instr is commutative and if we should swap.
+  if (right.value()->type()->as_LongConstant() &&
+      (x->op() == Bytecodes::_lsub && right.value()->type()->as_LongConstant()->value() == ((-1)<<15)) ) {
+    // Sub is implemented by addi and can't support min_simm16 as constant..
+    right.load_item();
+  } else {
+    right.load_nonconstant();
+  }
+  assert(right.is_constant() || right.is_register(), "wrong state of right");
+
+  if (is_div_rem) {
+    LIR_Opr divisor = right.result();
+    if (divisor->is_register()) {
+      CodeEmitInfo* null_check_info = state_for(x);
+      __ cmp(lir_cond_equal, divisor, LIR_OprFact::longConst(0));
+      __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(null_check_info));
+    } else {
+      jlong const_divisor = divisor->as_constant_ptr()->as_jlong();
+      if (const_divisor == 0) {
+        CodeEmitInfo* null_check_info = state_for(x);
+        __ jump(new DivByZeroStub(null_check_info));
+        rlock_result(x);
+        __ move(LIR_OprFact::longConst(0), x->operand()); // dummy
+        return;
+      }
+      if (x->op() == Bytecodes::_lrem && !is_power_of_2(const_divisor) && const_divisor != -1) {
+        // Remainder computation would need additional tmp != R0.
+        right.load_item();
+      }
+    }
+  }
+
+  LIRItem left(x->x(), this);
+  left.load_item();
+  rlock_result(x);
+  if (is_div_rem) {
+    CodeEmitInfo* info = NULL; // Null check already done above.
+    LIR_Opr tmp = FrameMap::R0_opr;
+    if (x->op() == Bytecodes::_lrem) {
+      __ irem(left.result(), right.result(), x->operand(), tmp, info);
+    } else if (x->op() == Bytecodes::_ldiv) {
+      __ idiv(left.result(), right.result(), x->operand(), tmp, info);
+    }
+  } else {
+    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
+  }
+}
+
+
+// for: _iadd, _imul, _isub, _idiv, _irem
+void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
+  bool is_div_rem = x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem;
+
+  LIRItem right(x->y(), this);
+  // Missing test if instr is commutative and if we should swap.
+  if (right.value()->type()->as_IntConstant() &&
+      (x->op() == Bytecodes::_isub && right.value()->type()->as_IntConstant()->value() == ((-1)<<15)) ) {
+    // Sub is implemented by addi and can't support min_simm16 as constant.
+    right.load_item();
+  } else {
+    right.load_nonconstant();
+  }
+  assert(right.is_constant() || right.is_register(), "wrong state of right");
+
+  if (is_div_rem) {
+    LIR_Opr divisor = right.result();
+    if (divisor->is_register()) {
+      CodeEmitInfo* null_check_info = state_for(x);
+      __ cmp(lir_cond_equal, divisor, LIR_OprFact::intConst(0));
+      __ branch(lir_cond_equal, T_INT, new DivByZeroStub(null_check_info));
+    } else {
+      jint const_divisor = divisor->as_constant_ptr()->as_jint();
+      if (const_divisor == 0) {
+        CodeEmitInfo* null_check_info = state_for(x);
+        __ jump(new DivByZeroStub(null_check_info));
+        rlock_result(x);
+        __ move(LIR_OprFact::intConst(0), x->operand()); // dummy
+        return;
+      }
+      if (x->op() == Bytecodes::_irem && !is_power_of_2(const_divisor) && const_divisor != -1) {
+        // Remainder computation would need additional tmp != R0.
+        right.load_item();
+      }
+    }
+  }
+
+  LIRItem left(x->x(), this);
+  left.load_item();
+  rlock_result(x);
+  if (is_div_rem) {
+    CodeEmitInfo* info = NULL; // Null check already done above.
+    LIR_Opr tmp = FrameMap::R0_opr;
+    if (x->op() == Bytecodes::_irem) {
+      __ irem(left.result(), right.result(), x->operand(), tmp, info);
+    } else if (x->op() == Bytecodes::_idiv) {
+      __ idiv(left.result(), right.result(), x->operand(), tmp, info);
+    }
+  } else {
+    arithmetic_op_int(x->op(), x->operand(), left.result(), right.result(), FrameMap::R0_opr);
+  }
+}
+
+
+void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
+  ValueTag tag = x->type()->tag();
+  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
+  switch (tag) {
+    case floatTag:
+    case doubleTag: do_ArithmeticOp_FPU(x);  return;
+    case longTag:   do_ArithmeticOp_Long(x); return;
+    case intTag:    do_ArithmeticOp_Int(x);  return;
+  }
+  ShouldNotReachHere();
+}
+
+
+// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
+void LIRGenerator::do_ShiftOp(ShiftOp* x) {
+  LIRItem value(x->x(), this);
+  LIRItem count(x->y(), this);
+  value.load_item();
+  LIR_Opr reg = rlock_result(x);
+  LIR_Opr mcount;
+  if (count.result()->is_register()) {
+    mcount = FrameMap::R0_opr;
+  } else {
+    mcount = LIR_OprFact::illegalOpr;
+  }
+  shift_op(x->op(), reg, value.result(), count.result(), mcount);
+}
+
+
+inline bool can_handle_logic_op_as_uimm(ValueType *type, Bytecodes::Code bc) {
+  jlong int_or_long_const;
+  if (type->as_IntConstant()) {
+    int_or_long_const = type->as_IntConstant()->value();
+  } else if (type->as_LongConstant()) {
+    int_or_long_const = type->as_LongConstant()->value();
+  } else if (type->as_ObjectConstant()) {
+    return type->as_ObjectConstant()->value()->is_null_object();
+  } else {
+    return false;
+  }
+
+  if (Assembler::is_uimm(int_or_long_const, 16)) return true;
+  if ((int_or_long_const & 0xFFFF) == 0 &&
+      Assembler::is_uimm((jlong)((julong)int_or_long_const >> 16), 16)) return true;
+
+  // see Assembler::andi
+  if (bc == Bytecodes::_iand &&
+      (is_power_of_2_long(int_or_long_const+1) ||
+       is_power_of_2_long(int_or_long_const) ||
+       is_power_of_2_long(-int_or_long_const))) return true;
+  if (bc == Bytecodes::_land &&
+      (is_power_of_2_long(int_or_long_const+1) ||
+       (Assembler::is_uimm(int_or_long_const, 32) && is_power_of_2_long(int_or_long_const)) ||
+       (int_or_long_const != min_jlong && is_power_of_2_long(-int_or_long_const)))) return true;
+
+  // special case: xor -1
+  if ((bc == Bytecodes::_ixor || bc == Bytecodes::_lxor) &&
+      int_or_long_const == -1) return true;
+  return false;
+}
+
+
+// _iand, _land, _ior, _lor, _ixor, _lxor
+void LIRGenerator::do_LogicOp(LogicOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+
+  left.load_item();
+
+  Value rval = right.value();
+  LIR_Opr r = rval->operand();
+  ValueType *type = rval->type();
+  // Logic instructions use unsigned immediate values.
+  if (can_handle_logic_op_as_uimm(type, x->op())) {
+    if (!r->is_constant()) {
+      r = LIR_OprFact::value_type(type);
+      rval->set_operand(r);
+    }
+    right.set_result(r);
+  } else {
+    right.load_item();
+  }
+
+  LIR_Opr reg = rlock_result(x);
+
+  logic_op(x->op(), reg, left.result(), right.result());
+}
+
+
+// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
+void LIRGenerator::do_CompareOp(CompareOp* x) {
+  LIRItem left(x->x(), this);
+  LIRItem right(x->y(), this);
+  left.load_item();
+  right.load_item();
+  LIR_Opr reg = rlock_result(x);
+  if (x->x()->type()->is_float_kind()) {
+    Bytecodes::Code code = x->op();
+    __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
+  } else if (x->x()->type()->tag() == longTag) {
+    __ lcmp2int(left.result(), right.result(), reg);
+  } else {
+    Unimplemented();
+  }
+}
+
+
+void LIRGenerator::do_CompareAndSwap(Intrinsic* x, ValueType* type) {
+  assert(x->number_of_arguments() == 4, "wrong type");
+  LIRItem obj   (x->argument_at(0), this);  // object
+  LIRItem offset(x->argument_at(1), this);  // offset of field
+  LIRItem cmp   (x->argument_at(2), this);  // Value to compare with field.
+  LIRItem val   (x->argument_at(3), this);  // Replace field with val if matches cmp.
+
+  LIR_Opr t1 = LIR_OprFact::illegalOpr;
+  LIR_Opr t2 = LIR_OprFact::illegalOpr;
+  LIR_Opr addr = new_pointer_register();
+
+  // Get address of field.
+  obj.load_item();
+  offset.load_item();
+  cmp.load_item();
+  val.load_item();
+
+  __ add(obj.result(), offset.result(), addr);
+
+  // Volatile load may be followed by Unsafe CAS.
+  if (support_IRIW_for_not_multiple_copy_atomic_cpu) {
+    __ membar(); // To be safe. Unsafe semantics are unclear.
+  } else {
+    __ membar_release();
+  }
+
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Only cmp value can get overwritten, no do_load required.
+    pre_barrier(LIR_OprFact::illegalOpr /* addr */, cmp.result() /* pre_val */,
+                false /* do_load */, false /* patch */, NULL);
+  }
+
+  if (type == objectType) {
+    if (UseCompressedOops) {
+      t1 = new_register(T_OBJECT);
+      t2 = new_register(T_OBJECT);
+    }
+    __ cas_obj(addr, cmp.result(), val.result(), t1, t2);
+  } else if (type == intType) {
+    __ cas_int(addr, cmp.result(), val.result(), t1, t2);
+  } else if (type == longType) {
+    __ cas_long(addr, cmp.result(), val.result(), t1, t2);
+  } else {
+    ShouldNotReachHere();
+  }
+  // Benerate conditional move of boolean result.
+  LIR_Opr result = rlock_result(x);
+  __ cmove(lir_cond_equal, LIR_OprFact::intConst(1), LIR_OprFact::intConst(0),
+           result, as_BasicType(type));
+  if (type == objectType) {  // Write-barrier needed for Object fields.
+    // Precise card mark since could either be object or array.
+    post_barrier(addr, val.result());
+  }
+}
+
+
+void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
+  switch (x->id()) {
+    case vmIntrinsics::_dabs: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+      LIRItem value(x->argument_at(0), this);
+      value.load_item();
+      LIR_Opr dst = rlock_result(x);
+      __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
+      break;
+    }
+    case vmIntrinsics::_dsqrt: {
+      if (VM_Version::has_fsqrt()) {
+        assert(x->number_of_arguments() == 1, "wrong type");
+        LIRItem value(x->argument_at(0), this);
+        value.load_item();
+        LIR_Opr dst = rlock_result(x);
+        __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
+        break;
+      } // else fallthru
+    }
+    case vmIntrinsics::_dlog10: // fall through
+    case vmIntrinsics::_dlog: // fall through
+    case vmIntrinsics::_dsin: // fall through
+    case vmIntrinsics::_dtan: // fall through
+    case vmIntrinsics::_dcos: // fall through
+    case vmIntrinsics::_dexp: {
+      assert(x->number_of_arguments() == 1, "wrong type");
+
+      address runtime_entry = NULL;
+      switch (x->id()) {
+      case vmIntrinsics::_dsqrt:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsqrt);
+        break;
+      case vmIntrinsics::_dsin:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
+        break;
+      case vmIntrinsics::_dcos:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
+        break;
+      case vmIntrinsics::_dtan:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
+        break;
+      case vmIntrinsics::_dlog:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
+        break;
+      case vmIntrinsics::_dlog10:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
+        break;
+      case vmIntrinsics::_dexp:
+        runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+
+      LIR_Opr result = call_runtime(x->argument_at(0), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+    case vmIntrinsics::_dpow: {
+      assert(x->number_of_arguments() == 2, "wrong type");
+      address runtime_entry = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
+      LIR_Opr result = call_runtime(x->argument_at(0), x->argument_at(1), runtime_entry, x->type(), NULL);
+      set_result(x, result);
+      break;
+    }
+  }
+}
+
+
+void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
+  assert(x->number_of_arguments() == 5, "wrong type");
+
+  // Make all state_for calls early since they can emit code.
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem src     (x->argument_at(0), this);
+  LIRItem src_pos (x->argument_at(1), this);
+  LIRItem dst     (x->argument_at(2), this);
+  LIRItem dst_pos (x->argument_at(3), this);
+  LIRItem length  (x->argument_at(4), this);
+
+  // Load all values in callee_save_registers (C calling convention),
+  // as this makes the parameter passing to the fast case simpler.
+  src.load_item_force     (FrameMap::R14_oop_opr);
+  src_pos.load_item_force (FrameMap::R15_opr);
+  dst.load_item_force     (FrameMap::R17_oop_opr);
+  dst_pos.load_item_force (FrameMap::R18_opr);
+  length.load_item_force  (FrameMap::R19_opr);
+  LIR_Opr tmp =            FrameMap::R20_opr;
+
+  int flags;
+  ciArrayKlass* expected_type;
+  arraycopy_helper(x, &flags, &expected_type);
+
+  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(),
+               length.result(), tmp,
+               expected_type, flags, info);
+  set_no_result(x);
+}
+
+
+// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
+// _i2b, _i2c, _i2s
+void LIRGenerator::do_Convert(Convert* x) {
+  switch (x->op()) {
+
+    // int -> float: force spill
+    case Bytecodes::_l2f: {
+      if (!VM_Version::has_fcfids()) { // fcfids is >= Power7 only
+        // fcfid+frsp needs fixup code to avoid rounding incompatibility.
+        address entry = CAST_FROM_FN_PTR(address, SharedRuntime::l2f);
+        LIR_Opr result = call_runtime(x->value(), entry, x->type(), NULL);
+        set_result(x, result);
+        break;
+      } // else fallthru
+    }
+    case Bytecodes::_l2d: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.load_item();
+      LIR_Opr tmp = force_to_spill(value.result(), T_DOUBLE);
+      __ convert(x->op(), tmp, reg);
+      break;
+    }
+    case Bytecodes::_i2f:
+    case Bytecodes::_i2d: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.load_item();
+      // Convert i2l first.
+      LIR_Opr tmp1 = new_register(T_LONG);
+      __ convert(Bytecodes::_i2l, value.result(), tmp1);
+      LIR_Opr tmp2 = force_to_spill(tmp1, T_DOUBLE);
+      __ convert(x->op(), tmp2, reg);
+      break;
+    }
+
+    // float -> int: result will be stored
+    case Bytecodes::_f2l:
+    case Bytecodes::_d2l: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.set_destroys_register(); // USE_KILL
+      value.load_item();
+      set_vreg_flag(reg, must_start_in_memory);
+      __ convert(x->op(), value.result(), reg);
+      break;
+    }
+    case Bytecodes::_f2i:
+    case Bytecodes::_d2i: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.set_destroys_register(); // USE_KILL
+      value.load_item();
+      // Convert l2i afterwards.
+      LIR_Opr tmp1 = new_register(T_LONG);
+      set_vreg_flag(tmp1, must_start_in_memory);
+      __ convert(x->op(), value.result(), tmp1);
+      __ convert(Bytecodes::_l2i, tmp1, reg);
+      break;
+    }
+
+    // Within same category: just register conversions.
+    case Bytecodes::_i2b:
+    case Bytecodes::_i2c:
+    case Bytecodes::_i2s:
+    case Bytecodes::_i2l:
+    case Bytecodes::_l2i:
+    case Bytecodes::_f2d:
+    case Bytecodes::_d2f: {
+      LIRItem value(x->value(), this);
+      LIR_Opr reg = rlock_result(x);
+      value.load_item();
+      __ convert(x->op(), value.result(), reg);
+      break;
+    }
+
+    default: ShouldNotReachHere();
+  }
+}
+
+
+void LIRGenerator::do_NewInstance(NewInstance* x) {
+  // This instruction can be deoptimized in the slow path.
+  const LIR_Opr reg = result_register_for(x->type());
+#ifndef PRODUCT
+  if (PrintNotLoaded && !x->klass()->is_loaded()) {
+    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
+  }
+#endif
+  CodeEmitInfo* info = state_for(x, x->state());
+  LIR_Opr klass_reg = FrameMap::R4_metadata_opr; // Used by slow path (NewInstanceStub).
+  LIR_Opr tmp1 = FrameMap::R5_oop_opr;
+  LIR_Opr tmp2 = FrameMap::R6_oop_opr;
+  LIR_Opr tmp3 = FrameMap::R7_oop_opr;
+  LIR_Opr tmp4 = FrameMap::R8_oop_opr;
+  new_instance(reg, x->klass(), x->is_unresolved(), tmp1, tmp2, tmp3, tmp4, klass_reg, info);
+
+  // Must prevent reordering of stores for object initialization
+  // with stores that publish the new object.
+  __ membar_storestore();
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  LIRItem length(x->length(), this);
+  length.load_item();
+
+  LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr klass_reg = FrameMap::R4_metadata_opr; // Used by slow path (NewTypeArrayStub).
+  // We use R5 in order to get a temp effect. This reg is used in slow path (NewTypeArrayStub).
+  LIR_Opr tmp1 = FrameMap::R5_oop_opr;
+  LIR_Opr tmp2 = FrameMap::R6_oop_opr;
+  LIR_Opr tmp3 = FrameMap::R7_oop_opr;
+  LIR_Opr tmp4 = FrameMap::R8_oop_opr;
+  LIR_Opr len = length.result();
+  BasicType elem_type = x->elt_type();
+
+  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
+
+  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
+
+  // Must prevent reordering of stores for object initialization
+  // with stores that publish the new object.
+  __ membar_storestore();
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* info = state_for(x, x->state());
+  // In case of patching (i.e., object class is not yet loaded),
+  // we need to reexecute the instruction and therefore provide
+  // the state before the parameters have been consumed.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for(x, x->state_before());
+  }
+
+  LIRItem length(x->length(), this);
+  length.load_item();
+
+  const LIR_Opr reg = result_register_for(x->type());
+  LIR_Opr klass_reg = FrameMap::R4_metadata_opr; // Used by slow path (NewObjectArrayStub).
+  // We use R5 in order to get a temp effect. This reg is used in slow path (NewObjectArrayStub).
+  LIR_Opr tmp1 = FrameMap::R5_oop_opr;
+  LIR_Opr tmp2 = FrameMap::R6_oop_opr;
+  LIR_Opr tmp3 = FrameMap::R7_oop_opr;
+  LIR_Opr tmp4 = FrameMap::R8_oop_opr;
+  LIR_Opr len = length.result();
+
+  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
+  ciMetadata* obj = ciObjArrayKlass::make(x->klass());
+  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
+    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
+  }
+  klass2reg_with_patching(klass_reg, obj, patching_info);
+  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
+
+  // Must prevent reordering of stores for object initialization
+  // with stores that publish the new object.
+  __ membar_storestore();
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
+  Values* dims = x->dims();
+  int i = dims->length();
+  LIRItemList* items = new LIRItemList(dims->length(), NULL);
+  while (i-- > 0) {
+    LIRItem* size = new LIRItem(dims->at(i), this);
+    items->at_put(i, size);
+  }
+
+  // Evaluate state_for early since it may emit code.
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || PatchALot) {
+    patching_info = state_for(x, x->state_before());
+
+    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
+    // clone all handlers (NOTE: Usually this is handled transparently
+    // by the CodeEmitInfo cloning logic in CodeStub constructors but
+    // is done explicitly here because a stub isn't being used).
+    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
+  }
+  CodeEmitInfo* info = state_for(x, x->state());
+
+  i = dims->length();
+  while (i-- > 0) {
+    LIRItem* size = items->at(i);
+    size->load_nonconstant();
+    // FrameMap::_reserved_argument_area_size includes the dimensions
+    // varargs, because it's initialized to hir()->max_stack() when the
+    // FrameMap is created.
+    store_stack_parameter(size->result(), in_ByteSize(i*sizeof(jint) + FrameMap::first_available_sp_in_frame));
+  }
+
+  const LIR_Opr klass_reg = FrameMap::R4_metadata_opr; // Used by slow path.
+  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
+
+  LIR_Opr rank = FrameMap::R5_opr; // Used by slow path.
+  __ move(LIR_OprFact::intConst(x->rank()), rank);
+
+  LIR_Opr varargs = FrameMap::as_pointer_opr(R6); // Used by slow path.
+  __ leal(LIR_OprFact::address(new LIR_Address(FrameMap::SP_opr, FrameMap::first_available_sp_in_frame, T_INT)),
+          varargs);
+
+  // Note: This instruction can be deoptimized in the slow path.
+  LIR_OprList* args = new LIR_OprList(3);
+  args->append(klass_reg);
+  args->append(rank);
+  args->append(varargs);
+  const LIR_Opr reg = result_register_for(x->type());
+  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
+                  LIR_OprFact::illegalOpr,
+                  reg, args, info);
+
+  // Must prevent reordering of stores for object initialization
+  // with stores that publish the new object.
+  __ membar_storestore();
+  LIR_Opr result = rlock_result(x);
+  __ move(reg, result);
+}
+
+
+void LIRGenerator::do_BlockBegin(BlockBegin* x) {
+  // nothing to do for now
+}
+
+
+void LIRGenerator::do_CheckCast(CheckCast* x) {
+  LIRItem obj(x->obj(), this);
+  CodeEmitInfo* patching_info = NULL;
+  if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check())) {
+    // Must do this before locking the destination register as
+    // an oop register, and before the obj is loaded (so x->obj()->item()
+    // is valid for creating a debug info location).
+    patching_info = state_for(x, x->state_before());
+  }
+  obj.load_item();
+  LIR_Opr out_reg = rlock_result(x);
+  CodeStub* stub;
+  CodeEmitInfo* info_for_exception = state_for(x);
+
+  if (x->is_incompatible_class_change_check()) {
+    assert(patching_info == NULL, "can't patch this");
+    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id,
+                                   LIR_OprFact::illegalOpr, info_for_exception);
+  } else {
+    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
+  }
+  // Following registers are used by slow_subtype_check:
+  LIR_Opr tmp1 = FrameMap::R4_oop_opr; // super_klass
+  LIR_Opr tmp2 = FrameMap::R5_oop_opr; // sub_klass