changeset 7880:647c8b619d80

8068054: AARCH64: Assembler interpreter, shared runtime Summary: add src/cpu/aarch64/vm/* interpreter, shared runtime files. Reviewed-by: kvn, roland, coleenp, twisti
author aph
date Tue, 20 Jan 2015 11:34:17 -0800
parents 73d7851fa9f2
children d498aba2c736
files src/cpu/aarch64/vm/aarch64Test.cpp src/cpu/aarch64/vm/aarch64_call.cpp src/cpu/aarch64/vm/aarch64_linkage.S src/cpu/aarch64/vm/assembler_aarch64.cpp src/cpu/aarch64/vm/assembler_aarch64.hpp src/cpu/aarch64/vm/assembler_aarch64.inline.hpp src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.cpp src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.hpp src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.inline.hpp src/cpu/aarch64/vm/bytecodes_aarch64.cpp src/cpu/aarch64/vm/bytecodes_aarch64.hpp src/cpu/aarch64/vm/bytes_aarch64.hpp src/cpu/aarch64/vm/codeBuffer_aarch64.hpp src/cpu/aarch64/vm/compiledIC_aarch64.cpp src/cpu/aarch64/vm/copy_aarch64.hpp src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp src/cpu/aarch64/vm/cpustate_aarch64.hpp src/cpu/aarch64/vm/debug_aarch64.cpp src/cpu/aarch64/vm/decode_aarch64.hpp src/cpu/aarch64/vm/depChecker_aarch64.cpp src/cpu/aarch64/vm/depChecker_aarch64.hpp src/cpu/aarch64/vm/disassembler_aarch64.hpp src/cpu/aarch64/vm/frame_aarch64.cpp src/cpu/aarch64/vm/frame_aarch64.hpp src/cpu/aarch64/vm/frame_aarch64.inline.hpp src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp src/cpu/aarch64/vm/globals_aarch64.hpp src/cpu/aarch64/vm/icBuffer_aarch64.cpp src/cpu/aarch64/vm/icache_aarch64.cpp src/cpu/aarch64/vm/icache_aarch64.hpp src/cpu/aarch64/vm/immediate_aarch64.cpp src/cpu/aarch64/vm/immediate_aarch64.hpp src/cpu/aarch64/vm/interp_masm_aarch64.cpp src/cpu/aarch64/vm/interp_masm_aarch64.hpp src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp src/cpu/aarch64/vm/interpreterRT_aarch64.cpp src/cpu/aarch64/vm/interpreterRT_aarch64.hpp src/cpu/aarch64/vm/interpreter_aarch64.cpp src/cpu/aarch64/vm/interpreter_aarch64.hpp src/cpu/aarch64/vm/javaFrameAnchor_aarch64.hpp src/cpu/aarch64/vm/jniFastGetField_aarch64.cpp src/cpu/aarch64/vm/jniTypes_aarch64.hpp src/cpu/aarch64/vm/jni_aarch64.h src/cpu/aarch64/vm/macroAssembler_aarch64.cpp src/cpu/aarch64/vm/macroAssembler_aarch64.hpp src/cpu/aarch64/vm/macroAssembler_aarch64.inline.hpp src/cpu/aarch64/vm/metaspaceShared_aarch64.cpp src/cpu/aarch64/vm/methodHandles_aarch64.cpp src/cpu/aarch64/vm/methodHandles_aarch64.hpp src/cpu/aarch64/vm/nativeInst_aarch64.cpp src/cpu/aarch64/vm/nativeInst_aarch64.hpp src/cpu/aarch64/vm/registerMap_aarch64.hpp src/cpu/aarch64/vm/register_aarch64.cpp src/cpu/aarch64/vm/register_aarch64.hpp src/cpu/aarch64/vm/register_definitions_aarch64.cpp src/cpu/aarch64/vm/relocInfo_aarch64.cpp src/cpu/aarch64/vm/relocInfo_aarch64.hpp src/cpu/aarch64/vm/runtime_aarch64.cpp src/cpu/aarch64/vm/sharedRuntime_aarch64.cpp src/cpu/aarch64/vm/stubGenerator_aarch64.cpp src/cpu/aarch64/vm/stubRoutines_aarch64.cpp src/cpu/aarch64/vm/stubRoutines_aarch64.hpp src/cpu/aarch64/vm/templateInterpreterGenerator_aarch64.hpp src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp src/cpu/aarch64/vm/templateInterpreter_aarch64.hpp src/cpu/aarch64/vm/templateTable_aarch64.cpp src/cpu/aarch64/vm/templateTable_aarch64.hpp src/cpu/aarch64/vm/vmStructs_aarch64.hpp src/cpu/aarch64/vm/vm_version_aarch64.cpp src/cpu/aarch64/vm/vm_version_aarch64.hpp src/cpu/aarch64/vm/vmreg_aarch64.cpp src/cpu/aarch64/vm/vmreg_aarch64.hpp src/cpu/aarch64/vm/vmreg_aarch64.inline.hpp src/cpu/aarch64/vm/vtableStubs_aarch64.cpp
diffstat 74 files changed, 31814 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/aarch64Test.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include <stdlib.h>
+
+#include "precompiled.hpp"
+#include "code/codeBlob.hpp"
+#include "asm/macroAssembler.hpp"
+
+// hook routine called during JVM bootstrap to test AArch64 assembler
+
+extern "C" void entry(CodeBuffer*);
+
+void aarch64TestHook()
+{
+  BufferBlob* b = BufferBlob::create("aarch64Test", 500000);
+  CodeBuffer code(b);
+  MacroAssembler _masm(&code);
+  entry(&code);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/aarch64_call.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,200 @@
+/*
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifdef BUILTIN_SIM
+
+#include <stdio.h>
+#include <sys/types.h>
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "../../../../../../simulator/cpustate.hpp"
+#include "../../../../../../simulator/simulator.hpp"
+
+/*
+ * a routine to initialise and enter ARM simulator execution when
+ * calling into ARM code from x86 code.
+ *
+ * we maintain a simulator per-thread and provide it with 8 Mb of
+ * stack space
+ */
+#define SIM_STACK_SIZE (1024 * 1024) // in units of u_int64_t
+
+extern "C" u_int64_t get_alt_stack()
+{
+  return AArch64Simulator::altStack();
+}
+
+extern "C" void setup_arm_sim(void *sp, u_int64_t calltype)
+{
+  // n.b. this function runs on the simulator stack so as to avoid
+  // simulator frames appearing in between VM x86 and ARM frames. note
+  // that arfgument sp points to the old (VM) stack from which the
+  // call into the sim was made. The stack switch and entry into this
+  // routine is handled by x86 prolog code planted in the head of the
+  // ARM code buffer which the sim is about to start executing (see
+  // aarch64_linkage.S).
+  //
+  // The first ARM instruction in the buffer is identified by fnptr
+  // stored at the top of the old stack. x86 register contents precede
+  // fnptr. preceding that are the fp and return address of the VM
+  // caller into ARM code. any extra, non-register arguments passed to
+  // the linkage routine precede the fp (this is as per any normal x86
+  // call wirth extra args).
+  //
+  // note that the sim creates Java frames on the Java stack just
+  // above sp (i.e. directly above fnptr). it sets the sim FP register
+  // to the pushed fp for the caller effectively eliding the register
+  // data saved by the linkage routine.
+  //
+  // x86 register call arguments are loaded from the stack into ARM
+  // call registers. if extra arguments occur preceding the x86
+  // caller's fp then they are copied either into extra ARM registers
+  // (ARM has 8 rather than 6 gp call registers) or up the stack
+  // beyond the saved x86 registers so that they immediately precede
+  // the ARM frame where the ARM calling convention expects them to
+  // be.
+  //
+  // n.b. the number of register/stack values passed to the ARM code
+  // is determined by calltype
+  //
+  // +--------+
+  // | fnptr  |  <--- argument sp points here
+  // +--------+  |
+  // | rax    |  | return slot if we need to return a value
+  // +--------+  |
+  // | rdi    |  increasing
+  // +--------+  address
+  // | rsi    |  |
+  // +--------+  V
+  // | rdx    |
+  // +--------+
+  // | rcx    |
+  // +--------+
+  // | r8     |
+  // +--------+
+  // | r9     |
+  // +--------+
+  // | xmm0   |
+  // +--------+
+  // | xmm1   |
+  // +--------+
+  // | xmm2   |
+  // +--------+
+  // | xmm3   |
+  // +--------+
+  // | xmm4   |
+  // +--------+
+  // | xmm5   |
+  // +--------+
+  // | xmm6   |
+  // +--------+
+  // | xmm7   |
+  // +--------+
+  // | fp     |
+  // +--------+
+  // | caller |
+  // | ret ip |
+  // +--------+
+  // | arg0   | <-- any extra call args start here
+  // +--------+     offset = 18 * wordSize
+  // | . . .  |     (i.e. 1 * calladdr + 1 * rax  + 6 * gp call regs
+  //                      + 8 * fp call regs + 2 * frame words)
+  //
+  // we use a unique sim/stack per thread
+  const int cursor2_offset = 18;
+  const int fp_offset = 16;
+  u_int64_t *cursor = (u_int64_t *)sp;
+  u_int64_t *cursor2 = ((u_int64_t *)sp) + cursor2_offset;
+  u_int64_t *fp = ((u_int64_t *)sp) + fp_offset;
+  int gp_arg_count = calltype & 0xf;
+  int fp_arg_count = (calltype >> 4) & 0xf;
+  int return_type = (calltype >> 8) & 0x3;
+  AArch64Simulator *sim = AArch64Simulator::get_current(UseSimulatorCache, DisableBCCheck);
+  // save previous cpu state in case this is a recursive entry
+  CPUState saveState = sim->getCPUState();
+  // set up initial sim pc, sp and fp registers
+  sim->init(*cursor++, (u_int64_t)sp, (u_int64_t)fp);
+  u_int64_t *return_slot = cursor++;
+
+  // if we need to pass the sim extra args on the stack then bump
+  // the stack pointer now
+  u_int64_t *cursor3 = (u_int64_t *)sim->getCPUState().xreg(SP, 1);
+  if (gp_arg_count > 8) {
+    cursor3 -= gp_arg_count - 8;
+  }
+  if (fp_arg_count > 8) {
+    cursor3 -= fp_arg_count - 8;
+  }
+  sim->getCPUState().xreg(SP, 1) = (u_int64_t)(cursor3++);
+
+  for (int i = 0; i < gp_arg_count; i++) {
+    if (i < 6) {
+      // copy saved register to sim register
+      GReg reg = (GReg)i;
+      sim->getCPUState().xreg(reg, 0) = *cursor++;
+    } else if (i < 8) {
+      // copy extra int arg to sim register
+      GReg reg = (GReg)i;
+      sim->getCPUState().xreg(reg, 0) = *cursor2++;
+    } else {
+      // copy extra fp arg to sim stack
+      *cursor3++ = *cursor2++;
+    }
+  }
+  for (int i = 0; i < fp_arg_count; i++) {
+    if (i < 8) {
+      // copy saved register to sim register
+      GReg reg = (GReg)i;
+      sim->getCPUState().xreg(reg, 0) = *cursor++;
+    } else {
+      // copy extra arg to sim stack
+      *cursor3++ = *cursor2++;
+    }
+  }
+  AArch64Simulator::status_t return_status = sim->run();
+  if (return_status != AArch64Simulator::STATUS_RETURN){
+    sim->simPrint0();
+    fatal("invalid status returned from simulator.run()\n");
+  }
+  switch (return_type) {
+  case MacroAssembler::ret_type_void:
+  default:
+    break;
+  case MacroAssembler::ret_type_integral:
+  // this overwrites the saved r0
+    *return_slot = sim->getCPUState().xreg(R0, 0);
+    break;
+  case MacroAssembler::ret_type_float:
+    *(float *)return_slot = sim->getCPUState().sreg(V0);
+    break;
+  case MacroAssembler::ret_type_double:
+    *(double *)return_slot = sim->getCPUState().dreg(V0);
+    break;
+  }
+  // restore incoimng cpu state
+  sim->getCPUState() = saveState;
+}
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/aarch64_linkage.S	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,167 @@
+#
+# Copyright (c) 2012, Red Hat. All rights reserved.
+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+#
+# This code is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License version 2 only, as
+# published by the Free Software Foundation.
+#
+# This code is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# version 2 for more details (a copy is included in the LICENSE file that
+# accompanied this code).
+#
+# You should have received a copy of the GNU General Public License version
+# 2 along with this work; if not, write to the Free Software Foundation,
+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+# or visit www.oracle.com if you need additional information or have any
+# questions.
+
+# Routines used to enable x86 VM C++ code to invoke JIT-compiled ARM code
+# -- either Java methods or generated stub -- and to allow JIT-compiled
+# ARM code to invoke x86 VM C++ code
+#
+# the code for aarch64_stub_prolog below can be copied into the start
+# of the ARM code buffer and patched with a link to the
+# C++ routine which starts execution on the simulator. the ARM
+# code can be generated immediately following the copied code.
+
+#ifdef BUILTIN_SIM
+
+	.data
+        .globl setup_arm_sim,
+	.type  setup_arm_sim,@function
+        .globl get_alt_stack,
+	.type  get_alt_stack,@function
+        .globl aarch64_stub_prolog
+        .p2align  4
+aarch64_stub_prolog:
+	// entry point
+4:	lea 1f(%rip), %r11
+	mov (%r11), %r10
+	mov (%r10), %r10
+	jmp *%r10
+	.p2align 4
+1:
+	.set entry_offset, . - 1b
+	.quad aarch64_prolog_ptr
+	// 64 bit int used to idenitfy called fn arg/return types
+	.set calltype_offset, . - 1b
+	.quad 0
+	// arm JIT code follows the stub
+	.set arm_code_offset, . - 1b
+	.size aarch64_stub_prolog, .-aarch64_stub_prolog
+aarch64_stub_prolog_end:
+
+	.text
+aarch64_prolog_ptr:
+	.quad aarch64_prolog
+
+        .globl aarch64_prolog
+aarch64_prolog:
+	.cfi_startproc
+	pushq	%rbp
+	.cfi_def_cfa_offset 16
+	.cfi_offset 6, -16
+	movq	%rsp, %rbp
+	.cfi_def_cfa_register 6
+	// save all registers used to pass args
+	sub $8, %rsp
+	movd %xmm7, (%rsp)
+	sub $8, %rsp
+	movd %xmm6, (%rsp)
+	sub $8, %rsp
+	movd %xmm5, (%rsp)
+	sub $8, %rsp
+	movd %xmm4, (%rsp)
+	sub $8, %rsp
+	movd %xmm3, (%rsp)
+	sub $8, %rsp
+	movd %xmm2, (%rsp)
+	sub $8, %rsp
+	movd %xmm1, (%rsp)
+	sub $8, %rsp
+	movd %xmm0, (%rsp)
+	push %r9
+	push %r8
+	push %rcx
+	push %rdx
+	push %rsi
+	push %rdi
+	// save rax -- this stack slot will be rewritten with a
+	// return value if needed
+	push %rax
+	// temporarily save r11 while we find the other stack
+	push %r11
+	// retrieve alt stack
+	call get_alt_stack@PLT
+	pop %r11
+	// push start of arm code
+	lea (arm_code_offset)(%r11), %rsi
+	push %rsi
+	// load call type code in arg reg 1
+	mov (calltype_offset)(%r11), %rsi
+	// load current stack pointer in arg reg 0
+	mov %rsp, %rdi
+	// switch to alt stack
+	mov %rax, %rsp
+	// save previous stack pointer on new stack
+	push %rdi
+	// 16-align the new stack pointer
+	push %rdi
+	// call sim setup routine
+	call setup_arm_sim@PLT
+	// switch back to old stack
+	pop %rsp
+	// pop start of arm code
+	pop %rdi
+	// pop rax -- either restores old value or installs return value
+	pop %rax
+	// pop arg registers
+	pop %rdi
+	pop %rsi
+	pop %rdx
+	pop %rcx
+	pop %r8
+	pop %r9
+	movd (%rsp), %xmm0
+	add $8, %rsp
+	movd (%rsp), %xmm1
+	add $8, %rsp
+	movd (%rsp), %xmm2
+	add $8, %rsp
+	movd (%rsp), %xmm3
+	add $8, %rsp
+	movd (%rsp), %xmm4
+	add $8, %rsp
+	movd (%rsp), %xmm5
+	add $8, %rsp
+	movd (%rsp), %xmm6
+	add $8, %rsp
+	movd (%rsp), %xmm7
+	add $8, %rsp
+	leave
+	.cfi_def_cfa 7, 8
+	ret
+	.cfi_endproc
+
+
+        .p2align  4
+get_pc:
+	// get return pc in rdi and then push it back
+	pop %rdi
+	push %rdi
+	ret
+
+	.p2align 4
+	.long
+	.globl aarch64_stub_prolog_size
+	.type  aarch64_stub_prolog_size,@function
+aarch64_stub_prolog_size:
+	leaq  aarch64_stub_prolog_end - aarch64_stub_prolog, %rax
+	ret
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,1526 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * reserved.  DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE
+ * HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include <stdio.h>
+#include <sys/types.h>
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "interpreter/interpreter.hpp"
+
+#ifndef PRODUCT
+const unsigned long Assembler::asm_bp = 0x00007fffee09ac88;
+#endif
+
+#include "compiler/disassembler.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+
+// for the moment we reuse the logical/floating point immediate encode
+// and decode functiosn provided by the simulator. when we move to
+// real hardware we will need to pull taht code into here
+
+#include "immediate_aarch64.hpp"
+
+extern "C" void entry(CodeBuffer *cb);
+
+#define __ _masm.
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) /* nothing */
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+
+#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
+
+static float unpack(unsigned value);
+
+void entry(CodeBuffer *cb) {
+
+  // {
+  //   for (int i = 0; i < 256; i+=16)
+  //     {
+  //    printf("\"%20.20g\", ", unpack(i));
+  //    printf("\"%20.20g\", ", unpack(i+1));
+  //     }
+  //   printf("\n");
+  // }
+
+  Assembler _masm(cb);
+  address entry = __ pc();
+
+  // Smoke test for assembler
+
+#ifdef ASSERT
+// BEGIN  Generated code -- do not edit
+// Generated by aarch64-asmtest.py
+    Label back, forth;
+    __ bind(back);
+
+// ArithOp
+    __ add(r19, r22, r7, Assembler::LSL, 28);          //       add     x19, x22, x7, LSL #28
+    __ sub(r16, r11, r10, Assembler::LSR, 13);         //       sub     x16, x11, x10, LSR #13
+    __ adds(r27, r13, r28, Assembler::ASR, 2);         //       adds    x27, x13, x28, ASR #2
+    __ subs(r20, r28, r26, Assembler::ASR, 41);        //       subs    x20, x28, x26, ASR #41
+    __ addw(r8, r19, r19, Assembler::ASR, 19);         //       add     w8, w19, w19, ASR #19
+    __ subw(r4, r9, r10, Assembler::LSL, 14);          //       sub     w4, w9, w10, LSL #14
+    __ addsw(r8, r11, r30, Assembler::LSL, 13);        //       adds    w8, w11, w30, LSL #13
+    __ subsw(r0, r25, r19, Assembler::LSL, 9);         //       subs    w0, w25, w19, LSL #9
+    __ andr(r20, r0, r21, Assembler::LSL, 19);         //       and     x20, x0, x21, LSL #19
+    __ orr(r21, r14, r20, Assembler::LSL, 17);         //       orr     x21, x14, x20, LSL #17
+    __ eor(r25, r28, r1, Assembler::LSL, 51);          //       eor     x25, x28, x1, LSL #51
+    __ ands(r10, r27, r11, Assembler::ASR, 15);        //       ands    x10, x27, x11, ASR #15
+    __ andw(r25, r5, r12, Assembler::ASR, 23);         //       and     w25, w5, w12, ASR #23
+    __ orrw(r18, r14, r10, Assembler::LSR, 4);         //       orr     w18, w14, w10, LSR #4
+    __ eorw(r4, r21, r5, Assembler::ASR, 22);          //       eor     w4, w21, w5, ASR #22
+    __ andsw(r21, r0, r5, Assembler::ASR, 29);         //       ands    w21, w0, w5, ASR #29
+    __ bic(r26, r30, r6, Assembler::ASR, 37);          //       bic     x26, x30, x6, ASR #37
+    __ orn(r3, r1, r13, Assembler::LSR, 29);           //       orn     x3, x1, x13, LSR #29
+    __ eon(r0, r28, r9, Assembler::LSL, 47);           //       eon     x0, x28, x9, LSL #47
+    __ bics(r29, r5, r28, Assembler::LSL, 46);         //       bics    x29, x5, x28, LSL #46
+    __ bicw(r9, r18, r7, Assembler::LSR, 20);          //       bic     w9, w18, w7, LSR #20
+    __ ornw(r26, r13, r25, Assembler::ASR, 24);        //       orn     w26, w13, w25, ASR #24
+    __ eonw(r25, r4, r19, Assembler::LSL, 6);          //       eon     w25, w4, w19, LSL #6
+    __ bicsw(r5, r26, r4, Assembler::LSR, 24);         //       bics    w5, w26, w4, LSR #24
+
+// AddSubImmOp
+    __ addw(r7, r19, 340u);                            //       add     w7, w19, #340
+    __ addsw(r8, r0, 401u);                            //       adds    w8, w0, #401
+    __ subw(r29, r20, 163u);                           //       sub     w29, w20, #163
+    __ subsw(r8, r23, 759u);                           //       subs    w8, w23, #759
+    __ add(r1, r12, 523u);                             //       add     x1, x12, #523
+    __ adds(r2, r11, 426u);                            //       adds    x2, x11, #426
+    __ sub(r14, r29, 716u);                            //       sub     x14, x29, #716
+    __ subs(r11, r5, 582u);                            //       subs    x11, x5, #582
+
+// LogicalImmOp
+    __ andw(r23, r22, 32768ul);                        //       and     w23, w22, #0x8000
+    __ orrw(r4, r10, 4042322160ul);                    //       orr     w4, w10, #0xf0f0f0f0
+    __ eorw(r0, r24, 4042322160ul);                    //       eor     w0, w24, #0xf0f0f0f0
+    __ andsw(r19, r29, 2139127680ul);                  //       ands    w19, w29, #0x7f807f80
+    __ andr(r5, r10, 4503599627354112ul);              //       and     x5, x10, #0xfffffffffc000
+    __ orr(r12, r30, 18445618178097414144ul);          //       orr     x12, x30, #0xfffc0000fffc0000
+    __ eor(r30, r5, 262128ul);                         //       eor     x30, x5, #0x3fff0
+    __ ands(r26, r23, 4194300ul);                      //       ands    x26, x23, #0x3ffffc
+
+// AbsOp
+    __ b(__ pc());                                     //       b       .
+    __ b(back);                                        //       b       back
+    __ b(forth);                                       //       b       forth
+    __ bl(__ pc());                                    //       bl      .
+    __ bl(back);                                       //       bl      back
+    __ bl(forth);                                      //       bl      forth
+
+// RegAndAbsOp
+    __ cbzw(r12, __ pc());                             //       cbz     w12, .
+    __ cbzw(r12, back);                                //       cbz     w12, back
+    __ cbzw(r12, forth);                               //       cbz     w12, forth
+    __ cbnzw(r20, __ pc());                            //       cbnz    w20, .
+    __ cbnzw(r20, back);                               //       cbnz    w20, back
+    __ cbnzw(r20, forth);                              //       cbnz    w20, forth
+    __ cbz(r12, __ pc());                              //       cbz     x12, .
+    __ cbz(r12, back);                                 //       cbz     x12, back
+    __ cbz(r12, forth);                                //       cbz     x12, forth
+    __ cbnz(r24, __ pc());                             //       cbnz    x24, .
+    __ cbnz(r24, back);                                //       cbnz    x24, back
+    __ cbnz(r24, forth);                               //       cbnz    x24, forth
+    __ adr(r6, __ pc());                               //       adr     x6, .
+    __ adr(r6, back);                                  //       adr     x6, back
+    __ adr(r6, forth);                                 //       adr     x6, forth
+    __ _adrp(r21, __ pc());                             //      adrp    x21, .
+
+// RegImmAbsOp
+    __ tbz(r1, 1, __ pc());                            //       tbz     x1, #1, .
+    __ tbz(r1, 1, back);                               //       tbz     x1, #1, back
+    __ tbz(r1, 1, forth);                              //       tbz     x1, #1, forth
+    __ tbnz(r8, 9, __ pc());                           //       tbnz    x8, #9, .
+    __ tbnz(r8, 9, back);                              //       tbnz    x8, #9, back
+    __ tbnz(r8, 9, forth);                             //       tbnz    x8, #9, forth
+
+// MoveWideImmOp
+    __ movnw(r12, 23175, 0);                           //       movn    w12, #23175, lsl 0
+    __ movzw(r11, 20476, 16);                          //       movz    w11, #20476, lsl 16
+    __ movkw(r21, 3716, 0);                            //       movk    w21, #3716, lsl 0
+    __ movn(r29, 28661, 48);                           //       movn    x29, #28661, lsl 48
+    __ movz(r3, 6927, 0);                              //       movz    x3, #6927, lsl 0
+    __ movk(r22, 9828, 16);                            //       movk    x22, #9828, lsl 16
+
+// BitfieldOp
+    __ sbfm(r12, r8, 6, 22);                           //       sbfm    x12, x8, #6, #22
+    __ bfmw(r19, r25, 25, 19);                         //       bfm     w19, w25, #25, #19
+    __ ubfmw(r9, r12, 29, 15);                         //       ubfm    w9, w12, #29, #15
+    __ sbfm(r28, r25, 16, 16);                         //       sbfm    x28, x25, #16, #16
+    __ bfm(r12, r5, 4, 25);                            //       bfm     x12, x5, #4, #25
+    __ ubfm(r0, r10, 6, 8);                            //       ubfm    x0, x10, #6, #8
+
+// ExtractOp
+    __ extrw(r4, r13, r26, 24);                        //       extr    w4, w13, w26, #24
+    __ extr(r23, r30, r24, 31);                        //       extr    x23, x30, x24, #31
+
+// CondBranchOp
+    __ br(Assembler::EQ, __ pc());                     //       b.EQ    .
+    __ br(Assembler::EQ, back);                        //       b.EQ    back
+    __ br(Assembler::EQ, forth);                       //       b.EQ    forth
+    __ br(Assembler::NE, __ pc());                     //       b.NE    .
+    __ br(Assembler::NE, back);                        //       b.NE    back
+    __ br(Assembler::NE, forth);                       //       b.NE    forth
+    __ br(Assembler::HS, __ pc());                     //       b.HS    .
+    __ br(Assembler::HS, back);                        //       b.HS    back
+    __ br(Assembler::HS, forth);                       //       b.HS    forth
+    __ br(Assembler::CS, __ pc());                     //       b.CS    .
+    __ br(Assembler::CS, back);                        //       b.CS    back
+    __ br(Assembler::CS, forth);                       //       b.CS    forth
+    __ br(Assembler::LO, __ pc());                     //       b.LO    .
+    __ br(Assembler::LO, back);                        //       b.LO    back
+    __ br(Assembler::LO, forth);                       //       b.LO    forth
+    __ br(Assembler::CC, __ pc());                     //       b.CC    .
+    __ br(Assembler::CC, back);                        //       b.CC    back
+    __ br(Assembler::CC, forth);                       //       b.CC    forth
+    __ br(Assembler::MI, __ pc());                     //       b.MI    .
+    __ br(Assembler::MI, back);                        //       b.MI    back
+    __ br(Assembler::MI, forth);                       //       b.MI    forth
+    __ br(Assembler::PL, __ pc());                     //       b.PL    .
+    __ br(Assembler::PL, back);                        //       b.PL    back
+    __ br(Assembler::PL, forth);                       //       b.PL    forth
+    __ br(Assembler::VS, __ pc());                     //       b.VS    .
+    __ br(Assembler::VS, back);                        //       b.VS    back
+    __ br(Assembler::VS, forth);                       //       b.VS    forth
+    __ br(Assembler::VC, __ pc());                     //       b.VC    .
+    __ br(Assembler::VC, back);                        //       b.VC    back
+    __ br(Assembler::VC, forth);                       //       b.VC    forth
+    __ br(Assembler::HI, __ pc());                     //       b.HI    .
+    __ br(Assembler::HI, back);                        //       b.HI    back
+    __ br(Assembler::HI, forth);                       //       b.HI    forth
+    __ br(Assembler::LS, __ pc());                     //       b.LS    .
+    __ br(Assembler::LS, back);                        //       b.LS    back
+    __ br(Assembler::LS, forth);                       //       b.LS    forth
+    __ br(Assembler::GE, __ pc());                     //       b.GE    .
+    __ br(Assembler::GE, back);                        //       b.GE    back
+    __ br(Assembler::GE, forth);                       //       b.GE    forth
+    __ br(Assembler::LT, __ pc());                     //       b.LT    .
+    __ br(Assembler::LT, back);                        //       b.LT    back
+    __ br(Assembler::LT, forth);                       //       b.LT    forth
+    __ br(Assembler::GT, __ pc());                     //       b.GT    .
+    __ br(Assembler::GT, back);                        //       b.GT    back
+    __ br(Assembler::GT, forth);                       //       b.GT    forth
+    __ br(Assembler::LE, __ pc());                     //       b.LE    .
+    __ br(Assembler::LE, back);                        //       b.LE    back
+    __ br(Assembler::LE, forth);                       //       b.LE    forth
+    __ br(Assembler::AL, __ pc());                     //       b.AL    .
+    __ br(Assembler::AL, back);                        //       b.AL    back
+    __ br(Assembler::AL, forth);                       //       b.AL    forth
+    __ br(Assembler::NV, __ pc());                     //       b.NV    .
+    __ br(Assembler::NV, back);                        //       b.NV    back
+    __ br(Assembler::NV, forth);                       //       b.NV    forth
+
+// ImmOp
+    __ svc(12729);                                     //       svc     #12729
+    __ hvc(6788);                                      //       hvc     #6788
+    __ smc(1535);                                      //       smc     #1535
+    __ brk(16766);                                     //       brk     #16766
+    __ hlt(9753);                                      //       hlt     #9753
+
+// Op
+    __ nop();                                          //       nop
+    __ eret();                                         //       eret
+    __ drps();                                         //       drps
+    __ isb();                                          //       isb
+
+// SystemOp
+    __ dsb(Assembler::SY);                             //       dsb     SY
+    __ dmb(Assembler::ISHST);                          //       dmb     ISHST
+
+// OneRegOp
+    __ br(r2);                                         //       br      x2
+    __ blr(r5);                                        //       blr     x5
+
+// LoadStoreExclusiveOp
+    __ stxr(r20, r21, r2);                             //       stxr    w20, x21, [x2]
+    __ stlxr(r7, r29, r7);                             //       stlxr   w7, x29, [x7]
+    __ ldxr(r5, r16);                                  //       ldxr    x5, [x16]
+    __ ldaxr(r27, r29);                                //       ldaxr   x27, [x29]
+    __ stlr(r0, r29);                                  //       stlr    x0, [x29]
+    __ ldar(r21, r28);                                 //       ldar    x21, [x28]
+
+// LoadStoreExclusiveOp
+    __ stxrw(r24, r24, r7);                            //       stxr    w24, w24, [x7]
+    __ stlxrw(r21, r26, r28);                          //       stlxr   w21, w26, [x28]
+    __ ldxrw(r21, r6);                                 //       ldxr    w21, [x6]
+    __ ldaxrw(r15, r30);                               //       ldaxr   w15, [x30]
+    __ stlrw(r19, r3);                                 //       stlr    w19, [x3]
+    __ ldarw(r22, r2);                                 //       ldar    w22, [x2]
+
+// LoadStoreExclusiveOp
+    __ stxrh(r18, r15, r0);                            //       stxrh   w18, w15, [x0]
+    __ stlxrh(r11, r5, r28);                           //       stlxrh  w11, w5, [x28]
+    __ ldxrh(r29, r6);                                 //       ldxrh   w29, [x6]
+    __ ldaxrh(r18, r7);                                //       ldaxrh  w18, [x7]
+    __ stlrh(r25, r28);                                //       stlrh   w25, [x28]
+    __ ldarh(r2, r19);                                 //       ldarh   w2, [x19]
+
+// LoadStoreExclusiveOp
+    __ stxrb(r10, r30, r1);                            //       stxrb   w10, w30, [x1]
+    __ stlxrb(r20, r21, r22);                          //       stlxrb  w20, w21, [x22]
+    __ ldxrb(r25, r2);                                 //       ldxrb   w25, [x2]
+    __ ldaxrb(r24, r5);                                //       ldaxrb  w24, [x5]
+    __ stlrb(r16, r3);                                 //       stlrb   w16, [x3]
+    __ ldarb(r22, r29);                                //       ldarb   w22, [x29]
+
+// LoadStoreExclusiveOp
+    __ ldxp(r8, r2, r19);                              //       ldxp    x8, x2, [x19]
+    __ ldaxp(r7, r19, r14);                            //       ldaxp   x7, x19, [x14]
+    __ stxp(r8, r27, r28, r5);                         //       stxp    w8, x27, x28, [x5]
+    __ stlxp(r6, r8, r14, r6);                         //       stlxp   w6, x8, x14, [x6]
+
+// LoadStoreExclusiveOp
+    __ ldxpw(r25, r4, r22);                            //       ldxp    w25, w4, [x22]
+    __ ldaxpw(r14, r14, r15);                          //       ldaxp   w14, w14, [x15]
+    __ stxpw(r20, r26, r8, r10);                       //       stxp    w20, w26, w8, [x10]
+    __ stlxpw(r23, r18, r18, r18);                     //       stlxp   w23, w18, w18, [x18]
+
+// base_plus_unscaled_offset
+// LoadStoreOp
+    __ str(r30, Address(r11, 99));                     //       str     x30, [x11, 99]
+    __ strw(r23, Address(r25, -77));                   //       str     w23, [x25, -77]
+    __ strb(r2, Address(r14, 3));                      //       strb    w2, [x14, 3]
+    __ strh(r9, Address(r10, 5));                      //       strh    w9, [x10, 5]
+    __ ldr(r20, Address(r15, 57));                     //       ldr     x20, [x15, 57]
+    __ ldrw(r12, Address(r16, -78));                   //       ldr     w12, [x16, -78]
+    __ ldrb(r22, Address(r26, -3));                    //       ldrb    w22, [x26, -3]
+    __ ldrh(r30, Address(r19, -47));                   //       ldrh    w30, [x19, -47]
+    __ ldrsb(r9, Address(r10, -12));                   //       ldrsb   x9, [x10, -12]
+    __ ldrsh(r28, Address(r17, 14));                   //       ldrsh   x28, [x17, 14]
+    __ ldrshw(r3, Address(r5, 10));                    //       ldrsh   w3, [x5, 10]
+    __ ldrsw(r17, Address(r17, -91));                  //       ldrsw   x17, [x17, -91]
+    __ ldrd(v2, Address(r20, -17));                    //       ldr     d2, [x20, -17]
+    __ ldrs(v22, Address(r7, -10));                    //       ldr     s22, [x7, -10]
+    __ strd(v30, Address(r18, -223));                  //       str     d30, [x18, -223]
+    __ strs(v13, Address(r22, 21));                    //       str     s13, [x22, 21]
+
+// pre
+// LoadStoreOp
+    __ str(r9, Address(__ pre(r18, -112)));            //       str     x9, [x18, -112]!
+    __ strw(r29, Address(__ pre(r23, 11)));            //       str     w29, [x23, 11]!
+    __ strb(r18, Address(__ pre(r12, -1)));            //       strb    w18, [x12, -1]!
+    __ strh(r16, Address(__ pre(r20, -23)));           //       strh    w16, [x20, -23]!
+    __ ldr(r3, Address(__ pre(r29, 9)));               //       ldr     x3, [x29, 9]!
+    __ ldrw(r25, Address(__ pre(r3, 19)));             //       ldr     w25, [x3, 19]!
+    __ ldrb(r1, Address(__ pre(r29, -1)));             //       ldrb    w1, [x29, -1]!
+    __ ldrh(r8, Address(__ pre(r29, -57)));            //       ldrh    w8, [x29, -57]!
+    __ ldrsb(r5, Address(__ pre(r14, -13)));           //       ldrsb   x5, [x14, -13]!
+    __ ldrsh(r10, Address(__ pre(r27, 1)));            //       ldrsh   x10, [x27, 1]!
+    __ ldrshw(r11, Address(__ pre(r10, 25)));          //       ldrsh   w11, [x10, 25]!
+    __ ldrsw(r4, Address(__ pre(r22, -92)));           //       ldrsw   x4, [x22, -92]!
+    __ ldrd(v11, Address(__ pre(r23, 8)));             //       ldr     d11, [x23, 8]!
+    __ ldrs(v25, Address(__ pre(r19, 54)));            //       ldr     s25, [x19, 54]!
+    __ strd(v1, Address(__ pre(r7, -174)));            //       str     d1, [x7, -174]!
+    __ strs(v8, Address(__ pre(r25, 54)));             //       str     s8, [x25, 54]!
+
+// post
+// LoadStoreOp
+    __ str(r5, Address(__ post(r11, 37)));             //       str     x5, [x11], 37
+    __ strw(r24, Address(__ post(r15, 19)));           //       str     w24, [x15], 19
+    __ strb(r15, Address(__ post(r26, -1)));           //       strb    w15, [x26], -1
+    __ strh(r18, Address(__ post(r18, -6)));           //       strh    w18, [x18], -6
+    __ ldr(r7, Address(__ post(r2, -230)));            //       ldr     x7, [x2], -230
+    __ ldrw(r27, Address(__ post(r11, -27)));          //       ldr     w27, [x11], -27
+    __ ldrb(r18, Address(__ post(r3, -25)));           //       ldrb    w18, [x3], -25
+    __ ldrh(r10, Address(__ post(r24, -32)));          //       ldrh    w10, [x24], -32
+    __ ldrsb(r22, Address(__ post(r10, 4)));           //       ldrsb   x22, [x10], 4
+    __ ldrsh(r17, Address(__ post(r12, 25)));          //       ldrsh   x17, [x12], 25
+    __ ldrshw(r8, Address(__ post(r7, -62)));          //       ldrsh   w8, [x7], -62
+    __ ldrsw(r23, Address(__ post(r22, -51)));         //       ldrsw   x23, [x22], -51
+    __ ldrd(v24, Address(__ post(r25, 48)));           //       ldr     d24, [x25], 48
+    __ ldrs(v21, Address(__ post(r12, -10)));          //       ldr     s21, [x12], -10
+    __ strd(v18, Address(__ post(r13, -222)));         //       str     d18, [x13], -222
+    __ strs(v16, Address(__ post(r1, -41)));           //       str     s16, [x1], -41
+
+// base_plus_reg
+// LoadStoreOp
+    __ str(r2, Address(r22, r15, Address::sxtw(0)));   //       str     x2, [x22, w15, sxtw #0]
+    __ strw(r2, Address(r16, r29, Address::lsl(0)));   //       str     w2, [x16, x29, lsl #0]
+    __ strb(r20, Address(r18, r14, Address::uxtw(0))); //       strb    w20, [x18, w14, uxtw #0]
+    __ strh(r6, Address(r19, r20, Address::sxtx(1)));  //       strh    w6, [x19, x20, sxtx #1]
+    __ ldr(r14, Address(r29, r14, Address::sxtw(0)));  //       ldr     x14, [x29, w14, sxtw #0]
+    __ ldrw(r16, Address(r20, r12, Address::sxtw(2))); //       ldr     w16, [x20, w12, sxtw #2]
+    __ ldrb(r9, Address(r12, r0, Address::sxtw(0)));   //       ldrb    w9, [x12, w0, sxtw #0]
+    __ ldrh(r12, Address(r17, r3, Address::lsl(1)));   //       ldrh    w12, [x17, x3, lsl #1]
+    __ ldrsb(r2, Address(r17, r3, Address::sxtx(0)));  //       ldrsb   x2, [x17, x3, sxtx #0]
+    __ ldrsh(r7, Address(r1, r17, Address::uxtw(1)));  //       ldrsh   x7, [x1, w17, uxtw #1]
+    __ ldrshw(r25, Address(r15, r18, Address::sxtw(1))); //     ldrsh   w25, [x15, w18, sxtw #1]
+    __ ldrsw(r23, Address(r21, r12, Address::lsl(0))); //       ldrsw   x23, [x21, x12, lsl #0]
+    __ ldrd(v5, Address(r13, r8, Address::lsl(3)));    //       ldr     d5, [x13, x8, lsl #3]
+    __ ldrs(v3, Address(r10, r22, Address::lsl(2)));   //       ldr     s3, [x10, x22, lsl #2]
+    __ strd(v14, Address(r2, r27, Address::sxtw(0)));  //       str     d14, [x2, w27, sxtw #0]
+    __ strs(v20, Address(r6, r25, Address::lsl(0)));   //       str     s20, [x6, x25, lsl #0]
+
+// base_plus_scaled_offset
+// LoadStoreOp
+    __ str(r30, Address(r7, 16256));                   //       str     x30, [x7, 16256]
+    __ strw(r15, Address(r8, 7588));                   //       str     w15, [x8, 7588]
+    __ strb(r11, Address(r0, 1866));                   //       strb    w11, [x0, 1866]
+    __ strh(r3, Address(r17, 3734));                   //       strh    w3, [x17, 3734]
+    __ ldr(r2, Address(r7, 14224));                    //       ldr     x2, [x7, 14224]
+    __ ldrw(r5, Address(r9, 7396));                    //       ldr     w5, [x9, 7396]
+    __ ldrb(r28, Address(r9, 1721));                   //       ldrb    w28, [x9, 1721]
+    __ ldrh(r2, Address(r20, 3656));                   //       ldrh    w2, [x20, 3656]
+    __ ldrsb(r22, Address(r14, 1887));                 //       ldrsb   x22, [x14, 1887]
+    __ ldrsh(r8, Address(r0, 4080));                   //       ldrsh   x8, [x0, 4080]
+    __ ldrshw(r0, Address(r30, 3916));                 //       ldrsh   w0, [x30, 3916]
+    __ ldrsw(r24, Address(r19, 6828));                 //       ldrsw   x24, [x19, 6828]
+    __ ldrd(v24, Address(r12, 13032));                 //       ldr     d24, [x12, 13032]
+    __ ldrs(v8, Address(r8, 7452));                    //       ldr     s8, [x8, 7452]
+    __ strd(v10, Address(r15, 15992));                 //       str     d10, [x15, 15992]
+    __ strs(v26, Address(r19, 6688));                  //       str     s26, [x19, 6688]
+
+// pcrel
+// LoadStoreOp
+    __ ldr(r10, forth);                                //       ldr     x10, forth
+    __ ldrw(r3, __ pc());                              //       ldr     w3, .
+
+// LoadStoreOp
+    __ prfm(Address(r23, 9));                          //       prfm    PLDL1KEEP, [x23, 9]
+
+// LoadStoreOp
+    __ prfm(back);                                     //       prfm    PLDL1KEEP, back
+
+// LoadStoreOp
+    __ prfm(Address(r3, r8, Address::uxtw(0)));        //       prfm    PLDL1KEEP, [x3, w8, uxtw #0]
+
+// LoadStoreOp
+    __ prfm(Address(r11, 15080));                      //       prfm    PLDL1KEEP, [x11, 15080]
+
+// AddSubCarryOp
+    __ adcw(r13, r9, r28);                             //       adc     w13, w9, w28
+    __ adcsw(r27, r19, r28);                           //       adcs    w27, w19, w28
+    __ sbcw(r19, r18, r6);                             //       sbc     w19, w18, w6
+    __ sbcsw(r14, r20, r3);                            //       sbcs    w14, w20, w3
+    __ adc(r16, r14, r8);                              //       adc     x16, x14, x8
+    __ adcs(r0, r29, r8);                              //       adcs    x0, x29, x8
+    __ sbc(r8, r24, r20);                              //       sbc     x8, x24, x20
+    __ sbcs(r12, r28, r0);                             //       sbcs    x12, x28, x0
+
+// AddSubExtendedOp
+    __ addw(r23, r6, r16, ext::uxtb, 4);               //       add     w23, w6, w16, uxtb #4
+    __ addsw(r25, r25, r23, ext::sxth, 2);             //       adds    w25, w25, w23, sxth #2
+    __ sub(r26, r22, r4, ext::uxtx, 1);                //       sub     x26, x22, x4, uxtx #1
+    __ subsw(r17, r29, r19, ext::sxtx, 3);             //       subs    w17, w29, w19, sxtx #3
+    __ add(r11, r30, r21, ext::uxtb, 3);               //       add     x11, x30, x21, uxtb #3
+    __ adds(r16, r19, r0, ext::sxtb, 2);               //       adds    x16, x19, x0, sxtb #2
+    __ sub(r11, r9, r25, ext::sxtx, 1);                //       sub     x11, x9, x25, sxtx #1
+    __ subs(r17, r20, r12, ext::sxtb, 4);              //       subs    x17, x20, x12, sxtb #4
+
+// ConditionalCompareOp
+    __ ccmnw(r13, r11, 3u, Assembler::LE);             //       ccmn    w13, w11, #3, LE
+    __ ccmpw(r13, r12, 2u, Assembler::HI);             //       ccmp    w13, w12, #2, HI
+    __ ccmn(r3, r2, 12u, Assembler::NE);               //       ccmn    x3, x2, #12, NE
+    __ ccmp(r7, r21, 3u, Assembler::VS);               //       ccmp    x7, x21, #3, VS
+
+// ConditionalCompareImmedOp
+    __ ccmnw(r2, 14, 4, Assembler::CC);                //       ccmn    w2, #14, #4, CC
+    __ ccmpw(r17, 17, 6, Assembler::PL);               //       ccmp    w17, #17, #6, PL
+    __ ccmn(r10, 12, 0, Assembler::CS);                //       ccmn    x10, #12, #0, CS
+    __ ccmp(r21, 18, 14, Assembler::GE);               //       ccmp    x21, #18, #14, GE
+
+// ConditionalSelectOp
+    __ cselw(r21, r13, r12, Assembler::GT);            //       csel    w21, w13, w12, GT
+    __ csincw(r10, r27, r15, Assembler::LS);           //       csinc   w10, w27, w15, LS
+    __ csinvw(r0, r13, r9, Assembler::HI);             //       csinv   w0, w13, w9, HI
+    __ csnegw(r18, r4, r26, Assembler::VS);            //       csneg   w18, w4, w26, VS
+    __ csel(r12, r29, r7, Assembler::LS);              //       csel    x12, x29, x7, LS
+    __ csinc(r6, r7, r20, Assembler::VC);              //       csinc   x6, x7, x20, VC
+    __ csinv(r22, r21, r3, Assembler::LE);             //       csinv   x22, x21, x3, LE
+    __ csneg(r19, r12, r27, Assembler::LS);            //       csneg   x19, x12, x27, LS
+
+// TwoRegOp
+    __ rbitw(r0, r16);                                 //       rbit    w0, w16
+    __ rev16w(r17, r23);                               //       rev16   w17, w23
+    __ revw(r17, r14);                                 //       rev     w17, w14
+    __ clzw(r24, r30);                                 //       clz     w24, w30
+    __ clsw(r24, r22);                                 //       cls     w24, w22
+    __ rbit(r3, r17);                                  //       rbit    x3, x17
+    __ rev16(r12, r13);                                //       rev16   x12, x13
+    __ rev32(r9, r22);                                 //       rev32   x9, x22
+    __ rev(r0, r0);                                    //       rev     x0, x0
+    __ clz(r5, r16);                                   //       clz     x5, x16
+    __ cls(r25, r22);                                  //       cls     x25, x22
+
+// ThreeRegOp
+    __ udivw(r29, r4, r0);                             //       udiv    w29, w4, w0
+    __ sdivw(r0, r29, r29);                            //       sdiv    w0, w29, w29
+    __ lslvw(r5, r17, r21);                            //       lslv    w5, w17, w21
+    __ lsrvw(r9, r9, r18);                             //       lsrv    w9, w9, w18
+    __ asrvw(r1, r27, r8);                             //       asrv    w1, w27, w8
+    __ rorvw(r18, r20, r13);                           //       rorv    w18, w20, w13
+    __ udiv(r8, r25, r12);                             //       udiv    x8, x25, x12
+    __ sdiv(r7, r5, r28);                              //       sdiv    x7, x5, x28
+    __ lslv(r5, r17, r27);                             //       lslv    x5, x17, x27
+    __ lsrv(r23, r26, r20);                            //       lsrv    x23, x26, x20
+    __ asrv(r28, r8, r28);                             //       asrv    x28, x8, x28
+    __ rorv(r3, r29, r4);                              //       rorv    x3, x29, x4
+
+// FourRegMulOp
+    __ maddw(r17, r14, r26, r21);                      //       madd    w17, w14, w26, w21
+    __ msubw(r1, r30, r11, r11);                       //       msub    w1, w30, w11, w11
+    __ madd(r1, r17, r6, r28);                         //       madd    x1, x17, x6, x28
+    __ msub(r30, r6, r30, r8);                         //       msub    x30, x6, x30, x8
+    __ smaddl(r21, r6, r14, r8);                       //       smaddl  x21, w6, w14, x8
+    __ smsubl(r10, r10, r24, r19);                     //       smsubl  x10, w10, w24, x19
+    __ umaddl(r20, r18, r14, r24);                     //       umaddl  x20, w18, w14, x24
+    __ umsubl(r18, r2, r5, r5);                        //       umsubl  x18, w2, w5, x5
+
+// ThreeRegFloatOp
+    __ fmuls(v8, v18, v13);                            //       fmul    s8, s18, s13
+    __ fdivs(v2, v14, v28);                            //       fdiv    s2, s14, s28
+    __ fadds(v15, v12, v28);                           //       fadd    s15, s12, s28
+    __ fsubs(v0, v12, v1);                             //       fsub    s0, s12, s1
+    __ fmuls(v15, v29, v4);                            //       fmul    s15, s29, s4
+    __ fmuld(v12, v1, v23);                            //       fmul    d12, d1, d23
+    __ fdivd(v27, v8, v18);                            //       fdiv    d27, d8, d18
+    __ faddd(v23, v20, v11);                           //       fadd    d23, d20, d11
+    __ fsubd(v8, v12, v18);                            //       fsub    d8, d12, d18
+    __ fmuld(v26, v24, v23);                           //       fmul    d26, d24, d23
+
+// FourRegFloatOp
+    __ fmadds(v21, v23, v13, v25);                     //       fmadd   s21, s23, s13, s25
+    __ fmsubs(v22, v10, v1, v14);                      //       fmsub   s22, s10, s1, s14
+    __ fnmadds(v14, v20, v2, v30);                     //       fnmadd  s14, s20, s2, s30
+    __ fnmadds(v7, v29, v22, v22);                     //       fnmadd  s7, s29, s22, s22
+    __ fmaddd(v13, v5, v15, v5);                       //       fmadd   d13, d5, d15, d5
+    __ fmsubd(v14, v12, v5, v10);                      //       fmsub   d14, d12, d5, d10
+    __ fnmaddd(v10, v19, v0, v1);                      //       fnmadd  d10, d19, d0, d1
+    __ fnmaddd(v20, v2, v2, v0);                       //       fnmadd  d20, d2, d2, d0
+
+// TwoRegFloatOp
+    __ fmovs(v25, v9);                                 //       fmov    s25, s9
+    __ fabss(v20, v4);                                 //       fabs    s20, s4
+    __ fnegs(v3, v27);                                 //       fneg    s3, s27
+    __ fsqrts(v1, v2);                                 //       fsqrt   s1, s2
+    __ fcvts(v30, v0);                                 //       fcvt    d30, s0
+    __ fmovd(v12, v4);                                 //       fmov    d12, d4
+    __ fabsd(v1, v27);                                 //       fabs    d1, d27
+    __ fnegd(v8, v22);                                 //       fneg    d8, d22
+    __ fsqrtd(v11, v11);                               //       fsqrt   d11, d11
+    __ fcvtd(v22, v28);                                //       fcvt    s22, d28
+
+// FloatConvertOp
+    __ fcvtzsw(r28, v22);                              //       fcvtzs  w28, s22
+    __ fcvtzs(r20, v27);                               //       fcvtzs  x20, s27
+    __ fcvtzdw(r14, v0);                               //       fcvtzs  w14, d0
+    __ fcvtzd(r26, v11);                               //       fcvtzs  x26, d11
+    __ scvtfws(v28, r22);                              //       scvtf   s28, w22
+    __ scvtfs(v16, r10);                               //       scvtf   s16, x10
+    __ scvtfwd(v8, r21);                               //       scvtf   d8, w21
+    __ scvtfd(v21, r28);                               //       scvtf   d21, x28
+    __ fmovs(r24, v24);                                //       fmov    w24, s24
+    __ fmovd(r8, v19);                                 //       fmov    x8, d19
+    __ fmovs(v8, r12);                                 //       fmov    s8, w12
+    __ fmovd(v6, r7);                                  //       fmov    d6, x7
+
+// TwoRegFloatOp
+    __ fcmps(v30, v16);                                //       fcmp    s30, s16
+    __ fcmpd(v25, v11);                                //       fcmp    d25, d11
+    __ fcmps(v11, 0.0);                                //       fcmp    s11, #0.0
+    __ fcmpd(v11, 0.0);                                //       fcmp    d11, #0.0
+
+// LoadStorePairOp
+    __ stpw(r29, r12, Address(r17, 128));              //       stp     w29, w12, [x17, #128]
+    __ ldpw(r22, r18, Address(r14, -96));              //       ldp     w22, w18, [x14, #-96]
+    __ ldpsw(r11, r16, Address(r1, 64));               //       ldpsw   x11, x16, [x1, #64]
+    __ stp(r0, r11, Address(r26, 112));                //       stp     x0, x11, [x26, #112]
+    __ ldp(r7, r1, Address(r26, 16));                  //       ldp     x7, x1, [x26, #16]
+
+// LoadStorePairOp
+    __ stpw(r10, r7, Address(__ pre(r24, 0)));         //       stp     w10, w7, [x24, #0]!
+    __ ldpw(r7, r28, Address(__ pre(r24, -256)));      //       ldp     w7, w28, [x24, #-256]!
+    __ ldpsw(r25, r28, Address(__ pre(r21, -240)));    //       ldpsw   x25, x28, [x21, #-240]!
+    __ stp(r20, r18, Address(__ pre(r14, -16)));       //       stp     x20, x18, [x14, #-16]!
+    __ ldp(r8, r10, Address(__ pre(r13, 80)));         //       ldp     x8, x10, [x13, #80]!
+
+// LoadStorePairOp
+    __ stpw(r26, r24, Address(__ post(r2, -128)));     //       stp     w26, w24, [x2], #-128
+    __ ldpw(r2, r25, Address(__ post(r21, -192)));     //       ldp     w2, w25, [x21], #-192
+    __ ldpsw(r17, r2, Address(__ post(r21, -144)));    //       ldpsw   x17, x2, [x21], #-144
+    __ stp(r12, r10, Address(__ post(r11, 96)));       //       stp     x12, x10, [x11], #96
+    __ ldp(r24, r6, Address(__ post(r17, -32)));       //       ldp     x24, x6, [x17], #-32
+
+// LoadStorePairOp
+    __ stnpw(r3, r30, Address(r14, -224));             //       stnp    w3, w30, [x14, #-224]
+    __ ldnpw(r15, r20, Address(r26, -144));            //       ldnp    w15, w20, [x26, #-144]
+    __ stnp(r22, r25, Address(r12, -128));             //       stnp    x22, x25, [x12, #-128]
+    __ ldnp(r27, r22, Address(r17, -176));             //       ldnp    x27, x22, [x17, #-176]
+
+// FloatImmediateOp
+    __ fmovd(v0, 2.0);                                 //       fmov d0, #2.0
+    __ fmovd(v0, 2.125);                               //       fmov d0, #2.125
+    __ fmovd(v0, 4.0);                                 //       fmov d0, #4.0
+    __ fmovd(v0, 4.25);                                //       fmov d0, #4.25
+    __ fmovd(v0, 8.0);                                 //       fmov d0, #8.0
+    __ fmovd(v0, 8.5);                                 //       fmov d0, #8.5
+    __ fmovd(v0, 16.0);                                //       fmov d0, #16.0
+    __ fmovd(v0, 17.0);                                //       fmov d0, #17.0
+    __ fmovd(v0, 0.125);                               //       fmov d0, #0.125
+    __ fmovd(v0, 0.1328125);                           //       fmov d0, #0.1328125
+    __ fmovd(v0, 0.25);                                //       fmov d0, #0.25
+    __ fmovd(v0, 0.265625);                            //       fmov d0, #0.265625
+    __ fmovd(v0, 0.5);                                 //       fmov d0, #0.5
+    __ fmovd(v0, 0.53125);                             //       fmov d0, #0.53125
+    __ fmovd(v0, 1.0);                                 //       fmov d0, #1.0
+    __ fmovd(v0, 1.0625);                              //       fmov d0, #1.0625
+    __ fmovd(v0, -2.0);                                //       fmov d0, #-2.0
+    __ fmovd(v0, -2.125);                              //       fmov d0, #-2.125
+    __ fmovd(v0, -4.0);                                //       fmov d0, #-4.0
+    __ fmovd(v0, -4.25);                               //       fmov d0, #-4.25
+    __ fmovd(v0, -8.0);                                //       fmov d0, #-8.0
+    __ fmovd(v0, -8.5);                                //       fmov d0, #-8.5
+    __ fmovd(v0, -16.0);                               //       fmov d0, #-16.0
+    __ fmovd(v0, -17.0);                               //       fmov d0, #-17.0
+    __ fmovd(v0, -0.125);                              //       fmov d0, #-0.125
+    __ fmovd(v0, -0.1328125);                          //       fmov d0, #-0.1328125
+    __ fmovd(v0, -0.25);                               //       fmov d0, #-0.25
+    __ fmovd(v0, -0.265625);                           //       fmov d0, #-0.265625
+    __ fmovd(v0, -0.5);                                //       fmov d0, #-0.5
+    __ fmovd(v0, -0.53125);                            //       fmov d0, #-0.53125
+    __ fmovd(v0, -1.0);                                //       fmov d0, #-1.0
+    __ fmovd(v0, -1.0625);                             //       fmov d0, #-1.0625
+
+    __ bind(forth);
+
+/*
+aarch64ops.o:     file format elf64-littleaarch64
+
+
+Disassembly of section .text:
+
+0000000000000000 <back>:
+   0:   8b0772d3        add     x19, x22, x7, lsl #28
+   4:   cb4a3570        sub     x16, x11, x10, lsr #13
+   8:   ab9c09bb        adds    x27, x13, x28, asr #2
+   c:   eb9aa794        subs    x20, x28, x26, asr #41
+  10:   0b934e68        add     w8, w19, w19, asr #19
+  14:   4b0a3924        sub     w4, w9, w10, lsl #14
+  18:   2b1e3568        adds    w8, w11, w30, lsl #13
+  1c:   6b132720        subs    w0, w25, w19, lsl #9
+  20:   8a154c14        and     x20, x0, x21, lsl #19
+  24:   aa1445d5        orr     x21, x14, x20, lsl #17
+  28:   ca01cf99        eor     x25, x28, x1, lsl #51
+  2c:   ea8b3f6a        ands    x10, x27, x11, asr #15
+  30:   0a8c5cb9        and     w25, w5, w12, asr #23
+  34:   2a4a11d2        orr     w18, w14, w10, lsr #4
+  38:   4a855aa4        eor     w4, w21, w5, asr #22
+  3c:   6a857415        ands    w21, w0, w5, asr #29
+  40:   8aa697da        bic     x26, x30, x6, asr #37
+  44:   aa6d7423        orn     x3, x1, x13, lsr #29
+  48:   ca29bf80        eon     x0, x28, x9, lsl #47
+  4c:   ea3cb8bd        bics    x29, x5, x28, lsl #46
+  50:   0a675249        bic     w9, w18, w7, lsr #20
+  54:   2ab961ba        orn     w26, w13, w25, asr #24
+  58:   4a331899        eon     w25, w4, w19, lsl #6
+  5c:   6a646345        bics    w5, w26, w4, lsr #24
+  60:   11055267        add     w7, w19, #0x154
+  64:   31064408        adds    w8, w0, #0x191
+  68:   51028e9d        sub     w29, w20, #0xa3
+  6c:   710bdee8        subs    w8, w23, #0x2f7
+  70:   91082d81        add     x1, x12, #0x20b
+  74:   b106a962        adds    x2, x11, #0x1aa
+  78:   d10b33ae        sub     x14, x29, #0x2cc
+  7c:   f10918ab        subs    x11, x5, #0x246
+  80:   121102d7        and     w23, w22, #0x8000
+  84:   3204cd44        orr     w4, w10, #0xf0f0f0f0
+  88:   5204cf00        eor     w0, w24, #0xf0f0f0f0
+  8c:   72099fb3        ands    w19, w29, #0x7f807f80
+  90:   92729545        and     x5, x10, #0xfffffffffc000
+  94:   b20e37cc        orr     x12, x30, #0xfffc0000fffc0000
+  98:   d27c34be        eor     x30, x5, #0x3fff0
+  9c:   f27e4efa        ands    x26, x23, #0x3ffffc
+  a0:   14000000        b       a0 <back+0xa0>
+  a4:   17ffffd7        b       0 <back>
+  a8:   1400017f        b       6a4 <forth>
+  ac:   94000000        bl      ac <back+0xac>
+  b0:   97ffffd4        bl      0 <back>
+  b4:   9400017c        bl      6a4 <forth>
+  b8:   3400000c        cbz     w12, b8 <back+0xb8>
+  bc:   34fffa2c        cbz     w12, 0 <back>
+  c0:   34002f2c        cbz     w12, 6a4 <forth>
+  c4:   35000014        cbnz    w20, c4 <back+0xc4>
+  c8:   35fff9d4        cbnz    w20, 0 <back>
+  cc:   35002ed4        cbnz    w20, 6a4 <forth>
+  d0:   b400000c        cbz     x12, d0 <back+0xd0>
+  d4:   b4fff96c        cbz     x12, 0 <back>
+  d8:   b4002e6c        cbz     x12, 6a4 <forth>
+  dc:   b5000018        cbnz    x24, dc <back+0xdc>
+  e0:   b5fff918        cbnz    x24, 0 <back>
+  e4:   b5002e18        cbnz    x24, 6a4 <forth>
+  e8:   10000006        adr     x6, e8 <back+0xe8>
+  ec:   10fff8a6        adr     x6, 0 <back>
+  f0:   10002da6        adr     x6, 6a4 <forth>
+  f4:   90000015        adrp    x21, 0 <back>
+  f8:   36080001        tbz     w1, #1, f8 <back+0xf8>
+  fc:   360ff821        tbz     w1, #1, 0 <back>
+ 100:   36082d21        tbz     w1, #1, 6a4 <forth>
+ 104:   37480008        tbnz    w8, #9, 104 <back+0x104>
+ 108:   374ff7c8        tbnz    w8, #9, 0 <back>
+ 10c:   37482cc8        tbnz    w8, #9, 6a4 <forth>
+ 110:   128b50ec        movn    w12, #0x5a87
+ 114:   52a9ff8b        movz    w11, #0x4ffc, lsl #16
+ 118:   7281d095        movk    w21, #0xe84
+ 11c:   92edfebd        movn    x29, #0x6ff5, lsl #48
+ 120:   d28361e3        movz    x3, #0x1b0f
+ 124:   f2a4cc96        movk    x22, #0x2664, lsl #16
+ 128:   9346590c        sbfx    x12, x8, #6, #17
+ 12c:   33194f33        bfi     w19, w25, #7, #20
+ 130:   531d3d89        ubfiz   w9, w12, #3, #16
+ 134:   9350433c        sbfx    x28, x25, #16, #1
+ 138:   b34464ac        bfxil   x12, x5, #4, #22
+ 13c:   d3462140        ubfx    x0, x10, #6, #3
+ 140:   139a61a4        extr    w4, w13, w26, #24
+ 144:   93d87fd7        extr    x23, x30, x24, #31
+ 148:   54000000        b.eq    148 <back+0x148>
+ 14c:   54fff5a0        b.eq    0 <back>
+ 150:   54002aa0        b.eq    6a4 <forth>
+ 154:   54000001        b.ne    154 <back+0x154>
+ 158:   54fff541        b.ne    0 <back>
+ 15c:   54002a41        b.ne    6a4 <forth>
+ 160:   54000002        b.cs    160 <back+0x160>
+ 164:   54fff4e2        b.cs    0 <back>
+ 168:   540029e2        b.cs    6a4 <forth>
+ 16c:   54000002        b.cs    16c <back+0x16c>
+ 170:   54fff482        b.cs    0 <back>
+ 174:   54002982        b.cs    6a4 <forth>
+ 178:   54000003        b.cc    178 <back+0x178>
+ 17c:   54fff423        b.cc    0 <back>
+ 180:   54002923        b.cc    6a4 <forth>
+ 184:   54000003        b.cc    184 <back+0x184>
+ 188:   54fff3c3        b.cc    0 <back>
+ 18c:   540028c3        b.cc    6a4 <forth>
+ 190:   54000004        b.mi    190 <back+0x190>
+ 194:   54fff364        b.mi    0 <back>
+ 198:   54002864        b.mi    6a4 <forth>
+ 19c:   54000005        b.pl    19c <back+0x19c>
+ 1a0:   54fff305        b.pl    0 <back>
+ 1a4:   54002805        b.pl    6a4 <forth>
+ 1a8:   54000006        b.vs    1a8 <back+0x1a8>
+ 1ac:   54fff2a6        b.vs    0 <back>
+ 1b0:   540027a6        b.vs    6a4 <forth>
+ 1b4:   54000007        b.vc    1b4 <back+0x1b4>
+ 1b8:   54fff247        b.vc    0 <back>
+ 1bc:   54002747        b.vc    6a4 <forth>
+ 1c0:   54000008        b.hi    1c0 <back+0x1c0>
+ 1c4:   54fff1e8        b.hi    0 <back>
+ 1c8:   540026e8        b.hi    6a4 <forth>
+ 1cc:   54000009        b.ls    1cc <back+0x1cc>
+ 1d0:   54fff189        b.ls    0 <back>
+ 1d4:   54002689        b.ls    6a4 <forth>
+ 1d8:   5400000a        b.ge    1d8 <back+0x1d8>
+ 1dc:   54fff12a        b.ge    0 <back>
+ 1e0:   5400262a        b.ge    6a4 <forth>
+ 1e4:   5400000b        b.lt    1e4 <back+0x1e4>
+ 1e8:   54fff0cb        b.lt    0 <back>
+ 1ec:   540025cb        b.lt    6a4 <forth>
+ 1f0:   5400000c        b.gt    1f0 <back+0x1f0>
+ 1f4:   54fff06c        b.gt    0 <back>
+ 1f8:   5400256c        b.gt    6a4 <forth>
+ 1fc:   5400000d        b.le    1fc <back+0x1fc>
+ 200:   54fff00d        b.le    0 <back>
+ 204:   5400250d        b.le    6a4 <forth>
+ 208:   5400000e        b.al    208 <back+0x208>
+ 20c:   54ffefae        b.al    0 <back>
+ 210:   540024ae        b.al    6a4 <forth>
+ 214:   5400000f        b.nv    214 <back+0x214>
+ 218:   54ffef4f        b.nv    0 <back>
+ 21c:   5400244f        b.nv    6a4 <forth>
+ 220:   d4063721        svc     #0x31b9
+ 224:   d4035082        hvc     #0x1a84
+ 228:   d400bfe3        smc     #0x5ff
+ 22c:   d4282fc0        brk     #0x417e
+ 230:   d444c320        hlt     #0x2619
+ 234:   d503201f        nop
+ 238:   d69f03e0        eret
+ 23c:   d6bf03e0        drps
+ 240:   d5033fdf        isb
+ 244:   d5033f9f        dsb     sy
+ 248:   d5033abf        dmb     ishst
+ 24c:   d61f0040        br      x2
+ 250:   d63f00a0        blr     x5
+ 254:   c8147c55        stxr    w20, x21, [x2]
+ 258:   c807fcfd        stlxr   w7, x29, [x7]
+ 25c:   c85f7e05        ldxr    x5, [x16]
+ 260:   c85fffbb        ldaxr   x27, [x29]
+ 264:   c89fffa0        stlr    x0, [x29]
+ 268:   c8dfff95        ldar    x21, [x28]
+ 26c:   88187cf8        stxr    w24, w24, [x7]
+ 270:   8815ff9a        stlxr   w21, w26, [x28]
+ 274:   885f7cd5        ldxr    w21, [x6]
+ 278:   885fffcf        ldaxr   w15, [x30]
+ 27c:   889ffc73        stlr    w19, [x3]
+ 280:   88dffc56        ldar    w22, [x2]
+ 284:   48127c0f        stxrh   w18, w15, [x0]
+ 288:   480bff85        stlxrh  w11, w5, [x28]
+ 28c:   485f7cdd        ldxrh   w29, [x6]
+ 290:   485ffcf2        ldaxrh  w18, [x7]
+ 294:   489fff99        stlrh   w25, [x28]
+ 298:   48dffe62        ldarh   w2, [x19]
+ 29c:   080a7c3e        stxrb   w10, w30, [x1]
+ 2a0:   0814fed5        stlxrb  w20, w21, [x22]
+ 2a4:   085f7c59        ldxrb   w25, [x2]
+ 2a8:   085ffcb8        ldaxrb  w24, [x5]
+ 2ac:   089ffc70        stlrb   w16, [x3]
+ 2b0:   08dfffb6        ldarb   w22, [x29]
+ 2b4:   c87f0a68        ldxp    x8, x2, [x19]
+ 2b8:   c87fcdc7        ldaxp   x7, x19, [x14]
+ 2bc:   c82870bb        stxp    w8, x27, x28, [x5]
+ 2c0:   c826b8c8        stlxp   w6, x8, x14, [x6]
+ 2c4:   887f12d9        ldxp    w25, w4, [x22]
+ 2c8:   887fb9ee        ldaxp   w14, w14, [x15]
+ 2cc:   8834215a        stxp    w20, w26, w8, [x10]
+ 2d0:   8837ca52        stlxp   w23, w18, w18, [x18]
+ 2d4:   f806317e        str     x30, [x11,#99]
+ 2d8:   b81b3337        str     w23, [x25,#-77]
+ 2dc:   39000dc2        strb    w2, [x14,#3]
+ 2e0:   78005149        strh    w9, [x10,#5]
+ 2e4:   f84391f4        ldr     x20, [x15,#57]
+ 2e8:   b85b220c        ldr     w12, [x16,#-78]
+ 2ec:   385fd356        ldrb    w22, [x26,#-3]
+ 2f0:   785d127e        ldrh    w30, [x19,#-47]
+ 2f4:   389f4149        ldrsb   x9, [x10,#-12]
+ 2f8:   79801e3c        ldrsh   x28, [x17,#14]
+ 2fc:   79c014a3        ldrsh   w3, [x5,#10]
+ 300:   b89a5231        ldrsw   x17, [x17,#-91]
+ 304:   fc5ef282        ldr     d2, [x20,#-17]
+ 308:   bc5f60f6        ldr     s22, [x7,#-10]
+ 30c:   fc12125e        str     d30, [x18,#-223]
+ 310:   bc0152cd        str     s13, [x22,#21]
+ 314:   f8190e49        str     x9, [x18,#-112]!
+ 318:   b800befd        str     w29, [x23,#11]!
+ 31c:   381ffd92        strb    w18, [x12,#-1]!
+ 320:   781e9e90        strh    w16, [x20,#-23]!
+ 324:   f8409fa3        ldr     x3, [x29,#9]!
+ 328:   b8413c79        ldr     w25, [x3,#19]!
+ 32c:   385fffa1        ldrb    w1, [x29,#-1]!
+ 330:   785c7fa8        ldrh    w8, [x29,#-57]!
+ 334:   389f3dc5        ldrsb   x5, [x14,#-13]!
+ 338:   78801f6a        ldrsh   x10, [x27,#1]!
+ 33c:   78c19d4b        ldrsh   w11, [x10,#25]!
+ 340:   b89a4ec4        ldrsw   x4, [x22,#-92]!
+ 344:   fc408eeb        ldr     d11, [x23,#8]!
+ 348:   bc436e79        ldr     s25, [x19,#54]!
+ 34c:   fc152ce1        str     d1, [x7,#-174]!
+ 350:   bc036f28        str     s8, [x25,#54]!
+ 354:   f8025565        str     x5, [x11],#37
+ 358:   b80135f8        str     w24, [x15],#19
+ 35c:   381ff74f        strb    w15, [x26],#-1
+ 360:   781fa652        strh    w18, [x18],#-6
+ 364:   f851a447        ldr     x7, [x2],#-230
+ 368:   b85e557b        ldr     w27, [x11],#-27
+ 36c:   385e7472        ldrb    w18, [x3],#-25
+ 370:   785e070a        ldrh    w10, [x24],#-32
+ 374:   38804556        ldrsb   x22, [x10],#4
+ 378:   78819591        ldrsh   x17, [x12],#25
+ 37c:   78dc24e8        ldrsh   w8, [x7],#-62
+ 380:   b89cd6d7        ldrsw   x23, [x22],#-51
+ 384:   fc430738        ldr     d24, [x25],#48
+ 388:   bc5f6595        ldr     s21, [x12],#-10
+ 38c:   fc1225b2        str     d18, [x13],#-222
+ 390:   bc1d7430        str     s16, [x1],#-41
+ 394:   f82fcac2        str     x2, [x22,w15,sxtw]
+ 398:   b83d6a02        str     w2, [x16,x29]
+ 39c:   382e5a54        strb    w20, [x18,w14,uxtw #0]
+ 3a0:   7834fa66        strh    w6, [x19,x20,sxtx #1]
+ 3a4:   f86ecbae        ldr     x14, [x29,w14,sxtw]
+ 3a8:   b86cda90        ldr     w16, [x20,w12,sxtw #2]
+ 3ac:   3860d989        ldrb    w9, [x12,w0,sxtw #0]
+ 3b0:   78637a2c        ldrh    w12, [x17,x3,lsl #1]
+ 3b4:   38a3fa22        ldrsb   x2, [x17,x3,sxtx #0]
+ 3b8:   78b15827        ldrsh   x7, [x1,w17,uxtw #1]
+ 3bc:   78f2d9f9        ldrsh   w25, [x15,w18,sxtw #1]
+ 3c0:   b8ac6ab7        ldrsw   x23, [x21,x12]
+ 3c4:   fc6879a5        ldr     d5, [x13,x8,lsl #3]
+ 3c8:   bc767943        ldr     s3, [x10,x22,lsl #2]
+ 3cc:   fc3bc84e        str     d14, [x2,w27,sxtw]
+ 3d0:   bc3968d4        str     s20, [x6,x25]
+ 3d4:   f91fc0fe        str     x30, [x7,#16256]
+ 3d8:   b91da50f        str     w15, [x8,#7588]
+ 3dc:   391d280b        strb    w11, [x0,#1866]
+ 3e0:   791d2e23        strh    w3, [x17,#3734]
+ 3e4:   f95bc8e2        ldr     x2, [x7,#14224]
+ 3e8:   b95ce525        ldr     w5, [x9,#7396]
+ 3ec:   395ae53c        ldrb    w28, [x9,#1721]
+ 3f0:   795c9282        ldrh    w2, [x20,#3656]
+ 3f4:   399d7dd6        ldrsb   x22, [x14,#1887]
+ 3f8:   799fe008        ldrsh   x8, [x0,#4080]
+ 3fc:   79de9bc0        ldrsh   w0, [x30,#3916]
+ 400:   b99aae78        ldrsw   x24, [x19,#6828]
+ 404:   fd597598        ldr     d24, [x12,#13032]
+ 408:   bd5d1d08        ldr     s8, [x8,#7452]
+ 40c:   fd1f3dea        str     d10, [x15,#15992]
+ 410:   bd1a227a        str     s26, [x19,#6688]
+ 414:   5800148a        ldr     x10, 6a4 <forth>
+ 418:   18000003        ldr     w3, 418 <back+0x418>
+ 41c:   f88092e0        prfm    pldl1keep, [x23,#9]
+ 420:   d8ffdf00        prfm    pldl1keep, 0 <back>
+ 424:   f8a84860        prfm    pldl1keep, [x3,w8,uxtw]
+ 428:   f99d7560        prfm    pldl1keep, [x11,#15080]
+ 42c:   1a1c012d        adc     w13, w9, w28
+ 430:   3a1c027b        adcs    w27, w19, w28
+ 434:   5a060253        sbc     w19, w18, w6
+ 438:   7a03028e        sbcs    w14, w20, w3
+ 43c:   9a0801d0        adc     x16, x14, x8
+ 440:   ba0803a0        adcs    x0, x29, x8
+ 444:   da140308        sbc     x8, x24, x20
+ 448:   fa00038c        sbcs    x12, x28, x0
+ 44c:   0b3010d7        add     w23, w6, w16, uxtb #4
+ 450:   2b37ab39        adds    w25, w25, w23, sxth #2
+ 454:   cb2466da        sub     x26, x22, x4, uxtx #1
+ 458:   6b33efb1        subs    w17, w29, w19, sxtx #3
+ 45c:   8b350fcb        add     x11, x30, w21, uxtb #3
+ 460:   ab208a70        adds    x16, x19, w0, sxtb #2
+ 464:   cb39e52b        sub     x11, x9, x25, sxtx #1
+ 468:   eb2c9291        subs    x17, x20, w12, sxtb #4
+ 46c:   3a4bd1a3        ccmn    w13, w11, #0x3, le
+ 470:   7a4c81a2        ccmp    w13, w12, #0x2, hi
+ 474:   ba42106c        ccmn    x3, x2, #0xc, ne
+ 478:   fa5560e3        ccmp    x7, x21, #0x3, vs
+ 47c:   3a4e3844        ccmn    w2, #0xe, #0x4, cc
+ 480:   7a515a26        ccmp    w17, #0x11, #0x6, pl
+ 484:   ba4c2940        ccmn    x10, #0xc, #0x0, cs
+ 488:   fa52aaae        ccmp    x21, #0x12, #0xe, ge
+ 48c:   1a8cc1b5        csel    w21, w13, w12, gt
+ 490:   1a8f976a        csinc   w10, w27, w15, ls
+ 494:   5a8981a0        csinv   w0, w13, w9, hi
+ 498:   5a9a6492        csneg   w18, w4, w26, vs
+ 49c:   9a8793ac        csel    x12, x29, x7, ls
+ 4a0:   9a9474e6        csinc   x6, x7, x20, vc
+ 4a4:   da83d2b6        csinv   x22, x21, x3, le
+ 4a8:   da9b9593        csneg   x19, x12, x27, ls
+ 4ac:   5ac00200        rbit    w0, w16
+ 4b0:   5ac006f1        rev16   w17, w23
+ 4b4:   5ac009d1        rev     w17, w14
+ 4b8:   5ac013d8        clz     w24, w30
+ 4bc:   5ac016d8        cls     w24, w22
+ 4c0:   dac00223        rbit    x3, x17
+ 4c4:   dac005ac        rev16   x12, x13
+ 4c8:   dac00ac9        rev32   x9, x22
+ 4cc:   dac00c00        rev     x0, x0
+ 4d0:   dac01205        clz     x5, x16
+ 4d4:   dac016d9        cls     x25, x22
+ 4d8:   1ac0089d        udiv    w29, w4, w0
+ 4dc:   1add0fa0        sdiv    w0, w29, w29
+ 4e0:   1ad52225        lsl     w5, w17, w21
+ 4e4:   1ad22529        lsr     w9, w9, w18
+ 4e8:   1ac82b61        asr     w1, w27, w8
+ 4ec:   1acd2e92        ror     w18, w20, w13
+ 4f0:   9acc0b28        udiv    x8, x25, x12
+ 4f4:   9adc0ca7        sdiv    x7, x5, x28
+ 4f8:   9adb2225        lsl     x5, x17, x27
+ 4fc:   9ad42757        lsr     x23, x26, x20
+ 500:   9adc291c        asr     x28, x8, x28
+ 504:   9ac42fa3        ror     x3, x29, x4
+ 508:   1b1a55d1        madd    w17, w14, w26, w21
+ 50c:   1b0bafc1        msub    w1, w30, w11, w11
+ 510:   9b067221        madd    x1, x17, x6, x28
+ 514:   9b1ea0de        msub    x30, x6, x30, x8
+ 518:   9b2e20d5        smaddl  x21, w6, w14, x8
+ 51c:   9b38cd4a        smsubl  x10, w10, w24, x19
+ 520:   9bae6254        umaddl  x20, w18, w14, x24
+ 524:   9ba59452        umsubl  x18, w2, w5, x5
+ 528:   1e2d0a48        fmul    s8, s18, s13
+ 52c:   1e3c19c2        fdiv    s2, s14, s28
+ 530:   1e3c298f        fadd    s15, s12, s28
+ 534:   1e213980        fsub    s0, s12, s1
+ 538:   1e240baf        fmul    s15, s29, s4
+ 53c:   1e77082c        fmul    d12, d1, d23
+ 540:   1e72191b        fdiv    d27, d8, d18
+ 544:   1e6b2a97        fadd    d23, d20, d11
+ 548:   1e723988        fsub    d8, d12, d18
+ 54c:   1e770b1a        fmul    d26, d24, d23
+ 550:   1f0d66f5        fmadd   s21, s23, s13, s25
+ 554:   1f01b956        fmsub   s22, s10, s1, s14
+ 558:   1f227a8e        fnmadd  s14, s20, s2, s30
+ 55c:   1f365ba7        fnmadd  s7, s29, s22, s22
+ 560:   1f4f14ad        fmadd   d13, d5, d15, d5
+ 564:   1f45a98e        fmsub   d14, d12, d5, d10
+ 568:   1f60066a        fnmadd  d10, d19, d0, d1
+ 56c:   1f620054        fnmadd  d20, d2, d2, d0
+ 570:   1e204139        fmov    s25, s9
+ 574:   1e20c094        fabs    s20, s4
+ 578:   1e214363        fneg    s3, s27
+ 57c:   1e21c041        fsqrt   s1, s2
+ 580:   1e22c01e        fcvt    d30, s0
+ 584:   1e60408c        fmov    d12, d4
+ 588:   1e60c361        fabs    d1, d27
+ 58c:   1e6142c8        fneg    d8, d22
+ 590:   1e61c16b        fsqrt   d11, d11
+ 594:   1e624396        fcvt    s22, d28
+ 598:   1e3802dc        fcvtzs  w28, s22
+ 59c:   9e380374        fcvtzs  x20, s27
+ 5a0:   1e78000e        fcvtzs  w14, d0
+ 5a4:   9e78017a        fcvtzs  x26, d11
+ 5a8:   1e2202dc        scvtf   s28, w22
+ 5ac:   9e220150        scvtf   s16, x10
+ 5b0:   1e6202a8        scvtf   d8, w21
+ 5b4:   9e620395        scvtf   d21, x28
+ 5b8:   1e260318        fmov    w24, s24
+ 5bc:   9e660268        fmov    x8, d19
+ 5c0:   1e270188        fmov    s8, w12
+ 5c4:   9e6700e6        fmov    d6, x7
+ 5c8:   1e3023c0        fcmp    s30, s16
+ 5cc:   1e6b2320        fcmp    d25, d11
+ 5d0:   1e202168        fcmp    s11, #0.0
+ 5d4:   1e602168        fcmp    d11, #0.0
+ 5d8:   2910323d        stp     w29, w12, [x17,#128]
+ 5dc:   297449d6        ldp     w22, w18, [x14,#-96]
+ 5e0:   6948402b        ldpsw   x11, x16, [x1,#64]
+ 5e4:   a9072f40        stp     x0, x11, [x26,#112]
+ 5e8:   a9410747        ldp     x7, x1, [x26,#16]
+ 5ec:   29801f0a        stp     w10, w7, [x24,#0]!
+ 5f0:   29e07307        ldp     w7, w28, [x24,#-256]!
+ 5f4:   69e272b9        ldpsw   x25, x28, [x21,#-240]!
+ 5f8:   a9bf49d4        stp     x20, x18, [x14,#-16]!
+ 5fc:   a9c529a8        ldp     x8, x10, [x13,#80]!
+ 600:   28b0605a        stp     w26, w24, [x2],#-128
+ 604:   28e866a2        ldp     w2, w25, [x21],#-192
+ 608:   68ee0ab1        ldpsw   x17, x2, [x21],#-144
+ 60c:   a886296c        stp     x12, x10, [x11],#96
+ 610:   a8fe1a38        ldp     x24, x6, [x17],#-32
+ 614:   282479c3        stnp    w3, w30, [x14,#-224]
+ 618:   286e534f        ldnp    w15, w20, [x26,#-144]
+ 61c:   a8386596        stnp    x22, x25, [x12,#-128]
+ 620:   a8755a3b        ldnp    x27, x22, [x17,#-176]
+ 624:   1e601000        fmov    d0, #2.000000000000000000e+00
+ 628:   1e603000        fmov    d0, #2.125000000000000000e+00
+ 62c:   1e621000        fmov    d0, #4.000000000000000000e+00
+ 630:   1e623000        fmov    d0, #4.250000000000000000e+00
+ 634:   1e641000        fmov    d0, #8.000000000000000000e+00
+ 638:   1e643000        fmov    d0, #8.500000000000000000e+00
+ 63c:   1e661000        fmov    d0, #1.600000000000000000e+01
+ 640:   1e663000        fmov    d0, #1.700000000000000000e+01
+ 644:   1e681000        fmov    d0, #1.250000000000000000e-01
+ 648:   1e683000        fmov    d0, #1.328125000000000000e-01
+ 64c:   1e6a1000        fmov    d0, #2.500000000000000000e-01
+ 650:   1e6a3000        fmov    d0, #2.656250000000000000e-01
+ 654:   1e6c1000        fmov    d0, #5.000000000000000000e-01
+ 658:   1e6c3000        fmov    d0, #5.312500000000000000e-01
+ 65c:   1e6e1000        fmov    d0, #1.000000000000000000e+00
+ 660:   1e6e3000        fmov    d0, #1.062500000000000000e+00
+ 664:   1e701000        fmov    d0, #-2.000000000000000000e+00
+ 668:   1e703000        fmov    d0, #-2.125000000000000000e+00
+ 66c:   1e721000        fmov    d0, #-4.000000000000000000e+00
+ 670:   1e723000        fmov    d0, #-4.250000000000000000e+00
+ 674:   1e741000        fmov    d0, #-8.000000000000000000e+00
+ 678:   1e743000        fmov    d0, #-8.500000000000000000e+00
+ 67c:   1e761000        fmov    d0, #-1.600000000000000000e+01
+ 680:   1e763000        fmov    d0, #-1.700000000000000000e+01
+ 684:   1e781000        fmov    d0, #-1.250000000000000000e-01
+ 688:   1e783000        fmov    d0, #-1.328125000000000000e-01
+ 68c:   1e7a1000        fmov    d0, #-2.500000000000000000e-01
+ 690:   1e7a3000        fmov    d0, #-2.656250000000000000e-01
+ 694:   1e7c1000        fmov    d0, #-5.000000000000000000e-01
+ 698:   1e7c3000        fmov    d0, #-5.312500000000000000e-01
+ 69c:   1e7e1000        fmov    d0, #-1.000000000000000000e+00
+ 6a0:   1e7e3000        fmov    d0, #-1.062500000000000000e+00
+ */
+
+  static const unsigned int insns[] =
+  {
+    0x8b0772d3,     0xcb4a3570,     0xab9c09bb,     0xeb9aa794,
+    0x0b934e68,     0x4b0a3924,     0x2b1e3568,     0x6b132720,
+    0x8a154c14,     0xaa1445d5,     0xca01cf99,     0xea8b3f6a,
+    0x0a8c5cb9,     0x2a4a11d2,     0x4a855aa4,     0x6a857415,
+    0x8aa697da,     0xaa6d7423,     0xca29bf80,     0xea3cb8bd,
+    0x0a675249,     0x2ab961ba,     0x4a331899,     0x6a646345,
+    0x11055267,     0x31064408,     0x51028e9d,     0x710bdee8,
+    0x91082d81,     0xb106a962,     0xd10b33ae,     0xf10918ab,
+    0x121102d7,     0x3204cd44,     0x5204cf00,     0x72099fb3,
+    0x92729545,     0xb20e37cc,     0xd27c34be,     0xf27e4efa,
+    0x14000000,     0x17ffffd7,     0x1400017f,     0x94000000,
+    0x97ffffd4,     0x9400017c,     0x3400000c,     0x34fffa2c,
+    0x34002f2c,     0x35000014,     0x35fff9d4,     0x35002ed4,
+    0xb400000c,     0xb4fff96c,     0xb4002e6c,     0xb5000018,
+    0xb5fff918,     0xb5002e18,     0x10000006,     0x10fff8a6,
+    0x10002da6,     0x90000015,     0x36080001,     0x360ff821,
+    0x36082d21,     0x37480008,     0x374ff7c8,     0x37482cc8,
+    0x128b50ec,     0x52a9ff8b,     0x7281d095,     0x92edfebd,
+    0xd28361e3,     0xf2a4cc96,     0x9346590c,     0x33194f33,
+    0x531d3d89,     0x9350433c,     0xb34464ac,     0xd3462140,
+    0x139a61a4,     0x93d87fd7,     0x54000000,     0x54fff5a0,
+    0x54002aa0,     0x54000001,     0x54fff541,     0x54002a41,
+    0x54000002,     0x54fff4e2,     0x540029e2,     0x54000002,
+    0x54fff482,     0x54002982,     0x54000003,     0x54fff423,
+    0x54002923,     0x54000003,     0x54fff3c3,     0x540028c3,
+    0x54000004,     0x54fff364,     0x54002864,     0x54000005,
+    0x54fff305,     0x54002805,     0x54000006,     0x54fff2a6,
+    0x540027a6,     0x54000007,     0x54fff247,     0x54002747,
+    0x54000008,     0x54fff1e8,     0x540026e8,     0x54000009,
+    0x54fff189,     0x54002689,     0x5400000a,     0x54fff12a,
+    0x5400262a,     0x5400000b,     0x54fff0cb,     0x540025cb,
+    0x5400000c,     0x54fff06c,     0x5400256c,     0x5400000d,
+    0x54fff00d,     0x5400250d,     0x5400000e,     0x54ffefae,
+    0x540024ae,     0x5400000f,     0x54ffef4f,     0x5400244f,
+    0xd4063721,     0xd4035082,     0xd400bfe3,     0xd4282fc0,
+    0xd444c320,     0xd503201f,     0xd69f03e0,     0xd6bf03e0,
+    0xd5033fdf,     0xd5033f9f,     0xd5033abf,     0xd61f0040,
+    0xd63f00a0,     0xc8147c55,     0xc807fcfd,     0xc85f7e05,
+    0xc85fffbb,     0xc89fffa0,     0xc8dfff95,     0x88187cf8,
+    0x8815ff9a,     0x885f7cd5,     0x885fffcf,     0x889ffc73,
+    0x88dffc56,     0x48127c0f,     0x480bff85,     0x485f7cdd,
+    0x485ffcf2,     0x489fff99,     0x48dffe62,     0x080a7c3e,
+    0x0814fed5,     0x085f7c59,     0x085ffcb8,     0x089ffc70,
+    0x08dfffb6,     0xc87f0a68,     0xc87fcdc7,     0xc82870bb,
+    0xc826b8c8,     0x887f12d9,     0x887fb9ee,     0x8834215a,
+    0x8837ca52,     0xf806317e,     0xb81b3337,     0x39000dc2,
+    0x78005149,     0xf84391f4,     0xb85b220c,     0x385fd356,
+    0x785d127e,     0x389f4149,     0x79801e3c,     0x79c014a3,
+    0xb89a5231,     0xfc5ef282,     0xbc5f60f6,     0xfc12125e,
+    0xbc0152cd,     0xf8190e49,     0xb800befd,     0x381ffd92,
+    0x781e9e90,     0xf8409fa3,     0xb8413c79,     0x385fffa1,
+    0x785c7fa8,     0x389f3dc5,     0x78801f6a,     0x78c19d4b,
+    0xb89a4ec4,     0xfc408eeb,     0xbc436e79,     0xfc152ce1,
+    0xbc036f28,     0xf8025565,     0xb80135f8,     0x381ff74f,
+    0x781fa652,     0xf851a447,     0xb85e557b,     0x385e7472,
+    0x785e070a,     0x38804556,     0x78819591,     0x78dc24e8,
+    0xb89cd6d7,     0xfc430738,     0xbc5f6595,     0xfc1225b2,
+    0xbc1d7430,     0xf82fcac2,     0xb83d6a02,     0x382e5a54,
+    0x7834fa66,     0xf86ecbae,     0xb86cda90,     0x3860d989,
+    0x78637a2c,     0x38a3fa22,     0x78b15827,     0x78f2d9f9,
+    0xb8ac6ab7,     0xfc6879a5,     0xbc767943,     0xfc3bc84e,
+    0xbc3968d4,     0xf91fc0fe,     0xb91da50f,     0x391d280b,
+    0x791d2e23,     0xf95bc8e2,     0xb95ce525,     0x395ae53c,
+    0x795c9282,     0x399d7dd6,     0x799fe008,     0x79de9bc0,
+    0xb99aae78,     0xfd597598,     0xbd5d1d08,     0xfd1f3dea,
+    0xbd1a227a,     0x5800148a,     0x18000003,     0xf88092e0,
+    0xd8ffdf00,     0xf8a84860,     0xf99d7560,     0x1a1c012d,
+    0x3a1c027b,     0x5a060253,     0x7a03028e,     0x9a0801d0,
+    0xba0803a0,     0xda140308,     0xfa00038c,     0x0b3010d7,
+    0x2b37ab39,     0xcb2466da,     0x6b33efb1,     0x8b350fcb,
+    0xab208a70,     0xcb39e52b,     0xeb2c9291,     0x3a4bd1a3,
+    0x7a4c81a2,     0xba42106c,     0xfa5560e3,     0x3a4e3844,
+    0x7a515a26,     0xba4c2940,     0xfa52aaae,     0x1a8cc1b5,
+    0x1a8f976a,     0x5a8981a0,     0x5a9a6492,     0x9a8793ac,
+    0x9a9474e6,     0xda83d2b6,     0xda9b9593,     0x5ac00200,
+    0x5ac006f1,     0x5ac009d1,     0x5ac013d8,     0x5ac016d8,
+    0xdac00223,     0xdac005ac,     0xdac00ac9,     0xdac00c00,
+    0xdac01205,     0xdac016d9,     0x1ac0089d,     0x1add0fa0,
+    0x1ad52225,     0x1ad22529,     0x1ac82b61,     0x1acd2e92,
+    0x9acc0b28,     0x9adc0ca7,     0x9adb2225,     0x9ad42757,
+    0x9adc291c,     0x9ac42fa3,     0x1b1a55d1,     0x1b0bafc1,
+    0x9b067221,     0x9b1ea0de,     0x9b2e20d5,     0x9b38cd4a,
+    0x9bae6254,     0x9ba59452,     0x1e2d0a48,     0x1e3c19c2,
+    0x1e3c298f,     0x1e213980,     0x1e240baf,     0x1e77082c,
+    0x1e72191b,     0x1e6b2a97,     0x1e723988,     0x1e770b1a,
+    0x1f0d66f5,     0x1f01b956,     0x1f227a8e,     0x1f365ba7,
+    0x1f4f14ad,     0x1f45a98e,     0x1f60066a,     0x1f620054,
+    0x1e204139,     0x1e20c094,     0x1e214363,     0x1e21c041,
+    0x1e22c01e,     0x1e60408c,     0x1e60c361,     0x1e6142c8,
+    0x1e61c16b,     0x1e624396,     0x1e3802dc,     0x9e380374,
+    0x1e78000e,     0x9e78017a,     0x1e2202dc,     0x9e220150,
+    0x1e6202a8,     0x9e620395,     0x1e260318,     0x9e660268,
+    0x1e270188,     0x9e6700e6,     0x1e3023c0,     0x1e6b2320,
+    0x1e202168,     0x1e602168,     0x2910323d,     0x297449d6,
+    0x6948402b,     0xa9072f40,     0xa9410747,     0x29801f0a,
+    0x29e07307,     0x69e272b9,     0xa9bf49d4,     0xa9c529a8,
+    0x28b0605a,     0x28e866a2,     0x68ee0ab1,     0xa886296c,
+    0xa8fe1a38,     0x282479c3,     0x286e534f,     0xa8386596,
+    0xa8755a3b,     0x1e601000,     0x1e603000,     0x1e621000,
+    0x1e623000,     0x1e641000,     0x1e643000,     0x1e661000,
+    0x1e663000,     0x1e681000,     0x1e683000,     0x1e6a1000,
+    0x1e6a3000,     0x1e6c1000,     0x1e6c3000,     0x1e6e1000,
+    0x1e6e3000,     0x1e701000,     0x1e703000,     0x1e721000,
+    0x1e723000,     0x1e741000,     0x1e743000,     0x1e761000,
+    0x1e763000,     0x1e781000,     0x1e783000,     0x1e7a1000,
+    0x1e7a3000,     0x1e7c1000,     0x1e7c3000,     0x1e7e1000,
+    0x1e7e3000,
+  };
+// END  Generated code -- do not edit
+
+  {
+    bool ok = true;
+    unsigned int *insns1 = (unsigned int *)entry;
+    for (unsigned int i = 0; i < sizeof insns / sizeof insns[0]; i++) {
+      if (insns[i] != insns1[i]) {
+        ok = false;
+        printf("Ours:\n");
+        Disassembler::decode((address)&insns1[i], (address)&insns1[i+1]);
+        printf("Theirs:\n");
+        Disassembler::decode((address)&insns[i], (address)&insns[i+1]);
+        printf("\n");
+      }
+    }
+    assert(ok, "Assembler smoke test failed");
+  }
+
+#ifndef PRODUCT
+
+  address PC = __ pc();
+  __ ld1(v0, __ T16B, Address(r16)); // No offset
+  __ ld1(v0, __ T16B, __ post(r16, 0)); // Post-index
+  __ ld1(v0, __ T16B, Address(r16, r17)); //
+
+
+#endif // PRODUCT
+#endif // ASSERT
+}
+
+#undef __
+
+void Assembler::emit_data64(jlong data,
+                            relocInfo::relocType rtype,
+                            int format) {
+  if (rtype == relocInfo::none) {
+    emit_int64(data);
+  } else {
+    emit_data64(data, Relocation::spec_simple(rtype), format);
+  }
+}
+
+void Assembler::emit_data64(jlong data,
+                            RelocationHolder const& rspec,
+                            int format) {
+
+  assert(inst_mark() != NULL, "must be inside InstructionMark");
+  // Do not use AbstractAssembler::relocate, which is not intended for
+  // embedded words.  Instead, relocate to the enclosing instruction.
+  code_section()->relocate(inst_mark(), rspec, format);
+  emit_int64(data);
+}
+
+extern "C" {
+  void das(uint64_t start, int len) {
+    ResourceMark rm;
+    len <<= 2;
+    if (len < 0)
+      Disassembler::decode((address)start + len, (address)start);
+    else
+      Disassembler::decode((address)start, (address)start + len);
+  }
+
+  JNIEXPORT void das1(unsigned long insn) {
+    das(insn, 1);
+  }
+}
+
+#define gas_assert(ARG1) assert(ARG1, #ARG1)
+
+#define __ as->
+
+void Address::lea(MacroAssembler *as, Register r) const {
+  Relocation* reloc = _rspec.reloc();
+  relocInfo::relocType rtype = (relocInfo::relocType) reloc->type();
+
+  switch(_mode) {
+  case base_plus_offset: {
+    if (_offset == 0 && _base == r) // it's a nop
+      break;
+    if (_offset > 0)
+      __ add(r, _base, _offset);
+    else
+      __ sub(r, _base, -_offset);
+      break;
+  }
+  case base_plus_offset_reg: {
+    __ add(r, _base, _index, _ext.op(), MAX(_ext.shift(), 0));
+    break;
+  }
+  case literal: {
+    if (rtype == relocInfo::none)
+      __ mov(r, target());
+    else
+      __ movptr(r, (uint64_t)target());
+    break;
+  }
+  default:
+    ShouldNotReachHere();
+  }
+}
+
+void Assembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
+  ShouldNotReachHere();
+}
+
+#undef __
+
+#define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use)
+
+  void Assembler::adr(Register Rd, address adr) {
+    long offset = adr - pc();
+    int offset_lo = offset & 3;
+    offset >>= 2;
+    starti;
+    f(0, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
+    rf(Rd, 0);
+  }
+
+  void Assembler::_adrp(Register Rd, address adr) {
+    uint64_t pc_page = (uint64_t)pc() >> 12;
+    uint64_t adr_page = (uint64_t)adr >> 12;
+    long offset = adr_page - pc_page;
+    int offset_lo = offset & 3;
+    offset >>= 2;
+    starti;
+    f(1, 31), f(offset_lo, 30, 29), f(0b10000, 28, 24), sf(offset, 23, 5);
+    rf(Rd, 0);
+  }
+
+#undef starti
+
+Address::Address(address target, relocInfo::relocType rtype) : _mode(literal){
+  _is_lval = false;
+  _target = target;
+  switch (rtype) {
+  case relocInfo::oop_type:
+  case relocInfo::metadata_type:
+    // Oops are a special case. Normally they would be their own section
+    // but in cases like icBuffer they are literals in the code stream that
+    // we don't have a section for. We use none so that we get a literal address
+    // which is always patchable.
+    break;
+  case relocInfo::external_word_type:
+    _rspec = external_word_Relocation::spec(target);
+    break;
+  case relocInfo::internal_word_type:
+    _rspec = internal_word_Relocation::spec(target);
+    break;
+  case relocInfo::opt_virtual_call_type:
+    _rspec = opt_virtual_call_Relocation::spec();
+    break;
+  case relocInfo::static_call_type:
+    _rspec = static_call_Relocation::spec();
+    break;
+  case relocInfo::runtime_call_type:
+    _rspec = runtime_call_Relocation::spec();
+    break;
+  case relocInfo::poll_type:
+  case relocInfo::poll_return_type:
+    _rspec = Relocation::spec_simple(rtype);
+    break;
+  case relocInfo::none:
+    _rspec = RelocationHolder::none;
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+}
+
+void Assembler::b(const Address &dest) {
+  code_section()->relocate(pc(), dest.rspec());
+  b(dest.target());
+}
+
+void Assembler::bl(const Address &dest) {
+  code_section()->relocate(pc(), dest.rspec());
+  bl(dest.target());
+}
+
+void Assembler::adr(Register r, const Address &dest) {
+  code_section()->relocate(pc(), dest.rspec());
+  adr(r, dest.target());
+}
+
+void Assembler::br(Condition cc, Label &L) {
+  if (L.is_bound()) {
+    br(cc, target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    br(cc, pc());
+  }
+}
+
+void Assembler::wrap_label(Label &L,
+                                 Assembler::uncond_branch_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(pc());
+  }
+}
+
+void Assembler::wrap_label(Register r, Label &L,
+                                 compare_and_branch_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(r, target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(r, pc());
+  }
+}
+
+void Assembler::wrap_label(Register r, int bitpos, Label &L,
+                                 test_and_branch_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(r, bitpos, target(L));
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(r, bitpos, pc());
+  }
+}
+
+void Assembler::wrap_label(Label &L, prfop op, prefetch_insn insn) {
+  if (L.is_bound()) {
+    (this->*insn)(target(L), op);
+  } else {
+    L.add_patch_at(code(), locator());
+    (this->*insn)(pc(), op);
+  }
+}
+
+// An "all-purpose" add/subtract immediate, per ARM documentation:
+// A "programmer-friendly" assembler may accept a negative immediate
+// between -(2^24 -1) and -1 inclusive, causing it to convert a
+// requested ADD operation to a SUB, or vice versa, and then encode
+// the absolute value of the immediate as for uimm24.
+void Assembler::add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op,
+                                  int negated_op) {
+  bool sets_flags = op & 1;   // this op sets flags
+  union {
+    unsigned u;
+    int imm;
+  };
+  u = uimm;
+  bool shift = false;
+  bool neg = imm < 0;
+  if (neg) {
+    imm = -imm;
+    op = negated_op;
+  }
+  assert(Rd != sp || imm % 16 == 0, "misaligned stack");
+  if (imm >= (1 << 11)
+      && ((imm >> 12) << 12 == imm)) {
+    imm >>= 12;
+    shift = true;
+  }
+  f(op, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10);
+
+  // add/subtract immediate ops with the S bit set treat r31 as zr;
+  // with S unset they use sp.
+  if (sets_flags)
+    zrf(Rd, 0);
+  else
+    srf(Rd, 0);
+
+  srf(Rn, 5);
+}
+
+bool Assembler::operand_valid_for_add_sub_immediate(long imm) {
+  bool shift = false;
+  unsigned long uimm = uabs(imm);
+  if (uimm < (1 << 12))
+    return true;
+  if (uimm < (1 << 24)
+      && ((uimm >> 12) << 12 == uimm)) {
+    return true;
+  }
+  return false;
+}
+
+bool Assembler::operand_valid_for_logical_immediate(bool is32, uint64_t imm) {
+  return encode_logical_immediate(is32, imm) != 0xffffffff;
+}
+
+static uint64_t doubleTo64Bits(jdouble d) {
+  union {
+    jdouble double_value;
+    uint64_t double_bits;
+  };
+
+  double_value = d;
+  return double_bits;
+}
+
+bool Assembler::operand_valid_for_float_immediate(double imm) {
+  // If imm is all zero bits we can use ZR as the source of a
+  // floating-point value.
+  if (doubleTo64Bits(imm) == 0)
+    return true;
+
+  // Otherwise try to encode imm then convert the encoded value back
+  // and make sure it's the exact same bit pattern.
+  unsigned result = encoding_for_fp_immediate(imm);
+  return doubleTo64Bits(imm) == fp_immediate_for_encoding(result, true);
+}
+
+int AbstractAssembler::code_fill_byte() {
+  return 0;
+}
+
+// n.b. this is implemented in subclass MacroAssembler
+void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); }
+
+
+// these are the functions provided by the simulator which are used to
+// encode and decode logical immediates and floating point immediates
+//
+//   u_int64_t logical_immediate_for_encoding(u_int32_t encoding);
+//
+//   u_int32_t encoding_for_logical_immediate(u_int64_t immediate);
+//
+//   u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp);
+//
+//   u_int32_t encoding_for_fp_immediate(float immediate);
+//
+// we currently import these from the simulator librray but the
+// definitions will need to be moved to here when we switch to real
+// hardware.
+
+// and now the routines called by the assembler which encapsulate the
+// above encode and decode functions
+
+uint32_t
+asm_util::encode_logical_immediate(bool is32, uint64_t imm)
+{
+  if (is32) {
+    /* Allow all zeros or all ones in top 32-bits, so that
+       constant expressions like ~1 are permitted. */
+    if (imm >> 32 != 0 && imm >> 32 != 0xffffffff)
+      return 0xffffffff;
+    /* Replicate the 32 lower bits to the 32 upper bits.  */
+    imm &= 0xffffffff;
+    imm |= imm << 32;
+  }
+
+  return encoding_for_logical_immediate(imm);
+}
+
+unsigned Assembler::pack(double value) {
+  float val = (float)value;
+  unsigned result = encoding_for_fp_immediate(val);
+  guarantee(unpack(result) == value,
+            "Invalid floating-point immediate operand");
+  return result;
+}
+
+// Packed operands for  Floating-point Move (immediate)
+
+static float unpack(unsigned value) {
+  union {
+    unsigned ival;
+    float val;
+  };
+  ival = fp_immediate_for_encoding(value, 0);
+  return val;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,2340 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP
+#define CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP
+
+#include "asm/register.hpp"
+
+// definitions of various symbolic names for machine registers
+
+// First intercalls between C and Java which use 8 general registers
+// and 8 floating registers
+
+// we also have to copy between x86 and ARM registers but that's a
+// secondary complication -- not all code employing C call convention
+// executes as x86 code though -- we generate some of it
+
+class Argument VALUE_OBJ_CLASS_SPEC {
+ public:
+  enum {
+    n_int_register_parameters_c   = 8,  // r0, r1, ... r7 (c_rarg0, c_rarg1, ...)
+    n_float_register_parameters_c = 8,  // v0, v1, ... v7 (c_farg0, c_farg1, ... )
+
+    n_int_register_parameters_j   = 8, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ...
+    n_float_register_parameters_j = 8  // v0, v1, ... v7 (j_farg0, j_farg1, ...
+  };
+};
+
+REGISTER_DECLARATION(Register, c_rarg0, r0);
+REGISTER_DECLARATION(Register, c_rarg1, r1);
+REGISTER_DECLARATION(Register, c_rarg2, r2);
+REGISTER_DECLARATION(Register, c_rarg3, r3);
+REGISTER_DECLARATION(Register, c_rarg4, r4);
+REGISTER_DECLARATION(Register, c_rarg5, r5);
+REGISTER_DECLARATION(Register, c_rarg6, r6);
+REGISTER_DECLARATION(Register, c_rarg7, r7);
+
+REGISTER_DECLARATION(FloatRegister, c_farg0, v0);
+REGISTER_DECLARATION(FloatRegister, c_farg1, v1);
+REGISTER_DECLARATION(FloatRegister, c_farg2, v2);
+REGISTER_DECLARATION(FloatRegister, c_farg3, v3);
+REGISTER_DECLARATION(FloatRegister, c_farg4, v4);
+REGISTER_DECLARATION(FloatRegister, c_farg5, v5);
+REGISTER_DECLARATION(FloatRegister, c_farg6, v6);
+REGISTER_DECLARATION(FloatRegister, c_farg7, v7);
+
+// Symbolically name the register arguments used by the Java calling convention.
+// We have control over the convention for java so we can do what we please.
+// What pleases us is to offset the java calling convention so that when
+// we call a suitable jni method the arguments are lined up and we don't
+// have to do much shuffling. A suitable jni method is non-static and a
+// small number of arguments
+//
+//  |--------------------------------------------------------------------|
+//  | c_rarg0  c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7  |
+//  |--------------------------------------------------------------------|
+//  | r0       r1       r2      r3      r4      r5      r6      r7       |
+//  |--------------------------------------------------------------------|
+//  | j_rarg7  j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6  |
+//  |--------------------------------------------------------------------|
+
+
+REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
+REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
+REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
+REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
+REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
+REGISTER_DECLARATION(Register, j_rarg5, c_rarg6);
+REGISTER_DECLARATION(Register, j_rarg6, c_rarg7);
+REGISTER_DECLARATION(Register, j_rarg7, c_rarg0);
+
+// Java floating args are passed as per C
+
+REGISTER_DECLARATION(FloatRegister, j_farg0, v0);
+REGISTER_DECLARATION(FloatRegister, j_farg1, v1);
+REGISTER_DECLARATION(FloatRegister, j_farg2, v2);
+REGISTER_DECLARATION(FloatRegister, j_farg3, v3);
+REGISTER_DECLARATION(FloatRegister, j_farg4, v4);
+REGISTER_DECLARATION(FloatRegister, j_farg5, v5);
+REGISTER_DECLARATION(FloatRegister, j_farg6, v6);
+REGISTER_DECLARATION(FloatRegister, j_farg7, v7);
+
+// registers used to hold VM data either temporarily within a method
+// or across method calls
+
+// volatile (caller-save) registers
+
+// r8 is used for indirect result location return
+// we use it and r9 as scratch registers
+REGISTER_DECLARATION(Register, rscratch1, r8);
+REGISTER_DECLARATION(Register, rscratch2, r9);
+
+// current method -- must be in a call-clobbered register
+REGISTER_DECLARATION(Register, rmethod,   r12);
+
+// non-volatile (callee-save) registers are r16-29
+// of which the following are dedicated global state
+
+// link register
+REGISTER_DECLARATION(Register, lr,        r30);
+// frame pointer
+REGISTER_DECLARATION(Register, rfp,       r29);
+// current thread
+REGISTER_DECLARATION(Register, rthread,   r28);
+// base of heap
+REGISTER_DECLARATION(Register, rheapbase, r27);
+// constant pool cache
+REGISTER_DECLARATION(Register, rcpool,    r26);
+// monitors allocated on stack
+REGISTER_DECLARATION(Register, rmonitors, r25);
+// locals on stack
+REGISTER_DECLARATION(Register, rlocals,   r24);
+// bytecode pointer
+REGISTER_DECLARATION(Register, rbcp,      r22);
+// Dispatch table base
+REGISTER_DECLARATION(Register, rdispatch,      r21);
+// Java stack pointer
+REGISTER_DECLARATION(Register, esp,      r20);
+
+// TODO : x86 uses rbp to save SP in method handle code
+// we may need to do the same with fp
+// JSR 292 fixed register usages:
+//REGISTER_DECLARATION(Register, r_mh_SP_save, r29);
+
+#define assert_cond(ARG1) assert(ARG1, #ARG1)
+
+namespace asm_util {
+  uint32_t encode_logical_immediate(bool is32, uint64_t imm);
+};
+
+using namespace asm_util;
+
+
+class Assembler;
+
+class Instruction_aarch64 {
+  unsigned insn;
+#ifdef ASSERT
+  unsigned bits;
+#endif
+  Assembler *assem;
+
+public:
+
+  Instruction_aarch64(class Assembler *as) {
+#ifdef ASSERT
+    bits = 0;
+#endif
+    insn = 0;
+    assem = as;
+  }
+
+  inline ~Instruction_aarch64();
+
+  unsigned &get_insn() { return insn; }
+#ifdef ASSERT
+  unsigned &get_bits() { return bits; }
+#endif
+
+  static inline int32_t extend(unsigned val, int hi = 31, int lo = 0) {
+    union {
+      unsigned u;
+      int n;
+    };
+
+    u = val << (31 - hi);
+    n = n >> (31 - hi + lo);
+    return n;
+  }
+
+  static inline uint32_t extract(uint32_t val, int msb, int lsb) {
+    int nbits = msb - lsb + 1;
+    assert_cond(msb >= lsb);
+    uint32_t mask = (1U << nbits) - 1;
+    uint32_t result = val >> lsb;
+    result &= mask;
+    return result;
+  }
+
+  static inline int32_t sextract(uint32_t val, int msb, int lsb) {
+    uint32_t uval = extract(val, msb, lsb);
+    return extend(uval, msb - lsb);
+  }
+
+  static void patch(address a, int msb, int lsb, unsigned long val) {
+    int nbits = msb - lsb + 1;
+    guarantee(val < (1U << nbits), "Field too big for insn");
+    assert_cond(msb >= lsb);
+    unsigned mask = (1U << nbits) - 1;
+    val <<= lsb;
+    mask <<= lsb;
+    unsigned target = *(unsigned *)a;
+    target &= ~mask;
+    target |= val;
+    *(unsigned *)a = target;
+  }
+
+  static void spatch(address a, int msb, int lsb, long val) {
+    int nbits = msb - lsb + 1;
+    long chk = val >> (nbits - 1);
+    guarantee (chk == -1 || chk == 0, "Field too big for insn");
+    unsigned uval = val;
+    unsigned mask = (1U << nbits) - 1;
+    uval &= mask;
+    uval <<= lsb;
+    mask <<= lsb;
+    unsigned target = *(unsigned *)a;
+    target &= ~mask;
+    target |= uval;
+    *(unsigned *)a = target;
+  }
+
+  void f(unsigned val, int msb, int lsb) {
+    int nbits = msb - lsb + 1;
+    guarantee(val < (1U << nbits), "Field too big for insn");
+    assert_cond(msb >= lsb);
+    unsigned mask = (1U << nbits) - 1;
+    val <<= lsb;
+    mask <<= lsb;
+    insn |= val;
+    assert_cond((bits & mask) == 0);
+#ifdef ASSERT
+    bits |= mask;
+#endif
+  }
+
+  void f(unsigned val, int bit) {
+    f(val, bit, bit);
+  }
+
+  void sf(long val, int msb, int lsb) {
+    int nbits = msb - lsb + 1;
+    long chk = val >> (nbits - 1);
+    guarantee (chk == -1 || chk == 0, "Field too big for insn");
+    unsigned uval = val;
+    unsigned mask = (1U << nbits) - 1;
+    uval &= mask;
+    f(uval, lsb + nbits - 1, lsb);
+  }
+
+  void rf(Register r, int lsb) {
+    f(r->encoding_nocheck(), lsb + 4, lsb);
+  }
+
+  // reg|ZR
+  void zrf(Register r, int lsb) {
+    f(r->encoding_nocheck() - (r == zr), lsb + 4, lsb);
+  }
+
+  // reg|SP
+  void srf(Register r, int lsb) {
+    f(r == sp ? 31 : r->encoding_nocheck(), lsb + 4, lsb);
+  }
+
+  void rf(FloatRegister r, int lsb) {
+    f(r->encoding_nocheck(), lsb + 4, lsb);
+  }
+
+  unsigned get(int msb = 31, int lsb = 0) {
+    int nbits = msb - lsb + 1;
+    unsigned mask = ((1U << nbits) - 1) << lsb;
+    assert_cond(bits & mask == mask);
+    return (insn & mask) >> lsb;
+  }
+
+  void fixed(unsigned value, unsigned mask) {
+    assert_cond ((mask & bits) == 0);
+#ifdef ASSERT
+    bits |= mask;
+#endif
+    insn |= value;
+  }
+};
+
+#define starti Instruction_aarch64 do_not_use(this); set_current(&do_not_use)
+
+class PrePost {
+  int _offset;
+  Register _r;
+public:
+  PrePost(Register reg, int o) : _r(reg), _offset(o) { }
+  int offset() { return _offset; }
+  Register reg() { return _r; }
+};
+
+class Pre : public PrePost {
+public:
+  Pre(Register reg, int o) : PrePost(reg, o) { }
+};
+class Post : public PrePost {
+public:
+  Post(Register reg, int o) : PrePost(reg, o) { }
+};
+
+namespace ext
+{
+  enum operation { uxtb, uxth, uxtw, uxtx, sxtb, sxth, sxtw, sxtx };
+};
+
+// abs methods which cannot overflow and so are well-defined across
+// the entire domain of integer types.
+static inline unsigned int uabs(unsigned int n) {
+  union {
+    unsigned int result;
+    int value;
+  };
+  result = n;
+  if (value < 0) result = -result;
+  return result;
+}
+static inline unsigned long uabs(unsigned long n) {
+  union {
+    unsigned long result;
+    long value;
+  };
+  result = n;
+  if (value < 0) result = -result;
+  return result;
+}
+static inline unsigned long uabs(long n) { return uabs((unsigned long)n); }
+static inline unsigned long uabs(int n) { return uabs((unsigned int)n); }
+
+// Addressing modes
+class Address VALUE_OBJ_CLASS_SPEC {
+ public:
+
+  enum mode { no_mode, base_plus_offset, pre, post, pcrel,
+              base_plus_offset_reg, literal };
+
+  // Shift and extend for base reg + reg offset addressing
+  class extend {
+    int _option, _shift;
+    ext::operation _op;
+  public:
+    extend() { }
+    extend(int s, int o, ext::operation op) : _shift(s), _option(o), _op(op) { }
+    int option() const{ return _option; }
+    int shift() const { return _shift; }
+    ext::operation op() const { return _op; }
+  };
+  class uxtw : public extend {
+  public:
+    uxtw(int shift = -1): extend(shift, 0b010, ext::uxtw) { }
+  };
+  class lsl : public extend {
+  public:
+    lsl(int shift = -1): extend(shift, 0b011, ext::uxtx) { }
+  };
+  class sxtw : public extend {
+  public:
+    sxtw(int shift = -1): extend(shift, 0b110, ext::sxtw) { }
+  };
+  class sxtx : public extend {
+  public:
+    sxtx(int shift = -1): extend(shift, 0b111, ext::sxtx) { }
+  };
+
+ private:
+  Register _base;
+  Register _index;
+  long _offset;
+  enum mode _mode;
+  extend _ext;
+
+  RelocationHolder _rspec;
+
+  // Typically we use AddressLiterals we want to use their rval
+  // However in some situations we want the lval (effect address) of
+  // the item.  We provide a special factory for making those lvals.
+  bool _is_lval;
+
+  // If the target is far we'll need to load the ea of this to a
+  // register to reach it. Otherwise if near we can do PC-relative
+  // addressing.
+  address          _target;
+
+ public:
+  Address()
+    : _mode(no_mode) { }
+  Address(Register r)
+    : _mode(base_plus_offset), _base(r), _offset(0), _index(noreg), _target(0) { }
+  Address(Register r, int o)
+    : _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { }
+  Address(Register r, long o)
+    : _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { }
+  Address(Register r, unsigned long o)
+    : _mode(base_plus_offset), _base(r), _offset(o), _index(noreg), _target(0) { }
+#ifdef ASSERT
+  Address(Register r, ByteSize disp)
+    : _mode(base_plus_offset), _base(r), _offset(in_bytes(disp)),
+      _index(noreg), _target(0) { }
+#endif
+  Address(Register r, Register r1, extend ext = lsl())
+    : _mode(base_plus_offset_reg), _base(r), _index(r1),
+    _ext(ext), _offset(0), _target(0) { }
+  Address(Pre p)
+    : _mode(pre), _base(p.reg()), _offset(p.offset()) { }
+  Address(Post p)
+    : _mode(post), _base(p.reg()), _offset(p.offset()), _target(0) { }
+  Address(address target, RelocationHolder const& rspec)
+    : _mode(literal),
+      _rspec(rspec),
+      _is_lval(false),
+      _target(target)  { }
+  Address(address target, relocInfo::relocType rtype = relocInfo::external_word_type);
+  Address(Register base, RegisterOrConstant index, extend ext = lsl())
+    : _base (base),
+      _ext(ext), _offset(0), _target(0) {
+    if (index.is_register()) {
+      _mode = base_plus_offset_reg;
+      _index = index.as_register();
+    } else {
+      guarantee(ext.option() == ext::uxtx, "should be");
+      assert(index.is_constant(), "should be");
+      _mode = base_plus_offset;
+      _offset = index.as_constant() << ext.shift();
+    }
+  }
+
+  Register base() const {
+    guarantee((_mode == base_plus_offset | _mode == base_plus_offset_reg
+               | _mode == post),
+              "wrong mode");
+    return _base;
+  }
+  long offset() const {
+    return _offset;
+  }
+  Register index() const {
+    return _index;
+  }
+  mode getMode() const {
+    return _mode;
+  }
+  bool uses(Register reg) const { return _base == reg || _index == reg; }
+  address target() const { return _target; }
+  const RelocationHolder& rspec() const { return _rspec; }
+
+  void encode(Instruction_aarch64 *i) const {
+    i->f(0b111, 29, 27);
+    i->srf(_base, 5);
+
+    switch(_mode) {
+    case base_plus_offset:
+      {
+        unsigned size = i->get(31, 30);
+        unsigned mask = (1 << size) - 1;
+        if (_offset < 0 || _offset & mask)
+          {
+            i->f(0b00, 25, 24);
+            i->f(0, 21), i->f(0b00, 11, 10);
+            i->sf(_offset, 20, 12);
+          } else {
+            i->f(0b01, 25, 24);
+            i->f(_offset >> size, 21, 10);
+          }
+      }
+      break;
+
+    case base_plus_offset_reg:
+      {
+        i->f(0b00, 25, 24);
+        i->f(1, 21);
+        i->rf(_index, 16);
+        i->f(_ext.option(), 15, 13);
+        unsigned size = i->get(31, 30);
+        if (size == 0) // It's a byte
+          i->f(_ext.shift() >= 0, 12);
+        else {
+          if (_ext.shift() > 0)
+            assert(_ext.shift() == (int)size, "bad shift");
+          i->f(_ext.shift() > 0, 12);
+        }
+        i->f(0b10, 11, 10);
+      }
+      break;
+
+    case pre:
+      i->f(0b00, 25, 24);
+      i->f(0, 21), i->f(0b11, 11, 10);
+      i->sf(_offset, 20, 12);
+      break;
+
+    case post:
+      i->f(0b00, 25, 24);
+      i->f(0, 21), i->f(0b01, 11, 10);
+      i->sf(_offset, 20, 12);
+      break;
+
+    default:
+      ShouldNotReachHere();
+    }
+  }
+
+  void encode_pair(Instruction_aarch64 *i) const {
+    switch(_mode) {
+    case base_plus_offset:
+      i->f(0b010, 25, 23);
+      break;
+    case pre:
+      i->f(0b011, 25, 23);
+      break;
+    case post:
+      i->f(0b001, 25, 23);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+
+    unsigned size; // Operand shift in 32-bit words
+
+    if (i->get(26, 26)) { // float
+      switch(i->get(31, 30)) {
+      case 0b10:
+        size = 2; break;
+      case 0b01:
+        size = 1; break;
+      case 0b00:
+        size = 0; break;
+      default:
+        ShouldNotReachHere();
+      }
+    } else {
+      size = i->get(31, 31);
+    }
+
+    size = 4 << size;
+    guarantee(_offset % size == 0, "bad offset");
+    i->sf(_offset / size, 21, 15);
+    i->srf(_base, 5);
+  }
+
+  void encode_nontemporal_pair(Instruction_aarch64 *i) const {
+    // Only base + offset is allowed
+    i->f(0b000, 25, 23);
+    unsigned size = i->get(31, 31);
+    size = 4 << size;
+    guarantee(_offset % size == 0, "bad offset");
+    i->sf(_offset / size, 21, 15);
+    i->srf(_base, 5);
+    guarantee(_mode == Address::base_plus_offset,
+              "Bad addressing mode for non-temporal op");
+  }
+
+  void lea(MacroAssembler *, Register) const;
+
+  static bool offset_ok_for_immed(long offset, int shift = 0) {
+    unsigned mask = (1 << shift) - 1;
+    if (offset < 0 || offset & mask) {
+      return (uabs(offset) < (1 << (20 - 12))); // Unscaled offset
+    } else {
+      return ((offset >> shift) < (1 << (21 - 10 + 1))); // Scaled, unsigned offset
+    }
+  }
+};
+
+// Convience classes
+class RuntimeAddress: public Address {
+
+  public:
+
+  RuntimeAddress(address target) : Address(target, relocInfo::runtime_call_type) {}
+
+};
+
+class OopAddress: public Address {
+
+  public:
+
+  OopAddress(address target) : Address(target, relocInfo::oop_type){}
+
+};
+
+class ExternalAddress: public Address {
+ private:
+  static relocInfo::relocType reloc_for_target(address target) {
+    // Sometimes ExternalAddress is used for values which aren't
+    // exactly addresses, like the card table base.
+    // external_word_type can't be used for values in the first page
+    // so just skip the reloc in that case.
+    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
+  }
+
+ public:
+
+  ExternalAddress(address target) : Address(target, reloc_for_target(target)) {}
+
+};
+
+class InternalAddress: public Address {
+
+  public:
+
+  InternalAddress(address target) : Address(target, relocInfo::internal_word_type) {}
+};
+
+const int FPUStateSizeInWords = 32 * 2;
+typedef enum {
+  PLDL1KEEP = 0b00000, PLDL1STRM, PLDL2KEEP, PLDL2STRM, PLDL3KEEP, PLDL3STRM,
+  PSTL1KEEP = 0b10000, PSTL1STRM, PSTL2KEEP, PSTL2STRM, PSTL3KEEP, PSTL3STRM,
+  PLIL1KEEP = 0b01000, PLIL1STRM, PLIL2KEEP, PLIL2STRM, PLIL3KEEP, PLIL3STRM
+} prfop;
+
+class Assembler : public AbstractAssembler {
+
+#ifndef PRODUCT
+  static const unsigned long asm_bp;
+
+  void emit_long(jint x) {
+    if ((unsigned long)pc() == asm_bp)
+      asm volatile ("nop");
+    AbstractAssembler::emit_int32(x);
+  }
+#else
+  void emit_long(jint x) {
+    AbstractAssembler::emit_int32(x);
+  }
+#endif
+
+public:
+
+  enum { instruction_size = 4 };
+
+  Address adjust(Register base, int offset, bool preIncrement) {
+    if (preIncrement)
+      return Address(Pre(base, offset));
+    else
+      return Address(Post(base, offset));
+  }
+
+  Address pre(Register base, int offset) {
+    return adjust(base, offset, true);
+  }
+
+  Address post (Register base, int offset) {
+    return adjust(base, offset, false);
+  }
+
+  Instruction_aarch64* current;
+
+  void set_current(Instruction_aarch64* i) { current = i; }
+
+  void f(unsigned val, int msb, int lsb) {
+    current->f(val, msb, lsb);
+  }
+  void f(unsigned val, int msb) {
+    current->f(val, msb, msb);
+  }
+  void sf(long val, int msb, int lsb) {
+    current->sf(val, msb, lsb);
+  }
+  void rf(Register reg, int lsb) {
+    current->rf(reg, lsb);
+  }
+  void srf(Register reg, int lsb) {
+    current->srf(reg, lsb);
+  }
+  void zrf(Register reg, int lsb) {
+    current->zrf(reg, lsb);
+  }
+  void rf(FloatRegister reg, int lsb) {
+    current->rf(reg, lsb);
+  }
+  void fixed(unsigned value, unsigned mask) {
+    current->fixed(value, mask);
+  }
+
+  void emit() {
+    emit_long(current->get_insn());
+    assert_cond(current->get_bits() == 0xffffffff);
+    current = NULL;
+  }
+
+  typedef void (Assembler::* uncond_branch_insn)(address dest);
+  typedef void (Assembler::* compare_and_branch_insn)(Register Rt, address dest);
+  typedef void (Assembler::* test_and_branch_insn)(Register Rt, int bitpos, address dest);
+  typedef void (Assembler::* prefetch_insn)(address target, prfop);
+
+  void wrap_label(Label &L, uncond_branch_insn insn);
+  void wrap_label(Register r, Label &L, compare_and_branch_insn insn);
+  void wrap_label(Register r, int bitpos, Label &L, test_and_branch_insn insn);
+  void wrap_label(Label &L, prfop, prefetch_insn insn);
+
+  // PC-rel. addressing
+
+  void adr(Register Rd, address dest);
+  void _adrp(Register Rd, address dest);
+
+  void adr(Register Rd, const Address &dest);
+  void _adrp(Register Rd, const Address &dest);
+
+  void adr(Register Rd, Label &L) {
+    wrap_label(Rd, L, &Assembler::Assembler::adr);
+  }
+  void _adrp(Register Rd, Label &L) {
+    wrap_label(Rd, L, &Assembler::_adrp);
+  }
+
+  void adrp(Register Rd, const Address &dest, unsigned long &offset);
+
+#undef INSN
+
+  void add_sub_immediate(Register Rd, Register Rn, unsigned uimm, int op,
+                         int negated_op);
+
+  // Add/subtract (immediate)
+#define INSN(NAME, decode, negated)                                     \
+  void NAME(Register Rd, Register Rn, unsigned imm, unsigned shift) {   \
+    starti;                                                             \
+    f(decode, 31, 29), f(0b10001, 28, 24), f(shift, 23, 22), f(imm, 21, 10); \
+    zrf(Rd, 0), srf(Rn, 5);                                             \
+  }                                                                     \
+                                                                        \
+  void NAME(Register Rd, Register Rn, unsigned imm) {                   \
+    starti;                                                             \
+    add_sub_immediate(Rd, Rn, imm, decode, negated);                    \
+  }
+
+  INSN(addsw, 0b001, 0b011);
+  INSN(subsw, 0b011, 0b001);
+  INSN(adds,  0b101, 0b111);
+  INSN(subs,  0b111, 0b101);
+
+#undef INSN
+
+#define INSN(NAME, decode, negated)                     \
+  void NAME(Register Rd, Register Rn, unsigned imm) {   \
+    starti;                                             \
+    add_sub_immediate(Rd, Rn, imm, decode, negated);    \
+  }
+
+  INSN(addw, 0b000, 0b010);
+  INSN(subw, 0b010, 0b000);
+  INSN(add,  0b100, 0b110);
+  INSN(sub,  0b110, 0b100);
+
+#undef INSN
+
+ // Logical (immediate)
+#define INSN(NAME, decode, is32)                                \
+  void NAME(Register Rd, Register Rn, uint64_t imm) {           \
+    starti;                                                     \
+    uint32_t val = encode_logical_immediate(is32, imm);         \
+    f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
+    srf(Rd, 0), zrf(Rn, 5);                                     \
+  }
+
+  INSN(andw, 0b000, true);
+  INSN(orrw, 0b001, true);
+  INSN(eorw, 0b010, true);
+  INSN(andr,  0b100, false);
+  INSN(orr,  0b101, false);
+  INSN(eor,  0b110, false);
+
+#undef INSN
+
+#define INSN(NAME, decode, is32)                                \
+  void NAME(Register Rd, Register Rn, uint64_t imm) {           \
+    starti;                                                     \
+    uint32_t val = encode_logical_immediate(is32, imm);         \
+    f(decode, 31, 29), f(0b100100, 28, 23), f(val, 22, 10);     \
+    zrf(Rd, 0), zrf(Rn, 5);                                     \
+  }
+
+  INSN(ands, 0b111, false);
+  INSN(andsw, 0b011, true);
+
+#undef INSN
+
+  // Move wide (immediate)
+#define INSN(NAME, opcode)                                              \
+  void NAME(Register Rd, unsigned imm, unsigned shift = 0) {            \
+    assert_cond((shift/16)*16 == shift);                                \
+    starti;                                                             \
+    f(opcode, 31, 29), f(0b100101, 28, 23), f(shift/16, 22, 21),        \
+      f(imm, 20, 5);                                                    \
+    rf(Rd, 0);                                                          \
+  }
+
+  INSN(movnw, 0b000);
+  INSN(movzw, 0b010);
+  INSN(movkw, 0b011);
+  INSN(movn, 0b100);
+  INSN(movz, 0b110);
+  INSN(movk, 0b111);
+
+#undef INSN
+
+  // Bitfield
+#define INSN(NAME, opcode)                                              \
+  void NAME(Register Rd, Register Rn, unsigned immr, unsigned imms) {   \
+    starti;                                                             \
+    f(opcode, 31, 22), f(immr, 21, 16), f(imms, 15, 10);                \
+    rf(Rn, 5), rf(Rd, 0);                                               \
+  }
+
+  INSN(sbfmw, 0b0001001100);
+  INSN(bfmw,  0b0011001100);
+  INSN(ubfmw, 0b0101001100);
+  INSN(sbfm,  0b1001001101);
+  INSN(bfm,   0b1011001101);
+  INSN(ubfm,  0b1101001101);
+
+#undef INSN
+
+  // Extract
+#define INSN(NAME, opcode)                                              \
+  void NAME(Register Rd, Register Rn, Register Rm, unsigned imms) {     \
+    starti;                                                             \
+    f(opcode, 31, 21), f(imms, 15, 10);                                 \
+    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                                   \
+  }
+
+  INSN(extrw, 0b00010011100);
+  INSN(extr,  0b10010011110);
+
+#undef INSN
+
+  // The maximum range of a branch is fixed for the AArch64
+  // architecture.  In debug mode we shrink it in order to test
+  // trampolines, but not so small that branches in the interpreter
+  // are out of range.
+  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
+
+  static bool reachable_from_branch_at(address branch, address target) {
+    return uabs(target - branch) < branch_range;
+  }
+
+  // Unconditional branch (immediate)
+#define INSN(NAME, opcode)                                              \
+  void NAME(address dest) {                                             \
+    starti;                                                             \
+    long offset = (dest - pc()) >> 2;                                   \
+    DEBUG_ONLY(assert(reachable_from_branch_at(pc(), dest), "debug only")); \
+    f(opcode, 31), f(0b00101, 30, 26), sf(offset, 25, 0);               \
+  }                                                                     \
+  void NAME(Label &L) {                                                 \
+    wrap_label(L, &Assembler::NAME);                                    \
+  }                                                                     \
+  void NAME(const Address &dest);
+
+  INSN(b, 0);
+  INSN(bl, 1);
+
+#undef INSN
+
+  // Compare & branch (immediate)
+#define INSN(NAME, opcode)                              \
+  void NAME(Register Rt, address dest) {                \
+    long offset = (dest - pc()) >> 2;                   \
+    starti;                                             \
+    f(opcode, 31, 24), sf(offset, 23, 5), rf(Rt, 0);    \
+  }                                                     \
+  void NAME(Register Rt, Label &L) {                    \
+    wrap_label(Rt, L, &Assembler::NAME);                \
+  }
+
+  INSN(cbzw,  0b00110100);
+  INSN(cbnzw, 0b00110101);
+  INSN(cbz,   0b10110100);
+  INSN(cbnz,  0b10110101);
+
+#undef INSN
+
+  // Test & branch (immediate)
+#define INSN(NAME, opcode)                                              \
+  void NAME(Register Rt, int bitpos, address dest) {                    \
+    long offset = (dest - pc()) >> 2;                                   \
+    int b5 = bitpos >> 5;                                               \
+    bitpos &= 0x1f;                                                     \
+    starti;                                                             \
+    f(b5, 31), f(opcode, 30, 24), f(bitpos, 23, 19), sf(offset, 18, 5); \
+    rf(Rt, 0);                                                          \
+  }                                                                     \
+  void NAME(Register Rt, int bitpos, Label &L) {                        \
+    wrap_label(Rt, bitpos, L, &Assembler::NAME);                        \
+  }
+
+  INSN(tbz,  0b0110110);
+  INSN(tbnz, 0b0110111);
+
+#undef INSN
+
+  // Conditional branch (immediate)
+  enum Condition
+    {EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV};
+
+  void br(Condition  cond, address dest) {
+    long offset = (dest - pc()) >> 2;
+    starti;
+    f(0b0101010, 31, 25), f(0, 24), sf(offset, 23, 5), f(0, 4), f(cond, 3, 0);
+  }
+
+#define INSN(NAME, cond)                        \
+  void NAME(address dest) {                     \
+    br(cond, dest);                             \
+  }
+
+  INSN(beq, EQ);
+  INSN(bne, NE);
+  INSN(bhs, HS);
+  INSN(bcs, CS);
+  INSN(blo, LO);
+  INSN(bcc, CC);
+  INSN(bmi, MI);
+  INSN(bpl, PL);
+  INSN(bvs, VS);
+  INSN(bvc, VC);
+  INSN(bhi, HI);
+  INSN(bls, LS);
+  INSN(bge, GE);
+  INSN(blt, LT);
+  INSN(bgt, GT);
+  INSN(ble, LE);
+  INSN(bal, AL);
+  INSN(bnv, NV);
+
+  void br(Condition cc, Label &L);
+
+#undef INSN
+
+  // Exception generation
+  void generate_exception(int opc, int op2, int LL, unsigned imm) {
+    starti;
+    f(0b11010100, 31, 24);
+    f(opc, 23, 21), f(imm, 20, 5), f(op2, 4, 2), f(LL, 1, 0);
+  }
+
+#define INSN(NAME, opc, op2, LL)                \
+  void NAME(unsigned imm) {                     \
+    generate_exception(opc, op2, LL, imm);      \
+  }
+
+  INSN(svc, 0b000, 0, 0b01);
+  INSN(hvc, 0b000, 0, 0b10);
+  INSN(smc, 0b000, 0, 0b11);
+  INSN(brk, 0b001, 0, 0b00);
+  INSN(hlt, 0b010, 0, 0b00);
+  INSN(dpcs1, 0b101, 0, 0b01);
+  INSN(dpcs2, 0b101, 0, 0b10);
+  INSN(dpcs3, 0b101, 0, 0b11);
+
+#undef INSN
+
+  // System
+  void system(int op0, int op1, int CRn, int CRm, int op2,
+              Register rt = (Register)0b11111)
+  {
+    starti;
+    f(0b11010101000, 31, 21);
+    f(op0, 20, 19);
+    f(op1, 18, 16);
+    f(CRn, 15, 12);
+    f(CRm, 11, 8);
+    f(op2, 7, 5);
+    rf(rt, 0);
+  }
+
+  void hint(int imm) {
+    system(0b00, 0b011, 0b0010, imm, 0b000);
+  }
+
+  void nop() {
+    hint(0);
+  }
+  // we only provide mrs and msr for the special purpose system
+  // registers where op1 (instr[20:19]) == 11 and, (currently) only
+  // use it for FPSR n.b msr has L (instr[21]) == 0 mrs has L == 1
+
+  void msr(int op1, int CRn, int CRm, int op2, Register rt) {
+    starti;
+    f(0b1101010100011, 31, 19);
+    f(op1, 18, 16);
+    f(CRn, 15, 12);
+    f(CRm, 11, 8);
+    f(op2, 7, 5);
+    // writing zr is ok
+    zrf(rt, 0);
+  }
+
+  void mrs(int op1, int CRn, int CRm, int op2, Register rt) {
+    starti;
+    f(0b1101010100111, 31, 19);
+    f(op1, 18, 16);
+    f(CRn, 15, 12);
+    f(CRm, 11, 8);
+    f(op2, 7, 5);
+    // reading to zr is a mistake
+    rf(rt, 0);
+  }
+
+  enum barrier {OSHLD = 0b0001, OSHST, OSH, NSHLD=0b0101, NSHST, NSH,
+                ISHLD = 0b1001, ISHST, ISH, LD=0b1101, ST, SY};
+
+  void dsb(barrier imm) {
+    system(0b00, 0b011, 0b00011, imm, 0b100);
+  }
+
+  void dmb(barrier imm) {
+    system(0b00, 0b011, 0b00011, imm, 0b101);
+  }
+
+  void isb() {
+    system(0b00, 0b011, 0b00011, SY, 0b110);
+  }
+
+  void dc(Register Rt) {
+    system(0b01, 0b011, 0b0111, 0b1011, 0b001, Rt);
+  }
+
+  void ic(Register Rt) {
+    system(0b01, 0b011, 0b0111, 0b0101, 0b001, Rt);
+  }
+
+  // A more convenient access to dmb for our purposes
+  enum Membar_mask_bits {
+    // We can use ISH for a barrier because the ARM ARM says "This
+    // architecture assumes that all Processing Elements that use the
+    // same operating system or hypervisor are in the same Inner
+    // Shareable shareability domain."
+    StoreStore = ISHST,
+    LoadStore  = ISHLD,
+    LoadLoad   = ISHLD,
+    StoreLoad  = ISH,
+    AnyAny     = ISH
+  };
+
+  void membar(Membar_mask_bits order_constraint) {
+    dmb(Assembler::barrier(order_constraint));
+  }
+
+  // Unconditional branch (register)
+  void branch_reg(Register R, int opc) {
+    starti;
+    f(0b1101011, 31, 25);
+    f(opc, 24, 21);
+    f(0b11111000000, 20, 10);
+    rf(R, 5);
+    f(0b00000, 4, 0);
+  }
+
+#define INSN(NAME, opc)                         \
+  void NAME(Register R) {                       \
+    branch_reg(R, opc);                         \
+  }
+
+  INSN(br, 0b0000);
+  INSN(blr, 0b0001);
+  INSN(ret, 0b0010);
+
+  void ret(void *p); // This forces a compile-time error for ret(0)
+
+#undef INSN
+
+#define INSN(NAME, opc)                         \
+  void NAME() {                 \
+    branch_reg((Register)0b11111, opc);         \
+  }
+
+  INSN(eret, 0b0100);
+  INSN(drps, 0b0101);
+
+#undef INSN
+
+  // Load/store exclusive
+  enum operand_size { byte, halfword, word, xword };
+
+  void load_store_exclusive(Register Rs, Register Rt1, Register Rt2,
+    Register Rn, enum operand_size sz, int op, int o0) {
+    starti;
+    f(sz, 31, 30), f(0b001000, 29, 24), f(op, 23, 21);
+    rf(Rs, 16), f(o0, 15), rf(Rt2, 10), rf(Rn, 5), rf(Rt1, 0);
+  }
+
+#define INSN4(NAME, sz, op, o0) /* Four registers */                    \
+  void NAME(Register Rs, Register Rt1, Register Rt2, Register Rn) {     \
+    load_store_exclusive(Rs, Rt1, Rt2, Rn, sz, op, o0);                 \
+  }
+
+#define INSN3(NAME, sz, op, o0) /* Three registers */                   \
+  void NAME(Register Rs, Register Rt, Register Rn) {                    \
+    load_store_exclusive(Rs, Rt, (Register)0b11111, Rn, sz, op, o0);    \
+  }
+
+#define INSN2(NAME, sz, op, o0) /* Two registers */                     \
+  void NAME(Register Rt, Register Rn) {                                 \
+    load_store_exclusive((Register)0b11111, Rt, (Register)0b11111,      \
+                         Rn, sz, op, o0);                               \
+  }
+
+#define INSN_FOO(NAME, sz, op, o0) /* Three registers, encoded differently */ \
+  void NAME(Register Rt1, Register Rt2, Register Rn) {                  \
+    load_store_exclusive((Register)0b11111, Rt1, Rt2, Rn, sz, op, o0);  \
+  }
+
+  // bytes
+  INSN3(stxrb, byte, 0b000, 0);
+  INSN3(stlxrb, byte, 0b000, 1);
+  INSN2(ldxrb, byte, 0b010, 0);
+  INSN2(ldaxrb, byte, 0b010, 1);
+  INSN2(stlrb, byte, 0b100, 1);
+  INSN2(ldarb, byte, 0b110, 1);
+
+  // halfwords
+  INSN3(stxrh, halfword, 0b000, 0);
+  INSN3(stlxrh, halfword, 0b000, 1);
+  INSN2(ldxrh, halfword, 0b010, 0);
+  INSN2(ldaxrh, halfword, 0b010, 1);
+  INSN2(stlrh, halfword, 0b100, 1);
+  INSN2(ldarh, halfword, 0b110, 1);
+
+  // words
+  INSN3(stxrw, word, 0b000, 0);
+  INSN3(stlxrw, word, 0b000, 1);
+  INSN4(stxpw, word, 0b001, 0);
+  INSN4(stlxpw, word, 0b001, 1);
+  INSN2(ldxrw, word, 0b010, 0);
+  INSN2(ldaxrw, word, 0b010, 1);
+  INSN_FOO(ldxpw, word, 0b011, 0);
+  INSN_FOO(ldaxpw, word, 0b011, 1);
+  INSN2(stlrw, word, 0b100, 1);
+  INSN2(ldarw, word, 0b110, 1);
+
+  // xwords
+  INSN3(stxr, xword, 0b000, 0);
+  INSN3(stlxr, xword, 0b000, 1);
+  INSN4(stxp, xword, 0b001, 0);
+  INSN4(stlxp, xword, 0b001, 1);
+  INSN2(ldxr, xword, 0b010, 0);
+  INSN2(ldaxr, xword, 0b010, 1);
+  INSN_FOO(ldxp, xword, 0b011, 0);
+  INSN_FOO(ldaxp, xword, 0b011, 1);
+  INSN2(stlr, xword, 0b100, 1);
+  INSN2(ldar, xword, 0b110, 1);
+
+#undef INSN2
+#undef INSN3
+#undef INSN4
+#undef INSN_FOO
+
+  // Load register (literal)
+#define INSN(NAME, opc, V)                                              \
+  void NAME(Register Rt, address dest) {                                \
+    long offset = (dest - pc()) >> 2;                                   \
+    starti;                                                             \
+    f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
+      sf(offset, 23, 5);                                                \
+    rf(Rt, 0);                                                          \
+  }                                                                     \
+  void NAME(Register Rt, address dest, relocInfo::relocType rtype) {    \
+    InstructionMark im(this);                                           \
+    guarantee(rtype == relocInfo::internal_word_type,                   \
+              "only internal_word_type relocs make sense here");        \
+    code_section()->relocate(inst_mark(), InternalAddress(dest).rspec()); \
+    NAME(Rt, dest);                                                     \
+  }                                                                     \
+  void NAME(Register Rt, Label &L) {                                    \
+    wrap_label(Rt, L, &Assembler::NAME);                                \
+  }
+
+  INSN(ldrw, 0b00, 0);
+  INSN(ldr, 0b01, 0);
+  INSN(ldrsw, 0b10, 0);
+
+#undef INSN
+
+#define INSN(NAME, opc, V)                                              \
+  void NAME(FloatRegister Rt, address dest) {                           \
+    long offset = (dest - pc()) >> 2;                                   \
+    starti;                                                             \
+    f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
+      sf(offset, 23, 5);                                                \
+    rf((Register)Rt, 0);                                                \
+  }
+
+  INSN(ldrs, 0b00, 1);
+  INSN(ldrd, 0b01, 1);
+  INSN(ldrq, 0x10, 1);
+
+#undef INSN
+
+#define INSN(NAME, opc, V)                                              \
+  void NAME(address dest, prfop op = PLDL1KEEP) {                       \
+    long offset = (dest - pc()) >> 2;                                   \
+    starti;                                                             \
+    f(opc, 31, 30), f(0b011, 29, 27), f(V, 26), f(0b00, 25, 24),        \
+      sf(offset, 23, 5);                                                \
+    f(op, 4, 0);                                                        \
+  }                                                                     \
+  void NAME(Label &L, prfop op = PLDL1KEEP) {                           \
+    wrap_label(L, op, &Assembler::NAME);                                \
+  }
+
+  INSN(prfm, 0b11, 0);
+
+#undef INSN
+
+  // Load/store
+  void ld_st1(int opc, int p1, int V, int L,
+              Register Rt1, Register Rt2, Address adr, bool no_allocate) {
+    starti;
+    f(opc, 31, 30), f(p1, 29, 27), f(V, 26), f(L, 22);
+    zrf(Rt2, 10), zrf(Rt1, 0);
+    if (no_allocate) {
+      adr.encode_nontemporal_pair(current);
+    } else {
+      adr.encode_pair(current);
+    }
+  }
+
+  // Load/store register pair (offset)
+#define INSN(NAME, size, p1, V, L, no_allocate)         \
+  void NAME(Register Rt1, Register Rt2, Address adr) {  \
+    ld_st1(size, p1, V, L, Rt1, Rt2, adr, no_allocate); \
+   }
+
+  INSN(stpw, 0b00, 0b101, 0, 0, false);
+  INSN(ldpw, 0b00, 0b101, 0, 1, false);
+  INSN(ldpsw, 0b01, 0b101, 0, 1, false);
+  INSN(stp, 0b10, 0b101, 0, 0, false);
+  INSN(ldp, 0b10, 0b101, 0, 1, false);
+
+  // Load/store no-allocate pair (offset)
+  INSN(stnpw, 0b00, 0b101, 0, 0, true);
+  INSN(ldnpw, 0b00, 0b101, 0, 1, true);
+  INSN(stnp, 0b10, 0b101, 0, 0, true);
+  INSN(ldnp, 0b10, 0b101, 0, 1, true);
+
+#undef INSN
+
+#define INSN(NAME, size, p1, V, L, no_allocate)                         \
+  void NAME(FloatRegister Rt1, FloatRegister Rt2, Address adr) {        \
+    ld_st1(size, p1, V, L, (Register)Rt1, (Register)Rt2, adr, no_allocate); \
+   }
+
+  INSN(stps, 0b00, 0b101, 1, 0, false);
+  INSN(ldps, 0b00, 0b101, 1, 1, false);
+  INSN(stpd, 0b01, 0b101, 1, 0, false);
+  INSN(ldpd, 0b01, 0b101, 1, 1, false);
+  INSN(stpq, 0b10, 0b101, 1, 0, false);
+  INSN(ldpq, 0b10, 0b101, 1, 1, false);
+
+#undef INSN
+
+  // Load/store register (all modes)
+  void ld_st2(Register Rt, const Address &adr, int size, int op, int V = 0) {
+    starti;
+
+    f(V, 26); // general reg?
+    zrf(Rt, 0);
+
+    // Encoding for literal loads is done here (rather than pushed
+    // down into Address::encode) because the encoding of this
+    // instruction is too different from all of the other forms to
+    // make it worth sharing.
+    if (adr.getMode() == Address::literal) {
+      assert(size == 0b10 || size == 0b11, "bad operand size in ldr");
+      assert(op == 0b01, "literal form can only be used with loads");
+      f(size & 0b01, 31, 30), f(0b011, 29, 27), f(0b00, 25, 24);
+      long offset = (adr.target() - pc()) >> 2;
+      sf(offset, 23, 5);
+      code_section()->relocate(pc(), adr.rspec());
+      return;
+    }
+
+    f(size, 31, 30);
+    f(op, 23, 22); // str
+    adr.encode(current);
+  }
+
+#define INSN(NAME, size, op)                            \
+  void NAME(Register Rt, const Address &adr) {          \
+    ld_st2(Rt, adr, size, op);                          \
+  }                                                     \
+
+  INSN(str, 0b11, 0b00);
+  INSN(strw, 0b10, 0b00);
+  INSN(strb, 0b00, 0b00);
+  INSN(strh, 0b01, 0b00);
+
+  INSN(ldr, 0b11, 0b01);
+  INSN(ldrw, 0b10, 0b01);
+  INSN(ldrb, 0b00, 0b01);
+  INSN(ldrh, 0b01, 0b01);
+
+  INSN(ldrsb, 0b00, 0b10);
+  INSN(ldrsbw, 0b00, 0b11);
+  INSN(ldrsh, 0b01, 0b10);
+  INSN(ldrshw, 0b01, 0b11);
+  INSN(ldrsw, 0b10, 0b10);
+
+#undef INSN
+
+#define INSN(NAME, size, op)                                    \
+  void NAME(const Address &adr, prfop pfop = PLDL1KEEP) {       \
+    ld_st2((Register)pfop, adr, size, op);                      \
+  }
+
+  INSN(prfm, 0b11, 0b10); // FIXME: PRFM should not be used with
+                          // writeback modes, but the assembler
+                          // doesn't enfore that.
+
+#undef INSN
+
+#define INSN(NAME, size, op)                            \
+  void NAME(FloatRegister Rt, const Address &adr) {     \
+    ld_st2((Register)Rt, adr, size, op, 1);             \
+  }
+
+  INSN(strd, 0b11, 0b00);
+  INSN(strs, 0b10, 0b00);
+  INSN(ldrd, 0b11, 0b01);
+  INSN(ldrs, 0b10, 0b01);
+  INSN(strq, 0b00, 0b10);
+  INSN(ldrq, 0x00, 0b11);
+
+#undef INSN
+
+  enum shift_kind { LSL, LSR, ASR, ROR };
+
+  void op_shifted_reg(unsigned decode,
+                      enum shift_kind kind, unsigned shift,
+                      unsigned size, unsigned op) {
+    f(size, 31);
+    f(op, 30, 29);
+    f(decode, 28, 24);
+    f(shift, 15, 10);
+    f(kind, 23, 22);
+  }
+
+  // Logical (shifted register)
+#define INSN(NAME, size, op, N)                                 \
+  void NAME(Register Rd, Register Rn, Register Rm,              \
+            enum shift_kind kind = LSL, unsigned shift = 0) {   \
+    starti;                                                     \
+    f(N, 21);                                                   \
+    zrf(Rm, 16), zrf(Rn, 5), zrf(Rd, 0);                        \
+    op_shifted_reg(0b01010, kind, shift, size, op);             \
+  }
+
+  INSN(andr, 1, 0b00, 0);
+  INSN(orr, 1, 0b01, 0);
+  INSN(eor, 1, 0b10, 0);
+  INSN(ands, 1, 0b11, 0);
+  INSN(andw, 0, 0b00, 0);
+  INSN(orrw, 0, 0b01, 0);
+  INSN(eorw, 0, 0b10, 0);
+  INSN(andsw, 0, 0b11, 0);
+
+  INSN(bic, 1, 0b00, 1);
+  INSN(orn, 1, 0b01, 1);
+  INSN(eon, 1, 0b10, 1);
+  INSN(bics, 1, 0b11, 1);
+  INSN(bicw, 0, 0b00, 1);
+  INSN(ornw, 0, 0b01, 1);
+  INSN(eonw, 0, 0b10, 1);
+  INSN(bicsw, 0, 0b11, 1);
+
+#undef INSN
+
+  // Add/subtract (shifted register)
+#define INSN(NAME, size, op)                            \
+  void NAME(Register Rd, Register Rn, Register Rm,      \
+            enum shift_kind kind, unsigned shift = 0) { \
+    starti;                                             \
+    f(0, 21);                                           \
+    assert_cond(kind != ROR);                           \
+    zrf(Rd, 0), zrf(Rn, 5), zrf(Rm, 16);                \
+    op_shifted_reg(0b01011, kind, shift, size, op);     \
+  }
+
+  INSN(add, 1, 0b000);
+  INSN(sub, 1, 0b10);
+  INSN(addw, 0, 0b000);
+  INSN(subw, 0, 0b10);
+
+  INSN(adds, 1, 0b001);
+  INSN(subs, 1, 0b11);
+  INSN(addsw, 0, 0b001);
+  INSN(subsw, 0, 0b11);
+
+#undef INSN
+
+  // Add/subtract (extended register)
+#define INSN(NAME, op)                                                  \
+  void NAME(Register Rd, Register Rn, Register Rm,                      \
+           ext::operation option, int amount = 0) {                     \
+    starti;                                                             \
+    zrf(Rm, 16), srf(Rn, 5), srf(Rd, 0);                                \
+    add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
+  }
+
+  void add_sub_extended_reg(unsigned op, unsigned decode,
+    Register Rd, Register Rn, Register Rm,
+    unsigned opt, ext::operation option, unsigned imm) {
+    guarantee(imm <= 4, "shift amount must be < 4");
+    f(op, 31, 29), f(decode, 28, 24), f(opt, 23, 22), f(1, 21);
+    f(option, 15, 13), f(imm, 12, 10);
+  }
+
+  INSN(addw, 0b000);
+  INSN(subw, 0b010);
+  INSN(add, 0b100);
+  INSN(sub, 0b110);
+
+#undef INSN
+
+#define INSN(NAME, op)                                                  \
+  void NAME(Register Rd, Register Rn, Register Rm,                      \
+           ext::operation option, int amount = 0) {                     \
+    starti;                                                             \
+    zrf(Rm, 16), srf(Rn, 5), zrf(Rd, 0);                                \
+    add_sub_extended_reg(op, 0b01011, Rd, Rn, Rm, 0b00, option, amount); \
+  }
+
+  INSN(addsw, 0b001);
+  INSN(subsw, 0b011);
+  INSN(adds, 0b101);
+  INSN(subs, 0b111);
+
+#undef INSN
+
+  // Aliases for short forms of add and sub
+#define INSN(NAME)                                      \
+  void NAME(Register Rd, Register Rn, Register Rm) {    \
+    if (Rd == sp || Rn == sp)                           \
+      NAME(Rd, Rn, Rm, ext::uxtx);                      \
+    else                                                \
+      NAME(Rd, Rn, Rm, LSL);                            \
+  }
+
+  INSN(addw);
+  INSN(subw);
+  INSN(add);
+  INSN(sub);
+
+  INSN(addsw);
+  INSN(subsw);
+  INSN(adds);
+  INSN(subs);
+
+#undef INSN
+
+  // Add/subtract (with carry)
+  void add_sub_carry(unsigned op, Register Rd, Register Rn, Register Rm) {
+    starti;
+    f(op, 31, 29);
+    f(0b11010000, 28, 21);
+    f(0b000000, 15, 10);
+    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);
+  }
+
+  #define INSN(NAME, op)                                \
+    void NAME(Register Rd, Register Rn, Register Rm) {  \
+      add_sub_carry(op, Rd, Rn, Rm);                    \
+    }
+
+  INSN(adcw, 0b000);
+  INSN(adcsw, 0b001);
+  INSN(sbcw, 0b010);
+  INSN(sbcsw, 0b011);
+  INSN(adc, 0b100);
+  INSN(adcs, 0b101);
+  INSN(sbc,0b110);
+  INSN(sbcs, 0b111);
+
+#undef INSN
+
+  // Conditional compare (both kinds)
+  void conditional_compare(unsigned op, int o2, int o3,
+                           Register Rn, unsigned imm5, unsigned nzcv,
+                           unsigned cond) {
+    f(op, 31, 29);
+    f(0b11010010, 28, 21);
+    f(cond, 15, 12);
+    f(o2, 10);
+    f(o3, 4);
+    f(nzcv, 3, 0);
+    f(imm5, 20, 16), rf(Rn, 5);
+  }
+
+#define INSN(NAME, op)                                                  \
+  void NAME(Register Rn, Register Rm, int imm, Condition cond) {        \
+    starti;                                                             \
+    f(0, 11);                                                           \
+    conditional_compare(op, 0, 0, Rn, (uintptr_t)Rm, imm, cond);        \
+  }                                                                     \
+                                                                        \
+  void NAME(Register Rn, int imm5, int imm, Condition cond) {   \
+    starti;                                                             \
+    f(1, 11);                                                           \
+    conditional_compare(op, 0, 0, Rn, imm5, imm, cond);                 \
+  }
+
+  INSN(ccmnw, 0b001);
+  INSN(ccmpw, 0b011);
+  INSN(ccmn, 0b101);
+  INSN(ccmp, 0b111);
+
+#undef INSN
+
+  // Conditional select
+  void conditional_select(unsigned op, unsigned op2,
+                          Register Rd, Register Rn, Register Rm,
+                          unsigned cond) {
+    starti;
+    f(op, 31, 29);
+    f(0b11010100, 28, 21);
+    f(cond, 15, 12);
+    f(op2, 11, 10);
+    zrf(Rm, 16), zrf(Rn, 5), rf(Rd, 0);
+  }
+
+#define INSN(NAME, op, op2)                                             \
+  void NAME(Register Rd, Register Rn, Register Rm, Condition cond) { \
+    conditional_select(op, op2, Rd, Rn, Rm, cond);                      \
+  }
+
+  INSN(cselw, 0b000, 0b00);
+  INSN(csincw, 0b000, 0b01);
+  INSN(csinvw, 0b010, 0b00);
+  INSN(csnegw, 0b010, 0b01);
+  INSN(csel, 0b100, 0b00);
+  INSN(csinc, 0b100, 0b01);
+  INSN(csinv, 0b110, 0b00);
+  INSN(csneg, 0b110, 0b01);
+
+#undef INSN
+
+  // Data processing
+  void data_processing(unsigned op29, unsigned opcode,
+                       Register Rd, Register Rn) {
+    f(op29, 31, 29), f(0b11010110, 28, 21);
+    f(opcode, 15, 10);
+    rf(Rn, 5), rf(Rd, 0);
+  }
+
+  // (1 source)
+#define INSN(NAME, op29, opcode2, opcode)       \
+  void NAME(Register Rd, Register Rn) {         \
+    starti;                                     \
+    f(opcode2, 20, 16);                         \
+    data_processing(op29, opcode, Rd, Rn);      \
+  }
+
+  INSN(rbitw,  0b010, 0b00000, 0b00000);
+  INSN(rev16w, 0b010, 0b00000, 0b00001);
+  INSN(revw,   0b010, 0b00000, 0b00010);
+  INSN(clzw,   0b010, 0b00000, 0b00100);
+  INSN(clsw,   0b010, 0b00000, 0b00101);
+
+  INSN(rbit,   0b110, 0b00000, 0b00000);
+  INSN(rev16,  0b110, 0b00000, 0b00001);
+  INSN(rev32,  0b110, 0b00000, 0b00010);
+  INSN(rev,    0b110, 0b00000, 0b00011);
+  INSN(clz,    0b110, 0b00000, 0b00100);
+  INSN(cls,    0b110, 0b00000, 0b00101);
+
+#undef INSN
+
+  // (2 sources)
+#define INSN(NAME, op29, opcode)                        \
+  void NAME(Register Rd, Register Rn, Register Rm) {    \
+    starti;                                             \
+    rf(Rm, 16);                                         \
+    data_processing(op29, opcode, Rd, Rn);              \
+  }
+
+  INSN(udivw, 0b000, 0b000010);
+  INSN(sdivw, 0b000, 0b000011);
+  INSN(lslvw, 0b000, 0b001000);
+  INSN(lsrvw, 0b000, 0b001001);
+  INSN(asrvw, 0b000, 0b001010);
+  INSN(rorvw, 0b000, 0b001011);
+
+  INSN(udiv, 0b100, 0b000010);
+  INSN(sdiv, 0b100, 0b000011);
+  INSN(lslv, 0b100, 0b001000);
+  INSN(lsrv, 0b100, 0b001001);
+  INSN(asrv, 0b100, 0b001010);
+  INSN(rorv, 0b100, 0b001011);
+
+#undef INSN
+
+  // (3 sources)
+  void data_processing(unsigned op54, unsigned op31, unsigned o0,
+                       Register Rd, Register Rn, Register Rm,
+                       Register Ra) {
+    starti;
+    f(op54, 31, 29), f(0b11011, 28, 24);
+    f(op31, 23, 21), f(o0, 15);
+    zrf(Rm, 16), zrf(Ra, 10), zrf(Rn, 5), zrf(Rd, 0);
+  }
+
+#define INSN(NAME, op54, op31, o0)                                      \
+  void NAME(Register Rd, Register Rn, Register Rm, Register Ra) {       \
+    data_processing(op54, op31, o0, Rd, Rn, Rm, Ra);                    \
+  }
+
+  INSN(maddw, 0b000, 0b000, 0);
+  INSN(msubw, 0b000, 0b000, 1);
+  INSN(madd, 0b100, 0b000, 0);
+  INSN(msub, 0b100, 0b000, 1);
+  INSN(smaddl, 0b100, 0b001, 0);
+  INSN(smsubl, 0b100, 0b001, 1);
+  INSN(umaddl, 0b100, 0b101, 0);
+  INSN(umsubl, 0b100, 0b101, 1);
+
+#undef INSN
+
+#define INSN(NAME, op54, op31, o0)                      \
+  void NAME(Register Rd, Register Rn, Register Rm) {    \
+    data_processing(op54, op31, o0, Rd, Rn, Rm, (Register)31);  \
+  }
+
+  INSN(smulh, 0b100, 0b010, 0);
+  INSN(umulh, 0b100, 0b110, 0);
+
+#undef INSN
+
+  // Floating-point data-processing (1 source)
+  void data_processing(unsigned op31, unsigned type, unsigned opcode,
+                       FloatRegister Vd, FloatRegister Vn) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22), f(1, 21), f(opcode, 20, 15), f(0b10000, 14, 10);
+    rf(Vn, 5), rf(Vd, 0);
+  }
+
+#define INSN(NAME, op31, type, opcode)                  \
+  void NAME(FloatRegister Vd, FloatRegister Vn) {       \
+    data_processing(op31, type, opcode, Vd, Vn);        \
+  }
+
+private:
+  INSN(i_fmovs, 0b000, 0b00, 0b000000);
+public:
+  INSN(fabss, 0b000, 0b00, 0b000001);
+  INSN(fnegs, 0b000, 0b00, 0b000010);
+  INSN(fsqrts, 0b000, 0b00, 0b000011);
+  INSN(fcvts, 0b000, 0b00, 0b000101);   // Single-precision to double-precision
+
+private:
+  INSN(i_fmovd, 0b000, 0b01, 0b000000);
+public:
+  INSN(fabsd, 0b000, 0b01, 0b000001);
+  INSN(fnegd, 0b000, 0b01, 0b000010);
+  INSN(fsqrtd, 0b000, 0b01, 0b000011);
+  INSN(fcvtd, 0b000, 0b01, 0b000100);   // Double-precision to single-precision
+
+  void fmovd(FloatRegister Vd, FloatRegister Vn) {
+    assert(Vd != Vn, "should be");
+    i_fmovd(Vd, Vn);
+  }
+
+  void fmovs(FloatRegister Vd, FloatRegister Vn) {
+    assert(Vd != Vn, "should be");
+    i_fmovs(Vd, Vn);
+  }
+
+#undef INSN
+
+  // Floating-point data-processing (2 source)
+  void data_processing(unsigned op31, unsigned type, unsigned opcode,
+                       FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22), f(1, 21), f(opcode, 15, 12), f(0b10, 11, 10);
+    rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
+  }
+
+#define INSN(NAME, op31, type, opcode)                  \
+  void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm) {     \
+    data_processing(op31, type, opcode, Vd, Vn, Vm);    \
+  }
+
+  INSN(fmuls, 0b000, 0b00, 0b0000);
+  INSN(fdivs, 0b000, 0b00, 0b0001);
+  INSN(fadds, 0b000, 0b00, 0b0010);
+  INSN(fsubs, 0b000, 0b00, 0b0011);
+  INSN(fnmuls, 0b000, 0b00, 0b1000);
+
+  INSN(fmuld, 0b000, 0b01, 0b0000);
+  INSN(fdivd, 0b000, 0b01, 0b0001);
+  INSN(faddd, 0b000, 0b01, 0b0010);
+  INSN(fsubd, 0b000, 0b01, 0b0011);
+  INSN(fnmuld, 0b000, 0b01, 0b1000);
+
+#undef INSN
+
+   // Floating-point data-processing (3 source)
+  void data_processing(unsigned op31, unsigned type, unsigned o1, unsigned o0,
+                       FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,
+                       FloatRegister Va) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11111, 28, 24);
+    f(type, 23, 22), f(o1, 21), f(o0, 15);
+    rf(Vm, 16), rf(Va, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+
+#define INSN(NAME, op31, type, o1, o0)                                  \
+  void NAME(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,       \
+            FloatRegister Va) {                                         \
+    data_processing(op31, type, o1, o0, Vd, Vn, Vm, Va);                \
+  }
+
+  INSN(fmadds, 0b000, 0b00, 0, 0);
+  INSN(fmsubs, 0b000, 0b00, 0, 1);
+  INSN(fnmadds, 0b000, 0b00, 1, 0);
+  INSN(fnmsubs, 0b000, 0b00, 1, 1);
+
+  INSN(fmaddd, 0b000, 0b01, 0, 0);
+  INSN(fmsubd, 0b000, 0b01, 0, 1);
+  INSN(fnmaddd, 0b000, 0b01, 1, 0);
+  INSN(fnmsub, 0b000, 0b01, 1, 1);
+
+#undef INSN
+
+   // Floating-point conditional select
+  void fp_conditional_select(unsigned op31, unsigned type,
+                             unsigned op1, unsigned op2,
+                             Condition cond, FloatRegister Vd,
+                             FloatRegister Vn, FloatRegister Vm) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22);
+    f(op1, 21, 21);
+    f(op2, 11, 10);
+    f(cond, 15, 12);
+    rf(Vm, 16), rf(Vn, 5), rf(Vd, 0);
+  }
+
+#define INSN(NAME, op31, type, op1, op2)                                \
+  void NAME(FloatRegister Vd, FloatRegister Vn,                         \
+            FloatRegister Vm, Condition cond) {                         \
+    fp_conditional_select(op31, type, op1, op2, cond, Vd, Vn, Vm);      \
+  }
+
+  INSN(fcsels, 0b000, 0b00, 0b1, 0b11);
+  INSN(fcseld, 0b000, 0b01, 0b1, 0b11);
+
+#undef INSN
+
+   // Floating-point<->integer conversions
+  void float_int_convert(unsigned op31, unsigned type,
+                         unsigned rmode, unsigned opcode,
+                         Register Rd, Register Rn) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22), f(1, 21), f(rmode, 20, 19);
+    f(opcode, 18, 16), f(0b000000, 15, 10);
+    zrf(Rn, 5), zrf(Rd, 0);
+  }
+
+#define INSN(NAME, op31, type, rmode, opcode)                           \
+  void NAME(Register Rd, FloatRegister Vn) {                            \
+    float_int_convert(op31, type, rmode, opcode, Rd, (Register)Vn);     \
+  }
+
+  INSN(fcvtzsw, 0b000, 0b00, 0b11, 0b000);
+  INSN(fcvtzs,  0b100, 0b00, 0b11, 0b000);
+  INSN(fcvtzdw, 0b000, 0b01, 0b11, 0b000);
+  INSN(fcvtzd,  0b100, 0b01, 0b11, 0b000);
+
+  INSN(fmovs, 0b000, 0b00, 0b00, 0b110);
+  INSN(fmovd, 0b100, 0b01, 0b00, 0b110);
+
+  // INSN(fmovhid, 0b100, 0b10, 0b01, 0b110);
+
+#undef INSN
+
+#define INSN(NAME, op31, type, rmode, opcode)                           \
+  void NAME(FloatRegister Vd, Register Rn) {                            \
+    float_int_convert(op31, type, rmode, opcode, (Register)Vd, Rn);     \
+  }
+
+  INSN(fmovs, 0b000, 0b00, 0b00, 0b111);
+  INSN(fmovd, 0b100, 0b01, 0b00, 0b111);
+
+  INSN(scvtfws, 0b000, 0b00, 0b00, 0b010);
+  INSN(scvtfs,  0b100, 0b00, 0b00, 0b010);
+  INSN(scvtfwd, 0b000, 0b01, 0b00, 0b010);
+  INSN(scvtfd,  0b100, 0b01, 0b00, 0b010);
+
+  // INSN(fmovhid, 0b100, 0b10, 0b01, 0b111);
+
+#undef INSN
+
+  // Floating-point compare
+  void float_compare(unsigned op31, unsigned type,
+                     unsigned op, unsigned op2,
+                     FloatRegister Vn, FloatRegister Vm = (FloatRegister)0) {
+    starti;
+    f(op31, 31, 29);
+    f(0b11110, 28, 24);
+    f(type, 23, 22), f(1, 21);
+    f(op, 15, 14), f(0b1000, 13, 10), f(op2, 4, 0);
+    rf(Vn, 5), rf(Vm, 16);
+  }
+
+
+#define INSN(NAME, op31, type, op, op2)                 \
+  void NAME(FloatRegister Vn, FloatRegister Vm) {       \
+    float_compare(op31, type, op, op2, Vn, Vm);         \
+  }
+
+#define INSN1(NAME, op31, type, op, op2)        \
+  void NAME(FloatRegister Vn, double d) {       \
+    assert_cond(d == 0.0);                      \
+    float_compare(op31, type, op, op2, Vn);     \
+  }
+
+  INSN(fcmps, 0b000, 0b00, 0b00, 0b00000);
+  INSN1(fcmps, 0b000, 0b00, 0b00, 0b01000);
+  // INSN(fcmpes, 0b000, 0b00, 0b00, 0b10000);
+  // INSN1(fcmpes, 0b000, 0b00, 0b00, 0b11000);
+
+  INSN(fcmpd, 0b000,   0b01, 0b00, 0b00000);
+  INSN1(fcmpd, 0b000,  0b01, 0b00, 0b01000);
+  // INSN(fcmped, 0b000,  0b01, 0b00, 0b10000);
+  // INSN1(fcmped, 0b000, 0b01, 0b00, 0b11000);
+
+#undef INSN
+#undef INSN1
+
+  // Floating-point Move (immediate)
+private:
+  unsigned pack(double value);
+
+  void fmov_imm(FloatRegister Vn, double value, unsigned size) {
+    starti;
+    f(0b00011110, 31, 24), f(size, 23, 22), f(1, 21);
+    f(pack(value), 20, 13), f(0b10000000, 12, 5);
+    rf(Vn, 0);
+  }
+
+public:
+
+  void fmovs(FloatRegister Vn, double value) {
+    if (value)
+      fmov_imm(Vn, value, 0b00);
+    else
+      fmovs(Vn, zr);
+  }
+  void fmovd(FloatRegister Vn, double value) {
+    if (value)
+      fmov_imm(Vn, value, 0b01);
+    else
+      fmovd(Vn, zr);
+  }
+
+/* SIMD extensions
+ *
+ * We just use FloatRegister in the following. They are exactly the same
+ * as SIMD registers.
+ */
+ public:
+
+  enum SIMD_Arrangement {
+       T8B, T16B, T4H, T8H, T2S, T4S, T1D, T2D
+  };
+
+  enum SIMD_RegVariant {
+       S32, D64, Q128
+  };
+
+
+ private:
+
+  void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn, int op1, int op2) {
+    starti;
+    f(0,31), f((int)T & 1, 30);
+    f(op1, 29, 21), f(0, 20, 16), f(op2, 15, 12);
+    f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
+  }
+  void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
+             int imm, int op1, int op2) {
+    starti;
+    f(0,31), f((int)T & 1, 30);
+    f(op1 | 0b100, 29, 21), f(0b11111, 20, 16), f(op2, 15, 12);
+    f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
+  }
+  void ld_st(FloatRegister Vt, SIMD_Arrangement T, Register Xn,
+             Register Xm, int op1, int op2) {
+    starti;
+    f(0,31), f((int)T & 1, 30);
+    f(op1 | 0b100, 29, 21), rf(Xm, 16), f(op2, 15, 12);
+    f((int)T >> 1, 11, 10), rf(Xn, 5), rf(Vt, 0);
+  }
+
+ void ld_st(FloatRegister Vt, SIMD_Arrangement T, Address a, int op1, int op2) {
+   switch (a.getMode()) {
+   case Address::base_plus_offset:
+     guarantee(a.offset() == 0, "no offset allowed here");
+     ld_st(Vt, T, a.base(), op1, op2);
+     break;
+   case Address::post:
+     ld_st(Vt, T, a.base(), a.offset(), op1, op2);
+     break;
+   case Address::base_plus_offset_reg:
+     ld_st(Vt, T, a.base(), a.index(), op1, op2);
+     break;
+   default:
+     ShouldNotReachHere();
+   }
+ }
+
+ public:
+
+#define INSN1(NAME, op1, op2)                                   \
+  void NAME(FloatRegister Vt, SIMD_Arrangement T, const Address &a) {   \
+   ld_st(Vt, T, a, op1, op2);                                           \
+ }
+
+#define INSN2(NAME, op1, op2)                                           \
+  void NAME(FloatRegister Vt, FloatRegister Vt2, SIMD_Arrangement T, const Address &a) { \
+    assert(Vt->successor() == Vt2, "Registers must be ordered");        \
+    ld_st(Vt, T, a, op1, op2);                                          \
+  }
+
+#define INSN3(NAME, op1, op2)                                           \
+  void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
+            SIMD_Arrangement T, const Address &a) {                     \
+    assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3,           \
+           "Registers must be ordered");                                \
+    ld_st(Vt, T, a, op1, op2);                                          \
+  }
+
+#define INSN4(NAME, op1, op2)                                           \
+  void NAME(FloatRegister Vt, FloatRegister Vt2, FloatRegister Vt3,     \
+            FloatRegister Vt4, SIMD_Arrangement T, const Address &a) {  \
+    assert(Vt->successor() == Vt2 && Vt2->successor() == Vt3 &&         \
+           Vt3->successor() == Vt4, "Registers must be ordered");       \
+    ld_st(Vt, T, a, op1, op2);                                          \
+  }
+
+  INSN1(ld1,  0b001100010, 0b0111);
+  INSN2(ld1,  0b001100010, 0b1010);
+  INSN3(ld1,  0b001100010, 0b0110);
+  INSN4(ld1,  0b001100010, 0b0010);
+
+  INSN2(ld2,  0b001100010, 0b1000);
+  INSN3(ld3,  0b001100010, 0b0100);
+  INSN4(ld4,  0b001100010, 0b0000);
+
+  INSN1(st1,  0b001100000, 0b0111);
+  INSN2(st1,  0b001100000, 0b1010);
+  INSN3(st1,  0b001100000, 0b0110);
+  INSN4(st1,  0b001100000, 0b0010);
+
+  INSN2(st2,  0b001100000, 0b1000);
+  INSN3(st3,  0b001100000, 0b0100);
+  INSN4(st4,  0b001100000, 0b0000);
+
+  INSN1(ld1r, 0b001101010, 0b1100);
+  INSN2(ld2r, 0b001101011, 0b1100);
+  INSN3(ld3r, 0b001101010, 0b1110);
+  INSN4(ld4r, 0b001101011, 0b1110);
+
+#undef INSN1
+#undef INSN2
+#undef INSN3
+#undef INSN4
+
+#define INSN(NAME, opc)                                                                 \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
+    starti;                                                                             \
+    assert(T == T8B || T == T16B, "must be T8B or T16B");                               \
+    f(0, 31), f((int)T & 1, 30), f(opc, 29, 21);                                        \
+    rf(Vm, 16), f(0b000111, 15, 10), rf(Vn, 5), rf(Vd, 0);                              \
+  }
+
+  INSN(eor, 0b101110001);
+  INSN(orr, 0b001110101);
+  INSN(andr, 0b001110001);
+  INSN(bic, 0b001110011);
+  INSN(bif, 0b101110111);
+  INSN(bit, 0b101110101);
+  INSN(bsl, 0b101110011);
+  INSN(orn, 0b001110111);
+
+#undef INSN
+
+#define INSN(NAME, opc)                                                                 \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
+    starti;                                                                             \
+    f(0, 31), f((int)T & 1, 30), f(opc, 29), f(0b01110, 28, 24);                        \
+    f((int)T >> 1, 23, 22), f(1, 21), rf(Vm, 16), f(0b100001, 15, 10);                  \
+    rf(Vn, 5), rf(Vd, 0);                                                               \
+  }
+
+  INSN(addv, 0);
+  INSN(subv, 1);
+
+#undef INSN
+
+#define INSN(NAME, opc)                                                                 \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
+    starti;                                                                             \
+    assert(T == T4S, "arrangement must be T4S");                                        \
+    f(0b01011110000, 31, 21), rf(Vm, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);         \
+  }
+
+  INSN(sha1c,     0b000000);
+  INSN(sha1m,     0b001000);
+  INSN(sha1p,     0b000100);
+  INSN(sha1su0,   0b001100);
+  INSN(sha256h2,  0b010100);
+  INSN(sha256h,   0b010000);
+  INSN(sha256su1, 0b011000);
+
+#undef INSN
+
+#define INSN(NAME, opc)                                                                 \
+  void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn) {                   \
+    starti;                                                                             \
+    assert(T == T4S, "arrangement must be T4S");                                        \
+    f(0b0101111000101000, 31, 16), f(opc, 15, 10), rf(Vn, 5), rf(Vd, 0);                \
+  }
+
+  INSN(sha1h,     0b000010);
+  INSN(sha1su1,   0b000110);
+  INSN(sha256su0, 0b001010);
+
+#undef INSN
+
+#define INSN(NAME, opc)                           \
+  void NAME(FloatRegister Vd, FloatRegister Vn) { \
+    starti;                                       \
+    f(opc, 31, 10), rf(Vn, 5), rf(Vd, 0);         \
+  }
+
+  INSN(aese, 0b0100111000101000010010);
+  INSN(aesd, 0b0100111000101000010110);
+  INSN(aesmc, 0b0100111000101000011010);
+  INSN(aesimc, 0b0100111000101000011110);
+
+#undef INSN
+
+  void shl(FloatRegister Vd, FloatRegister Vn, SIMD_Arrangement T, int shift){
+    starti;
+    /* The encodings for the immh:immb fields (bits 22:16) are
+     *   0001 xxx       8B/16B, shift = xxx
+     *   001x xxx       4H/8H,  shift = xxxx
+     *   01xx xxx       2S/4S,  shift = xxxxx
+     *   1xxx xxx       1D/2D,  shift = xxxxxx (1D is RESERVED)
+     */
+    assert((1 << ((T>>1)+3)) > shift, "Invalid Shift value");
+    f(0, 31), f(T & 1, 30), f(0b0011110, 29, 23), f((1 << ((T>>1)+3))|shift, 22, 16);
+    f(0b010101, 15, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+
+  void ushll(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, SIMD_Arrangement Tb, int shift) {
+    starti;
+    /* The encodings for the immh:immb fields (bits 22:16) are
+     *   0001 xxx       8H, 8B/16b shift = xxx
+     *   001x xxx       4S, 4H/8H  shift = xxxx
+     *   01xx xxx       2D, 2S/4S  shift = xxxxx
+     *   1xxx xxx       RESERVED
+     */
+    assert((Tb >> 1) + 1 == (Ta >> 1), "Incompatible arrangement");
+    assert((1 << ((Tb>>1)+3)) > shift, "Invalid shift value");
+    f(0, 31), f(Tb & 1, 30), f(0b1011110, 29, 23), f((1 << ((Tb>>1)+3))|shift, 22, 16);
+    f(0b101001, 15, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+  void ushll2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn,  SIMD_Arrangement Tb, int shift) {
+    ushll(Vd, Ta, Vn, Tb, shift);
+  }
+
+  void uzp1(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,  SIMD_Arrangement T, int op = 0){
+    starti;
+    f(0, 31), f((T & 0x1), 30), f(0b001110, 29, 24), f((T >> 1), 23, 22), f(0, 21);
+    rf(Vm, 16), f(0, 15), f(op, 14), f(0b0110, 13, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+  void uzp2(FloatRegister Vd, FloatRegister Vn, FloatRegister Vm,  SIMD_Arrangement T){
+    uzp1(Vd, Vn, Vm, T, 1);
+  }
+
+  // Move from general purpose register
+  //   mov  Vd.T[index], Rn
+  void mov(FloatRegister Vd, SIMD_Arrangement T, int index, Register Xn) {
+    starti;
+    f(0b01001110000, 31, 21), f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
+    f(0b000111, 15, 10), rf(Xn, 5), rf(Vd, 0);
+  }
+
+  // Move to general purpose register
+  //   mov  Rd, Vn.T[index]
+  void mov(Register Xd, FloatRegister Vn, SIMD_Arrangement T, int index) {
+    starti;
+    f(0, 31), f((T >= T1D) ? 1:0, 30), f(0b001110000, 29, 21);
+    f(((1 << (T >> 1)) | (index << ((T >> 1) + 1))), 20, 16);
+    f(0b001111, 15, 10), rf(Vn, 5), rf(Xd, 0);
+  }
+
+  // We do not handle the 1Q arrangement.
+  void pmull(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
+    starti;
+    assert(Ta == T8H && (Tb == T8B || Tb == T16B), "Invalid Size specifier");
+    f(0, 31), f(Tb & 1, 30), f(0b001110001, 29, 21), rf(Vm, 16), f(0b111000, 15, 10);
+    rf(Vn, 5), rf(Vd, 0);
+  }
+  void pmull2(FloatRegister Vd, SIMD_Arrangement Ta, FloatRegister Vn, FloatRegister Vm, SIMD_Arrangement Tb) {
+    pmull(Vd, Ta, Vn, Vm, Tb);
+  }
+
+  void uqxtn(FloatRegister Vd, SIMD_Arrangement Tb, FloatRegister Vn, SIMD_Arrangement Ta) {
+    starti;
+    int size_b = (int)Tb >> 1;
+    int size_a = (int)Ta >> 1;
+    assert(size_b < 3 && size_b == size_a - 1, "Invalid size specifier");
+    f(0, 31), f(Tb & 1, 30), f(0b101110, 29, 24), f(size_b, 23, 22);
+    f(0b100001010010, 21, 10), rf(Vn, 5), rf(Vd, 0);
+  }
+
+  void rev32(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn)
+  {
+    starti;
+    assert(T <= T8H, "must be one of T8B, T16B, T4H, T8H");
+    f(0, 31), f((int)T & 1, 30), f(0b101110, 29, 24);
+    f(T <= T16B ? 0b00 : 0b01, 23, 22), f(0b100000000010, 21, 10);
+    rf(Vn, 5), rf(Vd, 0);
+  }
+
+  // CRC32 instructions
+#define INSN(NAME, sf, sz)                                                \
+  void NAME(Register Rd, Register Rn, Register Rm) {                      \
+    starti;                                                               \
+    f(sf, 31), f(0b0011010110, 30, 21), f(0b0100, 15, 12), f(sz, 11, 10); \
+    rf(Rm, 16), rf(Rn, 5), rf(Rd, 0);                                     \
+  }
+
+  INSN(crc32b, 0, 0b00);
+  INSN(crc32h, 0, 0b01);
+  INSN(crc32w, 0, 0b10);
+  INSN(crc32x, 1, 0b11);
+
+#undef INSN
+
+
+/* Simulator extensions to the ISA
+
+   haltsim
+
+   takes no arguments, causes the sim to enter a debug break and then
+   return from the simulator run() call with STATUS_HALT? The linking
+   code will call fatal() when it sees STATUS_HALT.
+
+   blrt Xn, Wm
+   blrt Xn, #gpargs, #fpargs, #type
+   Xn holds the 64 bit x86 branch_address
+   call format is encoded either as immediate data in the call
+   or in register Wm. In the latter case
+     Wm[13..6] = #gpargs,
+     Wm[5..2] = #fpargs,
+     Wm[1,0] = #type
+
+   calls the x86 code address 'branch_address' supplied in Xn passing
+   arguments taken from the general and floating point registers according
+   to the supplied counts 'gpargs' and 'fpargs'. may return a result in r0
+   or v0 according to the the return type #type' where
+
+   address branch_address;
+   uimm4 gpargs;
+   uimm4 fpargs;
+   enum ReturnType type;
+
+   enum ReturnType
+     {
+       void_ret = 0,
+       int_ret = 1,
+       long_ret = 1,
+       obj_ret = 1, // i.e. same as long
+       float_ret = 2,
+       double_ret = 3
+     }
+
+   notify
+
+   notifies the simulator of a transfer of control. instr[14:0]
+   identifies the type of change of control.
+
+   0 ==> initial entry to a method.
+
+   1 ==> return into a method from a submethod call.
+
+   2 ==> exit out of Java method code.
+
+   3 ==> start execution for a new bytecode.
+
+   in cases 1 and 2 the simulator is expected to use a JVM callback to
+   identify the name of the specific method being executed. in case 4
+   the simulator is expected to use a JVM callback to identify the
+   bytecode index.
+
+   Instruction encodings
+   ---------------------
+
+   These are encoded in the space with instr[28:25] = 00 which is
+   unallocated. Encodings are
+
+                     10987654321098765432109876543210
+   PSEUDO_HALT   = 0x11100000000000000000000000000000
+   PSEUDO_BLRT  = 0x11000000000000000_______________
+   PSEUDO_BLRTR = 0x1100000000000000100000__________
+   PSEUDO_NOTIFY = 0x10100000000000000_______________
+
+   instr[31,29] = op1 : 111 ==> HALT, 110 ==> BLRT/BLRTR, 101 ==> NOTIFY
+
+   for BLRT
+     instr[14,11] = #gpargs, instr[10,7] = #fpargs
+     instr[6,5] = #type, instr[4,0] = Rn
+   for BLRTR
+     instr[9,5] = Rm, instr[4,0] = Rn
+   for NOTIFY
+     instr[14:0] = type : 0 ==> entry, 1 ==> reentry, 2 ==> exit, 3 ==> bcstart
+*/
+
+  enum NotifyType { method_entry, method_reentry, method_exit, bytecode_start };
+
+  virtual void notify(int type) {
+    if (UseBuiltinSim) {
+      starti;
+      //  109
+      f(0b101, 31, 29);
+      //  87654321098765
+      f(0b00000000000000, 28, 15);
+      f(type, 14, 0);
+    }
+  }
+
+  void blrt(Register Rn, int gpargs, int fpargs, int type) {
+    if (UseBuiltinSim) {
+      starti;
+      f(0b110, 31 ,29);
+      f(0b00, 28, 25);
+      //  4321098765
+      f(0b0000000000, 24, 15);
+      f(gpargs, 14, 11);
+      f(fpargs, 10, 7);
+      f(type, 6, 5);
+      rf(Rn, 0);
+    } else {
+      blr(Rn);
+    }
+  }
+
+  void blrt(Register Rn, Register Rm) {
+    if (UseBuiltinSim) {
+      starti;
+      f(0b110, 31 ,29);
+      f(0b00, 28, 25);
+      //  4321098765
+      f(0b0000000001, 24, 15);
+      //  43210
+      f(0b00000, 14, 10);
+      rf(Rm, 5);
+      rf(Rn, 0);
+    } else {
+      blr(Rn);
+    }
+  }
+
+  void haltsim() {
+    starti;
+    f(0b111, 31 ,29);
+    f(0b00, 28, 27);
+    //  654321098765432109876543210
+    f(0b000000000000000000000000000, 26, 0);
+  }
+
+  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
+  }
+
+  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
+                                                Register tmp,
+                                                int offset) {
+    ShouldNotCallThis();
+    return RegisterOrConstant();
+  }
+
+  // Stack overflow checking
+  virtual void bang_stack_with_offset(int offset);
+
+  static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm);
+  static bool operand_valid_for_add_sub_immediate(long imm);
+  static bool operand_valid_for_float_immediate(double imm);
+
+  void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
+  void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
+};
+
+inline Assembler::Membar_mask_bits operator|(Assembler::Membar_mask_bits a,
+                                             Assembler::Membar_mask_bits b) {
+  return Assembler::Membar_mask_bits(unsigned(a)|unsigned(b));
+}
+
+Instruction_aarch64::~Instruction_aarch64() {
+  assem->emit();
+}
+
+#undef starti
+
+// Invert a condition
+inline const Assembler::Condition operator~(const Assembler::Condition cond) {
+  return Assembler::Condition(int(cond) ^ 1);
+}
+
+class BiasedLockingCounters;
+
+extern "C" void das(uint64_t start, int len);
+
+#endif // CPU_AARCH64_VM_ASSEMBLER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/assembler_aarch64.inline.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP
+#define CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+#endif // CPU_AARCH64_VM_ASSEMBLER_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,48 @@
+/*
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "interpreter/bytecodeInterpreter.hpp"
+#include "interpreter/bytecodeInterpreter.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#include "interp_masm_aarch64.hpp"
+
+#ifdef CC_INTERP
+
+#endif // CC_INTERP (all)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_HPP
+#define CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_HPP
+
+// Platform specific for C++ based Interpreter
+
+private:
+
+    interpreterState _self_link;          /*  Previous interpreter state  */ /* sometimes points to self??? */
+    address   _result_handler;            /* temp for saving native result handler */
+    intptr_t* _sender_sp;                 /* sender's sp before stack (locals) extension */
+
+    address   _extra_junk1;               /* temp to save on recompiles */
+    address   _extra_junk2;               /* temp to save on recompiles */
+    address   _extra_junk3;               /* temp to save on recompiles */
+    // address dummy_for_native2;         /* a native frame result handler would be here... */
+    // address dummy_for_native1;         /* native result type stored here in a interpreter native frame */
+    address   _extra_junk4;               /* temp to save on recompiles */
+    address   _extra_junk5;               /* temp to save on recompiles */
+    address   _extra_junk6;               /* temp to save on recompiles */
+public:
+                                                         // we have an interpreter frame...
+inline intptr_t* sender_sp() {
+  return _sender_sp;
+}
+
+// The interpreter always has the frame anchor fully setup so we don't
+// have to do anything going to vm from the interpreter. On return
+// we do have to clear the flags in case they we're modified to
+// maintain the stack walking invariants.
+//
+#define SET_LAST_JAVA_FRAME()
+
+#define RESET_LAST_JAVA_FRAME()
+
+/*
+ * Macros for accessing the stack.
+ */
+#undef STACK_INT
+#undef STACK_FLOAT
+#undef STACK_ADDR
+#undef STACK_OBJECT
+#undef STACK_DOUBLE
+#undef STACK_LONG
+
+// JavaStack Implementation
+
+#define GET_STACK_SLOT(offset)    (*((intptr_t*) &topOfStack[-(offset)]))
+#define STACK_SLOT(offset)    ((address) &topOfStack[-(offset)])
+#define STACK_ADDR(offset)    (*((address *) &topOfStack[-(offset)]))
+#define STACK_INT(offset)     (*((jint*) &topOfStack[-(offset)]))
+#define STACK_FLOAT(offset)   (*((jfloat *) &topOfStack[-(offset)]))
+#define STACK_OBJECT(offset)  (*((oop *) &topOfStack [-(offset)]))
+#define STACK_DOUBLE(offset)  (((VMJavaVal64*) &topOfStack[-(offset)])->d)
+#define STACK_LONG(offset)    (((VMJavaVal64 *) &topOfStack[-(offset)])->l)
+
+#define SET_STACK_SLOT(value, offset)   (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value))
+#define SET_STACK_ADDR(value, offset)   (*((address *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_INT(value, offset)    (*((jint *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_FLOAT(value, offset)  (*((jfloat *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value))
+#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d =  \
+                                                 ((VMJavaVal64*)(addr))->d)
+#define SET_STACK_LONG(value, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value))
+#define SET_STACK_LONG_FROM_ADDR(addr, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l =  \
+                                                 ((VMJavaVal64*)(addr))->l)
+// JavaLocals implementation
+
+#define LOCALS_SLOT(offset)    ((intptr_t*)&locals[-(offset)])
+#define LOCALS_ADDR(offset)    ((address)locals[-(offset)])
+#define LOCALS_INT(offset)     ((jint)(locals[-(offset)]))
+#define LOCALS_FLOAT(offset)   (*((jfloat*)&locals[-(offset)]))
+#define LOCALS_OBJECT(offset)  ((oop)locals[-(offset)])
+#define LOCALS_DOUBLE(offset)  (((VMJavaVal64*)&locals[-((offset) + 1)])->d)
+#define LOCALS_LONG(offset)    (((VMJavaVal64*)&locals[-((offset) + 1)])->l)
+#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
+#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)]))
+
+#define SET_LOCALS_SLOT(value, offset)    (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value))
+#define SET_LOCALS_ADDR(value, offset)    (*((address *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_INT(value, offset)     (*((jint *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_FLOAT(value, offset)   (*((jfloat *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_OBJECT(value, offset)  (*((oop *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_DOUBLE(value, offset)  (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
+#define SET_LOCALS_LONG(value, offset)    (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
+#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
+                                                  ((VMJavaVal64*)(addr))->d)
+#define SET_LOCALS_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = \
+                                                ((VMJavaVal64*)(addr))->l)
+
+#endif // CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodeInterpreter_aarch64.inline.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_INLINE_HPP
+#define CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_INLINE_HPP
+
+// Inline interpreter functions for IA32
+
+inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; }
+inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; }
+inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; }
+inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; }
+inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return fmod(op1, op2); }
+
+inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; }
+
+inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+
+}
+
+inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) {
+  // x86 can do unaligned copies but not 64bits at a time
+  to[0] = from[0]; to[1] = from[1];
+}
+
+// The long operations depend on compiler support for "long long" on x86
+
+inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
+  return op1 + op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
+  return op1 & op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
+  // QQQ what about check and throw...
+  return op1 / op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
+  return op1 * op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
+  return op1 | op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
+  return op1 - op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
+  return op1 ^ op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
+  return op1 % op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
+  // CVM did this 0x3f mask, is the really needed??? QQQ
+  return ((unsigned long long) op1) >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
+  return op1 >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
+  return op1 << (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
+  return -op;
+}
+
+inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
+  return ~op;
+}
+
+inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) {
+  return (op <= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGez(jlong op) {
+  return (op >= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) {
+  return (op == 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) {
+  return (op1 == op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) {
+  return (op1 != op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) {
+  return (op1 >= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) {
+  return (op1 <= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) {
+  return (op1 < op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) {
+  return (op1 > op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) {
+  return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0);
+}
+
+// Long conversions
+
+inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) {
+  return (jfloat) val;
+}
+
+inline jint BytecodeInterpreter::VMlong2Int(jlong val) {
+  return (jint) val;
+}
+
+// Double Arithmetic
+
+inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
+  return op1 + op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
+  // Divide by zero... QQQ
+  return op1 / op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) {
+  return op1 * op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) {
+  return -op;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) {
+  return fmod(op1, op2);
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) {
+  return op1 - op2;
+}
+
+inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+}
+
+// Double Conversions
+
+inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) {
+  return (jfloat) val;
+}
+
+// Float Conversions
+
+inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
+  return (jdouble) op;
+}
+
+// Integer Arithmetic
+
+inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
+  return op1 + op2;
+}
+
+inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
+  return op1 & op2;
+}
+
+inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
+  /* it's possible we could catch this special case implicitly */
+  if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
+  else return op1 / op2;
+}
+
+inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
+  return op1 * op2;
+}
+
+inline jint BytecodeInterpreter::VMintNeg(jint op) {
+  return -op;
+}
+
+inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
+  return op1 | op2;
+}
+
+inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
+  /* it's possible we could catch this special case implicitly */
+  if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
+  else return op1 % op2;
+}
+
+inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
+  return op1 << op2;
+}
+
+inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
+  return op1 >> (op2 & 0x1f);
+}
+
+inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
+  return op1 - op2;
+}
+
+inline jint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
+  return ((juint) op1) >> (op2 & 0x1f);
+}
+
+inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
+  return op1 ^ op2;
+}
+
+inline jdouble BytecodeInterpreter::VMint2Double(jint val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMint2Float(jint val) {
+  return (jfloat) val;
+}
+
+inline jlong BytecodeInterpreter::VMint2Long(jint val) {
+  return (jlong) val;
+}
+
+inline jchar BytecodeInterpreter::VMint2Char(jint val) {
+  return (jchar) val;
+}
+
+inline jshort BytecodeInterpreter::VMint2Short(jint val) {
+  return (jshort) val;
+}
+
+inline jbyte BytecodeInterpreter::VMint2Byte(jint val) {
+  return (jbyte) val;
+}
+
+#endif // CPU_AARCH64_VM_BYTECODEINTERPRETER_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodes_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,29 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/bytecodes.hpp"
+
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytecodes_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_BYTECODES_AARCH64_HPP
+#define CPU_AARCH64_VM_BYTECODES_AARCH64_HPP
+
+// No aarch64 specific bytecodes
+
+#endif // CPU_AARCH64_VM_BYTECODES_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/bytes_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_BYTES_AARCH64_HPP
+#define CPU_AARCH64_VM_BYTES_AARCH64_HPP
+
+#include "memory/allocation.hpp"
+
+class Bytes: AllStatic {
+ public:
+  // Returns true if the byte ordering used by Java is different from the native byte ordering
+  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
+  // on Sparc.
+  static inline bool is_Java_byte_ordering_different(){ return true; }
+
+
+  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
+  // (no special code is needed since x86 CPUs can access unaligned data)
+  static inline u2   get_native_u2(address p)         { return *(u2*)p; }
+  static inline u4   get_native_u4(address p)         { return *(u4*)p; }
+  static inline u8   get_native_u8(address p)         { return *(u8*)p; }
+
+  static inline void put_native_u2(address p, u2 x)   { *(u2*)p = x; }
+  static inline void put_native_u4(address p, u4 x)   { *(u4*)p = x; }
+  static inline void put_native_u8(address p, u8 x)   { *(u8*)p = x; }
+
+
+  // Efficient reading and writing of unaligned unsigned data in Java
+  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
+  // needed since x86 CPUs use little-endian format.
+  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
+  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
+  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
+
+  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
+  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
+  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
+
+
+  // Efficient swapping of byte ordering
+  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
+  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
+  static inline u8   swap_u8(u8 x);
+};
+
+
+// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
+
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "bytes_linux_aarch64.inline.hpp"
+#endif
+
+#endif // CPU_AARCH64_VM_BYTES_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/codeBuffer_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_CODEBUFFER_AARCH64_HPP
+#define CPU_AARCH64_VM_CODEBUFFER_AARCH64_HPP
+
+private:
+  void pd_initialize() {}
+
+public:
+  void flush_bundle(bool start_new_bundle) {}
+
+#endif // CPU_AARCH64_VM_CODEBUFFER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/compiledIC_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/compiledIC.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nmethod.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+
+// Release the CompiledICHolder* associated with this call site is there is one.
+void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
+  // This call site might have become stale so inspect it carefully.
+  NativeCall* call = nativeCall_at(call_site->addr());
+  if (is_icholder_entry(call->destination())) {
+    NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
+    InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
+  }
+}
+
+bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
+  // This call site might have become stale so inspect it carefully.
+  NativeCall* call = nativeCall_at(call_site->addr());
+  return is_icholder_entry(call->destination());
+}
+
+// ----------------------------------------------------------------------------
+
+#define __ _masm.
+void CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+  // Stub is fixed up when the corresponding call is converted from
+  // calling compiled code to calling interpreted code.
+  // mov rmethod, 0
+  // jmp -4 # to self
+
+  address mark = cbuf.insts_mark();  // Get mark within main instrs section.
+
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a stub.
+  MacroAssembler _masm(&cbuf);
+
+  address base = __ start_a_stub(to_interp_stub_size()*2);
+
+  int offset = __ offset();
+  if (base == NULL)  return;  // CodeBuffer::expand failed
+  // static stub relocation stores the instruction address of the call
+  __ relocate(static_stub_Relocation::spec(mark));
+  // static stub relocation also tags the Method* in the code-stream.
+  __ mov_metadata(rmethod, (Metadata*)NULL);
+  __ movptr(rscratch1, 0);
+  __ br(rscratch1);
+
+  assert((__ offset() - offset) <= (int)to_interp_stub_size(), "stub too big");
+  __ end_a_stub();
+}
+#undef __
+
+int CompiledStaticCall::to_interp_stub_size() {
+  return 7 * NativeInstruction::instruction_size;
+}
+
+// Relocation entries for call stub, compiled java to interpreter.
+int CompiledStaticCall::reloc_to_interp_stub() {
+  return 4; // 3 in emit_to_interp_stub + 1 in emit_call
+}
+
+void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
+  address stub = find_stub();
+  guarantee(stub != NULL, "stub not found");
+
+  if (TraceICs) {
+    ResourceMark rm;
+    tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+                  p2i(instruction_address()),
+                  callee->name_and_sig_as_C_string());
+  }
+
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
+#ifndef PRODUCT
+  NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address());
+
+  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
+         "a) MT-unsafe modification of inline cache");
+  assert(method_holder->data() == 0 || jump->jump_destination() == entry,
+         "b) MT-unsafe modification of inline cache");
+#endif
+  // Update stub.
+  method_holder->set_data((intptr_t)callee());
+  NativeGeneralJump::insert_unconditional(method_holder->next_instruction_address(), entry);
+  ICache::invalidate_range(stub, to_interp_stub_size());
+  // Update jump to call.
+  set_destination_mt_safe(stub);
+}
+
+void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+  // Reset stub.
+  address stub = static_stub->addr();
+  assert(stub != NULL, "stub not found");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
+  method_holder->set_data(0);
+}
+
+//-----------------------------------------------------------------------------
+// Non-product mode code
+#ifndef PRODUCT
+
+void CompiledStaticCall::verify() {
+  // Verify call.
+  NativeCall::verify();
+  if (os::is_MP()) {
+    verify_alignment();
+  }
+
+  // Verify stub.
+  address stub = find_stub();
+  assert(stub != NULL, "no stub found for static call");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+
+  // Verify state.
+  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
+}
+
+#endif // !PRODUCT
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/copy_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_COPY_AARCH64_HPP
+#define CPU_AARCH64_VM_COPY_AARCH64_HPP
+
+// Inline functions for memory copy and fill.
+
+// Contains inline asm implementations
+#ifdef TARGET_OS_ARCH_linux_aarch64
+# include "copy_linux_aarch64.inline.hpp"
+#endif
+
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+  julong* to = (julong*) tohw;
+  julong  v  = ((julong) value << 32) | value;
+  while (count-- > 0) {
+    *to++ = v;
+  }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+  pd_fill_to_words(tohw, count, value);
+}
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+  (void)memset(to, value, count);
+}
+
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+  pd_fill_to_words(tohw, count, 0);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+  (void)memset(to, 0, count);
+}
+
+#endif // CPU_AARCH64_VM_COPY_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/cppInterpreterGenerator_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
+#define CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
+
+ protected:
+
+  void generate_more_monitors();
+  void generate_deopt_handling();
+
+#endif // CPU_AARCH64_VM_CPPINTERPRETERGENERATOR_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/cpustate_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,595 @@
+/*
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef _CPU_STATE_H
+#define _CPU_STATE_H
+
+#include <sys/types.h>
+
+/*
+ * symbolic names used to identify general registers which also match
+ * the registers indices in machine code
+ *
+ * We have 32 general registers which can be read/written as 32 bit or
+ * 64 bit sources/sinks and are appropriately referred to as Wn or Xn
+ * in the assembly code.  Some instructions mix these access modes
+ * (e.g. ADD X0, X1, W2) so the implementation of the instruction
+ * needs to *know* which type of read or write access is required.
+ */
+enum GReg {
+  R0,
+  R1,
+  R2,
+  R3,
+  R4,
+  R5,
+  R6,
+  R7,
+  R8,
+  R9,
+  R10,
+  R11,
+  R12,
+  R13,
+  R14,
+  R15,
+  R16,
+  R17,
+  R18,
+  R19,
+  R20,
+  R21,
+  R22,
+  R23,
+  R24,
+  R25,
+  R26,
+  R27,
+  R28,
+  R29,
+  R30,
+  R31,
+  // and now the aliases
+  RSCRATCH1=R8,
+  RSCRATCH2=R9,
+  RMETHOD=R12,
+  RESP=R20,
+  RDISPATCH=R21,
+  RBCP=R22,
+  RLOCALS=R24,
+  RMONITORS=R25,
+  RCPOOL=R26,
+  RHEAPBASE=R27,
+  RTHREAD=R28,
+  FP = R29,
+  LR = R30,
+  SP = R31,
+  ZR = R31
+};
+
+/*
+ * symbolic names used to refer to floating point registers which also
+ * match the registers indices in machine code
+ *
+ * We have 32 FP registers which can be read/written as 8, 16, 32, 64
+ * and 128 bit sources/sinks and are appropriately referred to as Bn,
+ * Hn, Sn, Dn and Qn in the assembly code. Some instructions mix these
+ * access modes (e.g. FCVT S0, D0) so the implementation of the
+ * instruction needs to *know* which type of read or write access is
+ * required.
+ */
+
+enum VReg {
+  V0,
+  V1,
+  V2,
+  V3,
+  V4,
+  V5,
+  V6,
+  V7,
+  V8,
+  V9,
+  V10,
+  V11,
+  V12,
+  V13,
+  V14,
+  V15,
+  V16,
+  V17,
+  V18,
+  V19,
+  V20,
+  V21,
+  V22,
+  V23,
+  V24,
+  V25,
+  V26,
+  V27,
+  V28,
+  V29,
+  V30,
+  V31,
+};
+
+/**
+ * all the different integer bit patterns for the components of a
+ * general register are overlaid here using a union so as to allow all
+ * reading and writing of the desired bits.
+ *
+ * n.b. the ARM spec says that when you write a 32 bit register you
+ * are supposed to write the low 32 bits and zero the high 32
+ * bits. But we don't actually have to care about this because Java
+ * will only ever consume the 32 bits value as a 64 bit quantity after
+ * an explicit extend.
+ */
+union GRegisterValue
+{
+  int8_t s8;
+  int16_t s16;
+  int32_t s32;
+  int64_t s64;
+  u_int8_t u8;
+  u_int16_t u16;
+  u_int32_t u32;
+  u_int64_t u64;
+};
+
+class GRegister
+{
+public:
+  GRegisterValue value;
+};
+
+/*
+ * float registers provide for storage of a single, double or quad
+ * word format float in the same register. single floats are not
+ * paired within each double register as per 32 bit arm. instead each
+ * 128 bit register Vn embeds the bits for Sn, and Dn in the lower
+ * quarter and half, respectively, of the bits for Qn.
+ *
+ * The upper bits can also be accessed as single or double floats by
+ * the float vector operations using indexing e.g. V1.D[1], V1.S[3]
+ * etc and, for SIMD operations using a horrible index range notation.
+ *
+ * The spec also talks about accessing float registers as half words
+ * and bytes with Hn and Bn providing access to the low 16 and 8 bits
+ * of Vn but it is not really clear what these bits represent. We can
+ * probably ignore this for Java anyway. However, we do need to access
+ * the raw bits at 32 and 64 bit resolution to load to/from integer
+ * registers.
+ */
+
+union FRegisterValue
+{
+  float s;
+  double d;
+  long double q;
+  // eventually we will need to be able to access the data as a vector
+  // the integral array elements allow us to access the bits in s, d,
+  // q, vs and vd at an appropriate level of granularity
+  u_int8_t vb[16];
+  u_int16_t vh[8];
+  u_int32_t vw[4];
+  u_int64_t vx[2];
+  float vs[4];
+  double vd[2];
+};
+
+class FRegister
+{
+public:
+  FRegisterValue value;
+};
+
+/*
+ * CPSR register -- this does not exist as a directly accessible
+ * register but we need to store the flags so we can implement
+ * flag-seting and flag testing operations
+ *
+ * we can possibly use injected x86 asm to report the outcome of flag
+ * setting operations. if so we will need to grab the flags
+ * immediately after the operation in order to ensure we don't lose
+ * them because of the actions of the simulator. so we still need
+ * somewhere to store the condition codes.
+ */
+
+class CPSRRegister
+{
+public:
+  u_int32_t value;
+
+/*
+ * condition register bit select values
+ *
+ * the order of bits here is important because some of
+ * the flag setting conditional instructions employ a
+ * bit field to populate the flags when a false condition
+ * bypasses execution of the operation and we want to
+ * be able to assign the flags register using the
+ * supplied value.
+ */
+
+  enum CPSRIdx {
+    V_IDX,
+    C_IDX,
+    Z_IDX,
+    N_IDX
+  };
+
+  enum CPSRMask {
+    V = 1 << V_IDX,
+    C = 1 << C_IDX,
+    Z = 1 << Z_IDX,
+    N = 1 << N_IDX
+  };
+
+  static const int CPSR_ALL_FLAGS = (V | C | Z | N);
+};
+
+// auxiliary function to assemble the relevant bits from
+// the x86 EFLAGS register into an ARM CPSR value
+
+#define X86_V_IDX 11
+#define X86_C_IDX 0
+#define X86_Z_IDX 6
+#define X86_N_IDX 7
+
+#define X86_V (1 << X86_V_IDX)
+#define X86_C (1 << X86_C_IDX)
+#define X86_Z (1 << X86_Z_IDX)
+#define X86_N (1 << X86_N_IDX)
+
+inline u_int32_t convertX86Flags(u_int32_t x86flags)
+{
+  u_int32_t flags;
+  // set N flag
+  flags = ((x86flags & X86_N) >> X86_N_IDX);
+  // shift then or in Z flag
+  flags <<= 1;
+  flags |= ((x86flags & X86_Z) >> X86_Z_IDX);
+  // shift then or in C flag
+  flags <<= 1;
+  flags |= ((x86flags & X86_C) >> X86_C_IDX);
+  // shift then or in V flag
+  flags <<= 1;
+  flags |= ((x86flags & X86_V) >> X86_V_IDX);
+
+  return flags;
+}
+
+inline u_int32_t convertX86FlagsFP(u_int32_t x86flags)
+{
+  // x86 flags set by fcomi(x,y) are ZF:PF:CF
+  // (yes, that's PF for parity, WTF?)
+  // where
+  // 0) 0:0:0 means x > y
+  // 1) 0:0:1 means x < y
+  // 2) 1:0:0 means x = y
+  // 3) 1:1:1 means x and y are unordered
+  // note that we don't have to check PF so
+  // we really have a simple 2-bit case switch
+  // the corresponding ARM64 flags settings
+  //  in hi->lo bit order are
+  // 0) --C-
+  // 1) N---
+  // 2) -ZC-
+  // 3) --CV
+
+  static u_int32_t armFlags[] = {
+      0b0010,
+      0b1000,
+      0b0110,
+      0b0011
+  };
+  // pick out the ZF and CF bits
+  u_int32_t zc = ((x86flags & X86_Z) >> X86_Z_IDX);
+  zc <<= 1;
+  zc |= ((x86flags & X86_C) >> X86_C_IDX);
+
+  return armFlags[zc];
+}
+
+/*
+ * FPSR register -- floating point status register
+
+ * this register includes IDC, IXC, UFC, OFC, DZC, IOC and QC bits,
+ * and the floating point N, Z, C, V bits but the latter are unused in
+ * aarch64 mode. the sim ignores QC for now.
+ *
+ * bit positions are as per the ARMv7 FPSCR register
+ *
+ * IDC :  7 ==> Input Denormal (cumulative exception bit)
+ * IXC :  4 ==> Inexact
+ * UFC :  3 ==> Underflow
+ * OFC :  2 ==> Overflow
+ * DZC :  1 ==> Division by Zero
+ * IOC :  0 ==> Invalid Operation
+ */
+
+class FPSRRegister
+{
+public:
+  u_int32_t value;
+  // indices for bits in the FPSR register value
+  enum FPSRIdx {
+    IO_IDX = 0,
+    DZ_IDX = 1,
+    OF_IDX = 2,
+    UF_IDX = 3,
+    IX_IDX = 4,
+    ID_IDX = 7
+  };
+  // corresponding bits as numeric values
+  enum FPSRMask {
+    IO = (1 << IO_IDX),
+    DZ = (1 << DZ_IDX),
+    OF = (1 << OF_IDX),
+    UF = (1 << UF_IDX),
+    IX = (1 << IX_IDX),
+    ID = (1 << ID_IDX)
+  };
+  static const int FPSR_ALL_FPSRS = (IO | DZ | OF | UF | IX | ID);
+};
+
+// debugger support
+
+enum PrintFormat
+{
+  FMT_DECIMAL,
+  FMT_HEX,
+  FMT_SINGLE,
+  FMT_DOUBLE,
+  FMT_QUAD,
+  FMT_MULTI
+};
+
+/*
+ * model of the registers and other state associated with the cpu
+ */
+class CPUState
+{
+  friend class AArch64Simulator;
+private:
+  // this is the PC of the instruction being executed
+  u_int64_t pc;
+  // this is the PC of the instruction to be executed next
+  // it is defaulted to pc + 4 at instruction decode but
+  // execute may reset it
+
+  u_int64_t nextpc;
+  GRegister gr[33];             // extra register at index 32 is used
+                                // to hold zero value
+  FRegister fr[32];
+  CPSRRegister cpsr;
+  FPSRRegister fpsr;
+
+public:
+
+  CPUState() {
+    gr[20].value.u64 = 0;  // establish initial condition for
+                           // checkAssertions()
+    trace_counter = 0;
+  }
+
+  // General Register access macros
+
+  // only xreg or xregs can be used as an lvalue in order to update a
+  // register. this ensures that the top part of a register is always
+  // assigned when it is written by the sim.
+
+  inline u_int64_t &xreg(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.u64;
+    } else {
+      return gr[reg].value.u64;
+    }
+  }
+
+  inline int64_t &xregs(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.s64;
+    } else {
+      return gr[reg].value.s64;
+    }
+  }
+
+  inline u_int32_t wreg(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.u32;
+    } else {
+      return gr[reg].value.u32;
+    }
+  }
+
+  inline int32_t wregs(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.s32;
+    } else {
+      return gr[reg].value.s32;
+    }
+  }
+
+  inline u_int32_t hreg(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.u16;
+    } else {
+      return gr[reg].value.u16;
+    }
+  }
+
+  inline int32_t hregs(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.s16;
+    } else {
+      return gr[reg].value.s16;
+    }
+  }
+
+  inline u_int32_t breg(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.u8;
+    } else {
+      return gr[reg].value.u8;
+    }
+  }
+
+  inline int32_t bregs(GReg reg, int r31_is_sp) {
+    if (reg == R31 && !r31_is_sp) {
+      return gr[32].value.s8;
+    } else {
+      return gr[reg].value.s8;
+    }
+  }
+
+  // FP Register access macros
+
+  // all non-vector accessors return a reference so we can both read
+  // and assign
+
+  inline float &sreg(VReg reg) {
+    return fr[reg].value.s;
+  }
+
+  inline double &dreg(VReg reg) {
+    return fr[reg].value.d;
+  }
+
+  inline long double &qreg(VReg reg) {
+    return fr[reg].value.q;
+  }
+
+  // all vector register accessors return a pointer
+
+  inline float *vsreg(VReg reg) {
+    return &fr[reg].value.vs[0];
+  }
+
+  inline double *vdreg(VReg reg) {
+    return &fr[reg].value.vd[0];
+  }
+
+  inline u_int8_t *vbreg(VReg reg) {
+    return &fr[reg].value.vb[0];
+  }
+
+  inline u_int16_t *vhreg(VReg reg) {
+    return &fr[reg].value.vh[0];
+  }
+
+  inline u_int32_t *vwreg(VReg reg) {
+    return &fr[reg].value.vw[0];
+  }
+
+  inline u_int64_t *vxreg(VReg reg) {
+    return &fr[reg].value.vx[0];
+  }
+
+  union GRegisterValue prev_sp, prev_fp;
+
+  static const int trace_size = 256;
+  u_int64_t trace_buffer[trace_size];
+  int trace_counter;
+
+  bool checkAssertions()
+  {
+    // Make sure that SP is 16-aligned
+    // Also make sure that ESP is above SP.
+    // We don't care about checking ESP if it is null, i.e. it hasn't
+    // been used yet.
+    if (gr[31].value.u64 & 0x0f) {
+      asm volatile("nop");
+      return false;
+    }
+    return true;
+  }
+
+  // pc register accessors
+
+  // this instruction can be used to fetch the current PC
+  u_int64_t getPC();
+  // instead of setting the current PC directly you can
+  // first set the next PC (either absolute or PC-relative)
+  // and later copy the next PC into the current PC
+  // this supports a default increment by 4 at instruction
+  // fetch with an optional reset by control instructions
+  u_int64_t getNextPC();
+  void setNextPC(u_int64_t next);
+  void offsetNextPC(int64_t offset);
+  // install nextpc as current pc
+  void updatePC();
+
+  // this instruction can be used to save the next PC to LR
+  // just before installing a branch PC
+  inline void saveLR() { gr[LR].value.u64 = nextpc; }
+
+  // cpsr register accessors
+  u_int32_t getCPSRRegister();
+  void setCPSRRegister(u_int32_t flags);
+  // read a specific subset of the flags as a bit pattern
+  // mask should be composed using elements of enum FlagMask
+  u_int32_t getCPSRBits(u_int32_t mask);
+  // assign a specific subset of the flags as a bit pattern
+  // mask and value should be composed using elements of enum FlagMask
+  void setCPSRBits(u_int32_t mask, u_int32_t value);
+  // test the value of a single flag returned as 1 or 0
+  u_int32_t testCPSR(CPSRRegister::CPSRIdx idx);
+  // set a single flag
+  void setCPSR(CPSRRegister::CPSRIdx idx);
+  // clear a single flag
+  void clearCPSR(CPSRRegister::CPSRIdx idx);
+  // utility method to set ARM CSPR flags from an x86 bit mask generated by integer arithmetic
+  void setCPSRRegisterFromX86(u_int64_t x86Flags);
+  // utility method to set ARM CSPR flags from an x86 bit mask generated by floating compare
+  void setCPSRRegisterFromX86FP(u_int64_t x86Flags);
+
+  // fpsr register accessors
+  u_int32_t getFPSRRegister();
+  void setFPSRRegister(u_int32_t flags);
+  // read a specific subset of the fprs bits as a bit pattern
+  // mask should be composed using elements of enum FPSRRegister::FlagMask
+  u_int32_t getFPSRBits(u_int32_t mask);
+  // assign a specific subset of the flags as a bit pattern
+  // mask and value should be composed using elements of enum FPSRRegister::FlagMask
+  void setFPSRBits(u_int32_t mask, u_int32_t value);
+  // test the value of a single flag returned as 1 or 0
+  u_int32_t testFPSR(FPSRRegister::FPSRIdx idx);
+  // set a single flag
+  void setFPSR(FPSRRegister::FPSRIdx idx);
+  // clear a single flag
+  void clearFPSR(FPSRRegister::FPSRIdx idx);
+
+  // debugger support
+  void printPC(int pending, const char *trailing = "\n");
+  void printInstr(u_int32_t instr, void (*dasm)(u_int64_t), const char *trailing = "\n");
+  void printGReg(GReg reg, PrintFormat format = FMT_HEX, const char *trailing = "\n");
+  void printVReg(VReg reg, PrintFormat format = FMT_HEX, const char *trailing = "\n");
+  void printCPSR(const char *trailing = "\n");
+  void printFPSR(const char *trailing = "\n");
+  void dumpState();
+};
+
+#endif // ifndef _CPU_STATE_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/debug_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/nmethod.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/init.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/top.hpp"
+
+void pd_ps(frame f) {}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/decode_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,412 @@
+/*
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef _DECODE_H
+#define _DECODE_H
+
+#include <sys/types.h>
+#include "cpustate_aarch64.hpp"
+
+// bitfield immediate expansion helper
+
+extern int expandLogicalImmediate(u_int32_t immN, u_int32_t immr,
+                                    u_int32_t imms, u_int64_t &bimm);
+
+
+/*
+ * codes used in conditional instructions
+ *
+ * these are passed to conditional operations to identify which
+ * condition to test for
+ */
+enum CondCode {
+  EQ = 0b0000, // meaning Z == 1
+  NE = 0b0001, // meaning Z == 0
+  HS = 0b0010, // meaning C == 1
+  CS = HS,
+  LO = 0b0011, // meaning C == 0
+  CC = LO,
+  MI = 0b0100, // meaning N == 1
+  PL = 0b0101, // meaning N == 0
+  VS = 0b0110, // meaning V == 1
+  VC = 0b0111, // meaning V == 0
+  HI = 0b1000, // meaning C == 1 && Z == 0
+  LS = 0b1001, // meaning !(C == 1 && Z == 0)
+  GE = 0b1010, // meaning N == V
+  LT = 0b1011, // meaning N != V
+  GT = 0b1100, // meaning Z == 0 && N == V
+  LE = 0b1101, // meaning !(Z == 0 && N == V)
+  AL = 0b1110, // meaning ANY
+  NV = 0b1111  // ditto
+};
+
+/*
+ * certain addressing modes for load require pre or post writeback of
+ * the computed address to a base register
+ */
+enum WriteBack {
+  Post = 0,
+  Pre = 1
+};
+
+/*
+ * certain addressing modes for load require an offset to
+ * be optionally scaled so the decode needs to pass that
+ * through to the execute routine
+ */
+enum Scaling {
+  Unscaled = 0,
+  Scaled = 1
+};
+
+/*
+ * when we do have to scale we do so by shifting using
+ * log(bytes in data element - 1) as the shift count.
+ * so we don't have to scale offsets when loading
+ * bytes.
+ */
+enum ScaleShift {
+  ScaleShift16 = 1,
+  ScaleShift32 = 2,
+  ScaleShift64 = 3,
+  ScaleShift128 = 4
+};
+
+/*
+ * one of the addressing modes for load requires a 32-bit register
+ * value to be either zero- or sign-extended for these instructions
+ * UXTW or SXTW should be passed
+ *
+ * arithmetic register data processing operations can optionally
+ * extend a portion of the second register value for these
+ * instructions the value supplied must identify the portion of the
+ * register which is to be zero- or sign-exended
+ */
+enum Extension {
+  UXTB = 0,
+  UXTH = 1,
+  UXTW = 2,
+  UXTX = 3,
+  SXTB = 4,
+  SXTH = 5,
+  SXTW = 6,
+  SXTX = 7
+};
+
+/*
+ * arithmetic and logical register data processing operations
+ * optionally perform a shift on the second register value
+ */
+enum Shift {
+  LSL = 0,
+  LSR = 1,
+  ASR = 2,
+  ROR = 3
+};
+
+/*
+ * bit twiddling helpers for instruction decode
+ */
+
+// 32 bit mask with bits [hi,...,lo] set
+
+static inline u_int32_t mask32(int hi = 31, int lo = 0)
+{
+  int nbits = (hi + 1) - lo;
+  return ((1 << nbits) - 1) << lo;
+}
+
+static inline u_int64_t mask64(int hi = 63, int lo = 0)
+{
+  int nbits = (hi + 1) - lo;
+  return ((1L << nbits) - 1) << lo;
+}
+
+// pick bits [hi,...,lo] from val
+static inline u_int32_t pick32(u_int32_t val, int hi = 31, int lo = 0)
+{
+  return (val & mask32(hi, lo));
+}
+
+// pick bits [hi,...,lo] from val
+static inline u_int64_t pick64(u_int64_t val, int hi = 31, int lo = 0)
+{
+  return (val & mask64(hi, lo));
+}
+
+// pick bits [hi,...,lo] from val and shift to [(hi-(newlo - lo)),newlo]
+static inline u_int32_t pickshift32(u_int32_t val, int hi = 31,
+                                    int lo = 0, int newlo = 0)
+{
+  u_int32_t bits = pick32(val, hi, lo);
+  if (lo < newlo) {
+    return (bits << (newlo - lo));
+  } else {
+    return (bits >> (lo - newlo));
+  }
+}
+// mask [hi,lo] and shift down to start at bit 0
+static inline u_int32_t pickbits32(u_int32_t val, int hi = 31, int lo = 0)
+{
+  return (pick32(val, hi, lo) >> lo);
+}
+
+// mask [hi,lo] and shift down to start at bit 0
+static inline u_int64_t pickbits64(u_int64_t val, int hi = 63, int lo = 0)
+{
+  return (pick64(val, hi, lo) >> lo);
+}
+
+/*
+ * decode registers, immediates and constants of various types
+ */
+
+static inline GReg greg(u_int32_t val, int lo)
+{
+  return (GReg)pickbits32(val, lo + 4, lo);
+}
+
+static inline VReg vreg(u_int32_t val, int lo)
+{
+  return (VReg)pickbits32(val, lo + 4, lo);
+}
+
+static inline u_int32_t uimm(u_int32_t val, int hi, int lo)
+{
+  return pickbits32(val, hi, lo);
+}
+
+static inline int32_t simm(u_int32_t val, int hi = 31, int lo = 0) {
+  union {
+    u_int32_t u;
+    int32_t n;
+  };
+
+  u = val << (31 - hi);
+  n = n >> (31 - hi + lo);
+  return n;
+}
+
+static inline int64_t simm(u_int64_t val, int hi = 63, int lo = 0) {
+  union {
+    u_int64_t u;
+    int64_t n;
+  };
+
+  u = val << (63 - hi);
+  n = n >> (63 - hi + lo);
+  return n;
+}
+
+static inline Shift shift(u_int32_t val, int lo)
+{
+  return (Shift)pickbits32(val, lo+1, lo);
+}
+
+static inline Extension extension(u_int32_t val, int lo)
+{
+  return (Extension)pickbits32(val, lo+2, lo);
+}
+
+static inline Scaling scaling(u_int32_t val, int lo)
+{
+  return (Scaling)pickbits32(val, lo, lo);
+}
+
+static inline WriteBack writeback(u_int32_t val, int lo)
+{
+  return (WriteBack)pickbits32(val, lo, lo);
+}
+
+static inline CondCode condcode(u_int32_t val, int lo)
+{
+  return (CondCode)pickbits32(val, lo+3, lo);
+}
+
+/*
+ * operation decode
+ */
+// bits [28,25] are the primary dispatch vector
+
+static inline u_int32_t dispatchGroup(u_int32_t val)
+{
+  return pickshift32(val, 28, 25, 0);
+}
+
+/*
+ * the 16 possible values for bits [28,25] identified by tags which
+ * map them to the 5 main instruction groups LDST, DPREG, ADVSIMD,
+ * BREXSYS and DPIMM.
+ *
+ * An extra group PSEUDO is included in one of the unallocated ranges
+ * for simulator-specific pseudo-instructions.
+ */
+enum DispatchGroup {
+  GROUP_PSEUDO_0000,
+  GROUP_UNALLOC_0001,
+  GROUP_UNALLOC_0010,
+  GROUP_UNALLOC_0011,
+  GROUP_LDST_0100,
+  GROUP_DPREG_0101,
+  GROUP_LDST_0110,
+  GROUP_ADVSIMD_0111,
+  GROUP_DPIMM_1000,
+  GROUP_DPIMM_1001,
+  GROUP_BREXSYS_1010,
+  GROUP_BREXSYS_1011,
+  GROUP_LDST_1100,
+  GROUP_DPREG_1101,
+  GROUP_LDST_1110,
+  GROUP_ADVSIMD_1111
+};
+
+// bits [31, 29] of a Pseudo are the secondary dispatch vector
+
+static inline u_int32_t dispatchPseudo(u_int32_t val)
+{
+  return pickshift32(val, 31, 29, 0);
+}
+
+/*
+ * the 8 possible values for bits [31,29] in a Pseudo Instruction.
+ * Bits [28,25] are always 0000.
+ */
+
+enum DispatchPseudo {
+  PSEUDO_UNALLOC_000, // unallocated
+  PSEUDO_UNALLOC_001, // ditto
+  PSEUDO_UNALLOC_010, // ditto
+  PSEUDO_UNALLOC_011, // ditto
+  PSEUDO_UNALLOC_100, // ditto
+  PSEUDO_UNALLOC_101, // ditto
+  PSEUDO_CALLOUT_110, // CALLOUT -- bits [24,0] identify call/ret sig
+  PSEUDO_HALT_111     // HALT -- bits [24, 0] identify halt code
+};
+
+// bits [25, 23] of a DPImm are the secondary dispatch vector
+
+static inline u_int32_t dispatchDPImm(u_int32_t instr)
+{
+  return pickshift32(instr, 25, 23, 0);
+}
+
+/*
+ * the 8 possible values for bits [25,23] in a Data Processing Immediate
+ * Instruction. Bits [28,25] are always 100_.
+ */
+
+enum DispatchDPImm {
+  DPIMM_PCADR_000,  // PC-rel-addressing
+  DPIMM_PCADR_001,  // ditto
+  DPIMM_ADDSUB_010,  // Add/Subtract (immediate)
+  DPIMM_ADDSUB_011, // ditto
+  DPIMM_LOG_100,    // Logical (immediate)
+  DPIMM_MOV_101,    // Move Wide (immediate)
+  DPIMM_BITF_110,   // Bitfield
+  DPIMM_EXTR_111    // Extract
+};
+
+// bits [29,28:26] of a LS are the secondary dispatch vector
+
+static inline u_int32_t dispatchLS(u_int32_t instr)
+{
+  return (pickshift32(instr, 29, 28, 1) |
+          pickshift32(instr, 26, 26, 0));
+}
+
+/*
+ * the 8 possible values for bits [29,28:26] in a Load/Store
+ * Instruction. Bits [28,25] are always _1_0
+ */
+
+enum DispatchLS {
+  LS_EXCL_000,    // Load/store exclusive (includes some unallocated)
+  LS_ADVSIMD_001, // AdvSIMD load/store (various -- includes some unallocated)
+  LS_LIT_010,     // Load register literal (includes some unallocated)
+  LS_LIT_011,     // ditto
+  LS_PAIR_100,    // Load/store register pair (various)
+  LS_PAIR_101,    // ditto
+  LS_OTHER_110,   // other load/store formats
+  LS_OTHER_111    // ditto
+};
+
+// bits [28:24:21] of a DPReg are the secondary dispatch vector
+
+static inline u_int32_t dispatchDPReg(u_int32_t instr)
+{
+  return (pickshift32(instr, 28, 28, 2) |
+          pickshift32(instr, 24, 24, 1) |
+          pickshift32(instr, 21, 21, 0));
+}
+
+/*
+ * the 8 possible values for bits [28:24:21] in a Data Processing
+ * Register Instruction. Bits [28,25] are always _101
+ */
+
+enum DispatchDPReg {
+  DPREG_LOG_000,     // Logical (shifted register)
+  DPREG_LOG_001,     // ditto
+  DPREG_ADDSHF_010,  // Add/subtract (shifted register)
+  DPREG_ADDEXT_011,  // Add/subtract (extended register)
+  DPREG_ADDCOND_100, // Add/subtract (with carry) AND
+                     // Cond compare/select AND
+                     // Data Processing (1/2 source)
+  DPREG_UNALLOC_101, // Unallocated
+  DPREG_3SRC_110, // Data Processing (3 source)
+  DPREG_3SRC_111  // Data Processing (3 source)
+};
+
+// bits [31,29] of a BrExSys are the secondary dispatch vector
+
+static inline u_int32_t dispatchBrExSys(u_int32_t instr)
+{
+  return pickbits32(instr, 31, 29);
+}
+
+/*
+ * the 8 possible values for bits [31,29] in a Branch/Exception/System
+ * Instruction. Bits [28,25] are always 101_
+ */
+
+enum DispatchBr {
+  BR_IMM_000,     // Unconditional branch (immediate)
+  BR_IMMCMP_001,  // Compare & branch (immediate) AND
+                  // Test & branch (immediate)
+  BR_IMMCOND_010, // Conditional branch (immediate) AND Unallocated
+  BR_UNALLOC_011, // Unallocated
+  BR_IMM_100,     // Unconditional branch (immediate)
+  BR_IMMCMP_101,  // Compare & branch (immediate) AND
+                  // Test & branch (immediate)
+  BR_REG_110,     // Unconditional branch (register) AND System AND
+                  // Excn gen AND Unallocated
+  BR_UNALLOC_111  // Unallocated
+};
+
+/*
+ * TODO still need to provide secondary decode and dispatch for
+ * AdvSIMD Insructions with instr[28,25] = 0111 or 1111
+ */
+
+#endif // ifndef DECODE_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/depChecker_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,30 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "compiler/disassembler.hpp"
+#include "depChecker_aarch64.hpp"
+
+// Nothing to do on aarch64
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/depChecker_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_DEPCHECKER_AARCH64_HPP
+#define CPU_AARCH64_VM_DEPCHECKER_AARCH64_HPP
+
+// Nothing to do on aarch64
+
+#endif // CPU_AARCH64_VM_DEPCHECKER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/disassembler_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_DISASSEMBLER_AARCH64_HPP
+#define CPU_AARCH64_VM_DISASSEMBLER_AARCH64_HPP
+
+  static int pd_instruction_alignment() {
+    return 1;
+  }
+
+  static const char* pd_cpu_opts() {
+    return "";
+  }
+
+#endif // CPU_AARCH64_VM_DISASSEMBLER_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/frame_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,832 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/markOop.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/os.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_aarch64.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#include "runtime/vframeArray.hpp"
+#endif
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif
+
+
+// Profiling/safepoint support
+
+bool frame::safe_for_sender(JavaThread *thread) {
+  address   sp = (address)_sp;
+  address   fp = (address)_fp;
+  address   unextended_sp = (address)_unextended_sp;
+
+  // consider stack guards when trying to determine "safe" stack pointers
+  static size_t stack_guard_size = os::uses_stack_guard_pages() ? (StackYellowPages + StackRedPages) * os::vm_page_size() : 0;
+  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
+
+  // sp must be within the usable part of the stack (not in guards)
+  bool sp_safe = (sp < thread->stack_base()) &&
+                 (sp >= thread->stack_base() - usable_stack_size);
+
+
+  if (!sp_safe) {
+    return false;
+  }
+
+  // unextended sp must be within the stack and above or equal sp
+  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
+                            (unextended_sp >= sp);
+
+  if (!unextended_sp_safe) {
+    return false;
+  }
+
+  // an fp must be within the stack and above (but not equal) sp
+  // second evaluation on fp+ is added to handle situation where fp is -1
+  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
+
+  // We know sp/unextended_sp are safe only fp is questionable here
+
+  // If the current frame is known to the code cache then we can attempt to
+  // to construct the sender and do some validation of it. This goes a long way
+  // toward eliminating issues when we get in frame construction code
+
+  if (_cb != NULL ) {
+
+    // First check if frame is complete and tester is reliable
+    // Unfortunately we can only check frame complete for runtime stubs and nmethod
+    // other generic buffer blobs are more problematic so we just assume they are
+    // ok. adapter blobs never have a frame complete and are never ok.
+
+    if (!_cb->is_frame_complete_at(_pc)) {
+      if (_cb->is_nmethod() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
+        return false;
+      }
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!_cb->code_contains(_pc)) {
+      return false;
+    }
+
+    // Entry frame checks
+    if (is_entry_frame()) {
+      // an entry frame must have a valid fp.
+
+      if (!fp_safe) return false;
+
+      // Validate the JavaCallWrapper an entry frame must have
+
+      address jcw = (address)entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > fp);
+
+      return jcw_safe;
+
+    }
+
+    intptr_t* sender_sp = NULL;
+    address   sender_pc = NULL;
+
+    if (is_interpreted_frame()) {
+      // fp must be safe
+      if (!fp_safe) {
+        return false;
+      }
+
+      sender_pc = (address) this->fp()[return_addr_offset];
+      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
+
+    } else {
+      // must be some sort of compiled/runtime frame
+      // fp does not have to be safe (although it could be check for c1?)
+
+      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
+      if (_cb->frame_size() <= 0) {
+        return false;
+      }
+
+      sender_sp = _unextended_sp + _cb->frame_size();
+      sender_pc = (address) *(sender_sp-1);
+    }
+
+
+    // If the potential sender is the interpreter then we can do some more checking
+    if (Interpreter::contains(sender_pc)) {
+
+      // fp is always saved in a recognizable place in any code we generate. However
+      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved fp
+      // is really a frame pointer.
+
+      intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset);
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, saved_fp, sender_pc);
+
+      return sender.is_interpreted_frame_valid(thread);
+
+    }
+
+    // We must always be able to find a recognizable pc
+    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
+    if (sender_pc == NULL ||  sender_blob == NULL) {
+      return false;
+    }
+
+    // Could be a zombie method
+    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
+      return false;
+    }
+
+    // Could just be some random pointer within the codeBlob
+    if (!sender_blob->code_contains(sender_pc)) {
+      return false;
+    }
+
+    // We should never be able to see an adapter if the current frame is something from code cache
+    if (sender_blob->is_adapter_blob()) {
+      return false;
+    }
+
+    // Could be the call_stub
+    if (StubRoutines::returns_to_call_stub(sender_pc)) {
+      intptr_t *saved_fp = (intptr_t*)*(sender_sp - frame::sender_sp_offset);
+      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
+
+      if (!saved_fp_safe) {
+        return false;
+      }
+
+      // construct the potential sender
+
+      frame sender(sender_sp, saved_fp, sender_pc);
+
+      // Validate the JavaCallWrapper an entry frame must have
+      address jcw = (address)sender.entry_frame_call_wrapper();
+
+      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp());
+
+      return jcw_safe;
+    }
+
+    if (sender_blob->is_nmethod()) {
+        nmethod* nm = sender_blob->as_nmethod_or_null();
+        if (nm != NULL) {
+            if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc)) {
+                return false;
+            }
+        }
+    }
+
+    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
+    // because the return address counts against the callee's frame.
+
+    if (sender_blob->frame_size() <= 0) {
+      assert(!sender_blob->is_nmethod(), "should count return address at least");
+      return false;
+    }
+
+    // We should never be able to see anything here except an nmethod. If something in the
+    // code cache (current frame) is called by an entity within the code cache that entity
+    // should not be anything but the call stub (already covered), the interpreter (already covered)
+    // or an nmethod.
+
+    if (!sender_blob->is_nmethod()) {
+        return false;
+    }
+
+    // Could put some more validation for the potential non-interpreted sender
+    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
+
+    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
+
+    // We've validated the potential sender that would be created
+    return true;
+  }
+
+  // Must be native-compiled frame. Since sender will try and use fp to find
+  // linkages it must be safe
+
+  if (!fp_safe) {
+    return false;
+  }
+
+  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
+
+  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
+
+
+  // could try and do some more potential verification of native frame if we could think of some...
+
+  return true;
+
+}
+
+void frame::patch_pc(Thread* thread, address pc) {
+  address* pc_addr = &(((address*) sp())[-1]);
+  if (TracePcPatching) {
+    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
+                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
+  }
+  // Either the return address is the original one or we are going to
+  // patch in the same address that's already there.
+  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
+  *pc_addr = pc;
+  _cb = CodeCache::find_blob(pc);
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    assert(original_pc == _pc, "expected original PC to be stored before patching");
+    _deopt_state = is_deoptimized;
+    // leave _pc as is
+  } else {
+    _deopt_state = not_deoptimized;
+    _pc = pc;
+  }
+}
+
+bool frame::is_interpreted_frame() const  {
+  return Interpreter::contains(pc());
+}
+
+int frame::frame_size(RegisterMap* map) const {
+  frame sender = this->sender(map);
+  return sender.sp() - sp();
+}
+
+intptr_t* frame::entry_frame_argument_at(int offset) const {
+  // convert offset to index to deal with tsi
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  // Entry frame's arguments are always in relation to unextended_sp()
+  return &unextended_sp()[index];
+}
+
+// sender_sp
+#ifdef CC_INTERP
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
+  // seems odd and if we always know interpreted vs. non then sender_sp() is really
+  // doing too much work.
+  return get_interpreterState()->sender_sp();
+}
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return get_interpreterState()->monitor_base();
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  return (BasicObjectLock*) get_interpreterState()->stack_base();
+}
+
+#else // CC_INTERP
+
+intptr_t* frame::interpreter_frame_sender_sp() const {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
+}
+
+void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
+}
+
+
+// monitor elements
+
+BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
+}
+
+BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
+  // make sure the pointer points inside the frame
+  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
+  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
+  return result;
+}
+
+void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
+  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
+}
+
+// Used by template based interpreter deoptimization
+void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
+    *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
+}
+#endif // CC_INTERP
+
+frame frame::sender_for_entry_frame(RegisterMap* map) const {
+  assert(map != NULL, "map must be set");
+  // Java frame called from C; skip all C frames and return top C
+  // frame of that chunk as the sender
+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+  assert(!entry_frame_is_first(), "next Java fp must be non zero");
+  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
+  map->clear();
+  assert(map->include_argument_oops(), "should be set by clear");
+  if (jfa->last_Java_pc() != NULL ) {
+    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
+    return fr;
+  }
+  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
+  return fr;
+}
+
+//------------------------------------------------------------------------------
+// frame::verify_deopt_original_pc
+//
+// Verifies the calculated original PC of a deoptimization PC for the
+// given unextended SP.  The unextended SP might also be the saved SP
+// for MethodHandle call sites.
+#ifdef ASSERT
+void frame::verify_deopt_original_pc(nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return) {
+  frame fr;
+
+  // This is ugly but it's better than to change {get,set}_original_pc
+  // to take an SP value as argument.  And it's only a debugging
+  // method anyway.
+  fr._unextended_sp = unextended_sp;
+
+  address original_pc = nm->get_original_pc(&fr);
+  assert(nm->insts_contains(original_pc), "original PC must be in nmethod");
+  assert(nm->is_method_handle_return(original_pc) == is_method_handle_return, "must be");
+}
+#endif
+
+//------------------------------------------------------------------------------
+// frame::adjust_unextended_sp
+void frame::adjust_unextended_sp() {
+  // If we are returning to a compiled MethodHandle call site, the
+  // saved_fp will in fact be a saved value of the unextended SP.  The
+  // simplest way to tell whether we are returning to such a call site
+  // is as follows:
+
+  nmethod* sender_nm = (_cb == NULL) ? NULL : _cb->as_nmethod_or_null();
+  if (sender_nm != NULL) {
+    // If the sender PC is a deoptimization point, get the original
+    // PC.  For MethodHandle call site the unextended_sp is stored in
+    // saved_fp.
+    if (sender_nm->is_deopt_mh_entry(_pc)) {
+      DEBUG_ONLY(verify_deopt_mh_original_pc(sender_nm, _fp));
+      _unextended_sp = _fp;
+    }
+    else if (sender_nm->is_deopt_entry(_pc)) {
+      DEBUG_ONLY(verify_deopt_original_pc(sender_nm, _unextended_sp));
+    }
+    else if (sender_nm->is_method_handle_return(_pc)) {
+      _unextended_sp = _fp;
+    }
+  }
+}
+
+//------------------------------------------------------------------------------
+// frame::update_map_with_saved_link
+void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
+  // The interpreter and compiler(s) always save fp in a known
+  // location on entry. We must record where that location is
+  // so that if fp was live on callout from c2 we can find
+  // the saved copy no matter what it called.
+
+  // Since the interpreter always saves fp if we record where it is then
+  // we don't have to always save fp on entry and exit to c2 compiled
+  // code, on entry will be enough.
+  map->set_location(rfp->as_VMReg(), (address) link_addr);
+  // this is weird "H" ought to be at a higher address however the
+  // oopMaps seems to have the "H" regs at the same address and the
+  // vanilla register.
+  // XXXX make this go away
+  if (true) {
+    map->set_location(rfp->as_VMReg()->next(), (address) link_addr);
+  }
+}
+
+
+//------------------------------------------------------------------------------
+// frame::sender_for_interpreter_frame
+frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
+  // SP is the raw SP from the sender after adapter or interpreter
+  // extension.
+  intptr_t* sender_sp = this->sender_sp();
+
+  // This is the sp before any possible extension (adapter/locals).
+  intptr_t* unextended_sp = interpreter_frame_sender_sp();
+
+#ifdef COMPILER2
+  if (map->update_map()) {
+    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
+  }
+#endif // COMPILER2
+
+  return frame(sender_sp, unextended_sp, link(), sender_pc());
+}
+
+
+//------------------------------------------------------------------------------
+// frame::sender_for_compiled_frame
+frame frame::sender_for_compiled_frame(RegisterMap* map) const {
+  // we cannot rely upon the last fp having been saved to the thread
+  // in C2 code but it will have been pushed onto the stack. so we
+  // have to find it relative to the unextended sp
+
+  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
+  intptr_t* l_sender_sp = unextended_sp() + _cb->frame_size();
+  intptr_t* unextended_sp = l_sender_sp;
+
+  // the return_address is always the word on the stack
+  address sender_pc = (address) *(l_sender_sp-1);
+
+  intptr_t** saved_fp_addr = (intptr_t**) (l_sender_sp - frame::sender_sp_offset);
+
+  // assert (sender_sp() == l_sender_sp, "should be");
+  // assert (*saved_fp_addr == link(), "should be");
+
+  if (map->update_map()) {
+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
+    // For C1, the runtime stub might not have oop maps, so set this flag
+    // outside of update_register_map.
+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+    if (_cb->oop_maps() != NULL) {
+      OopMapSet::update_register_map(this, map);
+    }
+
+    // Since the prolog does the save and restore of FP there is no
+    // oopmap for it so we must fill in its location as if there was
+    // an oopmap entry since if our caller was compiled code there
+    // could be live jvm state in it.
+    update_map_with_saved_link(map, saved_fp_addr);
+  }
+
+  return frame(l_sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
+}
+
+//------------------------------------------------------------------------------
+// frame::sender
+frame frame::sender(RegisterMap* map) const {
+  // Default is we done have to follow them. The sender_for_xxx will
+  // update it accordingly
+   map->set_include_argument_oops(false);
+
+  if (is_entry_frame())
+    return sender_for_entry_frame(map);
+  if (is_interpreted_frame())
+    return sender_for_interpreter_frame(map);
+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+
+  // This test looks odd: why is it not is_compiled_frame() ?  That's
+  // because stubs also have OOP maps.
+  if (_cb != NULL) {
+    return sender_for_compiled_frame(map);
+  }
+
+  // Must be native-compiled frame, i.e. the marshaling code for native
+  // methods that exists in the core system.
+  return frame(sender_sp(), link(), sender_pc());
+}
+
+bool frame::interpreter_frame_equals_unpacked_fp(intptr_t* fp) {
+  assert(is_interpreted_frame(), "must be interpreter frame");
+  Method* method = interpreter_frame_method();
+  // When unpacking an optimized frame the frame pointer is
+  // adjusted with:
+  int diff = (method->max_locals() - method->size_of_parameters()) *
+             Interpreter::stackElementWords;
+  return _fp == (fp - diff);
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+// QQQ
+#ifdef CC_INTERP
+#else
+  assert(is_interpreted_frame(), "Not an interpreted frame");
+  // These are reasonable sanity checks
+  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
+    return false;
+  }
+  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
+    return false;
+  }
+  // These are hacks to keep us out of trouble.
+  // The problem with these is that they mask other problems
+  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
+    return false;
+  }
+
+  // do some validation of frame elements
+
+  // first the method
+
+  Method* m = *interpreter_frame_method_addr();
+
+  // validate the method we'd find in this potential sender
+  if (!m->is_valid_method()) return false;
+
+  // stack frames shouldn't be much larger than max_stack elements
+
+  if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
+    return false;
+  }
+
+  // validate bci/bcx
+
+  address  bcp    = interpreter_frame_bcp();
+  if (m->validate_bci_from_bcp(bcp) < 0) {
+    return false;
+  }
+
+  // validate constantPoolCache*
+  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
+  if (cp == NULL || !cp->is_metaspace_object()) return false;
+
+  // validate locals
+
+  address locals =  (address) *interpreter_frame_locals_addr();
+
+  if (locals > thread->stack_base() || locals < (address) fp()) return false;
+
+  // We'd have to be pretty unlucky to be mislead at this point
+
+#endif // CC_INTERP
+  return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+#ifdef CC_INTERP
+  // Needed for JVMTI. The result should always be in the
+  // interpreterState object
+  interpreterState istate = get_interpreterState();
+#endif // CC_INTERP
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  Method* method = interpreter_frame_method();
+  BasicType type = method->result_type();
+
+  intptr_t* tos_addr;
+  if (method->is_native()) {
+    // TODO : ensure AARCH64 does the same as Intel here i.e. push v0 then r0
+    // Prior to calling into the runtime to report the method_exit the possible
+    // return value is pushed to the native stack. If the result is a jfloat/jdouble
+    // then ST0 is saved before EAX/EDX. See the note in generate_native_result
+    tos_addr = (intptr_t*)sp();
+    if (type == T_FLOAT || type == T_DOUBLE) {
+      // This is times two because we do a push(ltos) after pushing XMM0
+      // and that takes two interpreter stack slots.
+      tos_addr += 2 * Interpreter::stackElementWords;
+    }
+  } else {
+    tos_addr = (intptr_t*)interpreter_frame_tos_address();
+  }
+
+  switch (type) {
+    case T_OBJECT  :
+    case T_ARRAY   : {
+      oop obj;
+      if (method->is_native()) {
+#ifdef CC_INTERP
+        obj = istate->_oop_temp;
+#else
+        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
+#endif // CC_INTERP
+      } else {
+        oop* obj_p = (oop*)tos_addr;
+        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
+      }
+      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+      *oop_result = obj;
+      break;
+    }
+    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
+    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
+    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
+    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
+    case T_INT     : value_result->i = *(jint*)tos_addr; break;
+    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
+    case T_FLOAT   : {
+        value_result->f = *(jfloat*)tos_addr;
+      break;
+    }
+    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
+    case T_VOID    : /* Nothing to do */ break;
+    default        : ShouldNotReachHere();
+  }
+
+  return type;
+}
+
+
+intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
+  return &interpreter_frame_tos_address()[index];
+}
+
+#ifndef PRODUCT
+
+#define DESCRIBE_FP_OFFSET(name) \
+  values.describe(frame_no, fp() + frame::name##_offset, #name)
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+  if (is_interpreted_frame()) {
+    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_method);
+    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
+    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+  }
+}
+#endif
+
+intptr_t *frame::initial_deoptimization_info() {
+  // Not used on aarch64, but we must return something.
+  return NULL;
+}
+
+intptr_t* frame::real_fp() const {
+  if (_cb != NULL) {
+    // use the frame size if valid
+    int size = _cb->frame_size();
+    if (size > 0) {
+      return unextended_sp() + size;
+    }
+  }
+  // else rely on fp()
+  assert(! is_compiled_frame(), "unknown compiled frame size");
+  return fp();
+}
+
+#undef DESCRIBE_FP_OFFSET
+
+#define DESCRIBE_FP_OFFSET(name)                                        \
+  {                                                                     \
+    unsigned long *p = (unsigned long *)fp;                             \
+    printf("0x%016lx 0x%016lx %s\n", (unsigned long)(p + frame::name##_offset), \
+           p[frame::name##_offset], #name);                             \
+  }
+
+static __thread unsigned long nextfp;
+static __thread unsigned long nextpc;
+static __thread unsigned long nextsp;
+static __thread RegisterMap *reg_map;
+
+static void printbc(Method *m, intptr_t bcx) {
+  const char *name;
+  char buf[16];
+  if (m->validate_bci_from_bcp((address)bcx) < 0
+      || !m->contains((address)bcx)) {
+    name = "???";
+    snprintf(buf, sizeof buf, "(bad)");
+  } else {
+    int bci = m->bci_from((address)bcx);
+    snprintf(buf, sizeof buf, "%d", bci);
+    name = Bytecodes::name(m->code_at(bci));
+  }
+  ResourceMark rm;
+  printf("%s : %s ==> %s\n", m->name_and_sig_as_C_string(), buf, name);
+}
+
+void internal_pf(unsigned long sp, unsigned long fp, unsigned long pc, unsigned long bcx) {
+  if (! fp)
+    return;
+
+  DESCRIBE_FP_OFFSET(return_addr);
+  DESCRIBE_FP_OFFSET(link);
+  DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
+  DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
+  DESCRIBE_FP_OFFSET(interpreter_frame_method);
+  DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
+  DESCRIBE_FP_OFFSET(interpreter_frame_cache);
+  DESCRIBE_FP_OFFSET(interpreter_frame_locals);
+  DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
+  DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
+  unsigned long *p = (unsigned long *)fp;
+
+  // We want to see all frames, native and Java.  For compiled and
+  // interpreted frames we have special information that allows us to
+  // unwind them; for everything else we assume that the native frame
+  // pointer chain is intact.
+  frame this_frame((intptr_t*)sp, (intptr_t*)fp, (address)pc);
+  if (this_frame.is_compiled_frame() ||
+      this_frame.is_interpreted_frame()) {
+    frame sender = this_frame.sender(reg_map);
+    nextfp = (unsigned long)sender.fp();
+    nextpc = (unsigned long)sender.pc();
+    nextsp = (unsigned long)sender.unextended_sp();
+  } else {
+    nextfp = p[frame::link_offset];
+    nextpc = p[frame::return_addr_offset];
+    nextsp = (unsigned long)&p[frame::sender_sp_offset];
+  }
+
+  if (bcx == -1ul)
+    bcx = p[frame::interpreter_frame_bcp_offset];
+
+  if (Interpreter::contains((address)pc)) {
+    Method* m = (Method*)p[frame::interpreter_frame_method_offset];
+    if(m && m->is_method()) {
+      printbc(m, bcx);
+    } else
+      printf("not a Method\n");
+  } else {
+    CodeBlob *cb = CodeCache::find_blob((address)pc);
+    if (cb != NULL) {
+      if (cb->is_nmethod()) {
+        ResourceMark rm;
+        nmethod* nm = (nmethod*)cb;
+        printf("nmethod %s\n", nm->method()->name_and_sig_as_C_string());
+      } else if (cb->name()) {
+        printf("CodeBlob %s\n", cb->name());
+      }
+    }
+  }
+}
+
+extern "C" void npf() {
+  CodeBlob *cb = CodeCache::find_blob((address)nextpc);
+  // C2 does not always chain the frame pointers when it can, instead
+  // preferring to use fixed offsets from SP, so a simple leave() does
+  // not work.  Instead, it adds the frame size to SP then pops FP and
+  // LR.  We have to do the same thing to get a good call chain.
+  if (cb && cb->frame_size())
+    nextfp = nextsp + wordSize * (cb->frame_size() - 2);
+  internal_pf (nextsp, nextfp, nextpc, -1);
+}
+
+extern "C" void pf(unsigned long sp, unsigned long fp, unsigned long pc,
+                   unsigned long bcx, unsigned long thread) {
+  RegisterMap map((JavaThread*)thread, false);
+  if (!reg_map) {
+    reg_map = (RegisterMap*)os::malloc(sizeof map, mtNone);
+  }
+  memcpy(reg_map, &map, sizeof map);
+  {
+    CodeBlob *cb = CodeCache::find_blob((address)pc);
+    if (cb && cb->frame_size())
+      fp = sp + wordSize * (cb->frame_size() - 2);
+  }
+  internal_pf(sp, fp, pc, bcx);
+}
+
+// support for printing out where we are in a Java method
+// needs to be passed current fp and bcp register values
+// prints method name, bc index and bytecode name
+extern "C" void pm(unsigned long fp, unsigned long bcx) {
+  DESCRIBE_FP_OFFSET(interpreter_frame_method);
+  unsigned long *p = (unsigned long *)fp;
+  Method* m = (Method*)p[frame::interpreter_frame_method_offset];
+  printbc(m, bcx);
+}
+
+#ifndef PRODUCT
+// This is a generic constructor which is only used by pns() in debug.cpp.
+frame::frame(void* sp, void* fp, void* pc) {
+  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
+}
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/frame_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,216 @@
+/*
+ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_FRAME_AARCH64_HPP
+#define CPU_AARCH64_VM_FRAME_AARCH64_HPP
+
+#include "runtime/synchronizer.hpp"
+#include "utilities/top.hpp"
+
+// A frame represents a physical stack frame (an activation).  Frames can be
+// C or Java frames, and the Java frames can be interpreted or compiled.
+// In contrast, vframes represent source-level activations, so that one physical frame
+// can correspond to multiple source level frames because of inlining.
+// A frame is comprised of {pc, fp, sp}
+// ------------------------------ Asm interpreter ----------------------------------------
+// Layout of asm interpreter frame:
+//    [expression stack      ] * <- sp
+
+//    [monitors[0]           ]   \
+//     ...                        | monitor block size = k
+//    [monitors[k-1]         ]   /
+//    [frame initial esp     ] ( == &monitors[0], initially here)       initial_sp_offset
+//    [byte code index/pointr]                   = bcx()                bcx_offset
+
+//    [pointer to locals     ]                   = locals()             locals_offset
+//    [constant pool cache   ]                   = cache()              cache_offset
+
+//    [methodData            ]                   = mdp()                mdx_offset
+//    [methodOop             ]                   = method()             method_offset
+
+//    [last esp              ]                   = last_sp()            last_sp_offset
+//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
+
+//    [old frame pointer     ]   <- fp           = link()
+//    [return pc             ]
+
+//    [last sp               ]
+//    [oop temp              ]                     (only for native calls)
+
+//    [locals and parameters ]
+//                               <- sender sp
+// ------------------------------ Asm interpreter ----------------------------------------
+
+// ------------------------------ C++ interpreter ----------------------------------------
+//
+// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
+//
+//                             <- SP (current esp/rsp)
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    ...                        BytecodeInterpreter::run local variables
+//    [local variables         ] BytecodeInterpreter::run local variables
+//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's ebp/rbp ]
+//    [return pc               ]  (return to frame manager)
+//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
+//    [expression stack        ] <- last_Java_sp                           |
+//    [...                     ] * <- interpreter_state.stack              |
+//    [expression stack        ] * <- interpreter_state.stack_base         |
+//    [monitors                ]   \                                       |
+//     ...                          | monitor block size                   |
+//    [monitors                ]   / <- interpreter_state.monitor_base     |
+//    [struct interpretState   ] <-----------------------------------------|
+//    [return pc               ] (return to callee of frame manager [1]
+//    [locals and parameters   ]
+//                               <- sender sp
+
+// [1] When the c++ interpreter calls a new method it returns to the frame
+//     manager which allocates a new frame on the stack. In that case there
+//     is no real callee of this newly allocated frame. The frame manager is
+//     aware of the  additional frame(s) and will pop them as nested calls
+//     complete. Howevers tTo make it look good in the debugger the frame
+//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
+//     with a fake interpreter_state* parameter to make it easy to debug
+//     nested calls.
+
+// Note that contrary to the layout for the assembly interpreter the
+// expression stack allocated for the C++ interpreter is full sized.
+// However this is not as bad as it seems as the interpreter frame_manager
+// will truncate the unused space on succesive method calls.
+//
+// ------------------------------ C++ interpreter ----------------------------------------
+
+ public:
+  enum {
+    pc_return_offset                                 =  0,
+    // All frames
+    link_offset                                      =  0,
+    return_addr_offset                               =  1,
+    sender_sp_offset                                 =  2,
+
+#ifndef CC_INTERP
+
+    // Interpreter frames
+    interpreter_frame_oop_temp_offset                =  3, // for native calls only
+
+    interpreter_frame_sender_sp_offset               = -1,
+    // outgoing sp before a call to an invoked method
+    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
+    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
+    interpreter_frame_mdp_offset                     = interpreter_frame_method_offset - 1,
+    interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
+    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
+    interpreter_frame_bcp_offset                     = interpreter_frame_locals_offset - 1,
+    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
+
+    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
+    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
+
+#endif // CC_INTERP
+
+    // Entry frames
+    // n.b. these values are determined by the layout defined in
+    // stubGenerator for the Java call stub
+    entry_frame_after_call_words                     = 27,
+    entry_frame_call_wrapper_offset                  = -8,
+
+    // we don't need a save area
+    arg_reg_save_area_bytes                          =  0,
+
+    // TODO - check that this is still correct
+    // Native frames
+
+    native_frame_initial_param_offset                =  2
+
+  };
+
+  intptr_t ptr_at(int offset) const {
+    return *ptr_at_addr(offset);
+  }
+
+  void ptr_at_put(int offset, intptr_t value) {
+    *ptr_at_addr(offset) = value;
+  }
+
+ private:
+  // an additional field beyond _sp and _pc:
+  intptr_t*   _fp; // frame pointer
+  // The interpreter and adapters will extend the frame of the caller.
+  // Since oopMaps are based on the sp of the caller before extension
+  // we need to know that value. However in order to compute the address
+  // of the return address we need the real "raw" sp. Since sparc already
+  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
+  // original sp we use that convention.
+
+  intptr_t*     _unextended_sp;
+  void adjust_unextended_sp();
+
+  intptr_t* ptr_at_addr(int offset) const {
+    return (intptr_t*) addr_at(offset);
+  }
+
+#ifdef ASSERT
+  // Used in frame::sender_for_{interpreter,compiled}_frame
+  static void verify_deopt_original_pc(   nmethod* nm, intptr_t* unextended_sp, bool is_method_handle_return = false);
+  static void verify_deopt_mh_original_pc(nmethod* nm, intptr_t* unextended_sp) {
+    verify_deopt_original_pc(nm, unextended_sp, true);
+  }
+#endif
+
+ public:
+  // Constructors
+
+  frame(intptr_t* sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
+
+  frame(intptr_t* sp, intptr_t* fp);
+
+  void init(intptr_t* sp, intptr_t* fp, address pc);
+
+  // accessors for the instance variables
+  // Note: not necessarily the real 'frame pointer' (see real_fp)
+  intptr_t*   fp() const { return _fp; }
+
+  inline address* sender_pc_addr() const;
+
+  // return address of param, zero origin index.
+  inline address* native_param_addr(int idx) const;
+
+  // expression stack tos if we are nested in a java call
+  intptr_t* interpreter_frame_last_sp() const;
+
+  // helper to update a map with callee-saved RBP
+  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
+
+#ifndef CC_INTERP
+  // deoptimization support
+  void interpreter_frame_set_last_sp(intptr_t* sp);
+#endif // CC_INTERP
+
+#ifdef CC_INTERP
+  inline interpreterState get_interpreterState() const;
+#endif // CC_INTERP
+
+#endif // CPU_AARCH64_VM_FRAME_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/frame_aarch64.inline.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,348 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_FRAME_AARCH64_INLINE_HPP
+#define CPU_AARCH64_VM_FRAME_AARCH64_INLINE_HPP
+
+#include "code/codeCache.hpp"
+#include "code/vmreg.inline.hpp"
+
+// Inline functions for AArch64 frames:
+
+// Constructors:
+
+inline frame::frame() {
+  _pc = NULL;
+  _sp = NULL;
+  _unextended_sp = NULL;
+  _fp = NULL;
+  _cb = NULL;
+  _deopt_state = unknown;
+}
+
+static int spin;
+
+inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
+  intptr_t a = intptr_t(sp);
+  intptr_t b = intptr_t(fp);
+#ifndef PRODUCT
+  if (fp)
+    if (sp > fp || (fp - sp > 0x100000))
+      for(;;)
+        asm("nop");
+#endif
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
+  init(sp, fp, pc);
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
+  intptr_t a = intptr_t(sp);
+  intptr_t b = intptr_t(fp);
+#ifndef PRODUCT
+  if (fp)
+    if (sp > fp || (fp - sp > 0x100000))
+      for(;;)
+        asm("nop");
+#endif
+  _sp = sp;
+  _unextended_sp = unextended_sp;
+  _fp = fp;
+  _pc = pc;
+  assert(pc != NULL, "no pc?");
+  _cb = CodeCache::find_blob(pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    assert(((nmethod*)_cb)->insts_contains(_pc), "original PC must be in nmethod");
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+inline frame::frame(intptr_t* sp, intptr_t* fp) {
+  intptr_t a = intptr_t(sp);
+  intptr_t b = intptr_t(fp);
+#ifndef PRODUCT
+  if (fp)
+    if (sp > fp || (fp - sp > 0x100000))
+      for(;;)
+        asm("nop");
+#endif
+  _sp = sp;
+  _unextended_sp = sp;
+  _fp = fp;
+  _pc = (address)(sp[-1]);
+
+  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
+  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
+  // unlucky the junk value could be to a zombied method and we'll die on the
+  // find_blob call. This is also why we can have no asserts on the validity
+  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
+  // -> pd_last_frame should use a specialized version of pd_last_frame which could
+  // call a specilaized frame constructor instead of this one.
+  // Then we could use the assert below. However this assert is of somewhat dubious
+  // value.
+  // assert(_pc != NULL, "no pc?");
+
+  _cb = CodeCache::find_blob(_pc);
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+}
+
+// Accessors
+
+inline bool frame::equal(frame other) const {
+  bool ret =  sp() == other.sp()
+              && unextended_sp() == other.unextended_sp()
+              && fp() == other.fp()
+              && pc() == other.pc();
+  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
+  return ret;
+}
+
+// Return unique id for this frame. The id must have a value where we can distinguish
+// identity and younger/older relationship. NULL represents an invalid (incomparable)
+// frame.
+inline intptr_t* frame::id(void) const { return unextended_sp(); }
+
+// Relationals on frames based
+// Return true if the frame is younger (more recent activation) than the frame represented by id
+inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() < id ; }
+
+// Return true if the frame is older (less recent activation) than the frame represented by id
+inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
+                                                    return this->id() > id ; }
+
+
+
+inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
+inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
+
+
+inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
+
+// Return address:
+
+inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
+inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
+
+// return address of param, zero origin index.
+inline address* frame::native_param_addr(int idx) const { return (address*) addr_at( native_frame_initial_param_offset+idx); }
+
+#ifdef CC_INTERP
+
+inline interpreterState frame::get_interpreterState() const {
+  return ((interpreterState)addr_at( -((int)sizeof(BytecodeInterpreter))/wordSize ));
+}
+
+inline intptr_t*    frame::sender_sp()        const {
+  // Hmm this seems awfully expensive QQQ, is this really called with interpreted frames?
+  if (is_interpreted_frame()) {
+    assert(false, "should never happen");
+    return get_interpreterState()->sender_sp();
+  } else {
+    return            addr_at(sender_sp_offset);
+  }
+}
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_locals);
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_bcp);
+}
+
+
+// Constant pool cache
+
+inline constantPoolCacheOop* frame::interpreter_frame_cache_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_constants);
+}
+
+// Method
+
+inline methodOop* frame::interpreter_frame_method_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return &(get_interpreterState()->_method);
+}
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  assert(is_interpreted_frame(), "must be interpreted");
+  return (intptr_t*) &(get_interpreterState()->_mdx);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  assert(is_interpreted_frame(), "wrong frame type");
+  return get_interpreterState()->_stack + 1;
+}
+
+#else /* asm interpreter */
+inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_last_sp() const {
+  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_bcp_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
+}
+
+inline intptr_t* frame::interpreter_frame_mdp_addr() const {
+  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
+}
+
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
+}
+
+// Method
+
+inline Method** frame::interpreter_frame_method_addr() const {
+  return (Method**)addr_at(interpreter_frame_method_offset);
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  intptr_t* last_sp = interpreter_frame_last_sp();
+  if (last_sp == NULL) {
+    return sp();
+  } else {
+    // sp() may have been extended or shrunk by an adapter.  At least
+    // check that we don't fall behind the legal region.
+    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
+    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
+    return last_sp;
+  }
+}
+
+inline oop* frame::interpreter_frame_temp_oop_addr() const {
+  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
+}
+
+#endif /* CC_INTERP */
+
+inline int frame::pd_oop_map_offset_adjustment() const {
+  return 0;
+}
+
+inline int frame::interpreter_frame_monitor_size() {
+  return BasicObjectLock::size();
+}
+
+
+// expression stack
+// (the max_stack arguments are used by the GC; see class FrameClosure)
+
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
+  return monitor_end-1;
+}
+
+
+inline jint frame::interpreter_frame_expression_stack_direction() { return -1; }
+
+
+// Entry frames
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
+}
+
+
+// Compiled frames
+
+inline int frame::local_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - local_index + (local_index < nof_args ? 1: -1));
+}
+
+inline int frame::monitor_offset_for_compiler(int local_index, int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return local_offset_for_compiler(local_index, nof_args, max_nof_locals, max_nof_monitors);
+}
+
+inline int frame::min_local_offset_for_compiler(int nof_args, int max_nof_locals, int max_nof_monitors) {
+  return (nof_args - (max_nof_locals + max_nof_monitors*2) - 1);
+}
+
+inline bool frame::volatile_across_calls(Register reg) {
+  return true;
+}
+
+
+
+inline oop frame::saved_oop_result(RegisterMap* map) const {
+  oop* result_adr = (oop *)map->location(r0->as_VMReg());
+  guarantee(result_adr != NULL, "bad register save location");
+
+  return (*result_adr);
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+  oop* result_adr = (oop *)map->location(r0->as_VMReg());
+  guarantee(result_adr != NULL, "bad register save location");
+
+  *result_adr = obj;
+}
+
+#endif // CPU_AARCH64_VM_FRAME_AARCH64_INLINE_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/globalDefinitions_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
+#define CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
+
+const int StackAlignmentInBytes  = 16;
+
+// Indicates whether the C calling conventions require that
+// 32-bit integer argument values are properly extended to 64 bits.
+// If set, SharedRuntime::c_calling_convention() must adapt
+// signatures accordingly.
+const bool CCallingConventionRequiresIntsAsLongs = true;
+
+#define SUPPORTS_NATIVE_CX8
+
+// The maximum B/BL offset range on AArch64 is 128MB.
+#undef CODE_CACHE_DEFAULT_LIMIT
+#define CODE_CACHE_DEFAULT_LIMIT (128*M)
+
+// According to the ARMv8 ARM, "Concurrent modification and execution
+// of instructions can lead to the resulting instruction performing
+// any behavior that can be achieved by executing any sequence of
+// instructions that can be executed from the same Exception level,
+// except where the instruction before modification and the
+// instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, or
+// SMC instruction."
+//
+// This makes the games we play when patching difficult, so when we
+// come across an access that needs patching we deoptimize.  There are
+// ways we can avoid this, but these would slow down C1-compiled code
+// in the defauilt case.  We could revisit this decision if we get any
+// evidence that it's worth doing.
+#define DEOPTIMIZE_WHEN_PATCHING
+
+#endif // CPU_AARCH64_VM_GLOBALDEFINITIONS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/globals_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,129 @@
+/*
+ * Copyright (c) 2000, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_GLOBALS_AARCH64_HPP
+#define CPU_AARCH64_VM_GLOBALS_AARCH64_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool, ConvertSleepToYield,      true);
+define_pd_global(bool, ShareVtableStubs,         true);
+define_pd_global(bool, CountInterpCalls,         true);
+define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
+
+define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
+define_pd_global(bool, TrapBasedNullChecks,  false);
+define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
+
+// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
+// assign a different value for C2 without touching a number of files. Use
+// #ifdef to minimize the change as it's late in Mantis. -- FIXME.
+// c1 doesn't have this problem because the fix to 4858033 assures us
+// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
+// the uep and the vep doesn't get real alignment but just slops on by
+// only assured that the entry instruction meets the 5 byte size requirement.
+#ifdef COMPILER2
+define_pd_global(intx, CodeEntryAlignment,       64);
+#else
+define_pd_global(intx, CodeEntryAlignment,       16);
+#endif // COMPILER2
+define_pd_global(intx, OptoLoopAlignment,        16);
+define_pd_global(intx, InlineFrequencyCount,     100);
+
+define_pd_global(intx, StackYellowPages, 2);
+define_pd_global(intx, StackRedPages, 1);
+
+define_pd_global(intx, StackShadowPages, 4 DEBUG_ONLY(+5));
+
+define_pd_global(intx, PreInflateSpin,           10);
+
+define_pd_global(bool, RewriteBytecodes,     true);
+define_pd_global(bool, RewriteFrequentPairs, false);
+
+define_pd_global(bool, UseMembar,            true);
+
+// GC Ergo Flags
+define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
+
+define_pd_global(uintx, TypeProfileLevel, 111);
+
+// avoid biased locking while we are bootstrapping the aarch64 build
+define_pd_global(bool, UseBiasedLocking, false);
+
+#if defined(COMPILER1) || defined(COMPILER2)
+define_pd_global(intx, InlineSmallCode,          1000);
+#endif
+
+#ifdef BUILTIN_SIM
+#define UseBuiltinSim           true
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                        \
+  product(bool, NotifySimulator, UseBuiltinSim,                         \
+         "tell the AArch64 sim where we are in method code")            \
+                                                                        \
+  product(bool, UseSimulatorCache, false,                               \
+         "tell sim to cache memory updates until exclusive op occurs")  \
+                                                                        \
+  product(bool, DisableBCCheck, true,                                   \
+          "tell sim not to invoke bccheck callback")                    \
+                                                                        \
+  product(bool, NearCpool, true,                                        \
+         "constant pool is close to instructions")                      \
+                                                                        \
+  notproduct(bool, UseAcqRelForVolatileFields, false,                   \
+             "Use acquire and release insns for volatile fields")       \
+                                                                        \
+  product(bool, UseCRC32, false,                                        \
+          "Use CRC32 instructions for CRC32 computation")               \
+
+// Don't attempt to use Neon on builtin sim until builtin sim supports it
+#define UseCRC32 false
+
+#else
+#define UseBuiltinSim           false
+#define NotifySimulator         false
+#define UseSimulatorCache       false
+#define DisableBCCheck          true
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \
+                                                                        \
+  product(bool, NearCpool, true,                                        \
+         "constant pool is close to instructions")                      \
+                                                                        \
+  notproduct(bool, UseAcqRelForVolatileFields, false,                   \
+             "Use acquire and release insns for volatile fields")       \
+  product(bool, UseNeon, false,                                         \
+          "Use Neon for CRC32 computation")                             \
+  product(bool, UseCRC32, false,                                        \
+          "Use CRC32 instructions for CRC32 computation")               \
+  product(bool, TraceTraps, false, "Trace all traps the signal handler")
+
+#endif
+
+
+#endif // CPU_AARCH64_VM_GLOBALS_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/icBuffer_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_aarch64.hpp"
+#include "oops/oop.inline.hpp"
+#include "oops/oop.inline2.hpp"
+
+int InlineCacheBuffer::ic_stub_code_size() {
+  return (MacroAssembler::far_branches() ? 6 : 4) * NativeInstruction::instruction_size;
+}
+
+#define __ masm->
+
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
+  ResourceMark rm;
+  CodeBuffer      code(code_begin, ic_stub_code_size());
+  MacroAssembler* masm            = new MacroAssembler(&code);
+  // note: even though the code contains an embedded value, we do not need reloc info
+  // because
+  // (1) the value is old (i.e., doesn't matter for scavenges)
+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
+  // assert(cached_value == NULL || cached_oop->is_perm(), "must be perm oop");
+
+  address start = __ pc();
+  Label l;
+  __ ldr(rscratch2, l);
+  __ far_jump(ExternalAddress(entry_point));
+  __ align(wordSize);
+  __ bind(l);
+  __ emit_int64((int64_t)cached_value);
+  // Only need to invalidate the 1st two instructions - not the whole ic stub
+  ICache::invalidate_range(code_begin, InlineCacheBuffer::ic_stub_code_size());
+  assert(__ pc() - start == ic_stub_code_size(), "must be");
+}
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
+  NativeJump* jump = nativeJump_at(code_begin + 4);
+  return jump->jump_destination();
+}
+
+
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
+  // The word containing the cached value is at the end of this IC buffer
+  uintptr_t *p = (uintptr_t *)(code_begin + ic_stub_code_size() - wordSize);
+  void* o = (void*)*p;
+  return o;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/icache_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.hpp"
+#include "runtime/icache.hpp"
+
+extern void aarch64TestHook();
+
+void ICacheStubGenerator::generate_icache_flush(
+                ICache::flush_icache_stub_t* flush_icache_stub) {
+  // Give anyone who calls this a surprise
+  *flush_icache_stub = (ICache::flush_icache_stub_t)NULL;
+}
+
+void ICache::initialize() {
+  aarch64TestHook();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/icache_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_AARCH64_VM_ICACHE_AARCH64_HPP
+#define CPU_AARCH64_VM_ICACHE_AARCH64_HPP
+
+// Interface for updating the instruction cache.  Whenever the VM
+// modifies code, part of the processor instruction cache potentially
+// has to be flushed.
+
+class ICache : public AbstractICache {
+ public:
+  static void initialize();
+  static void invalidate_word(address addr) {
+    __clear_cache((char *)addr, (char *)(addr + 3));
+  }
+  static void invalidate_range(address start, int nbytes) {
+    __clear_cache((char *)start, (char *)(start + nbytes));
+  }
+};
+
+#endif // CPU_AARCH64_VM_ICACHE_AARCH64_HPP
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/immediate_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,315 @@
+/*
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include <stdlib.h>
+#include "decode_aarch64.hpp"
+#include "immediate_aarch64.hpp"
+
+// there are at most 2^13 possible logical immediate encodings
+// however, some combinations of immr and imms are invalid
+static const unsigned  LI_TABLE_SIZE = (1 << 13);
+
+static int li_table_entry_count;
+
+// for forward lookup we just use a direct array lookup
+// and assume that the cient has supplied a valid encoding
+// table[encoding] = immediate
+static u_int64_t LITable[LI_TABLE_SIZE];
+
+// for reverse lookup we need a sparse map so we store a table of
+// immediate and encoding pairs sorted by immediate value
+
+struct li_pair {
+  u_int64_t immediate;
+  u_int32_t encoding;
+};
+
+static struct li_pair InverseLITable[LI_TABLE_SIZE];
+
+// comparator to sort entries in the inverse table
+int compare_immediate_pair(const void *i1, const void *i2)
+{
+  struct li_pair *li1 = (struct li_pair *)i1;
+  struct li_pair *li2 = (struct li_pair *)i2;
+  if (li1->immediate < li2->immediate) {
+    return -1;
+  }
+  if (li1->immediate > li2->immediate) {
+    return 1;
+  }
+  return 0;
+}
+
+// helper functions used by expandLogicalImmediate
+
+// for i = 1, ... N result<i-1> = 1 other bits are zero
+static inline u_int64_t ones(int N)
+{
+  return (N == 64 ? (u_int64_t)-1UL : ((1UL << N) - 1));
+}
+
+// result<0> to val<N>
+static inline u_int64_t pickbit(u_int64_t val, int N)
+{
+  return pickbits64(val, N, N);
+}
+
+
+// SPEC bits(M*N) Replicate(bits(M) x, integer N);
+// this is just an educated guess
+
+u_int64_t replicate(u_int64_t bits, int nbits, int count)
+{
+  u_int64_t result = 0;
+  // nbits may be 64 in which case we want mask to be -1
+  u_int64_t mask = ones(nbits);
+  for (int i = 0; i < count ; i++) {
+    result <<= nbits;
+    result |= (bits & mask);
+  }
+  return result;
+}
+
+// this function writes the supplied bimm reference and returns a
+// boolean to indicate success (1) or fail (0) because an illegal
+// encoding must be treated as an UNALLOC instruction
+
+// construct a 32 bit immediate value for a logical immediate operation
+int expandLogicalImmediate(u_int32_t immN, u_int32_t immr,
+                            u_int32_t imms, u_int64_t &bimm)
+{
+  int len;                  // ought to be <= 6
+  u_int32_t levels;         // 6 bits
+  u_int32_t tmask_and;      // 6 bits
+  u_int32_t wmask_and;      // 6 bits
+  u_int32_t tmask_or;       // 6 bits
+  u_int32_t wmask_or;       // 6 bits
+  u_int64_t imm64;          // 64 bits
+  u_int64_t tmask, wmask;   // 64 bits
+  u_int32_t S, R, diff;     // 6 bits?
+
+  if (immN == 1) {
+    len = 6; // looks like 7 given the spec above but this cannot be!
+  } else {
+    len = 0;
+    u_int32_t val = (~imms & 0x3f);
+    for (int i = 5; i > 0; i--) {
+      if (val & (1 << i)) {
+        len = i;
+        break;
+      }
+    }
+    if (len < 1) {
+      return 0;
+    }
+    // for valid inputs leading 1s in immr must be less than leading
+    // zeros in imms
+    int len2 = 0;                   // ought to be < len
+    u_int32_t val2 = (~immr & 0x3f);
+    for (int i = 5; i > 0; i--) {
+      if (!(val2 & (1 << i))) {
+        len2 = i;
+        break;
+      }
+    }
+    if (len2 >= len) {
+      return 0;
+    }
+  }
+
+  levels = (1 << len) - 1;
+
+  if ((imms & levels) == levels) {
+    return 0;
+  }
+
+  S = imms & levels;
+  R = immr & levels;
+
+ // 6 bit arithmetic!
+  diff = S - R;
+  tmask_and = (diff | ~levels) & 0x3f;
+  tmask_or = (diff & levels) & 0x3f;
+  tmask = 0xffffffffffffffffULL;
+
+  for (int i = 0; i < 6; i++) {
+    int nbits = 1 << i;
+    u_int64_t and_bit = pickbit(tmask_and, i);
+    u_int64_t or_bit = pickbit(tmask_or, i);
+    u_int64_t and_bits_sub = replicate(and_bit, 1, nbits);
+    u_int64_t or_bits_sub = replicate(or_bit, 1, nbits);
+    u_int64_t and_bits_top = (and_bits_sub << nbits) | ones(nbits);
+    u_int64_t or_bits_top = (0 << nbits) | or_bits_sub;
+
+    tmask = ((tmask
+              & (replicate(and_bits_top, 2 * nbits, 32 / nbits)))
+             | replicate(or_bits_top, 2 * nbits, 32 / nbits));
+  }
+
+  wmask_and = (immr | ~levels) & 0x3f;
+  wmask_or = (immr & levels) & 0x3f;
+
+  wmask = 0;
+
+  for (int i = 0; i < 6; i++) {
+    int nbits = 1 << i;
+    u_int64_t and_bit = pickbit(wmask_and, i);
+    u_int64_t or_bit = pickbit(wmask_or, i);
+    u_int64_t and_bits_sub = replicate(and_bit, 1, nbits);
+    u_int64_t or_bits_sub = replicate(or_bit, 1, nbits);
+    u_int64_t and_bits_top = (ones(nbits) << nbits) | and_bits_sub;
+    u_int64_t or_bits_top = (or_bits_sub << nbits) | 0;
+
+    wmask = ((wmask
+              & (replicate(and_bits_top, 2 * nbits, 32 / nbits)))
+             | replicate(or_bits_top, 2 * nbits, 32 / nbits));
+  }
+
+  if (diff & (1U << 6)) {
+    imm64 = tmask & wmask;
+  } else {
+    imm64 = tmask | wmask;
+  }
+
+
+  bimm = imm64;
+  return 1;
+}
+
+// constructor to initialise the lookup tables
+
+static void initLITables() __attribute__ ((constructor));
+static void initLITables()
+{
+  li_table_entry_count = 0;
+  for (unsigned index = 0; index < LI_TABLE_SIZE; index++) {
+    u_int32_t N = uimm(index, 12, 12);
+    u_int32_t immr = uimm(index, 11, 6);
+    u_int32_t imms = uimm(index, 5, 0);
+    if (expandLogicalImmediate(N, immr, imms, LITable[index])) {
+      InverseLITable[li_table_entry_count].immediate = LITable[index];
+      InverseLITable[li_table_entry_count].encoding = index;
+      li_table_entry_count++;
+    }
+  }
+  // now sort the inverse table
+  qsort(InverseLITable, li_table_entry_count,
+        sizeof(InverseLITable[0]), compare_immediate_pair);
+}
+
+// public APIs provided for logical immediate lookup and reverse lookup
+
+u_int64_t logical_immediate_for_encoding(u_int32_t encoding)
+{
+  return LITable[encoding];
+}
+
+u_int32_t encoding_for_logical_immediate(u_int64_t immediate)
+{
+  struct li_pair pair;
+  struct li_pair *result;
+
+  pair.immediate = immediate;
+
+  result = (struct li_pair *)
+    bsearch(&pair, InverseLITable, li_table_entry_count,
+            sizeof(InverseLITable[0]), compare_immediate_pair);
+
+  if (result) {
+    return result->encoding;
+  }
+
+  return 0xffffffff;
+}
+
+// floating point immediates are encoded in 8 bits
+// fpimm[7] = sign bit
+// fpimm[6:4] = signed exponent
+// fpimm[3:0] = fraction (assuming leading 1)
+// i.e. F = s * 1.f * 2^(e - b)
+
+u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp)
+{
+  union {
+    float fpval;
+    double dpval;
+    u_int64_t val;
+  };
+
+  u_int32_t s, e, f;
+  s = (imm8 >> 7 ) & 0x1;
+  e = (imm8 >> 4) & 0x7;
+  f = imm8 & 0xf;
+  // the fp value is s * n/16 * 2r where n is 16+e
+  fpval = (16.0 + f) / 16.0;
+  // n.b. exponent is signed
+  if (e < 4) {
+    int epos = e;
+    for (int i = 0; i <= epos; i++) {
+      fpval *= 2.0;
+    }
+  } else {
+    int eneg = 7 - e;
+    for (int i = 0; i < eneg; i++) {
+      fpval /= 2.0;
+    }
+  }
+
+  if (s) {
+    fpval = -fpval;
+  }
+  if (is_dp) {
+    dpval = (double)fpval;
+  }
+  return val;
+}
+
+u_int32_t encoding_for_fp_immediate(float immediate)
+{
+  // given a float which is of the form
+  //
+  //     s * n/16 * 2r
+  //
+  // where n is 16+f and imm1:s, imm4:f, simm3:r
+  // return the imm8 result [s:r:f]
+  //
+
+  union {
+    float fpval;
+    u_int32_t val;
+  };
+  fpval = immediate;
+  u_int32_t s, r, f, res;
+  // sign bit is 31
+  s = (val >> 31) & 0x1;
+  // exponent is bits 30-23 but we only want the bottom 3 bits
+  // strictly we ought to check that the bits bits 30-25 are
+  // either all 1s or all 0s
+  r = (val >> 23) & 0x7;
+  // fraction is bits 22-0
+  f = (val >> 19) & 0xf;
+  res = (s << 7) | (r << 4) | f;
+  return res;
+}
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/immediate_aarch64.hpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef _IMMEDIATE_H
+#define _IMMEDIATE_H
+
+#include <sys/types.h>
+
+/*
+ * functions to map backwards and forwards between logical or floating
+ * point immediates and their corresponding encodings. the mapping
+ * from encoding to immediate is required by the simulator. the reverse
+ * mapping is required by the OpenJDK assembler.
+ *
+ * a logical immediate value supplied to or returned from a map lookup
+ * is always 64 bits. this is sufficient for looking up 32 bit
+ * immediates or their encodings since a 32 bit immediate has the same
+ * encoding as the 64 bit immediate produced by concatenating the
+ * immediate with itself.
+ *
+ * a logical immediate encoding is 13 bits N:immr:imms (3 fields of
+ * widths 1:6:6 -- see the arm spec). they appear as bits [22:10] of a
+ * logical immediate instruction. encodings are supplied and returned
+ * as 32 bit values. if a given 13 bit immediate has no corresponding
+ * encoding then a map lookup will return 0xffffffff.
+ */
+
+u_int64_t logical_immediate_for_encoding(u_int32_t encoding);
+u_int32_t encoding_for_logical_immediate(u_int64_t immediate);
+u_int64_t fp_immediate_for_encoding(u_int32_t imm8, int is_dp);
+u_int32_t encoding_for_fp_immediate(float immediate);
+
+#endif // _IMMEDIATE_H
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/aarch64/vm/interp_masm_aarch64.cpp	Tue Jan 20 11:34:17 2015 -0800
@@ -0,0 +1,1682 @@
+/*
+ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interp_masm_aarch64.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/markOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiRedefineClassesTrace.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/basicLock.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/thread.inline.hpp"
+
+
+// Implementation of InterpreterMacroAssembler
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
+  if (JvmtiExport::can_pop_frame()) {
+    Label L;
+    // Initiate popframe handling only if it is not already being
+    // processed.  If the flag has the popframe_processing bit set, it
+    // means that this code is called *during* popframe handling - we
+    // don't want to reenter.
+    // This method is only called just after the call into the vm in
+    // call_VM_base, so the arg registers are available.
+    ldrw(rscratch1, Address(rthread, JavaThread::popframe_condition_offset()));
+    tstw(rscratch1, JavaThread::popframe_pending_bit);
+    br(Assembler::EQ, L);
+    tstw(rscratch1, JavaThread::popframe_processing_bit);
+    br(Assembler::NE, L);
+    // Call Interpreter::remove_activation_preserving_args_entry() to get the
+    // address of the same-named entrypoint in the generated interpreter code.
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
+    br(r0);
+    bind(L);
+  }
+}
+
+
+void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
+  ldr(r2, Address(rthread, JavaThread::jvmti_thread_state_offset()));
+  const Address tos_addr(r2, JvmtiThreadState::earlyret_tos_offset());
+  const Address oop_addr(r2, JvmtiThreadState::earlyret_oop_offset());
+  const Address val_addr(r2, JvmtiThreadState::earlyret_value_offset());
+  switch (state) {
+    case atos: ldr(r0, oop_addr);
+               str(zr, oop_addr);
+               verify_oop(r0, state);               break;
+    case ltos: ldr(r0, val_addr);                   break;
+    case btos:                                   // fall through
+    case ctos:                                   // fall through
+    case stos:                                   // fall through
+    case itos: ldrw(r0, val_addr);                  break;
+    case ftos: ldrs(v0, val_addr);                  break;
+    case dtos: ldrd(v0, val_addr);                  break;
+    case vtos: /* nothing to do */                  break;
+    default  : ShouldNotReachHere();
+  }
+  // Clean up tos value in the thread object
+  movw(rscratch1, (int) ilgl);
+  strw(rscratch1, tos_addr);
+  strw(zr, val_addr);
+}
+
+
+void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
+  if (JvmtiExport::can_force_early_return()) {
+    Label L;
+    ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
+    cbz(rscratch1, L); // if (thread->jvmti_thread_state() == NULL) exit;
+
+    // Initiate earlyret handling only if it is not already being processed.
+    // If the flag has the earlyret_processing bit set, it means that this code
+    // is called *during* earlyret handling - we don't want to reenter.
+    ldrw(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_state_offset()));
+    cmpw(rscratch1, JvmtiThreadState::earlyret_pending);
+    br(Assembler::NE, L);
+
+    // Call Interpreter::remove_activation_early_entry() to get the address of the
+    // same-named entrypoint in the generated interpreter code.
+    ldr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
+    ldrw(rscratch1, Address(rscratch1, JvmtiThreadState::earlyret_tos_offset()));
+    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), rscratch1);
+    br(r0);
+    bind(L);
+  }
+}
+
+void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(
+  Register reg,
+  int bcp_offset) {
+  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
+  ldrh(reg, Address(rbcp, bcp_offset));
+  rev16(reg, reg);
+}
+
+void InterpreterMacroAssembler::get_dispatch() {
+  unsigned long offset;
+  adrp(rdispatch, ExternalAddress((address)Interpreter::dispatch_table()), offset);
+  lea(rdispatch, Address(rdispatch, offset));
+}
+
+void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
+                                                       int bcp_offset,
+                                                       size_t index_size) {
+  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  if (index_size == sizeof(u2)) {
+    load_unsigned_short(index, Address(rbcp, bcp_offset));
+  } else if (index_size == sizeof(u4)) {
+    // assert(EnableInvokeDynamic, "giant index used only for JSR 292");
+    ldrw(index, Address(rbcp, bcp_offset));
+    // Check if the secondary index definition is still ~x, otherwise
+    // we have to change the following assembler code to calculate the
+    // plain index.
+    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
+    eonw(index, index, zr);  // convert to plain index
+  } else if (index_size == sizeof(u1)) {
+    load_unsigned_byte(index, Address(rbcp, bcp_offset));
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+// Return
+// Rindex: index into constant pool
+// Rcache: address of cache entry - ConstantPoolCache::base_offset()
+//
+// A caller must add ConstantPoolCache::base_offset() to Rcache to get
+// the true address of the cache entry.
+//
+void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
+                                                           Register index,
+                                                           int bcp_offset,
+                                                           size_t index_size) {
+  assert_different_registers(cache, index);
+  assert_different_registers(cache, rcpool);
+  get_cache_index_at_bcp(index, bcp_offset, index_size);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  // convert from field index to ConstantPoolCacheEntry
+  // aarch64 already has the cache in rcpool so there is no need to
+  // install it in cache. instead we pre-add the indexed offset to
+  // rcpool and return it in cache. All clients of this method need to
+  // be modified accordingly.
+  add(cache, rcpool, index, Assembler::LSL, 5);
+}
+
+
+void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
+                                                                        Register index,
+                                                                        Register bytecode,
+                                                                        int byte_no,
+                                                                        int bcp_offset,
+                                                                        size_t index_size) {
+  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
+  // We use a 32-bit load here since the layout of 64-bit words on
+  // little-endian machines allow us that.
+  // n.b. unlike x86 cache alreeady includes the index offset
+  ldrw(bytecode, Address(cache,
+                         ConstantPoolCache::base_offset()
+                         + ConstantPoolCacheEntry::indices_offset()));
+  const int shift_count = (1 + byte_no) * BitsPerByte;
+  ubfx(bytecode, bytecode, shift_count, BitsPerByte);
+}
+
+void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
+                                                               Register tmp,
+                                                               int bcp_offset,
+                                                               size_t index_size) {
+  assert(cache != tmp, "must use different register");
+  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
+  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
+  // convert from field index to ConstantPoolCacheEntry index
+  // and from word offset to byte offset
+  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
+  ldr(cache, Address(rfp, frame::interpreter_frame_cache_offset * wordSize));
+  // skip past the header
+  add(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
+  add(cache, cache, tmp, Assembler::LSL, 2 + LogBytesPerWord);  // construct pointer to cache entry
+}
+
+void InterpreterMacroAssembler::get_method_counters(Register method,
+                                                    Register mcs, Label& skip) {
+  Label has_counters;
+  ldr(mcs, Address(method, Method::method_counters_offset()));
+  cbnz(mcs, has_counters);
+  call_VM(noreg, CAST_FROM_FN_PTR(address,
+          InterpreterRuntime::build_method_counters), method);
+  ldr(mcs, Address(method, Method::method_counters_offset()));
+  cbz(mcs, skip); // No MethodCounters allocated, OutOfMemory
+  bind(has_counters);
+}
+
+// Load object from cpool->resolved_references(index)
+void InterpreterMacroAssembler::load_resolved_reference_at_index(
+                                           Register result, Register index) {
+  assert_different_registers(result, index);
+  // convert from field index to resolved_references() index and from
+  // word index to byte offset. Since this is a java object, it can be compressed
+  Register tmp = index;  // reuse
+  lslw(tmp, tmp, LogBytesPerHeapOop);
+
+  get_constant_pool(result);
+  // load pointer for resolved_references[] objArray
+  ldr(result, Address(result, ConstantPool::resolved_references_offset_in_bytes()));
+  // JNIHandles::resolve(obj);
+  ldr(result, Address(result, 0));
+  // Add in the index
+  add(result, result, tmp);
+  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
+}
+
+// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
+// subtype of super_klass.
+//
+// Args:
+//      r0: superklass
+//      Rsub_klass: subklass
+//
+// Kills:
+//      r2, r5
+void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass,
+                                                  Label& ok_is_subtype) {
+  assert(Rsub_klass != r0, "r0 holds superklass");
+  assert(Rsub_klass != r2, "r2 holds 2ndary super array length");
+  assert(Rsub_klass != r5, "r5 holds 2ndary super array scan ptr");
+
+  // Profile the not-null value's klass.
+  profile_typecheck(r2, Rsub_klass, r5); // blows r2, reloads r5
+
+  // Do the check.
+  check_klass_subtype(Rsub_klass, r0, r2, ok_is_subtype); // blows r2
+
+  // Profile the failure of the check.
+  profile_typecheck_failed(r2); // blows r2
+}
+
+// Java Expression Stack
+
+void InterpreterMacroAssembler::pop_ptr(Register r) {
+  ldr(r, post(esp, wordSize));
+}
+
+void InterpreterMacroAssembler::pop_i(Register r) {
+  ldrw(r, post(esp, wordSize));
+}
+
+void InterpreterMacroAssembler::pop_l(Register r) {
+  ldr(r, post(esp, 2 * Interpreter::stackElementSize));
+}
+
+void InterpreterMacroAssembler::push_ptr(Register r) {
+  str(r, pre(esp, -wordSize));
+ }
+
+void InterpreterMacroAssembler::push_i(Register r) {
+  str(r, pre(esp, -wordSize));
+}
+
+void InterpreterMacroAssembler::push_l(Register r) {
+  str(r, pre(esp, 2 * -wordSize));
+}
+
+void InterpreterMacroAssembler::pop_f(FloatRegister r) {
+  ldrs(r, post(esp, wordSize));
+}
+
+void InterpreterMacroAssembler::pop_d(FloatRegister r) {
+  ldrd(r, post(esp, 2 * Interpreter::stackElementSize));
+}
+
+void InterpreterMacroAssembler::push_f(FloatRegister r) {
+  strs(r, pre(esp, -wordSize));
+}
+
+void InterpreterMacroAssembler::push_d(FloatRegister r) {
+  strd(r, pre(esp, 2* -wordSize));
+}
+
+void InterpreterMacroAssembler::pop(TosState state) {
+  switch (state) {
+  case atos: pop_ptr();                 break;
+  case btos:
+  case ctos:
+  case stos:
+  case itos: pop_i();                   break;
+  case ltos: pop_l();                   break;
+  case ftos: pop_f();                   break;
+  case dtos: pop_d();                   break;
+  case vtos: /* nothing to do */        break;
+  default:   ShouldNotReachHere();
+  }
+  verify_oop(r0, state);
+}
+
+void InterpreterMacroAssembler::push(TosState state) {
+  verify_oop(r0, state);
+  switch (state) {
+  case atos: push_ptr();                break;
+  case btos:
+  case ctos:
+  case stos:
+  case itos: push_i();                  break;
+  case ltos: push_l();                  break;
+  case ftos: push_f();                  break;
+  case dtos: push_d();                  break;
+  case vtos: /* nothing to do */        break;
+  default  : ShouldNotReachHere();
+  }
+}
+
+// Helpers for swap and dup
+void InterpreterMacroAssembler::load_ptr(int n, Register val) {
+  ldr(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+void InterpreterMacroAssembler::store_ptr(int n, Register val) {
+  str(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
+}
+
+
+void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {
+  // set sender sp
+  mov(r13, sp);
+  // record last_sp
+  str(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
+}
+
+// Jump to from_interpreted entry of a call unless single stepping is possible
+// in this thread in which case we must call the i2i entry
+void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
+  prepare_to_jump_from_interpreted();
+
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label run_compiled_code;
+    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
+    // compiled code in threads for which the event is enabled.  Check here for
+    // interp_only_mode if these events CAN be enabled.
+    // interp_only is an int, on little endian it is sufficient to test the byte only
+    // Is a cmpl faster?
+    ldr(rscratch1, Address(rthread, JavaThread::interp_only_mode_offset()));
+    cbz(rscratch1, run_compiled_code);
+    ldr(rscratch1, Address(method, Method::interpreter_entry_offset()));
+    br(rscratch1);
+    bind(run_compiled_code);
+  }
+
+  ldr(rscratch1, Address(method, Method::from_interpreted_offset()));
+  br(rscratch1);
+}
+
+// The following two routines provide a hook so that an implementation
+// can schedule the dispatch in two parts.  amd64 does not do this.
+void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
+}
+
+void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
+    dispatch_next(state, step);
+}
+
+void InterpreterMacroAssembler::dispatch_base(TosState state,
+                                              address* table,
+                                              bool verifyoop) {
+  if (VerifyActivationFrameSize) {
+    Unimplemented();
+  }
+  if (verifyoop) {
+    verify_oop(r0, state);
+  }
+  if (table == Interpreter::dispatch_table(state)) {
+    addw(rscratch2, rscratch1, Interpreter::distance_from_dispatch_table(state));
+    ldr(rscratch2, Address(rdispatch, rscratch2, Address::uxtw(3)));
+  } else {
+    mov(rscratch2, (address)table);
+    ldr(rscratch2, Address(rscratch2, rscratch1, Address::uxtw(3)));
+  }
+  br(rscratch2);
+}
+
+void InterpreterMacroAssembler::dispatch_only(TosState state) {
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
+  dispatch_base(state, Interpreter::normal_table(state), false);
+}
+
+
+void InterpreterMacroAssembler::dispatch_next(TosState state, int step) {
+  // load next bytecode
+  ldrb(rscratch1, Address(pre(rbcp, step)));
+  dispatch_base(state, Interpreter::dispatch_table(state));
+}
+
+void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
+  // load current bytecode
+  ldrb(rscratch1, Address(rbcp, 0));
+  dispatch_base(state, table);
+}
+
+// remove activation
+//
+// Unlock the receiver if this is a synchronized method.
+// Unlock any Java monitors from syncronized blocks.
+// Remove the activation from the stack.
+//
+// If there are locked Java monitors
+//    If throw_monitor_exception
+//       throws IllegalMonitorStateException
+//    Else if install_monitor_exception
+//       installs IllegalMonitorStateException
+//    Else
+//       no error processing
+void InterpreterMacroAssembler::remove_activation(
+        TosState state,
+        bool throw_monitor_exception,
+        bool install_monitor_exception,
+        bool notify_jvmdi) {
+  // Note: Registers r3 xmm0 may be in use for the
+  // result check if synchronized method
+  Label unlocked, unlock, no_unlock;
+
+  // get the value of _do_not_unlock_if_synchronized into r3
+  const Address do_not_unlock_if_synchronized(rthread,
+    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
+  ldrb(r3, do_not_unlock_if_synchronized);
+  strb(zr, do_not_unlock_if_synchronized); // reset the flag
+
+ // get method access flags
+  ldr(r1, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
+  ldr(r2, Address(r1, Method::access_flags_offset()));
+  tst(r2, JVM_ACC_SYNCHRONIZED);
+  br(Assembler::EQ, unlocked);
+
+  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
+  // is set.
+  cbnz(r3, no_unlock);
+
+  // unlock monitor
+  push(state); // save result
+
+  // BasicObjectLock will be first in list, since this is a
+  // synchronized method. However, need to check that the object has
+  // not been unlocked by an explicit monitorexit bytecode.
+  const Address monitor(rfp, frame::interpreter_frame_initial_sp_offset *
+                        wordSize - (int) sizeof(BasicObjectLock));
+  // We use c_rarg1 so that if we go slow path it will be the correct
+  // register for unlock_object to pass to VM directly
+  lea(c_rarg1, monitor); // address of first monitor
+
+  ldr(r0, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+  cbnz(r0, unlock);
+
+  pop(state);
+  if (throw_monitor_exception) {
+    // Entry already unlocked, need to throw exception
+    call_VM(noreg, CAST_FROM_FN_PTR(address,
+                   InterpreterRuntime::throw_illegal_monitor_state_exception));
+    should_not_reach_here();
+  } else {
+    // Monitor already unlocked during a stack unroll. If requested,
+    // install an illegal_monitor_state_exception.  Continue with
+    // stack unrolling.
+    if (install_monitor_exception) {
+      call_VM(noreg, CAST_FROM_FN_PTR(address,
+                     InterpreterRuntime::new_illegal_monitor_state_exception));
+    }
+    b(unlocked);
+  }
+
+  bind(unlock);
+  unlock_object(c_rarg1);
+  pop(state);
+
+  // Check that for block-structured locking (i.e., that all locked
+  // objects has been unlocked)
+  bind(unlocked);
+
+  // r0: Might contain return value
+
+  // Check that all monitors are unlocked
+  {
+    Label loop, exception, entry, restart;
+    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
+    const Address monitor_block_top(
+        rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
+    const Address monitor_block_bot(
+        rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
+
+    bind(restart);
+    // We use c_rarg1 so that if we go slow path it will be the correct
+    // register for unlock_object to pass to VM directly
+    ldr(c_rarg1, monitor_block_top); // points to current entry, starting
+                                     // with top-most entry
+    lea(r19, monitor_block_bot);  // points to word before bottom of
+                                  // monitor block
+    b(entry);
+
+    // Entry already locked, need to throw exception
+    bind(exception);
+
+    if (throw_monitor_exception) {
+      // Throw exception
+      MacroAssembler::call_VM(noreg,
+                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
+                                   throw_illegal_monitor_state_exception));
+      should_not_reach_here();
+    } else {
+      // Stack unrolling. Unlock object and install illegal_monitor_exception.
+      // Unlock does not block, so don't have to worry about the frame.
+      // We don't have to preserve c_rarg1 since we are going to throw an exception.
+
+      push(state);
+      unlock_object(c_rarg1);
+      pop(state);
+
+      if (install_monitor_exception) {
+        call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                        InterpreterRuntime::
+                                        new_illegal_monitor_state_exception));
+      }
+
+      b(restart);
+    }
+
+    bind(loop);
+    // check if current entry is used
+    ldr(rscratch1, Address(c_rarg1, BasicObjectLock::obj_offset_in_bytes()));
+    cbnz(rscratch1, exception);
+
+    add(c_rarg1, c_rarg1, entry_size); // otherwise advance to next entry
+    bind(entry);
+    cmp(c_rarg1, r19); // check if bottom reached
+    br(Assembler::NE, loop); // if not at bottom then check this entry
+  }
+
+  bind(no_unlock);
+
+  // jvmti support
+  if (notify_jvmdi) {
+    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
+  } else {
+    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
+  }
+
+  // remove activation
+  // get sender esp
+  ldr(esp,
+      Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize));
+  // remove frame anchor
+  leave();
+  // If we're returning to interpreted code we will shortly be
+  // adjusting SP to allow some space for ESP.  If we're returning to
+  // compiled code the saved sender SP was saved in sender_sp, so this
+  // restores it.
+  andr(sp, esp, -16);
+}
+
+#endif // C_INTERP
+
+// Lock object
+//
+// Args:
+//      c_rarg1: BasicObjectLock to be used for locking
+//
+// Kills:
+//      r0
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, .. (param regs)
+//      rscratch1, rscratch2 (scratch regs)
+void InterpreterMacroAssembler::lock_object(Register lock_reg)
+{
+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be c_rarg1");
+  if (UseHeavyMonitors) {
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            lock_reg);
+  } else {
+    Label done;
+
+    const Register swap_reg = r0;
+    const Register obj_reg = c_rarg3; // Will contain the oop
+
+    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
+    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
+    const int mark_offset = lock_offset +
+                            BasicLock::displaced_header_offset_in_bytes();
+
+    Label slow_case;
+
+    // Load object pointer into obj_reg %c_rarg3
+    ldr(obj_reg, Address(lock_reg, obj_offset));
+
+    if (UseBiasedLocking) {
+      biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch2, false, done, &slow_case);
+    }
+
+    // Load (object->mark() | 1) into swap_reg
+    ldr(rscratch1, Address(obj_reg, 0));
+    orr(swap_reg, rscratch1, 1);
+
+    // Save (object->mark() | 1) into BasicLock's displaced header
+    str(swap_reg, Address(lock_reg, mark_offset));
+
+    assert(lock_offset == 0,
+           "displached header must be first word in BasicObjectLock");
+
+    Label fail;
+    if (PrintBiasedLockingStatistics) {
+      Label fast;
+      cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, fast, &fail);
+      bind(fast);
+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                  rscratch2, rscratch1);
+      b(done);
+      bind(fail);
+    } else {
+      cmpxchgptr(swap_reg, lock_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+    }
+
+    // Test if the oopMark is an obvious stack pointer, i.e.,
+    //  1) (mark & 7) == 0, and
+    //  2) rsp <= mark < mark + os::pagesize()
+    //
+    // These 3 tests can be done by evaluating the following
+    // expression: ((mark - rsp) & (7 - os::vm_page_size())),
+    // assuming both stack pointer and pagesize have their
+    // least significant 3 bits clear.
+    // NOTE: the oopMark is in swap_reg %r0 as the result of cmpxchg
+    // NOTE2: aarch64 does not like to subtract sp from rn so take a
+    // copy
+    mov(rscratch1, sp);
+    sub(swap_reg, swap_reg, rscratch1);
+    ands(swap_reg, swap_reg, (unsigned long)(7 - os::vm_page_size()));
+
+    // Save the test result, for recursive case, the result is zero
+    str(swap_reg, Address(lock_reg, mark_offset));
+
+    if (PrintBiasedLockingStatistics) {
+      br(Assembler::NE, slow_case);
+      atomic_incw(Address((address)BiasedLocking::fast_path_entry_count_addr()),
+                  rscratch2, rscratch1);
+    }
+    br(Assembler::EQ, done);
+
+    bind(slow_case);
+
+    // Call the runtime routine for slow case
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            lock_reg);
+
+    bind(done);
+  }
+}
+
+
+// Unlocks an object. Used in monitorexit bytecode and
+// remove_activation.  Throws an IllegalMonitorException if object is
+// not locked by current thread.
+//
+// Args:
+//      c_rarg1: BasicObjectLock for lock
+//
+// Kills:
+//      r0
+//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
+//      rscratch1, rscratch2 (scratch regs)
+void InterpreterMacroAssembler::unlock_object(Register lock_reg)
+{
+  assert(lock_reg == c_rarg1, "The argument is only for looks. It must be rarg1");
+
+  if (UseHeavyMonitors) {
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
+  } else {
+    Label done;
+
+    const Register swap_reg   = r0;
+    const Register header_reg = c_rarg2;  // Will contain the old oopMark
+    const Register obj_reg    = c_rarg3;  // Will contain the oop
+
+    save_bcp(); // Save in case of exception
+
+    // Convert from BasicObjectLock structure to object and BasicLock
+    // structure Store the BasicLock address into %r0
+    lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
+
+    // Load oop into obj_reg(%c_rarg3)
+    ldr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
+
+    // Free entry
+    str(zr, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
+
+    if (UseBiasedLocking) {
+      biased_locking_exit(obj_reg, header_reg, done);
+    }
+
+    // Load the old header from BasicLock structure
+    ldr(header_reg, Address(swap_reg,
+                            BasicLock::displaced_header_offset_in_bytes()));
+
+    // Test for recursion
+    cbz(header_reg, done);
+
+    // Atomic swap back the old header
+    cmpxchgptr(swap_reg, header_reg, obj_reg, rscratch1, done, /*fallthrough*/NULL);
+
+    // Call the runtime routine for slow case.
+    str(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
+    call_VM(noreg,
+            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            lock_reg);
+
+    bind(done);
+
+    restore_bcp();
+  }
+}
+
+#ifndef CC_INTERP
+
+void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
+                                                         Label& zero_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  ldr(mdp, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
+  cbz(mdp, zero_continue);
+}
+
+// Set the method data pointer for the current bcp.
+void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Label set_mdp;
+  stp(r0, r1, Address(pre(sp, -2 * wordSize)));
+
+  // Test MDO to avoid the call if it is NULL.
+  ldr(r0, Address(rmethod, in_bytes(Method::method_data_offset())));
+  cbz(r0, set_mdp);
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), rmethod, rbcp);
+  // r0: mdi
+  // mdo is guaranteed to be non-zero here, we checked for it before the call.
+  ldr(r1, Address(rmethod, in_bytes(Method::method_data_offset())));
+  lea(r1, Address(r1, in_bytes(MethodData::data_offset())));
+  add(r0, r1, r0);
+  str(r0, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
+  bind(set_mdp);
+  ldp(r0, r1, Address(post(sp, 2 * wordSize)));
+}
+
+void InterpreterMacroAssembler::verify_method_data_pointer() {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+#ifdef ASSERT
+  Label verify_continue;
+  stp(r0, r1, Address(pre(sp, -2 * wordSize)));
+  stp(r2, r3, Address(pre(sp, -2 * wordSize)));
+  test_method_data_pointer(r3, verify_continue); // If mdp is zero, continue
+  get_method(r1);
+
+  // If the mdp is valid, it will point to a DataLayout header which is
+  // consistent with the bcp.  The converse is highly probable also.
+  ldrsh(r2, Address(r3, in_bytes(DataLayout::bci_offset())));
+  ldr(rscratch1, Address(r1, Method::const_offset()));
+  add(r2, r2, rscratch1, Assembler::LSL);
+  lea(r2, Address(r2, ConstMethod::codes_offset()));
+  cmp(r2, rbcp);
+  br(Assembler::EQ, verify_continue);
+  // r1: method
+  // rbcp: bcp // rbcp == 22
+  // r3: mdp
+  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
+               r1, rbcp, r3);
+  bind(verify_continue);
+  ldp(r2, r3, Address(post(sp, 2 * wordSize)));
+  ldp(r0, r1, Address(post(sp, 2 * wordSize)));
+#endif // ASSERT
+}
+
+
+void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
+                                                int constant,
+                                                Register value) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  Address data(mdp_in, constant);
+  str(value, data);
+}
+
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      int constant,
+                                                      bool decrement) {
+  increment_mdp_data_at(mdp_in, noreg, constant, decrement);
+}
+
+void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
+                                                      Register reg,
+                                                      int constant,
+                                                      bool decrement) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  // %%% this does 64bit counters at best it is wasting space
+  // at worst it is a rare bug when counters overflow
+
+  assert_different_registers(rscratch2, rscratch1, mdp_in, reg);
+
+  Address addr1(mdp_in, constant);
+  Address addr2(rscratch2, reg, Address::lsl(0));
+  Address &addr = addr1;
+  if (reg != noreg) {
+    lea(rscratch2, addr1);
+    addr = addr2;
+  }
+
+  if (decrement) {
+    // Decrement the register.  Set condition codes.
+    // Intel does this
+    // addptr(data, (int32_t) -DataLayout::counter_increment);
+    // If the decrement causes the counter to overflow, stay negative
+    // Label L;
+    // jcc(Assembler::negative, L);
+    // addptr(data, (int32_t) DataLayout::counter_increment);
+    // so we do this
+    ldr(rscratch1, addr);
+    subs(rscratch1, rscratch1, (unsigned)DataLayout::counter_increment);
+    Label L;
+    br(Assembler::LO, L);       // skip store if counter underflow
+    str(rscratch1, addr);
+    bind(L);
+  } else {
+    assert(DataLayout::counter_increment == 1,
+           "flow-free idiom only works with 1");
+    // Intel does this
+    // Increment the register.  Set carry flag.
+    // addptr(data, DataLayout::counter_increment);
+    // If the increment causes the counter to overflow, pull back by 1.
+    // sbbptr(data, (int32_t)0);
+    // so we do this
+    ldr(rscratch1, addr);
+    adds(rscratch1, rscratch1, DataLayout::counter_increment);
+    Label L;
+    br(Assembler::CS, L);       // skip store if counter overflow
+    str(rscratch1, addr);
+    bind(L);
+  }
+}
+
+void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
+                                                int flag_byte_constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  int header_offset = in_bytes(DataLayout::header_offset());
+  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
+  // Set the flag
+  ldr(rscratch1, Address(mdp_in, header_offset));
+  orr(rscratch1, rscratch1, header_bits);
+  str(rscratch1, Address(mdp_in, header_offset));
+}
+
+
+void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
+                                                 int offset,
+                                                 Register value,
+                                                 Register test_value_out,
+                                                 Label& not_equal_continue) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  if (test_value_out == noreg) {
+    ldr(rscratch1, Address(mdp_in, offset));
+    cmp(value, rscratch1);
+  } else {
+    // Put the test value into a register, so caller can use it:
+    ldr(test_value_out, Address(mdp_in, offset));
+    cmp(value, test_value_out);
+  }
+  br(Assembler::NE, not_equal_continue);
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  ldr(rscratch1, Address(mdp_in, offset_of_disp));
+  add(mdp_in, mdp_in, rscratch1, LSL);
+  str(mdp_in, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
+                                                     Register reg,
+                                                     int offset_of_disp) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  lea(rscratch1, Address(mdp_in, offset_of_disp));
+  ldr(rscratch1, Address(rscratch1, reg, Address::lsl(0)));
+  add(mdp_in, mdp_in, rscratch1, LSL);
+  str(mdp_in, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
+                                                       int constant) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  add(mdp_in, mdp_in, (unsigned)constant);
+  str(mdp_in, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
+}
+
+
+void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
+  assert(ProfileInterpreter, "must be profiling interpreter");
+  // save/restore across call_VM
+  stp(zr, return_bci, Address(pre(sp, -2 * wordSize)));
+  call_VM(noreg,
+          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
+          return_bci);
+  ldp(zr, return_bci, Address(post(sp, 2 * wordSize)));
+}
+
+
+void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
+                                                     Register bumped_count) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    // Otherwise, assign to mdp
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the taken count.
+    // We inline increment_mdp_data_at to return bumped_count in a register
+    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
+    Address data(mdp, in_bytes(JumpData::taken_offset()));
+    ldr(bumped_count, data);
+    assert(DataLayout::counter_increment == 1,
+            "flow-free idiom only works with 1");
+    // Intel does this to catch overflow
+    // addptr(bumped_count, DataLayout::counter_increment);
+    // sbbptr(bumped_count, 0);
+    // so we do this
+    adds(bumped_count, bumped_count, DataLayout::counter_increment);
+    Label L;
+    br(Assembler::CS, L);       // skip store if counter overflow
+    str(bumped_count, data);
+    bind(L);
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are taking a branch.  Increment the not taken count.
+    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
+
+    // The method data pointer needs to be updated to correspond to
+    // the next bytecode
+    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_final_call(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // We are making a call.  Increment the count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+
+void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
+                                                     Register mdp,
+                                                     Register reg2,
+                                                     bool receiver_can_be_null) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    Label skip_receiver_profile;
+    if (receiver_can_be_null) {
+      Label not_null;
+      // We are making a call.  Increment the count for null receiver.
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+      b(skip_receiver_profile);
+      bind(not_null);
+    }
+
+    // Record the receiver type.
+    record_klass_in_profile(receiver, mdp, reg2, true);
+    bind(skip_receiver_profile);
+
+    // The method data pointer needs to be updated to reflect the new target.
+    update_mdp_by_constant(mdp,
+                           in_bytes(VirtualCallData::
+                                    virtual_call_data_size()));
+    bind(profile_continue);
+  }
+}
+
+// This routine creates a state machine for updating the multi-row
+// type profile at a virtual call site (or other type-sensitive bytecode).
+// The machine visits each row (of receiver/count) until the receiver type
+// is found, or until it runs out of rows.  At the same time, it remembers
+// the location of the first empty row.  (An empty row records null for its
+// receiver, and can be allocated for a newly-observed receiver type.)
+// Because there are two degrees of freedom in the state, a simple linear
+// search will not work; it must be a decision tree.  Hence this helper
+// function is recursive, to generate the required tree structured code.
+// It's the interpreter, so we are trading off code space for speed.
+// See below for example code.
+void InterpreterMacroAssembler::record_klass_in_profile_helper(
+                                        Register receiver, Register mdp,
+                                        Register reg2, int start_row,
+                                        Label& done, bool is_virtual_call) {
+  if (TypeProfileWidth == 0) {
+    if (is_virtual_call) {
+      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+    }
+    return;
+  }
+
+  int last_row = VirtualCallData::row_limit() - 1;
+  assert(start_row <= last_row, "must be work left to do");
+  // Test this row for both the receiver and for null.
+  // Take any of three different outcomes:
+  //   1. found receiver => increment count and goto done
+  //   2. found null => keep looking for case 1, maybe allocate this cell
+  //   3. found something else => keep looking for cases 1 and 2
+  // Case 3 is handled by a recursive call.
+  for (int row = start_row; row <= last_row; row++) {
+    Label next_test;
+    bool test_for_null_also = (row == start_row);
+
+    // See if the receiver is receiver[n].
+    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
+    test_mdp_data_at(mdp, recvr_offset, receiver,
+                     (test_for_null_also ? reg2 : noreg),
+                     next_test);
+    // (Reg2 now contains the receiver from the CallData.)
+
+    // The receiver is receiver[n].  Increment count[n].
+    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
+    increment_mdp_data_at(mdp, count_offset);
+    b(done);
+    bind(next_test);
+
+    if (test_for_null_also) {
+      Label found_null;
+      // Failed the equality check on receiver[n]...  Test for null.
+      if (start_row == last_row) {
+        // The only thing left to do is handle the null case.
+        if (is_virtual_call) {
+          cbz(reg2, found_null);
+          // Receiver did not match any saved receiver and there is no empty row for it.
+          // Increment total counter to indicate polymorphic case.
+          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+          b(done);
+          bind(found_null);
+        } else {
+          cbz(reg2, done);
+        }
+        break;
+      }
+      // Since null is rare, make it be the branch-taken case.
+      cbz(reg2,found_null);
+
+      // Put all the "Case 3" tests here.
+      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
+
+      // Found a null.  Keep searching for a matching receiver,
+      // but remember that this is an empty (unused) slot.
+      bind(found_null);
+    }
+  }
+
+  // In the fall-through case, we found no matching receiver, but we
+  // observed the receiver[start_row] is NULL.
+
+  // Fill in the receiver field and increment the count.
+  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
+  set_mdp_data_at(mdp, recvr_offset, receiver);
+  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
+  mov(reg2, DataLayout::counter_increment);
+  set_mdp_data_at(mdp, count_offset, reg2);
+  if (start_row > 0) {
+    b(done);
+  }
+}
+
+// Example state machine code for three profile rows:
+//   // main copy of decision tree, rooted at row[1]
+//   if (row[0].rec == rec) { row[0].incr(); goto done; }
+//   if (row[0].rec != NULL) {
+//     // inner copy of decision tree, rooted at row[1]
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[1].rec != NULL) {
+//       // degenerate decision tree, rooted at row[2]
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       if (row[2].rec != NULL) { count.incr(); goto done; } // overflow
+//       row[2].init(rec); goto done;
+//     } else {
+//       // remember row[1] is empty
+//       if (row[2].rec == rec) { row[2].incr(); goto done; }
+//       row[1].init(rec); goto done;
+//     }
+//   } else {
+//     // remember row[0] is empty
+//     if (row[1].rec == rec) { row[1].incr(); goto done; }
+//     if (row[2].rec == rec) { row[2].incr(); goto done; }
+//     row[0].init(rec); goto done;
+//   }
+//   done:
+
+void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
+                                                        Register mdp, Register reg2,
+                                                        bool is_virtual_call) {
+  assert(ProfileInterpreter, "must be profiling");
+  Label done;
+
+  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
+
+  bind (done);
+}
+
+void InterpreterMacroAssembler::profile_ret(Register return_bci,
+                                            Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+    uint row;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    // Update the total ret count.
+    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
+
+    for (row = 0; row < RetData::row_limit(); row++) {
+      Label next_test;
+
+      // See if return_bci is equal to bci[n]:
+      test_mdp_data_at(mdp,
+                       in_bytes(RetData::bci_offset(row)),
+                       return_bci, noreg,
+                       next_test);
+
+      // return_bci is equal to bci[n].  Increment the count.
+      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
+
+      // The method data pointer needs to be updated to reflect the new target.
+      update_mdp_by_offset(mdp,
+                           in_bytes(RetData::bci_displacement_offset(row)));
+      b(profile_continue);
+      bind(next_test);
+    }
+
+    update_mdp_for_ret(return_bci);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
+  if (ProfileInterpreter) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
+
+    // The method data pointer needs to be updated.
+    int mdp_delta = in_bytes(BitData::bit_data_size());
+    if (TypeProfileCasts) {
+      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
+    }
+    update_mdp_by_constant(mdp, mdp_delta);
+
+    bind(profile_continue);
+  }
+}
+
+void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
+  if (ProfileInterpreter && TypeProfileCasts) {
+    Label profile_continue;
+
+    // If no method data exists, go to profile_continue.
+    test_method_data_pointer(mdp, profile_continue);
+
+    int count_offset = in_bytes(CounterData::count_offset());
+    // Back up the address, since we have already bumped the mdp.
+    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
+
+    // *Decrement* the counter.  We expect to see zero or small