view ppc_patches/0009_linux_ppc_files.patch @ 5903:ac7b3be2fdb5

Fix stack overflow issue: code branched to zero Generate throw_StackOverflowError stub before generating the native entry.
author goetz
date Wed, 18 Dec 2013 12:23:05 +0100
parents 9677ba28c6d8
children
line wrap: on
line source
# HG changeset patch
# User goetz
# Date 1374503506 -7200
# Node ID b2d1cd9c3ed62d4f851bcb9e3ba54a64edaf30f1
# Parent ca3dacaf9041f7deda09f95d86988421f98f40f5
8019972: PPC64 (part 9): platform files for interpreter only VM.
Summary: With this change the HotSpot core build works on Linux/PPC64. The VM succesfully executes simple test programs.

diff -r ca3dacaf9041 src/cpu/ppc/vm/assembler_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/assembler_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,699 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) // nothing
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+
+int AbstractAssembler::code_fill_byte() {
+  return 0x00;                  // illegal instruction 0x00000000
+}
+
+void Assembler::print_instruction(int inst) {
+  Unimplemented();
+}
+
+// Patch instruction `inst' at offset `inst_pos' to refer to
+// `dest_pos' and return the resulting instruction.  We should have
+// pcs, not offsets, but since all is relative, it will work out fine.
+int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
+  int m = 0; // mask for displacement field
+  int v = 0; // new value for displacement field
+
+  switch (inv_op_ppc(inst)) {
+  case b_op:  m = li(-1); v = li(disp(dest_pos, inst_pos)); break;
+  case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break;
+    default: ShouldNotReachHere();
+  }
+  return inst & ~m | v;
+}
+
+// Return the offset, relative to _code_begin, of the destination of
+// the branch inst at offset pos.
+int Assembler::branch_destination(int inst, int pos) {
+  int r = 0;
+  switch (inv_op_ppc(inst)) {
+    case b_op:  r = bxx_destination_offset(inst, pos); break;
+    case bc_op: r = inv_bd_field(inst, pos); break;
+    default: ShouldNotReachHere();
+  }
+  return r;
+}
+
+// Low-level andi-one-instruction-macro.
+void Assembler::andi(Register a, Register s, const int ui16) {
+  assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate");
+  if (is_power_of_2_long(((jlong) ui16)+1)) {
+    // pow2minus1
+    clrldi(a, s, 64-log2_long((((jlong) ui16)+1)));
+  } else if (is_power_of_2_long((jlong) ui16)) {
+    // pow2
+    rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16));
+  } else if (is_power_of_2_long((jlong)-ui16)) {
+    // negpow2
+    clrrdi(a, s, log2_long((jlong)-ui16));
+  } else {
+    andi_(a, s, ui16);
+  }
+}
+
+// RegisterOrConstant version.
+void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
+      Assembler::ld(d, simm16_rest, d);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::ld(d, roc.as_constant(), s1);
+    } else {
+      load_const_optimized(d, roc.as_constant());
+      Assembler::ldx(d, d, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::ld(d, 0, roc.as_register());
+    else
+      Assembler::ldx(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::lwa(Register d, RegisterOrConstant roc, Register s1) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
+      Assembler::lwa(d, simm16_rest, d);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::lwa(d, roc.as_constant(), s1);
+    } else {
+      load_const_optimized(d, roc.as_constant());
+      Assembler::lwax(d, d, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::lwa(d, 0, roc.as_register());
+    else
+      Assembler::lwax(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::lwz(Register d, RegisterOrConstant roc, Register s1) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
+      Assembler::lwz(d, simm16_rest, d);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::lwz(d, roc.as_constant(), s1);
+    } else {
+      load_const_optimized(d, roc.as_constant());
+      Assembler::lwzx(d, d, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::lwz(d, 0, roc.as_register());
+    else
+      Assembler::lwzx(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::lha(Register d, RegisterOrConstant roc, Register s1) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
+      Assembler::lha(d, simm16_rest, d);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::lha(d, roc.as_constant(), s1);
+    } else {
+      load_const_optimized(d, roc.as_constant());
+      Assembler::lhax(d, d, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::lha(d, 0, roc.as_register());
+    else
+      Assembler::lhax(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::lhz(Register d, RegisterOrConstant roc, Register s1) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
+      Assembler::lhz(d, simm16_rest, d);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::lhz(d, roc.as_constant(), s1);
+    } else {
+      load_const_optimized(d, roc.as_constant());
+      Assembler::lhzx(d, d, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::lhz(d, 0, roc.as_register());
+    else
+      Assembler::lhzx(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::lbz(Register d, RegisterOrConstant roc, Register s1) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true);
+      Assembler::lbz(d, simm16_rest, d);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::lbz(d, roc.as_constant(), s1);
+    } else {
+      load_const_optimized(d, roc.as_constant());
+      Assembler::lbzx(d, d, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::lbz(d, 0, roc.as_register());
+    else
+      Assembler::lbzx(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::std(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      guarantee(tmp != noreg, "Need tmp reg to encode large constants");
+      int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
+      Assembler::std(d, simm16_rest, tmp);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::std(d, roc.as_constant(), s1);
+    } else {
+      guarantee(tmp != noreg, "Need tmp reg to encode large constants");
+      load_const_optimized(tmp, roc.as_constant());
+      Assembler::stdx(d, tmp, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::std(d, 0, roc.as_register());
+    else
+      Assembler::stdx(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::stw(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      guarantee(tmp != noreg, "Need tmp reg to encode large constants");
+      int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
+      Assembler::stw(d, simm16_rest, tmp);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::stw(d, roc.as_constant(), s1);
+    } else {
+      guarantee(tmp != noreg, "Need tmp reg to encode large constants");
+      load_const_optimized(tmp, roc.as_constant());
+      Assembler::stwx(d, tmp, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::stw(d, 0, roc.as_register());
+    else
+      Assembler::stwx(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::sth(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      guarantee(tmp != noreg, "Need tmp reg to encode large constants");
+      int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
+      Assembler::sth(d, simm16_rest, tmp);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::sth(d, roc.as_constant(), s1);
+    } else {
+      guarantee(tmp != noreg, "Need tmp reg to encode large constants");
+      load_const_optimized(tmp, roc.as_constant());
+      Assembler::sthx(d, tmp, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::sth(d, 0, roc.as_register());
+    else
+      Assembler::sthx(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::stb(Register d, RegisterOrConstant roc, Register s1, Register tmp) {
+  if (roc.is_constant()) {
+    if (s1 == noreg) {
+      guarantee(tmp != noreg, "Need tmp reg to encode large constants");
+      int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true);
+      Assembler::stb(d, simm16_rest, tmp);
+    } else if (is_simm(roc.as_constant(), 16)) {
+      Assembler::stb(d, roc.as_constant(), s1);
+    } else {
+      guarantee(tmp != noreg, "Need tmp reg to encode large constants");
+      load_const_optimized(tmp, roc.as_constant());
+      Assembler::stbx(d, tmp, s1);
+    }
+  } else {
+    if (s1 == noreg)
+      Assembler::stb(d, 0, roc.as_register());
+    else
+      Assembler::stbx(d, roc.as_register(), s1);
+  }
+}
+
+void Assembler::add(Register d, RegisterOrConstant roc, Register s1) {
+  if (roc.is_constant()) {
+    intptr_t c = roc.as_constant();
+    assert(is_simm(c, 16), "too big");
+    addi(d, s1, (int)c);
+  }
+  else add(d, roc.as_register(), s1);
+}
+
+void Assembler::subf(Register d, RegisterOrConstant roc, Register s1) {
+  if (roc.is_constant()) {
+    intptr_t c = roc.as_constant();
+    assert(is_simm(-c, 16), "too big");
+    addi(d, s1, (int)-c);
+  }
+  else subf(d, roc.as_register(), s1);
+}
+
+void Assembler::cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1) {
+  if (roc.is_constant()) {
+    intptr_t c = roc.as_constant();
+    assert(is_simm(c, 16), "too big");
+    cmpdi(d, s1, (int)c);
+  }
+  else cmpd(d, roc.as_register(), s1);
+}
+
+// Load a 64 bit constant. Patchable.
+void Assembler::load_const(Register d, long x, Register tmp) {
+  // 64-bit value: x = xa xb xc xd
+  int xa = (x >> 48) & 0xffff;
+  int xb = (x >> 32) & 0xffff;
+  int xc = (x >> 16) & 0xffff;
+  int xd = (x >>  0) & 0xffff;
+  if (tmp == noreg) {
+    Assembler::lis( d, (int)(short)xa);
+    Assembler::ori( d, d, (unsigned int)xb);
+    Assembler::sldi(d, d, 32);
+    Assembler::oris(d, d, (unsigned int)xc);
+    Assembler::ori( d, d, (unsigned int)xd);
+  } else {
+    // exploit instruction level parallelism if we have a tmp register
+    assert_different_registers(d, tmp);
+    Assembler::lis(tmp, (int)(short)xa);
+    Assembler::lis(d, (int)(short)xc);
+    Assembler::ori(tmp, tmp, (unsigned int)xb);
+    Assembler::ori(d, d, (unsigned int)xd);
+    Assembler::insrdi(d, tmp, 32, 0);
+  }
+}
+
+// Load a 64 bit constant, optimized, not identifyable.
+// Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
+// 16 bit immediate offset.
+int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) {
+  // Avoid accidentally trying to use R0 for indexed addressing.
+  assert(d != R0, "R0 not allowed");
+  assert_different_registers(d, tmp);
+
+  short xa, xb, xc, xd; // Four 16-bit chunks of const.
+  long rem = x;         // Remaining part of const.
+
+  xd = rem & 0xFFFF;    // Lowest 16-bit chunk.
+  rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend.
+
+  if (rem == 0) { // opt 1: simm16
+    li(d, xd);
+    return 0;
+  }
+
+  xc = rem & 0xFFFF; // Next 16-bit chunk.
+  rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend.
+
+  if (rem == 0) { // opt 2: simm32
+    lis(d, xc);
+  } else { // High 32 bits needed.
+
+    if (tmp != noreg) { // opt 3: We have a temp reg.
+      // No carry propagation between xc and higher chunks here (use logical instructions).
+      xa = (x >> 48) & 0xffff;
+      xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0.
+      bool load_xa = (xa != 0) || (xb < 0);
+      bool return_xd = false;
+
+      if (load_xa) lis(tmp, xa);
+      if (xc) lis(d, xc);
+      if (load_xa) {
+        if (xb) ori(tmp, tmp, xb); // No addi, we support tmp == R0.
+      } else {
+        li(tmp, xb); // non-negative
+      }
+      if (xc) {
+        if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi.
+        else if (xd) { addi(d, d, xd); }
+      } else {
+        li(d, xd);
+      }
+      insrdi(d, tmp, 32, 0);
+      return return_xd ? xd : 0; // non-negative
+    }
+
+    xb = rem & 0xFFFF; // Next 16-bit chunk.
+    rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend.
+
+    xa = rem & 0xFFFF; // Highest 16-bit chunk.
+
+    // opt 4: avoid adding 0
+    if (xa) { // Highest 16-bit needed?
+      lis(d, xa);
+      if (xb) addi(d, d, xb);
+    } else {
+      li(d, xb);
+    }
+    sldi(d, d, 32);
+    if (xc) addis(d, d, xc);
+  }
+
+  // opt 5: Return offset to be inserted into following instruction.
+  if (return_simm16_rest) return xd;
+
+  if (xd) addi(d, d, xd);
+  return 0;
+}
+
+#ifndef PRODUCT
+// Test of ppc assembler.
+void Assembler::test_asm() {
+  // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
+  addi(   R0,  R1,  10);
+  addis(  R5,  R2,  11);
+  addic_( R3,  R31, 42);
+  subfic( R21, R12, 2112);
+  add(    R3,  R2,  R1);
+  add_(   R11, R22, R30);
+  subf(   R7,  R6,  R5);
+  subf_(  R8,  R9,  R4);
+  addc(   R11, R12, R13);
+  addc_(  R14, R14, R14);
+  subfc(  R15, R16, R17);
+  subfc_( R18, R20, R19);
+  adde(   R20, R22, R24);
+  adde_(  R29, R27, R26);
+  subfe(  R28, R1,  R0);
+  subfe_( R21, R11, R29);
+  neg(    R21, R22);
+  neg_(   R13, R23);
+  mulli(  R0,  R11, -31);
+  mulld(  R1,  R18, R21);
+  mulld_( R2,  R17, R22);
+  mullw(  R3,  R16, R23);
+  mullw_( R4,  R15, R24);
+  divd(   R5,  R14, R25);
+  divd_(  R6,  R13, R26);
+  divw(   R7,  R12, R27);
+  divw_(  R8,  R11, R28);
+
+  li(     R3, -4711);
+
+  // PPC 1, section 3.3.9, Fixed-Point Compare Instructions
+  cmpi(   CCR7,  0, R27, 4711);
+  cmp(    CCR0, 1, R14, R11);
+  cmpli(  CCR5,  1, R17, 45);
+  cmpl(   CCR3, 0, R9,  R10);
+
+  cmpwi(  CCR7,  R27, 4711);
+  cmpw(   CCR0, R14, R11);
+  cmplwi( CCR5,  R17, 45);
+  cmplw(  CCR3, R9,  R10);
+
+  cmpdi(  CCR7,  R27, 4711);
+  cmpd(   CCR0, R14, R11);
+  cmpldi( CCR5,  R17, 45);
+  cmpld(  CCR3, R9,  R10);
+
+  // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
+  andi_(  R4,  R5,  0xff);
+  andis_( R12, R13, 0x7b51);
+  ori(    R1,  R4,  13);
+  oris(   R3,  R5,  177);
+  xori(   R7,  R6,  51);
+  xoris(  R29, R0,  1);
+  andr(   R17, R21, R16);
+  and_(   R3,  R5,  R15);
+  orr(    R2,  R1,  R9);
+  or_(    R17, R15, R11);
+  xorr(   R19, R18, R10);
+  xor_(   R31, R21, R11);
+  nand(   R5,  R7,  R3);
+  nand_(  R3,  R1,  R0);
+  nor(    R2,  R3,  R5);
+  nor_(   R3,  R6,  R8);
+  andc(   R25, R12, R11);
+  andc_(  R24, R22, R21);
+  orc(    R20, R10, R12);
+  orc_(   R22, R2,  R13);
+
+  nop();
+
+  // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
+  sld(    R5,  R6,  R8);
+  sld_(   R3,  R5,  R9);
+  slw(    R2,  R1,  R10);
+  slw_(   R6,  R26, R16);
+  srd(    R16, R24, R8);
+  srd_(   R21, R14, R7);
+  srw(    R22, R25, R29);
+  srw_(   R5,  R18, R17);
+  srad(   R7,  R11, R0);
+  srad_(  R9,  R13, R1);
+  sraw(   R7,  R15, R2);
+  sraw_(  R4,  R17, R3);
+  sldi(   R3,  R18, 63);
+  sldi_(  R2,  R20, 30);
+  slwi(   R1,  R21, 30);
+  slwi_(  R7,  R23, 8);
+  srdi(   R0,  R19, 2);
+  srdi_(  R12, R24, 5);
+  srwi(   R13, R27, 6);
+  srwi_(  R14, R29, 7);
+  sradi(  R15, R30, 9);
+  sradi_( R16, R31, 19);
+  srawi(  R17, R31, 15);
+  srawi_( R18, R31, 12);
+
+  clrrdi( R3, R30, 5);
+  clrldi( R9, R10, 11);
+
+  rldicr( R19, R20, 13, 15);
+  rldicr_(R20, R20, 16, 14);
+  rldicl( R21, R21, 30, 33);
+  rldicl_(R22, R1,  20, 25);
+  rlwinm( R23, R2,  25, 10, 11);
+  rlwinm_(R24, R3,  12, 13, 14);
+
+  // PPC 1, section 3.3.2 Fixed-Point Load Instructions
+  lwzx(   R3,  R5, R7);
+  lwz(    R11,  0, R1);
+  lwzu(   R31, -4, R11);
+
+  lwax(   R3,  R5, R7);
+  lwa(    R31, -4, R11);
+  lhzx(   R3,  R5, R7);
+  lhz(    R31, -4, R11);
+  lhzu(   R31, -4, R11);
+
+
+  lhax(   R3,  R5, R7);
+  lha(    R31, -4, R11);
+  lhau(   R11,  0, R1);
+
+  lbzx(   R3,  R5, R7);
+  lbz(    R31, -4, R11);
+  lbzu(   R11,  0, R1);
+
+  ld(     R31, -4, R11);
+  ldx(    R3,  R5, R7);
+  ldu(    R31, -4, R11);
+
+  //  PPC 1, section 3.3.3 Fixed-Point Store Instructions
+  stwx(   R3,  R5, R7);
+  stw(    R31, -4, R11);
+  stwu(   R11,  0, R1);
+
+  sthx(   R3,  R5, R7 );
+  sth(    R31, -4, R11);
+  sthu(   R31, -4, R11);
+
+  stbx(   R3,  R5, R7);
+  stb(    R31, -4, R11);
+  stbu(   R31, -4, R11);
+
+  std(    R31, -4, R11);
+  stdx(   R3,  R5, R7);
+  stdu(   R31, -4, R11);
+
+ // PPC 1, section 3.3.13 Move To/From System Register Instructions
+  mtlr(   R3);
+  mflr(   R3);
+  mtctr(  R3);
+  mfctr(  R3);
+  mtcrf(  0xff, R15);
+  mtcr(   R15);
+  mtcrf(  0x03, R15);
+  mtcr(   R15);
+  mfcr(   R15);
+
+ // PPC 1, section 2.4.1 Branch Instructions
+  Label lbl1, lbl2, lbl3;
+  bind(lbl1);
+
+  b(pc());
+  b(pc() - 8);
+  b(lbl1);
+  b(lbl2);
+  b(lbl3);
+
+  bl(pc() - 8);
+  bl(lbl1);
+  bl(lbl2);
+
+  bcl(4, 10, pc() - 8);
+  bcl(4, 10, lbl1);
+  bcl(4, 10, lbl2);
+
+  bclr( 4, 6, 0);
+  bclrl(4, 6, 0);
+
+  bind(lbl2);
+
+  bcctr( 4, 6, 0);
+  bcctrl(4, 6, 0);
+
+  blt(CCR0, lbl2);
+  bgt(CCR1, lbl2);
+  beq(CCR2, lbl2);
+  bso(CCR3, lbl2);
+  bge(CCR4, lbl2);
+  ble(CCR5, lbl2);
+  bne(CCR6, lbl2);
+  bns(CCR7, lbl2);
+
+  bltl(CCR0, lbl2);
+  bgtl(CCR1, lbl2);
+  beql(CCR2, lbl2);
+  bsol(CCR3, lbl2);
+  bgel(CCR4, lbl2);
+  blel(CCR5, lbl2);
+  bnel(CCR6, lbl2);
+  bnsl(CCR7, lbl2);
+  blr();
+
+  sync();
+  icbi( R1, R2);
+  dcbst(R2, R3);
+
+  // FLOATING POINT instructions ppc.
+  // PPC 1, section 4.6.2 Floating-Point Load Instructions
+  lfs( F1, -11, R3);
+  lfsu(F2, 123, R4);
+  lfsx(F3, R5,  R6);
+  lfd( F4, 456, R7);
+  lfdu(F5, 789, R8);
+  lfdx(F6, R10, R11);
+
+  // PPC 1, section 4.6.3 Floating-Point Store Instructions
+  stfs(  F7,  876, R12);
+  stfsu( F8,  543, R13);
+  stfsx( F9,  R14, R15);
+  stfd(  F10, 210, R16);
+  stfdu( F11, 111, R17);
+  stfdx( F12, R18, R19);
+
+  // PPC 1, section 4.6.4 Floating-Point Move Instructions
+  fmr(   F13, F14);
+  fmr_(  F14, F15);
+  fneg(  F16, F17);
+  fneg_( F18, F19);
+  fabs(  F20, F21);
+  fabs_( F22, F23);
+  fnabs( F24, F25);
+  fnabs_(F26, F27);
+
+  // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic
+  // Instructions
+  fadd(  F28, F29, F30);
+  fadd_( F31, F0,  F1);
+  fadds( F2,  F3,  F4);
+  fadds_(F5,  F6,  F7);
+  fsub(  F8,  F9,  F10);
+  fsub_( F11, F12, F13);
+  fsubs( F14, F15, F16);
+  fsubs_(F17, F18, F19);
+  fmul(  F20, F21, F22);
+  fmul_( F23, F24, F25);
+  fmuls( F26, F27, F28);
+  fmuls_(F29, F30, F31);
+  fdiv(  F0,  F1,  F2);
+  fdiv_( F3,  F4,  F5);
+  fdivs( F6,  F7,  F8);
+  fdivs_(F9,  F10, F11);
+
+  // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion
+  // Instructions
+  frsp(  F12, F13);
+  fctid( F14, F15);
+  fctidz(F16, F17);
+  fctiw( F18, F19);
+  fctiwz(F20, F21);
+  fcfid( F22, F23);
+
+  // PPC 1, section 4.6.7 Floating-Point Compare Instructions
+  fcmpu( CCR7, F24, F25);
+
+  tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", code()->insts_begin(), code()->insts_end());
+  code()->decode();
+}
+#endif // !PRODUCT
diff -r ca3dacaf9041 src/cpu/ppc/vm/assembler_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/assembler_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,1963 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_ASSEMBLER_PPC_HPP
+#define CPU_PPC_VM_ASSEMBLER_PPC_HPP
+
+#include "asm/register.hpp"
+
+// Address is an abstraction used to represent a memory location
+// as used in assembler instructions.
+// PPC instructions grok either baseReg + indexReg or baseReg + disp.
+// So far we do not use this as simplification by this class is low
+// on PPC with its simple addressing mode. Use RegisterOrConstant to
+// represent an offset.
+class Address VALUE_OBJ_CLASS_SPEC {
+};
+
+class AddressLiteral VALUE_OBJ_CLASS_SPEC {
+ private:
+  address          _address;
+  RelocationHolder _rspec;
+
+  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
+    switch (rtype) {
+    case relocInfo::external_word_type:
+      return external_word_Relocation::spec(addr);
+    case relocInfo::internal_word_type:
+      return internal_word_Relocation::spec(addr);
+    case relocInfo::opt_virtual_call_type:
+      return opt_virtual_call_Relocation::spec();
+    case relocInfo::static_call_type:
+      return static_call_Relocation::spec();
+    case relocInfo::runtime_call_type:
+      return runtime_call_Relocation::spec();
+    case relocInfo::none:
+      return RelocationHolder();
+    default:
+      ShouldNotReachHere();
+      return RelocationHolder();
+    }
+  }
+
+ protected:
+  // creation
+  AddressLiteral() : _address(NULL), _rspec(NULL) {}
+
+ public:
+  AddressLiteral(address addr, RelocationHolder const& rspec)
+    : _address(addr),
+      _rspec(rspec) {}
+
+  AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  AddressLiteral(oop* addr, relocInfo::relocType rtype = relocInfo::none)
+    : _address((address) addr),
+      _rspec(rspec_from_rtype(rtype, (address) addr)) {}
+
+  intptr_t value() const { return (intptr_t) _address; }
+
+  const RelocationHolder& rspec() const { return _rspec; }
+};
+
+// Argument is an abstraction used to represent an outgoing
+// actual argument or an incoming formal parameter, whether
+// it resides in memory or in a register, in a manner consistent
+// with the PPC Application Binary Interface, or ABI. This is
+// often referred to as the native or C calling convention.
+
+class Argument VALUE_OBJ_CLASS_SPEC {
+ private:
+  int _number;  // The number of the argument.
+ public:
+  enum {
+    // Only 8 registers may contain integer parameters.
+    n_register_parameters = 8,
+    // Can have up to 8 floating registers.
+    n_float_register_parameters = 8
+  };
+  // creation
+  Argument(int number) : _number(number) {}
+
+  int  number() const { return _number; }
+
+  // Locating register-based arguments:
+  bool is_register() const { return _number < n_register_parameters; }
+
+  Register as_register() const {
+    assert(is_register(), "must be a register argument");
+    return as_Register(number() + R3_ARG1->encoding());
+  }
+};
+
+// A ppc64 function descriptor.
+struct FunctionDescriptor VALUE_OBJ_CLASS_SPEC {
+ private:
+  address _entry;
+  address _toc;
+  address _env;
+
+ public:
+  inline address entry() const { return _entry; }
+  inline address toc()   const { return _toc; }
+  inline address env()   const { return _env; }
+
+  inline void set_entry(address entry) { _entry = entry; }
+  inline void set_toc(  address toc)   { _toc   = toc; }
+  inline void set_env(  address env)   { _env   = env; }
+
+  inline static ByteSize entry_offset() { return byte_offset_of(FunctionDescriptor, _entry); }
+  inline static ByteSize toc_offset()   { return byte_offset_of(FunctionDescriptor, _toc); }
+  inline static ByteSize env_offset()   { return byte_offset_of(FunctionDescriptor, _env); }
+
+  // Friend functions can be called without loading toc and env.
+  enum {
+    friend_toc = 0xcafe,
+    friend_env = 0xc0de
+  };
+
+  inline bool is_friend_function() const {
+    return (toc() == (address) friend_toc) && (env() == (address) friend_env);
+  }
+
+  // Constructor for stack-allocated instances.
+  FunctionDescriptor() {
+    _entry = (address) 0xbad;
+    _toc   = (address) 0xbad;
+    _env   = (address) 0xbad;
+  }
+};
+
+class Assembler : public AbstractAssembler {
+ protected:
+  // Displacement routines
+  static void print_instruction(int inst);
+  static int  patched_branch(int dest_pos, int inst, int inst_pos);
+  static int  branch_destination(int inst, int pos);
+
+  friend class AbstractAssembler;
+
+  // Code patchers need various routines like inv_wdisp()
+  friend class NativeInstruction;
+  friend class NativeGeneralJump;
+  friend class Relocation;
+
+ public:
+
+  enum shifts {
+    XO_21_29_SHIFT = 2,
+    XO_21_30_SHIFT = 1,
+    XO_27_29_SHIFT = 2,
+    XO_30_31_SHIFT = 0,
+    SPR_5_9_SHIFT  = 11u, // SPR_5_9 field in bits 11 -- 15
+    SPR_0_4_SHIFT  = 16u, // SPR_0_4 field in bits 16 -- 20
+    RS_SHIFT       = 21u, // RS field in bits 21 -- 25
+    OPCODE_SHIFT   = 26u, // opcode in bits 26 -- 31
+  };
+
+  enum opcdxos_masks {
+    XL_FORM_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1),
+    ADDI_OPCODE_MASK    = (63u << OPCODE_SHIFT),
+    ADDIS_OPCODE_MASK   = (63u << OPCODE_SHIFT),
+    BXX_OPCODE_MASK     = (63u << OPCODE_SHIFT),
+    BCXX_OPCODE_MASK    = (63u << OPCODE_SHIFT),
+    // trap instructions
+    TDI_OPCODE_MASK     = (63u << OPCODE_SHIFT),
+    TWI_OPCODE_MASK     = (63u << OPCODE_SHIFT),
+    TD_OPCODE_MASK      = (63u << OPCODE_SHIFT) | (1023u << 1),
+    TW_OPCODE_MASK      = (63u << OPCODE_SHIFT) | (1023u << 1),
+    LD_OPCODE_MASK      = (63u << OPCODE_SHIFT) | (3u << XO_30_31_SHIFT), // DS-FORM
+    STD_OPCODE_MASK     = LD_OPCODE_MASK,
+    STDU_OPCODE_MASK    = STD_OPCODE_MASK,
+    STDX_OPCODE_MASK    = (63u << OPCODE_SHIFT) | (1023u << 1),
+    STDUX_OPCODE_MASK   = STDX_OPCODE_MASK,
+    STW_OPCODE_MASK     = (63u << OPCODE_SHIFT),
+    STWU_OPCODE_MASK    = STW_OPCODE_MASK,
+    STWX_OPCODE_MASK    = (63u << OPCODE_SHIFT) | (1023u << 1),
+    STWUX_OPCODE_MASK   = STWX_OPCODE_MASK,
+    MTCTR_OPCODE_MASK   = ~(31u << RS_SHIFT),
+    ORI_OPCODE_MASK     = (63u << OPCODE_SHIFT),
+    ORIS_OPCODE_MASK    = (63u << OPCODE_SHIFT),
+    RLDICR_OPCODE_MASK  = (63u << OPCODE_SHIFT) | (7u << XO_27_29_SHIFT)
+  };
+
+  enum opcdxos {
+    ADD_OPCODE    = (31u << OPCODE_SHIFT | 266u << 1),
+    ADDC_OPCODE   = (31u << OPCODE_SHIFT |  10u << 1),
+    ADDI_OPCODE   = (14u << OPCODE_SHIFT),
+    ADDIS_OPCODE  = (15u << OPCODE_SHIFT),
+    ADDIC__OPCODE = (13u << OPCODE_SHIFT),
+    ADDE_OPCODE   = (31u << OPCODE_SHIFT | 138u << 1),
+    SUBF_OPCODE   = (31u << OPCODE_SHIFT |  40u << 1),
+    SUBFC_OPCODE  = (31u << OPCODE_SHIFT |   8u << 1),
+    SUBFE_OPCODE  = (31u << OPCODE_SHIFT | 136u << 1),
+    SUBFIC_OPCODE = (8u  << OPCODE_SHIFT),
+    SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1),
+    DIVW_OPCODE   = (31u << OPCODE_SHIFT | 491u << 1),
+    MULLW_OPCODE  = (31u << OPCODE_SHIFT | 235u << 1),
+    MULHW_OPCODE  = (31u << OPCODE_SHIFT |  75u << 1),
+    MULHWU_OPCODE = (31u << OPCODE_SHIFT |  11u << 1),
+    MULLI_OPCODE  = (7u  << OPCODE_SHIFT),
+    AND_OPCODE    = (31u << OPCODE_SHIFT |  28u << 1),
+    ANDI_OPCODE   = (28u << OPCODE_SHIFT),
+    ANDIS_OPCODE  = (29u << OPCODE_SHIFT),
+    ANDC_OPCODE   = (31u << OPCODE_SHIFT |  60u << 1),
+    ORC_OPCODE    = (31u << OPCODE_SHIFT | 412u << 1),
+    OR_OPCODE     = (31u << OPCODE_SHIFT | 444u << 1),
+    ORI_OPCODE    = (24u << OPCODE_SHIFT),
+    ORIS_OPCODE   = (25u << OPCODE_SHIFT),
+    XOR_OPCODE    = (31u << OPCODE_SHIFT | 316u << 1),
+    XORI_OPCODE   = (26u << OPCODE_SHIFT),
+    XORIS_OPCODE  = (27u << OPCODE_SHIFT),
+
+    NEG_OPCODE    = (31u << OPCODE_SHIFT | 104u << 1),
+
+    RLWINM_OPCODE = (21u << OPCODE_SHIFT),
+    CLRRWI_OPCODE = RLWINM_OPCODE,
+    CLRLWI_OPCODE = RLWINM_OPCODE,
+
+    RLWIMI_OPCODE = (20u << OPCODE_SHIFT),
+
+    SLW_OPCODE    = (31u << OPCODE_SHIFT |  24u << 1),
+    SLWI_OPCODE   = RLWINM_OPCODE,
+    SRW_OPCODE    = (31u << OPCODE_SHIFT | 536u << 1),
+    SRWI_OPCODE   = RLWINM_OPCODE,
+    SRAW_OPCODE   = (31u << OPCODE_SHIFT | 792u << 1),
+    SRAWI_OPCODE  = (31u << OPCODE_SHIFT | 824u << 1),
+
+    CMP_OPCODE    = (31u << OPCODE_SHIFT |   0u << 1),
+    CMPI_OPCODE   = (11u << OPCODE_SHIFT),
+    CMPL_OPCODE   = (31u << OPCODE_SHIFT |  32u << 1),
+    CMPLI_OPCODE  = (10u << OPCODE_SHIFT),
+
+    ISEL_OPCODE   = (31u << OPCODE_SHIFT |  15u << 1),
+
+    MTLR_OPCODE   = (31u << OPCODE_SHIFT | 467u << 1 | 8 << SPR_0_4_SHIFT),
+    MFLR_OPCODE   = (31u << OPCODE_SHIFT | 339u << 1 | 8 << SPR_0_4_SHIFT),
+
+    MTCRF_OPCODE  = (31u << OPCODE_SHIFT | 144u << 1),
+    MFCR_OPCODE   = (31u << OPCODE_SHIFT | 19u << 1),
+    MCRF_OPCODE   = (19u << OPCODE_SHIFT | 0u << 1),
+
+    // condition register logic instructions
+    CRAND_OPCODE  = (19u << OPCODE_SHIFT | 257u << 1),
+    CRNAND_OPCODE = (19u << OPCODE_SHIFT | 225u << 1),
+    CROR_OPCODE   = (19u << OPCODE_SHIFT | 449u << 1),
+    CRXOR_OPCODE  = (19u << OPCODE_SHIFT | 193u << 1),
+    CRNOR_OPCODE  = (19u << OPCODE_SHIFT |  33u << 1),
+    CREQV_OPCODE  = (19u << OPCODE_SHIFT | 289u << 1),
+    CRANDC_OPCODE = (19u << OPCODE_SHIFT | 129u << 1),
+    CRORC_OPCODE  = (19u << OPCODE_SHIFT | 417u << 1),
+
+    BCLR_OPCODE   = (19u << OPCODE_SHIFT | 16u << 1),
+    BXX_OPCODE      = (18u << OPCODE_SHIFT),
+    BCXX_OPCODE     = (16u << OPCODE_SHIFT),
+
+    // CTR-related opcodes
+    BCCTR_OPCODE  = (19u << OPCODE_SHIFT | 528u << 1),
+    MTCTR_OPCODE  = (31u << OPCODE_SHIFT | 467u << 1 | 9 << SPR_0_4_SHIFT),
+    MFCTR_OPCODE  = (31u << OPCODE_SHIFT | 339u << 1 | 9 << SPR_0_4_SHIFT),
+
+
+    LWZ_OPCODE   = (32u << OPCODE_SHIFT),
+    LWZX_OPCODE  = (31u << OPCODE_SHIFT |  23u << 1),
+    LWZU_OPCODE  = (33u << OPCODE_SHIFT),
+
+    LHA_OPCODE   = (42u << OPCODE_SHIFT),
+    LHAX_OPCODE  = (31u << OPCODE_SHIFT | 343u << 1),
+    LHAU_OPCODE  = (43u << OPCODE_SHIFT),
+
+    LHZ_OPCODE   = (40u << OPCODE_SHIFT),
+    LHZX_OPCODE  = (31u << OPCODE_SHIFT | 279u << 1),
+    LHZU_OPCODE  = (41u << OPCODE_SHIFT),
+
+    LBZ_OPCODE   = (34u << OPCODE_SHIFT),
+    LBZX_OPCODE  = (31u << OPCODE_SHIFT |  87u << 1),
+    LBZU_OPCODE  = (35u << OPCODE_SHIFT),
+
+    STW_OPCODE   = (36u << OPCODE_SHIFT),
+    STWX_OPCODE  = (31u << OPCODE_SHIFT | 151u << 1),
+    STWU_OPCODE  = (37u << OPCODE_SHIFT),
+    STWUX_OPCODE = (31u << OPCODE_SHIFT | 183u << 1),
+
+    STH_OPCODE   = (44u << OPCODE_SHIFT),
+    STHX_OPCODE  = (31u << OPCODE_SHIFT | 407u << 1),
+    STHU_OPCODE  = (45u << OPCODE_SHIFT),
+
+    STB_OPCODE   = (38u << OPCODE_SHIFT),
+    STBX_OPCODE  = (31u << OPCODE_SHIFT | 215u << 1),
+    STBU_OPCODE  = (39u << OPCODE_SHIFT),
+
+    EXTSB_OPCODE = (31u << OPCODE_SHIFT | 954u << 1),
+    EXTSH_OPCODE = (31u << OPCODE_SHIFT | 922u << 1),
+    EXTSW_OPCODE = (31u << OPCODE_SHIFT | 986u << 1),               // X-FORM
+
+    // 32 bit opcode encodings
+
+    LWA_OPCODE    = (58u << OPCODE_SHIFT |   2u << XO_30_31_SHIFT), // DS-FORM
+    LWAX_OPCODE   = (31u << OPCODE_SHIFT | 341u << XO_21_30_SHIFT), // X-FORM
+
+    CNTLZW_OPCODE = (31u << OPCODE_SHIFT |  26u << XO_21_30_SHIFT), // X-FORM
+
+    // 64 bit opcode encodings
+
+    LD_OPCODE     = (58u << OPCODE_SHIFT |   0u << XO_30_31_SHIFT), // DS-FORM
+    LDU_OPCODE    = (58u << OPCODE_SHIFT |   1u << XO_30_31_SHIFT), // DS-FORM
+    LDX_OPCODE    = (31u << OPCODE_SHIFT |  21u << XO_21_30_SHIFT), // X-FORM
+
+    STD_OPCODE    = (62u << OPCODE_SHIFT |   0u << XO_30_31_SHIFT), // DS-FORM
+    STDU_OPCODE   = (62u << OPCODE_SHIFT |   1u << XO_30_31_SHIFT), // DS-FORM
+    STDUX_OPCODE  = (31u << OPCODE_SHIFT | 181u << 1),                  // X-FORM
+    STDX_OPCODE   = (31u << OPCODE_SHIFT | 149u << XO_21_30_SHIFT), // X-FORM
+
+    RLDICR_OPCODE = (30u << OPCODE_SHIFT |   1u << XO_27_29_SHIFT), // MD-FORM
+    RLDICL_OPCODE = (30u << OPCODE_SHIFT |   0u << XO_27_29_SHIFT), // MD-FORM
+    RLDIC_OPCODE  = (30u << OPCODE_SHIFT |   2u << XO_27_29_SHIFT), // MD-FORM
+    RLDIMI_OPCODE = (30u << OPCODE_SHIFT |   3u << XO_27_29_SHIFT), // MD-FORM
+
+    SRADI_OPCODE  = (31u << OPCODE_SHIFT | 413u << XO_21_29_SHIFT), // XS-FORM
+
+    SLD_OPCODE    = (31u << OPCODE_SHIFT |  27u << 1),              // X-FORM
+    SRD_OPCODE    = (31u << OPCODE_SHIFT | 539u << 1),              // X-FORM
+    SRAD_OPCODE   = (31u << OPCODE_SHIFT | 794u << 1),              // X-FORM
+
+    MULLD_OPCODE  = (31u << OPCODE_SHIFT | 233u << 1),              // XO-FORM
+    MULHD_OPCODE  = (31u << OPCODE_SHIFT |  73u << 1),              // XO-FORM
+    MULHDU_OPCODE = (31u << OPCODE_SHIFT |   9u << 1),              // XO-FORM
+    DIVD_OPCODE   = (31u << OPCODE_SHIFT | 489u << 1),              // XO-FORM
+
+    CNTLZD_OPCODE = (31u << OPCODE_SHIFT |  58u << XO_21_30_SHIFT), // X-FORM
+    NAND_OPCODE   = (31u << OPCODE_SHIFT | 476u << XO_21_30_SHIFT), // X-FORM
+    NOR_OPCODE    = (31u << OPCODE_SHIFT | 124u << XO_21_30_SHIFT), // X-FORM
+
+
+    // opcodes only used for floating arithmetic
+    FADD_OPCODE   = (63u << OPCODE_SHIFT |  21u << 1),
+    FADDS_OPCODE  = (59u << OPCODE_SHIFT |  21u << 1),
+    FCMPU_OPCODE  = (63u << OPCODE_SHIFT |  00u << 1),
+    FDIV_OPCODE   = (63u << OPCODE_SHIFT |  18u << 1),
+    FDIVS_OPCODE  = (59u << OPCODE_SHIFT |  18u << 1),
+    FMR_OPCODE    = (63u << OPCODE_SHIFT |  72u << 1),
+    // These are special Power6 opcodes, reused for "lfdepx" and "stfdepx"
+    // on Power7.  Do not use.
+    // MFFGPR_OPCODE  = (31u << OPCODE_SHIFT | 607u << 1),
+    // MFTGPR_OPCODE  = (31u << OPCODE_SHIFT | 735u << 1),
+    CMPB_OPCODE    = (31u << OPCODE_SHIFT |  508  << 1),
+    POPCNTB_OPCODE = (31u << OPCODE_SHIFT |  122  << 1),
+    POPCNTW_OPCODE = (31u << OPCODE_SHIFT |  378  << 1),
+    POPCNTD_OPCODE = (31u << OPCODE_SHIFT |  506  << 1),
+    FABS_OPCODE    = (63u << OPCODE_SHIFT |  264u << 1),
+    FNABS_OPCODE   = (63u << OPCODE_SHIFT |  136u << 1),
+    FMUL_OPCODE    = (63u << OPCODE_SHIFT |   25u << 1),
+    FMULS_OPCODE   = (59u << OPCODE_SHIFT |   25u << 1),
+    FNEG_OPCODE    = (63u << OPCODE_SHIFT |   40u << 1),
+    FSUB_OPCODE    = (63u << OPCODE_SHIFT |   20u << 1),
+    FSUBS_OPCODE   = (59u << OPCODE_SHIFT |   20u << 1),
+
+    // PPC64-internal FPU conversion opcodes
+    FCFID_OPCODE   = (63u << OPCODE_SHIFT |  846u << 1),
+    FCFIDS_OPCODE  = (59u << OPCODE_SHIFT |  846u << 1),
+    FCTID_OPCODE   = (63u << OPCODE_SHIFT |  814u << 1),
+    FCTIDZ_OPCODE  = (63u << OPCODE_SHIFT |  815u << 1),
+    FCTIW_OPCODE   = (63u << OPCODE_SHIFT |   14u << 1),
+    FCTIWZ_OPCODE  = (63u << OPCODE_SHIFT |   15u << 1),
+    FRSP_OPCODE    = (63u << OPCODE_SHIFT |   12u << 1),
+
+    // WARNING: using fmadd results in a non-compliant vm. Some floating
+    // point tck tests will fail.
+    FMADD_OPCODE   = (59u << OPCODE_SHIFT |   29u << 1),
+    DMADD_OPCODE   = (63u << OPCODE_SHIFT |   29u << 1),
+    FMSUB_OPCODE   = (59u << OPCODE_SHIFT |   28u << 1),
+    DMSUB_OPCODE   = (63u << OPCODE_SHIFT |   28u << 1),
+    FNMADD_OPCODE  = (59u << OPCODE_SHIFT |   31u << 1),
+    DNMADD_OPCODE  = (63u << OPCODE_SHIFT |   31u << 1),
+    FNMSUB_OPCODE  = (59u << OPCODE_SHIFT |   30u << 1),
+    DNMSUB_OPCODE  = (63u << OPCODE_SHIFT |   30u << 1),
+
+    LFD_OPCODE     = (50u << OPCODE_SHIFT |   00u << 1),
+    LFDU_OPCODE    = (51u << OPCODE_SHIFT |   00u << 1),
+    LFDX_OPCODE    = (31u << OPCODE_SHIFT |  599u << 1),
+    LFS_OPCODE     = (48u << OPCODE_SHIFT |   00u << 1),
+    LFSU_OPCODE    = (49u << OPCODE_SHIFT |   00u << 1),
+    LFSX_OPCODE    = (31u << OPCODE_SHIFT |  535u << 1),
+
+    STFD_OPCODE    = (54u << OPCODE_SHIFT |   00u << 1),
+    STFDU_OPCODE   = (55u << OPCODE_SHIFT |   00u << 1),
+    STFDX_OPCODE   = (31u << OPCODE_SHIFT |  727u << 1),
+    STFS_OPCODE    = (52u << OPCODE_SHIFT |   00u << 1),
+    STFSU_OPCODE   = (53u << OPCODE_SHIFT |   00u << 1),
+    STFSX_OPCODE   = (31u << OPCODE_SHIFT |  663u << 1),
+
+    FSQRT_OPCODE   = (63u << OPCODE_SHIFT |   22u << 1),            // A-FORM
+    FSQRTS_OPCODE  = (59u << OPCODE_SHIFT |   22u << 1),            // A-FORM
+
+    // Vector instruction support for >= Power6
+    // Vector Storage Access
+    LVEBX_OPCODE   = (31u << OPCODE_SHIFT |    7u << 1),
+    LVEHX_OPCODE   = (31u << OPCODE_SHIFT |   39u << 1),
+    LVEWX_OPCODE   = (31u << OPCODE_SHIFT |   71u << 1),
+    LVX_OPCODE     = (31u << OPCODE_SHIFT |  103u << 1),
+    LVXL_OPCODE    = (31u << OPCODE_SHIFT |  359u << 1),
+    STVEBX_OPCODE  = (31u << OPCODE_SHIFT |  135u << 1),
+    STVEHX_OPCODE  = (31u << OPCODE_SHIFT |  167u << 1),
+    STVEWX_OPCODE  = (31u << OPCODE_SHIFT |  199u << 1),
+    STVX_OPCODE    = (31u << OPCODE_SHIFT |  231u << 1),
+    STVXL_OPCODE   = (31u << OPCODE_SHIFT |  487u << 1),
+    LVSL_OPCODE    = (31u << OPCODE_SHIFT |    6u << 1),
+    LVSR_OPCODE    = (31u << OPCODE_SHIFT |   38u << 1),
+
+    // Vector Permute and Formatting
+    VPKPX_OPCODE   = (4u  << OPCODE_SHIFT |  782u     ),
+    VPKSHSS_OPCODE = (4u  << OPCODE_SHIFT |  398u     ),
+    VPKSWSS_OPCODE = (4u  << OPCODE_SHIFT |  462u     ),
+    VPKSHUS_OPCODE = (4u  << OPCODE_SHIFT |  270u     ),
+    VPKSWUS_OPCODE = (4u  << OPCODE_SHIFT |  334u     ),
+    VPKUHUM_OPCODE = (4u  << OPCODE_SHIFT |   14u     ),
+    VPKUWUM_OPCODE = (4u  << OPCODE_SHIFT |   78u     ),
+    VPKUHUS_OPCODE = (4u  << OPCODE_SHIFT |  142u     ),
+    VPKUWUS_OPCODE = (4u  << OPCODE_SHIFT |  206u     ),
+    VUPKHPX_OPCODE = (4u  << OPCODE_SHIFT |  846u     ),
+    VUPKHSB_OPCODE = (4u  << OPCODE_SHIFT |  526u     ),
+    VUPKHSH_OPCODE = (4u  << OPCODE_SHIFT |  590u     ),
+    VUPKLPX_OPCODE = (4u  << OPCODE_SHIFT |  974u     ),
+    VUPKLSB_OPCODE = (4u  << OPCODE_SHIFT |  654u     ),
+    VUPKLSH_OPCODE = (4u  << OPCODE_SHIFT |  718u     ),
+
+    VMRGHB_OPCODE  = (4u  << OPCODE_SHIFT |   12u     ),
+    VMRGHW_OPCODE  = (4u  << OPCODE_SHIFT |  140u     ),
+    VMRGHH_OPCODE  = (4u  << OPCODE_SHIFT |   76u     ),
+    VMRGLB_OPCODE  = (4u  << OPCODE_SHIFT |  268u     ),
+    VMRGLW_OPCODE  = (4u  << OPCODE_SHIFT |  396u     ),
+    VMRGLH_OPCODE  = (4u  << OPCODE_SHIFT |  332u     ),
+
+    VSPLT_OPCODE   = (4u  << OPCODE_SHIFT |  524u     ),
+    VSPLTH_OPCODE  = (4u  << OPCODE_SHIFT |  588u     ),
+    VSPLTW_OPCODE  = (4u  << OPCODE_SHIFT |  652u     ),
+    VSPLTISB_OPCODE= (4u  << OPCODE_SHIFT |  780u     ),
+    VSPLTISH_OPCODE= (4u  << OPCODE_SHIFT |  844u     ),
+    VSPLTISW_OPCODE= (4u  << OPCODE_SHIFT |  908u     ),
+
+    VPERM_OPCODE   = (4u  << OPCODE_SHIFT |   43u     ),
+    VSEL_OPCODE    = (4u  << OPCODE_SHIFT |   42u     ),
+
+    VSL_OPCODE     = (4u  << OPCODE_SHIFT |  452u     ),
+    VSLDOI_OPCODE  = (4u  << OPCODE_SHIFT |   44u     ),
+    VSLO_OPCODE    = (4u  << OPCODE_SHIFT | 1036u     ),
+    VSR_OPCODE     = (4u  << OPCODE_SHIFT |  708u     ),
+    VSRO_OPCODE    = (4u  << OPCODE_SHIFT | 1100u     ),
+
+    // Vector Integer
+    VADDCUW_OPCODE = (4u  << OPCODE_SHIFT |  384u     ),
+    VADDSHS_OPCODE = (4u  << OPCODE_SHIFT |  832u     ),
+    VADDSBS_OPCODE = (4u  << OPCODE_SHIFT |  768u     ),
+    VADDSWS_OPCODE = (4u  << OPCODE_SHIFT |  896u     ),
+    VADDUBM_OPCODE = (4u  << OPCODE_SHIFT |    0u     ),
+    VADDUWM_OPCODE = (4u  << OPCODE_SHIFT |  128u     ),
+    VADDUHM_OPCODE = (4u  << OPCODE_SHIFT |   64u     ),
+    VADDUBS_OPCODE = (4u  << OPCODE_SHIFT |  512u     ),
+    VADDUWS_OPCODE = (4u  << OPCODE_SHIFT |  640u     ),
+    VADDUHS_OPCODE = (4u  << OPCODE_SHIFT |  576u     ),
+    VSUBCUW_OPCODE = (4u  << OPCODE_SHIFT | 1408u     ),
+    VSUBSHS_OPCODE = (4u  << OPCODE_SHIFT | 1856u     ),
+    VSUBSBS_OPCODE = (4u  << OPCODE_SHIFT | 1792u     ),
+    VSUBSWS_OPCODE = (4u  << OPCODE_SHIFT | 1920u     ),
+    VSUBUBM_OPCODE = (4u  << OPCODE_SHIFT | 1024u     ),
+    VSUBUWM_OPCODE = (4u  << OPCODE_SHIFT | 1152u     ),
+    VSUBUHM_OPCODE = (4u  << OPCODE_SHIFT | 1088u     ),
+    VSUBUBS_OPCODE = (4u  << OPCODE_SHIFT | 1536u     ),
+    VSUBUWS_OPCODE = (4u  << OPCODE_SHIFT | 1664u     ),
+    VSUBUHS_OPCODE = (4u  << OPCODE_SHIFT | 1600u     ),
+
+    VMULESB_OPCODE = (4u  << OPCODE_SHIFT |  776u     ),
+    VMULEUB_OPCODE = (4u  << OPCODE_SHIFT |  520u     ),
+    VMULESH_OPCODE = (4u  << OPCODE_SHIFT |  840u     ),
+    VMULEUH_OPCODE = (4u  << OPCODE_SHIFT |  584u     ),
+    VMULOSB_OPCODE = (4u  << OPCODE_SHIFT |  264u     ),
+    VMULOUB_OPCODE = (4u  << OPCODE_SHIFT |    8u     ),
+    VMULOSH_OPCODE = (4u  << OPCODE_SHIFT |  328u     ),
+    VMULOUH_OPCODE = (4u  << OPCODE_SHIFT |   72u     ),
+    VMHADDSHS_OPCODE=(4u  << OPCODE_SHIFT |   32u     ),
+    VMHRADDSHS_OPCODE=(4u << OPCODE_SHIFT |   33u     ),
+    VMLADDUHM_OPCODE=(4u  << OPCODE_SHIFT |   34u     ),
+    VMSUBUHM_OPCODE= (4u  << OPCODE_SHIFT |   36u     ),
+    VMSUMMBM_OPCODE= (4u  << OPCODE_SHIFT |   37u     ),
+    VMSUMSHM_OPCODE= (4u  << OPCODE_SHIFT |   40u     ),
+    VMSUMSHS_OPCODE= (4u  << OPCODE_SHIFT |   41u     ),
+    VMSUMUHM_OPCODE= (4u  << OPCODE_SHIFT |   38u     ),
+    VMSUMUHS_OPCODE= (4u  << OPCODE_SHIFT |   39u     ),
+
+    VSUMSWS_OPCODE = (4u  << OPCODE_SHIFT | 1928u     ),
+    VSUM2SWS_OPCODE= (4u  << OPCODE_SHIFT | 1672u     ),
+    VSUM4SBS_OPCODE= (4u  << OPCODE_SHIFT | 1800u     ),
+    VSUM4UBS_OPCODE= (4u  << OPCODE_SHIFT | 1544u     ),
+    VSUM4SHS_OPCODE= (4u  << OPCODE_SHIFT | 1608u     ),
+
+    VAVGSB_OPCODE  = (4u  << OPCODE_SHIFT | 1282u     ),
+    VAVGSW_OPCODE  = (4u  << OPCODE_SHIFT | 1410u     ),
+    VAVGSH_OPCODE  = (4u  << OPCODE_SHIFT | 1346u     ),
+    VAVGUB_OPCODE  = (4u  << OPCODE_SHIFT | 1026u     ),
+    VAVGUW_OPCODE  = (4u  << OPCODE_SHIFT | 1154u     ),
+    VAVGUH_OPCODE  = (4u  << OPCODE_SHIFT | 1090u     ),
+
+    VMAXSB_OPCODE  = (4u  << OPCODE_SHIFT |  258u     ),
+    VMAXSW_OPCODE  = (4u  << OPCODE_SHIFT |  386u     ),
+    VMAXSH_OPCODE  = (4u  << OPCODE_SHIFT |  322u     ),
+    VMAXUB_OPCODE  = (4u  << OPCODE_SHIFT |    2u     ),
+    VMAXUW_OPCODE  = (4u  << OPCODE_SHIFT |  130u     ),
+    VMAXUH_OPCODE  = (4u  << OPCODE_SHIFT |   66u     ),
+    VMINSB_OPCODE  = (4u  << OPCODE_SHIFT |  770u     ),
+    VMINSW_OPCODE  = (4u  << OPCODE_SHIFT |  898u     ),
+    VMINSH_OPCODE  = (4u  << OPCODE_SHIFT |  834u     ),
+    VMINUB_OPCODE  = (4u  << OPCODE_SHIFT |  514u     ),
+    VMINUW_OPCODE  = (4u  << OPCODE_SHIFT |  642u     ),
+    VMINUH_OPCODE  = (4u  << OPCODE_SHIFT |  578u     ),
+
+    VCMPEQUB_OPCODE= (4u  << OPCODE_SHIFT |    6u     ),
+    VCMPEQUH_OPCODE= (4u  << OPCODE_SHIFT |   70u     ),
+    VCMPEQUW_OPCODE= (4u  << OPCODE_SHIFT |  134u     ),
+    VCMPGTSH_OPCODE= (4u  << OPCODE_SHIFT |  838u     ),
+    VCMPGTSB_OPCODE= (4u  << OPCODE_SHIFT |  774u     ),
+    VCMPGTSW_OPCODE= (4u  << OPCODE_SHIFT |  902u     ),
+    VCMPGTUB_OPCODE= (4u  << OPCODE_SHIFT |  518u     ),
+    VCMPGTUH_OPCODE= (4u  << OPCODE_SHIFT |  582u     ),
+    VCMPGTUW_OPCODE= (4u  << OPCODE_SHIFT |  646u     ),
+
+    VAND_OPCODE    = (4u  << OPCODE_SHIFT | 1028u     ),
+    VANDC_OPCODE   = (4u  << OPCODE_SHIFT | 1092u     ),
+    VNOR_OPCODE    = (4u  << OPCODE_SHIFT | 1284u     ),
+    VOR_OPCODE     = (4u  << OPCODE_SHIFT | 1156u     ),
+    VXOR_OPCODE    = (4u  << OPCODE_SHIFT | 1220u     ),
+    VRLB_OPCODE    = (4u  << OPCODE_SHIFT |    4u     ),
+    VRLW_OPCODE    = (4u  << OPCODE_SHIFT |  132u     ),
+    VRLH_OPCODE    = (4u  << OPCODE_SHIFT |   68u     ),
+    VSLB_OPCODE    = (4u  << OPCODE_SHIFT |  260u     ),
+    VSKW_OPCODE    = (4u  << OPCODE_SHIFT |  388u     ),
+    VSLH_OPCODE    = (4u  << OPCODE_SHIFT |  324u     ),
+    VSRB_OPCODE    = (4u  << OPCODE_SHIFT |  516u     ),
+    VSRW_OPCODE    = (4u  << OPCODE_SHIFT |  644u     ),
+    VSRH_OPCODE    = (4u  << OPCODE_SHIFT |  580u     ),
+    VSRAB_OPCODE   = (4u  << OPCODE_SHIFT |  772u     ),
+    VSRAW_OPCODE   = (4u  << OPCODE_SHIFT |  900u     ),
+    VSRAH_OPCODE   = (4u  << OPCODE_SHIFT |  836u     ),
+
+    // Vector Floating-Point
+    // not implemented yet
+
+    // Vector Status and Control
+    MTVSCR_OPCODE  = (4u  << OPCODE_SHIFT | 1604u     ),
+    MFVSCR_OPCODE  = (4u  << OPCODE_SHIFT | 1540u     ),
+
+    // Icache and dcache related instructions
+    DCBA_OPCODE    = (31u << OPCODE_SHIFT |  758u << 1),
+    DCBZ_OPCODE    = (31u << OPCODE_SHIFT | 1014u << 1),
+    DCBST_OPCODE   = (31u << OPCODE_SHIFT |   54u << 1),
+    DCBF_OPCODE    = (31u << OPCODE_SHIFT |   86u << 1),
+
+    DCBT_OPCODE    = (31u << OPCODE_SHIFT |  278u << 1),
+    DCBTST_OPCODE  = (31u << OPCODE_SHIFT |  246u << 1),
+    ICBI_OPCODE    = (31u << OPCODE_SHIFT |  982u << 1),
+
+    // Instruction synchronization
+    ISYNC_OPCODE   = (19u << OPCODE_SHIFT |  150u << 1),
+    // Memory barriers
+    SYNC_OPCODE    = (31u << OPCODE_SHIFT |  598u << 1),
+    EIEIO_OPCODE   = (31u << OPCODE_SHIFT |  854u << 1),
+
+    // Trap instructions
+    TDI_OPCODE     = (2u  << OPCODE_SHIFT),
+    TWI_OPCODE     = (3u  << OPCODE_SHIFT),
+    TD_OPCODE      = (31u << OPCODE_SHIFT |   68u << 1),
+    TW_OPCODE      = (31u << OPCODE_SHIFT |    4u << 1),
+
+    // Atomics.
+    LWARX_OPCODE   = (31u << OPCODE_SHIFT |   20u << 1),
+    LDARX_OPCODE   = (31u << OPCODE_SHIFT |   84u << 1),
+    STWCX_OPCODE   = (31u << OPCODE_SHIFT |  150u << 1),
+    STDCX_OPCODE   = (31u << OPCODE_SHIFT |  214u << 1)
+
+  };
+
+  // Trap instructions TO bits
+  enum trap_to_bits {
+    // single bits
+    traptoLessThanSigned      = 1 << 4, // 0, left end
+    traptoGreaterThanSigned   = 1 << 3,
+    traptoEqual               = 1 << 2,
+    traptoLessThanUnsigned    = 1 << 1,
+    traptoGreaterThanUnsigned = 1 << 0, // 4, right end
+
+    // compound ones
+    traptoUnconditional       = (traptoLessThanSigned |
+                                 traptoGreaterThanSigned |
+                                 traptoEqual |
+                                 traptoLessThanUnsigned |
+                                 traptoGreaterThanUnsigned)
+  };
+
+  // Branch hints BH field
+  enum branch_hint_bh {
+    // bclr cases:
+    bhintbhBCLRisReturn            = 0,
+    bhintbhBCLRisNotReturnButSame  = 1,
+    bhintbhBCLRisNotPredictable    = 3,
+
+    // bcctr cases:
+    bhintbhBCCTRisNotReturnButSame = 0,
+    bhintbhBCCTRisNotPredictable   = 3
+  };
+
+  // Branch prediction hints AT field
+  enum branch_hint_at {
+    bhintatNoHint     = 0,  // at=00
+    bhintatIsNotTaken = 2,  // at=10
+    bhintatIsTaken    = 3   // at=11
+  };
+
+  // Branch prediction hints
+  enum branch_hint_concept {
+    // Use the same encoding as branch_hint_at to simply code.
+    bhintNoHint       = bhintatNoHint,
+    bhintIsNotTaken   = bhintatIsNotTaken,
+    bhintIsTaken      = bhintatIsTaken
+  };
+
+  // Used in BO field of branch instruction.
+  enum branch_condition {
+    bcondCRbiIs0      =  4, // bo=001at
+    bcondCRbiIs1      = 12, // bo=011at
+    bcondAlways       = 20  // bo=10100
+  };
+
+  // Branch condition with combined prediction hints.
+  enum branch_condition_with_hint {
+    bcondCRbiIs0_bhintNoHint     = bcondCRbiIs0 | bhintatNoHint,
+    bcondCRbiIs0_bhintIsNotTaken = bcondCRbiIs0 | bhintatIsNotTaken,
+    bcondCRbiIs0_bhintIsTaken    = bcondCRbiIs0 | bhintatIsTaken,
+    bcondCRbiIs1_bhintNoHint     = bcondCRbiIs1 | bhintatNoHint,
+    bcondCRbiIs1_bhintIsNotTaken = bcondCRbiIs1 | bhintatIsNotTaken,
+    bcondCRbiIs1_bhintIsTaken    = bcondCRbiIs1 | bhintatIsTaken,
+  };
+
+  // Branch prediction hints.
+  inline static int add_bhint_to_boint(const int bhint, const int boint) {
+    switch (boint) {
+      case bcondCRbiIs0:
+      case bcondCRbiIs1:
+        // branch_hint and branch_hint_at have same encodings
+        assert(   (int)bhintNoHint     == (int)bhintatNoHint
+               && (int)bhintIsNotTaken == (int)bhintatIsNotTaken
+               && (int)bhintIsTaken    == (int)bhintatIsTaken,
+               "wrong encodings");
+        assert((bhint & 0x03) == bhint, "wrong encodings");
+        return (boint & ~0x03) | bhint;
+      case bcondAlways:
+        // no branch_hint
+        return boint;
+      default:
+        ShouldNotReachHere();
+        return 0;
+    }
+  }
+
+  // Extract bcond from boint.
+  inline static int inv_boint_bcond(const int boint) {
+    int r_bcond = boint & ~0x03;
+    assert(r_bcond == bcondCRbiIs0 ||
+           r_bcond == bcondCRbiIs1 ||
+           r_bcond == bcondAlways,
+           "bad branch condition");
+    return r_bcond;
+  }
+
+  // Extract bhint from boint.
+  inline static int inv_boint_bhint(const int boint) {
+    int r_bhint = boint & 0x03;
+    assert(r_bhint == bhintatNoHint ||
+           r_bhint == bhintatIsNotTaken ||
+           r_bhint == bhintatIsTaken,
+           "bad branch hint");
+    return r_bhint;
+  }
+
+  // Calculate opposite of given bcond.
+  inline static int opposite_bcond(const int bcond) {
+    switch (bcond) {
+      case bcondCRbiIs0:
+        return bcondCRbiIs1;
+      case bcondCRbiIs1:
+        return bcondCRbiIs0;
+      default:
+        ShouldNotReachHere();
+        return 0;
+    }
+  }
+
+  // Calculate opposite of given bhint.
+  inline static int opposite_bhint(const int bhint) {
+    switch (bhint) {
+      case bhintatNoHint:
+        return bhintatNoHint;
+      case bhintatIsNotTaken:
+        return bhintatIsTaken;
+      case bhintatIsTaken:
+        return bhintatIsNotTaken;
+      default:
+        ShouldNotReachHere();
+        return 0;
+    }
+  }
+
+  // PPC branch instructions
+  enum ppcops {
+    b_op    = 18,
+    bc_op   = 16,
+    bcr_op  = 19
+  };
+
+  enum Condition {
+    negative         = 0,
+    less             = 0,
+    positive         = 1,
+    greater          = 1,
+    zero             = 2,
+    equal            = 2,
+    summary_overflow = 3,
+  };
+
+ public:
+  // Helper functions for groups of instructions
+
+  enum Predict { pt = 1, pn = 0 }; // pt = predict taken
+
+  enum Membar_mask_bits { // page 184, v9
+    StoreStore = 1 << 3,
+    LoadStore  = 1 << 2,
+    StoreLoad  = 1 << 1,
+    LoadLoad   = 1 << 0,
+
+    Sync       = 1 << 6,
+    MemIssue   = 1 << 5,
+    Lookaside  = 1 << 4
+  };
+
+  // instruction must start at passed address
+  static int instr_len(unsigned char *instr) { return BytesPerInstWord; }
+
+  // instruction must be left-justified in argument
+  static int instr_len(unsigned long instr)  { return BytesPerInstWord; }
+
+  // longest instructions
+  static int instr_maxlen() { return BytesPerInstWord; }
+
+  // Test if x is within signed immediate range for nbits.
+  static bool is_simm(int x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 32, "out of bounds");
+    const int   min      = -( ((int)1) << nbits-1 );
+    const int   maxplus1 =  ( ((int)1) << nbits-1 );
+    return min <= x && x < maxplus1;
+  }
+
+  static bool is_simm(jlong x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 64, "out of bounds");
+    const jlong min      = -( ((jlong)1) << nbits-1 );
+    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
+    return min <= x && x < maxplus1;
+  }
+
+  // Test if x is within unsigned immediate range for nbits
+  static bool is_uimm(int x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 32, "out of bounds");
+    const int   maxplus1 = ( ((int)1) << nbits );
+    return 0 <= x && x < maxplus1;
+  }
+
+  static bool is_uimm(jlong x, unsigned int nbits) {
+    assert(0 < nbits && nbits < 64, "out of bounds");
+    const jlong maxplus1 =  ( ((jlong)1) << nbits );
+    return 0 <= x && x < maxplus1;
+  }
+
+ protected:
+  // helpers
+
+  // X is supposed to fit in a field "nbits" wide
+  // and be sign-extended. Check the range.
+  static void assert_signed_range(intptr_t x, int nbits) {
+    assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)),
+           "value out of range");
+  }
+
+  static void assert_signed_word_disp_range(intptr_t x, int nbits) {
+    assert((x & 3) == 0, "not word aligned");
+    assert_signed_range(x, nbits + 2);
+  }
+
+  static void assert_unsigned_const(int x, int nbits) {
+    assert(juint(x) < juint(1 << nbits), "unsigned constant out of range");
+  }
+
+  static int fmask(juint hi_bit, juint lo_bit) {
+    assert(hi_bit >= lo_bit && hi_bit < 32, "bad bits");
+    return (1 << ( hi_bit-lo_bit + 1 )) - 1;
+  }
+
+  // inverse of u_field
+  static int inv_u_field(int x, int hi_bit, int lo_bit) {
+    juint r = juint(x) >> lo_bit;
+    r &= fmask(hi_bit, lo_bit);
+    return int(r);
+  }
+
+  // signed version: extract from field and sign-extend
+  static int inv_s_field_ppc(int x, int hi_bit, int lo_bit) {
+    x = x << (31-hi_bit);
+    x = x >> (31-hi_bit+lo_bit);
+    return x;
+  }
+
+  static int u_field(int x, int hi_bit, int lo_bit) {
+    assert((x & ~fmask(hi_bit, lo_bit)) == 0, "value out of range");
+    int r = x << lo_bit;
+    assert(inv_u_field(r, hi_bit, lo_bit) == x, "just checking");
+    return r;
+  }
+
+  // Same as u_field for signed values
+  static int s_field(int x, int hi_bit, int lo_bit) {
+    int nbits = hi_bit - lo_bit + 1;
+    assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)),
+      "value out of range");
+    x &= fmask(hi_bit, lo_bit);
+    int r = x << lo_bit;
+    return r;
+  }
+
+  // inv_op for ppc instructions
+  static int inv_op_ppc(int x) { return inv_u_field(x, 31, 26); }
+
+  // Determine target address from li, bd field of branch instruction.
+  static intptr_t inv_li_field(int x) {
+    intptr_t r = inv_s_field_ppc(x, 25, 2);
+    r = (r << 2);
+    return r;
+  }
+  static intptr_t inv_bd_field(int x, intptr_t pos) {
+    intptr_t r = inv_s_field_ppc(x, 15, 2);
+    r = (r << 2) + pos;
+    return r;
+  }
+
+  #define inv_opp_u_field(x, hi_bit, lo_bit) inv_u_field(x, 31-(lo_bit), 31-(hi_bit))
+  #define inv_opp_s_field(x, hi_bit, lo_bit) inv_s_field_ppc(x, 31-(lo_bit), 31-(hi_bit))
+  // Extract instruction fields from instruction words.
+ public:
+  static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); }
+  static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); }
+  static int inv_rt_field(int x) { return inv_opp_u_field(x, 10,  6); }
+  static int inv_rs_field(int x) { return inv_opp_u_field(x, 10,  6); }
+  // Ds uses opp_s_field(x, 31, 16), but lowest 2 bits must be 0.
+  // Inv_ds_field uses range (x, 29, 16) but shifts by 2 to ensure that lowest bits are 0.
+  static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; }
+  static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); }
+  static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); }
+  static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6);  }
+  static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); }
+  static int inv_bo_field(int x) { return inv_opp_u_field(x, 10,  6); }
+  static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); }
+
+  #define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit))
+  #define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit))
+
+  // instruction fields
+  static int aa(       int         x)  { return  opp_u_field(x,             30, 30); }
+  static int ba(       int         x)  { return  opp_u_field(x,             15, 11); }
+  static int bb(       int         x)  { return  opp_u_field(x,             20, 16); }
+  static int bc(       int         x)  { return  opp_u_field(x,             25, 21); }
+  static int bd(       int         x)  { return  opp_s_field(x,             29, 16); }
+  static int bf( ConditionRegister cr) { return  bf(cr->encoding()); }
+  static int bf(       int         x)  { return  opp_u_field(x,              8,  6); }
+  static int bfa(ConditionRegister cr) { return  bfa(cr->encoding()); }
+  static int bfa(      int         x)  { return  opp_u_field(x,             13, 11); }
+  static int bh(       int         x)  { return  opp_u_field(x,             20, 19); }
+  static int bi(       int         x)  { return  opp_u_field(x,             15, 11); }
+  static int bi0(ConditionRegister cr, Condition c) { return (cr->encoding() << 2) | c; }
+  static int bo(       int         x)  { return  opp_u_field(x,             10,  6); }
+  static int bt(       int         x)  { return  opp_u_field(x,             10,  6); }
+  static int d1(       int         x)  { return  opp_s_field(x,             31, 16); }
+  static int ds(       int         x)  { assert((x & 0x3) == 0, "unaligned offset"); return opp_s_field(x, 31, 16); }
+  static int eh(       int         x)  { return  opp_u_field(x,             31, 31); }
+  static int flm(      int         x)  { return  opp_u_field(x,             14,  7); }
+  static int fra(    FloatRegister r)  { return  fra(r->encoding());}
+  static int frb(    FloatRegister r)  { return  frb(r->encoding());}
+  static int frc(    FloatRegister r)  { return  frc(r->encoding());}
+  static int frs(    FloatRegister r)  { return  frs(r->encoding());}
+  static int frt(    FloatRegister r)  { return  frt(r->encoding());}
+  static int fra(      int         x)  { return  opp_u_field(x,             15, 11); }
+  static int frb(      int         x)  { return  opp_u_field(x,             20, 16); }
+  static int frc(      int         x)  { return  opp_u_field(x,             25, 21); }
+  static int frs(      int         x)  { return  opp_u_field(x,             10,  6); }
+  static int frt(      int         x)  { return  opp_u_field(x,             10,  6); }
+  static int fxm(      int         x)  { return  opp_u_field(x,             19, 12); }
+  static int l10(      int         x)  { return  opp_u_field(x,             10, 10); }
+  static int l15(      int         x)  { return  opp_u_field(x,             15, 15); }
+  static int l910(     int         x)  { return  opp_u_field(x,             10,  9); }
+  static int lev(      int         x)  { return  opp_u_field(x,             26, 20); }
+  static int li(       int         x)  { return  opp_s_field(x,             29,  6); }
+  static int lk(       int         x)  { return  opp_u_field(x,             31, 31); }
+  static int mb2125(   int         x)  { return  opp_u_field(x,             25, 21); }
+  static int me2630(   int         x)  { return  opp_u_field(x,             30, 26); }
+  static int mb2126(   int         x)  { return  opp_u_field(((x & 0x1f) << 1) | ((x & 0x20) >> 5), 26, 21); }
+  static int me2126(   int         x)  { return  mb2126(x); }
+  static int nb(       int         x)  { return  opp_u_field(x,             20, 16); }
+  //static int opcd(   int         x)  { return  opp_u_field(x,              5,  0); } // is contained in our opcodes
+  static int oe(       int         x)  { return  opp_u_field(x,             21, 21); }
+  static int ra(       Register    r)  { return  ra(r->encoding()); }
+  static int ra(       int         x)  { return  opp_u_field(x,             15, 11); }
+  static int rb(       Register    r)  { return  rb(r->encoding()); }
+  static int rb(       int         x)  { return  opp_u_field(x,             20, 16); }
+  static int rc(       int         x)  { return  opp_u_field(x,             31, 31); }
+  static int rs(       Register    r)  { return  rs(r->encoding()); }
+  static int rs(       int         x)  { return  opp_u_field(x,             10,  6); }
+  // we don't want to use R0 in memory accesses, because it has value `0' then
+  static int ra0mem(   Register    r)  { assert(r != R0, "cannot use register R0 in memory access"); return ra(r); }
+  static int ra0mem(   int         x)  { assert(x != 0,  "cannot use register 0 in memory access");  return ra(x); }
+
+  // register r is target
+  static int rt(       Register    r)  { return rs(r); }
+  static int rt(       int         x)  { return rs(x); }
+  static int rta(      Register    r)  { return ra(r); }
+  static int rta0mem(  Register    r)  { rta(r); return ra0mem(r); }
+
+  static int sh1620(   int         x)  { return  opp_u_field(x,             20, 16); }
+  static int sh30(     int         x)  { return  opp_u_field(x,             30, 30); }
+  static int sh162030( int         x)  { return  sh1620(x & 0x1f) | sh30((x & 0x20) >> 5); }
+  static int si(       int         x)  { return  opp_s_field(x,             31, 16); }
+  static int spr(      int         x)  { return  opp_u_field(x,             20, 11); }
+  static int sr(       int         x)  { return  opp_u_field(x,             15, 12); }
+  static int tbr(      int         x)  { return  opp_u_field(x,             20, 11); }
+  static int th(       int         x)  { return  opp_u_field(x,             10,  7); }
+  static int thct(     int         x)  { assert((x&8)==0, "must be valid cache specification");  return th(x); }
+  static int thds(     int         x)  { assert((x&8)==8, "must be valid stream specification"); return th(x); }
+  static int to(       int         x)  { return  opp_u_field(x,             10,  6); }
+  static int u(        int         x)  { return  opp_u_field(x,             19, 16); }
+  static int ui(       int         x)  { return  opp_u_field(x,             31, 16); }
+
+  // support vector instructions for >= Power6
+  static int vra(      int         x)  { return  opp_u_field(x,             15, 11); }
+  static int vrb(      int         x)  { return  opp_u_field(x,             20, 16); }
+  static int vrc(      int         x)  { return  opp_u_field(x,             25, 21); }
+  static int vrs(      int         x)  { return  opp_u_field(x,             10,  6); }
+  static int vrt(      int         x)  { return  opp_u_field(x,             10,  6); }
+
+  static int vra(   VectorRegister r)  { return  vra(r->encoding());}
+  static int vrb(   VectorRegister r)  { return  vrb(r->encoding());}
+  static int vrc(   VectorRegister r)  { return  vrc(r->encoding());}
+  static int vrs(   VectorRegister r)  { return  vrs(r->encoding());}
+  static int vrt(   VectorRegister r)  { return  vrt(r->encoding());}
+
+  static int vsplt_uim( int        x)  { return  opp_u_field(x,             15, 12); } // for vsplt* instructions
+  static int vsplti_sim(int        x)  { return  opp_u_field(x,             15, 11); } // for vsplti* instructions
+  static int vsldoi_shb(int        x)  { return  opp_u_field(x,             25, 22); } // for vsldoi instruction
+  static int vcmp_rc(   int        x)  { return  opp_u_field(x,             21, 21); } // for vcmp* instructions
+
+  //static int xo1(     int        x)  { return  opp_u_field(x,             29, 21); }// is contained in our opcodes
+  //static int xo2(     int        x)  { return  opp_u_field(x,             30, 21); }// is contained in our opcodes
+  //static int xo3(     int        x)  { return  opp_u_field(x,             30, 22); }// is contained in our opcodes
+  //static int xo4(     int        x)  { return  opp_u_field(x,             30, 26); }// is contained in our opcodes
+  //static int xo5(     int        x)  { return  opp_u_field(x,             29, 27); }// is contained in our opcodes
+  //static int xo6(     int        x)  { return  opp_u_field(x,             30, 27); }// is contained in our opcodes
+  //static int xo7(     int        x)  { return  opp_u_field(x,             31, 30); }// is contained in our opcodes
+
+ protected:
+  // Compute relative address for branch.
+  static intptr_t disp(intptr_t x, intptr_t off) {
+    int xx = x - off;
+    xx = xx >> 2;
+    return xx;
+  }
+
+ public:
+  // signed immediate, in low bits, nbits long
+  static int simm(int x, int nbits) {
+    assert_signed_range(x, nbits);
+    return x & ((1 << nbits) - 1);
+  }
+
+  // unsigned immediate, in low bits, nbits long
+  static int uimm(int x, int nbits) {
+    assert_unsigned_const(x, nbits);
+    return x & ((1 << nbits) - 1);
+  }
+
+  static void set_imm(int* instr, short s) {
+    short* p = ((short *)instr) + 1;
+    *p = s;
+  }
+
+  static int get_imm(address a, int instruction_number) {
+    short imm;
+    short *p =((short *)a)+2*instruction_number+1;
+    imm = *p;
+    return (int)imm;
+  }
+
+  static inline int hi16_signed(  int x) { return (int)(int16_t)(x >> 16); }
+  static inline int lo16_unsigned(int x) { return x & 0xffff; }
+
+ protected:
+
+  // Extract the top 32 bits in a 64 bit word.
+  static int32_t hi32(int64_t x) {
+    int32_t r = int32_t((uint64_t)x >> 32);
+    return r;
+  }
+
+ public:
+
+  static inline unsigned int align_addr(unsigned int addr, unsigned int a) {
+    return ((addr + (a - 1)) & ~(a - 1));
+  }
+
+  static inline bool is_aligned(unsigned int addr, unsigned int a) {
+    return (0 == addr % a);
+  }
+
+  void flush() {
+    AbstractAssembler::flush();
+  }
+
+  inline void emit_int32(int);  // shadows AbstractAssembler::emit_int32
+  inline void emit_data(int);
+  inline void emit_data(int, RelocationHolder const&);
+  inline void emit_data(int, relocInfo::relocType rtype);
+
+  // Emit an address.
+  inline address emit_addr(const address addr = NULL);
+
+  // Emit a function descriptor with the specified entry point, TOC,
+  // and ENV. If the entry point is NULL, the descriptor will point
+  // just past the descriptor.
+  // Use values from friend functions as defaults.
+  inline address emit_fd(address entry = NULL,
+                         address toc = (address) FunctionDescriptor::friend_toc,
+                         address env = (address) FunctionDescriptor::friend_env);
+
+  /////////////////////////////////////////////////////////////////////////////////////
+  // PPC instructions
+  /////////////////////////////////////////////////////////////////////////////////////
+
+  // Memory instructions use r0 as hard coded 0, e.g. to simulate loading
+  // immediates. The normal instruction encoders enforce that r0 is not
+  // passed to them. Use either extended mnemonics encoders or the special ra0
+  // versions.
+
+  // Issue an illegal instruction.
+  inline void illtrap();
+  static inline bool is_illtrap(int x);
+
+  // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
+  inline void addi( Register d, Register a, int si16);
+  inline void addis(Register d, Register a, int si16);
+ private:
+  inline void addi_r0ok( Register d, Register a, int si16);
+  inline void addis_r0ok(Register d, Register a, int si16);
+ public:
+  inline void addic_( Register d, Register a, int si16);
+  inline void subfic( Register d, Register a, int si16);
+  inline void add(    Register d, Register a, Register b);
+  inline void add_(   Register d, Register a, Register b);
+  inline void subf(   Register d, Register a, Register b);
+  inline void sub(    Register d, Register a, Register b);
+  inline void subf_(  Register d, Register a, Register b);
+  inline void addc(   Register d, Register a, Register b);
+  inline void addc_(  Register d, Register a, Register b);
+  inline void subfc(  Register d, Register a, Register b);
+  inline void subfc_( Register d, Register a, Register b);
+  inline void adde(   Register d, Register a, Register b);
+  inline void adde_(  Register d, Register a, Register b);
+  inline void subfe(  Register d, Register a, Register b);
+  inline void subfe_( Register d, Register a, Register b);
+  inline void neg(    Register d, Register a);
+  inline void neg_(   Register d, Register a);
+  inline void mulli(  Register d, Register a, int si16);
+  inline void mulld(  Register d, Register a, Register b);
+  inline void mulld_( Register d, Register a, Register b);
+  inline void mullw(  Register d, Register a, Register b);
+  inline void mullw_( Register d, Register a, Register b);
+  inline void mulhw(  Register d, Register a, Register b);
+  inline void mulhw_( Register d, Register a, Register b);
+  inline void mulhd(  Register d, Register a, Register b);
+  inline void mulhd_( Register d, Register a, Register b);
+  inline void mulhdu( Register d, Register a, Register b);
+  inline void mulhdu_(Register d, Register a, Register b);
+  inline void divd(   Register d, Register a, Register b);
+  inline void divd_(  Register d, Register a, Register b);
+  inline void divw(   Register d, Register a, Register b);
+  inline void divw_(  Register d, Register a, Register b);
+
+  // extended mnemonics
+  inline void li(   Register d, int si16);
+  inline void lis(  Register d, int si16);
+  inline void addir(Register d, int si16, Register a);
+
+  static bool is_addi(int x) {
+     return ADDI_OPCODE == (x & ADDI_OPCODE_MASK);
+  }
+  static bool is_addis(int x) {
+     return ADDIS_OPCODE == (x & ADDIS_OPCODE_MASK);
+  }
+  static bool is_bxx(int x) {
+     return BXX_OPCODE == (x & BXX_OPCODE_MASK);
+  }
+  static bool is_b(int x) {
+     return BXX_OPCODE == (x & BXX_OPCODE_MASK) && inv_lk_field(x) == 0;
+  }
+  static bool is_bl(int x) {
+     return BXX_OPCODE == (x & BXX_OPCODE_MASK) && inv_lk_field(x) == 1;
+  }
+  static bool is_bcxx(int x) {
+     return BCXX_OPCODE == (x & BCXX_OPCODE_MASK);
+  }
+  static bool is_bxx_or_bcxx(int x) {
+     return is_bxx(x) || is_bcxx(x);
+  }
+  static bool is_bctrl(int x) {
+     return x == 0x4e800421;
+  }
+  static bool is_bctr(int x) {
+     return x == 0x4e800420;
+  }
+  static bool is_bclr(int x) {
+     return BCLR_OPCODE == (x & XL_FORM_OPCODE_MASK);
+  }
+  static bool is_li(int x) {
+     return is_addi(x) && inv_ra_field(x)==0;
+  }
+  static bool is_lis(int x) {
+     return is_addis(x) && inv_ra_field(x)==0;
+  }
+  static bool is_mtctr(int x) {
+     return MTCTR_OPCODE == (x & MTCTR_OPCODE_MASK);
+  }
+  static bool is_ld(int x) {
+     return LD_OPCODE == (x & LD_OPCODE_MASK);
+  }
+  static bool is_std(int x) {
+     return STD_OPCODE == (x & STD_OPCODE_MASK);
+  }
+  static bool is_stdu(int x) {
+     return STDU_OPCODE == (x & STDU_OPCODE_MASK);
+  }
+  static bool is_stdx(int x) {
+     return STDX_OPCODE == (x & STDX_OPCODE_MASK);
+  }
+  static bool is_stdux(int x) {
+     return STDUX_OPCODE == (x & STDUX_OPCODE_MASK);
+  }
+  static bool is_stwx(int x) {
+     return STWX_OPCODE == (x & STWX_OPCODE_MASK);
+  }
+  static bool is_stwux(int x) {
+     return STWUX_OPCODE == (x & STWUX_OPCODE_MASK);
+  }
+  static bool is_stw(int x) {
+     return STW_OPCODE == (x & STW_OPCODE_MASK);
+  }
+  static bool is_stwu(int x) {
+     return STWU_OPCODE == (x & STWU_OPCODE_MASK);
+  }
+  static bool is_ori(int x) {
+     return ORI_OPCODE == (x & ORI_OPCODE_MASK);
+  };
+  static bool is_oris(int x) {
+     return ORIS_OPCODE == (x & ORIS_OPCODE_MASK);
+  };
+  static bool is_rldicr(int x) {
+     return (RLDICR_OPCODE == (x & RLDICR_OPCODE_MASK));
+  };
+  static bool is_nop(int x) {
+    return x == 0x60000000;
+  }
+  // endgroup opcode for Power6
+  static bool is_endgroup(int x) {
+    return is_ori(x) && inv_ra_field(x)==1 && inv_rs_field(x)==1 && inv_d1_field(x)==0;
+  }
+
+
+ private:
+  // PPC 1, section 3.3.9, Fixed-Point Compare Instructions
+  inline void cmpi( ConditionRegister bf, int l, Register a, int si16);
+  inline void cmp(  ConditionRegister bf, int l, Register a, Register b);
+  inline void cmpli(ConditionRegister bf, int l, Register a, int ui16);
+  inline void cmpl( ConditionRegister bf, int l, Register a, Register b);
+
+ public:
+  // extended mnemonics of Compare Instructions
+  inline void cmpwi( ConditionRegister crx, Register a, int si16);
+  inline void cmpdi( ConditionRegister crx, Register a, int si16);
+  inline void cmpw(  ConditionRegister crx, Register a, Register b);
+  inline void cmpd(  ConditionRegister crx, Register a, Register b);
+  inline void cmplwi(ConditionRegister crx, Register a, int ui16);
+  inline void cmpldi(ConditionRegister crx, Register a, int ui16);
+  inline void cmplw( ConditionRegister crx, Register a, Register b);
+  inline void cmpld( ConditionRegister crx, Register a, Register b);
+
+  inline void isel(   Register d, Register a, Register b, int bc);
+
+  // PPC 1, section 3.3.11, Fixed-Point Logical Instructions
+         void andi(   Register a, Register s, int ui16);    // optimized version
+  inline void andi_(  Register a, Register s, int ui16);
+  inline void andis_( Register a, Register s, int ui16);
+  inline void ori(    Register a, Register s, int ui16);
+  inline void oris(   Register a, Register s, int ui16);
+  inline void xori(   Register a, Register s, int ui16);
+  inline void xoris(  Register a, Register s, int ui16);
+  inline void andr(   Register a, Register s, Register b);  // suffixed by 'r' as 'and' is C++ keyword
+  inline void and_(   Register a, Register s, Register b);
+  // Turn or0(rx,rx,rx) into a nop and avoid that we accidently emit a
+  // SMT-priority change instruction (see SMT instructions below).
+  inline void or_unchecked(Register a, Register s, Register b);
+  inline void orr(    Register a, Register s, Register b);  // suffixed by 'r' as 'or' is C++ keyword
+  inline void or_(    Register a, Register s, Register b);
+  inline void xorr(   Register a, Register s, Register b);  // suffixed by 'r' as 'xor' is C++ keyword
+  inline void xor_(   Register a, Register s, Register b);
+  inline void nand(   Register a, Register s, Register b);
+  inline void nand_(  Register a, Register s, Register b);
+  inline void nor(    Register a, Register s, Register b);
+  inline void nor_(   Register a, Register s, Register b);
+  inline void andc(   Register a, Register s, Register b);
+  inline void andc_(  Register a, Register s, Register b);
+  inline void orc(    Register a, Register s, Register b);
+  inline void orc_(   Register a, Register s, Register b);
+  inline void extsb(  Register a, Register s);
+  inline void extsh(  Register a, Register s);
+  inline void extsw(  Register a, Register s);
+
+  // extended mnemonics
+  inline void nop();
+  // NOP for FP and BR units (different versions to allow them to be in one group)
+  inline void fpnop0();
+  inline void fpnop1();
+  inline void brnop0();
+  inline void brnop1();
+  inline void brnop2();
+
+  inline void mr(      Register d, Register s);
+  inline void ori_opt( Register d, int ui16);
+  inline void oris_opt(Register d, int ui16);
+
+  // endgroup opcode for Power6
+  inline void endgroup();
+
+  // count instructions
+  inline void cntlzw(  Register a, Register s);
+  inline void cntlzw_( Register a, Register s);
+  inline void cntlzd(  Register a, Register s);
+  inline void cntlzd_( Register a, Register s);
+
+  // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
+  inline void sld(     Register a, Register s, Register b);
+  inline void sld_(    Register a, Register s, Register b);
+  inline void slw(     Register a, Register s, Register b);
+  inline void slw_(    Register a, Register s, Register b);
+  inline void srd(     Register a, Register s, Register b);
+  inline void srd_(    Register a, Register s, Register b);
+  inline void srw(     Register a, Register s, Register b);
+  inline void srw_(    Register a, Register s, Register b);
+  inline void srad(    Register a, Register s, Register b);
+  inline void srad_(   Register a, Register s, Register b);
+  inline void sraw(    Register a, Register s, Register b);
+  inline void sraw_(   Register a, Register s, Register b);
+  inline void sradi(   Register a, Register s, int sh6);
+  inline void sradi_(  Register a, Register s, int sh6);
+  inline void srawi(   Register a, Register s, int sh5);
+  inline void srawi_(  Register a, Register s, int sh5);
+
+  // extended mnemonics for Shift Instructions
+  inline void sldi(    Register a, Register s, int sh6);
+  inline void sldi_(   Register a, Register s, int sh6);
+  inline void slwi(    Register a, Register s, int sh5);
+  inline void slwi_(   Register a, Register s, int sh5);
+  inline void srdi(    Register a, Register s, int sh6);
+  inline void srdi_(   Register a, Register s, int sh6);
+  inline void srwi(    Register a, Register s, int sh5);
+  inline void srwi_(   Register a, Register s, int sh5);
+
+  inline void clrrdi(  Register a, Register s, int ui6);
+  inline void clrrdi_( Register a, Register s, int ui6);
+  inline void clrldi(  Register a, Register s, int ui6);
+  inline void clrldi_( Register a, Register s, int ui6);
+  inline void clrlsldi(Register a, Register s, int clrl6, int shl6);
+  inline void clrlsldi_(Register a, Register s, int clrl6, int shl6);
+  inline void extrdi(  Register a, Register s, int n, int b);
+  // testbit with condition register
+  inline void testbitdi(ConditionRegister cr, Register a, Register s, int ui6);
+
+  // rotate instructions
+  inline void rotldi(  Register a, Register s, int n);
+  inline void rotrdi(  Register a, Register s, int n);
+  inline void rotlwi(  Register a, Register s, int n);
+  inline void rotrwi(  Register a, Register s, int n);
+
+  // Rotate Instructions
+  inline void rldic(   Register a, Register s, int sh6, int mb6);
+  inline void rldic_(  Register a, Register s, int sh6, int mb6);
+  inline void rldicr(  Register a, Register s, int sh6, int mb6);
+  inline void rldicr_( Register a, Register s, int sh6, int mb6);
+  inline void rldicl(  Register a, Register s, int sh6, int mb6);
+  inline void rldicl_( Register a, Register s, int sh6, int mb6);
+  inline void rlwinm(  Register a, Register s, int sh5, int mb5, int me5);
+  inline void rlwinm_( Register a, Register s, int sh5, int mb5, int me5);
+  inline void rldimi(  Register a, Register s, int sh6, int mb6);
+  inline void rldimi_( Register a, Register s, int sh6, int mb6);
+  inline void rlwimi(  Register a, Register s, int sh5, int mb5, int me5);
+  inline void insrdi(  Register a, Register s, int n,   int b);
+  inline void insrwi(  Register a, Register s, int n,   int b);
+
+  // PPC 1, section 3.3.2 Fixed-Point Load Instructions
+  // 4 bytes
+  inline void lwzx( Register d, Register s1, Register s2);
+  inline void lwz(  Register d, int si16,    Register s1);
+  inline void lwzu( Register d, int si16,    Register s1);
+
+  // 4 bytes
+  inline void lwax( Register d, Register s1, Register s2);
+  inline void lwa(  Register d, int si16,    Register s1);
+
+  // 2 bytes
+  inline void lhzx( Register d, Register s1, Register s2);
+  inline void lhz(  Register d, int si16,    Register s1);
+  inline void lhzu( Register d, int si16,    Register s1);
+
+  // 2 bytes
+  inline void lhax( Register d, Register s1, Register s2);
+  inline void lha(  Register d, int si16,    Register s1);
+  inline void lhau( Register d, int si16,    Register s1);
+
+  // 1 byte
+  inline void lbzx( Register d, Register s1, Register s2);
+  inline void lbz(  Register d, int si16,    Register s1);
+  inline void lbzu( Register d, int si16,    Register s1);
+
+  // 8 bytes
+  inline void ldx(  Register d, Register s1, Register s2);
+  inline void ld(   Register d, int si16,    Register s1);
+  inline void ldu(  Register d, int si16,    Register s1);
+
+  //  PPC 1, section 3.3.3 Fixed-Point Store Instructions
+  inline void stwx( Register d, Register s1, Register s2);
+  inline void stw(  Register d, int si16,    Register s1);
+  inline void stwu( Register d, int si16,    Register s1);
+
+  inline void sthx( Register d, Register s1, Register s2);
+  inline void sth(  Register d, int si16,    Register s1);
+  inline void sthu( Register d, int si16,    Register s1);
+
+  inline void stbx( Register d, Register s1, Register s2);
+  inline void stb(  Register d, int si16,    Register s1);
+  inline void stbu( Register d, int si16,    Register s1);
+
+  inline void stdx( Register d, Register s1, Register s2);
+  inline void std(  Register d, int si16,    Register s1);
+  inline void stdu( Register d, int si16,    Register s1);
+  inline void stdux(Register s, Register a,  Register b);
+
+  // PPC 1, section 3.3.13 Move To/From System Register Instructions
+  inline void mtlr( Register s1);
+  inline void mflr( Register d);
+  inline void mtctr(Register s1);
+  inline void mfctr(Register d);
+  inline void mtcrf(int fxm, Register s);
+  inline void mfcr( Register d);
+  inline void mcrf( ConditionRegister crd, ConditionRegister cra);
+  inline void mtcr( Register s);
+
+  // PPC 1, section 2.4.1 Branch Instructions
+  inline void b(  address a, relocInfo::relocType rt = relocInfo::none);
+  inline void b(  Label& L);
+  inline void bl( address a, relocInfo::relocType rt = relocInfo::none);
+  inline void bl( Label& L);
+  inline void bc( int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none);
+  inline void bc( int boint, int biint, Label& L);
+  inline void bcl(int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none);
+  inline void bcl(int boint, int biint, Label& L);
+
+  inline void bclr(  int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none);
+  inline void bclrl( int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none);
+  inline void bcctr( int boint, int biint, int bhint = bhintbhBCCTRisNotReturnButSame,
+                         relocInfo::relocType rt = relocInfo::none);
+  inline void bcctrl(int boint, int biint, int bhint = bhintbhBCLRisReturn,
+                         relocInfo::relocType rt = relocInfo::none);
+
+  // helper function for b, bcxx
+  inline bool is_within_range_of_b(address a, address pc);
+  inline bool is_within_range_of_bcxx(address a, address pc);
+
+  // get the destination of a bxx branch (b, bl, ba, bla)
+  static inline address  bxx_destination(address baddr);
+  static inline address  bxx_destination(int instr, address pc);
+  static inline intptr_t bxx_destination_offset(int instr, intptr_t bxx_pos);
+
+  // extended mnemonics for branch instructions
+  inline void blt(ConditionRegister crx, Label& L);
+  inline void bgt(ConditionRegister crx, Label& L);
+  inline void beq(ConditionRegister crx, Label& L);
+  inline void bso(ConditionRegister crx, Label& L);
+  inline void bge(ConditionRegister crx, Label& L);
+  inline void ble(ConditionRegister crx, Label& L);
+  inline void bne(ConditionRegister crx, Label& L);
+  inline void bns(ConditionRegister crx, Label& L);
+
+  // Branch instructions with static prediction hints.
+  inline void blt_predict_taken(    ConditionRegister crx, Label& L);
+  inline void bgt_predict_taken(    ConditionRegister crx, Label& L);
+  inline void beq_predict_taken(    ConditionRegister crx, Label& L);
+  inline void bso_predict_taken(    ConditionRegister crx, Label& L);
+  inline void bge_predict_taken(    ConditionRegister crx, Label& L);
+  inline void ble_predict_taken(    ConditionRegister crx, Label& L);
+  inline void bne_predict_taken(    ConditionRegister crx, Label& L);
+  inline void bns_predict_taken(    ConditionRegister crx, Label& L);
+  inline void blt_predict_not_taken(ConditionRegister crx, Label& L);
+  inline void bgt_predict_not_taken(ConditionRegister crx, Label& L);
+  inline void beq_predict_not_taken(ConditionRegister crx, Label& L);
+  inline void bso_predict_not_taken(ConditionRegister crx, Label& L);
+  inline void bge_predict_not_taken(ConditionRegister crx, Label& L);
+  inline void ble_predict_not_taken(ConditionRegister crx, Label& L);
+  inline void bne_predict_not_taken(ConditionRegister crx, Label& L);
+  inline void bns_predict_not_taken(ConditionRegister crx, Label& L);
+
+  // for use in conjunction with testbitdi:
+  inline void btrue( ConditionRegister crx, Label& L);
+  inline void bfalse(ConditionRegister crx, Label& L);
+
+  inline void bltl(ConditionRegister crx, Label& L);
+  inline void bgtl(ConditionRegister crx, Label& L);
+  inline void beql(ConditionRegister crx, Label& L);
+  inline void bsol(ConditionRegister crx, Label& L);
+  inline void bgel(ConditionRegister crx, Label& L);
+  inline void blel(ConditionRegister crx, Label& L);
+  inline void bnel(ConditionRegister crx, Label& L);
+  inline void bnsl(ConditionRegister crx, Label& L);
+
+  // extended mnemonics for Branch Instructions via LR
+  // We use `blr' for returns.
+  inline void blr(relocInfo::relocType rt = relocInfo::none);
+
+  // extended mnemonics for Branch Instructions with CTR
+  // bdnz means `decrement CTR and jump to L if CTR is not zero'
+  inline void bdnz(Label& L);
+  // Decrement and branch if result is zero.
+  inline void bdz(Label& L);
+  // we use `bctr[l]' for jumps/calls in function descriptor glue
+  // code, e.g. calls to runtime functions
+  inline void bctr( relocInfo::relocType rt = relocInfo::none);
+  inline void bctrl(relocInfo::relocType rt = relocInfo::none);
+  // conditional jumps/branches via CTR
+  inline void beqctr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none);
+  inline void beqctrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none);
+  inline void bnectr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none);
+  inline void bnectrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none);
+
+  // condition register logic instructions
+  inline void crand( int d, int s1, int s2);
+  inline void crnand(int d, int s1, int s2);
+  inline void cror(  int d, int s1, int s2);
+  inline void crxor( int d, int s1, int s2);
+  inline void crnor( int d, int s1, int s2);
+  inline void creqv( int d, int s1, int s2);
+  inline void crandc(int d, int s1, int s2);
+  inline void crorc( int d, int s1, int s2);
+
+  // icache and dcache related instructions
+  inline void icbi(  Register s1, Register s2);
+  //inline void dcba(Register s1, Register s2); // Instruction for embedded processor only.
+  inline void dcbz(  Register s1, Register s2);
+  inline void dcbst( Register s1, Register s2);
+  inline void dcbf(  Register s1, Register s2);
+
+  enum ct_cache_specification {
+    ct_primary_cache   = 0,
+    ct_secondary_cache = 2
+  };
+  // dcache read hint
+  inline void dcbt(    Register s1, Register s2);
+  inline void dcbtct(  Register s1, Register s2, int ct);
+  inline void dcbtds(  Register s1, Register s2, int ds);
+  // dcache write hint
+  inline void dcbtst(  Register s1, Register s2);
+  inline void dcbtstct(Register s1, Register s2, int ct);
+
+  //  machine barrier instructions:
+  //
+  //  - sync    two-way memory barrier, aka fence
+  //  - lwsync  orders  Store|Store,
+  //                     Load|Store,
+  //                     Load|Load,
+  //            but not Store|Load
+  //  - eieio   orders memory accesses for device memory (only)
+  //  - isync   invalidates speculatively executed instructions
+  //            From the Power ISA 2.06 documentation:
+  //             "[...] an isync instruction prevents the execution of
+  //            instructions following the isync until instructions
+  //            preceding the isync have completed, [...]"
+  //            From IBM's AIX assembler reference:
+  //             "The isync [...] instructions causes the processor to
+  //            refetch any instructions that might have been fetched
+  //            prior to the isync instruction. The instruction isync
+  //            causes the processor to wait for all previous instructions
+  //            to complete. Then any instructions already fetched are
+  //            discarded and instruction processing continues in the
+  //            environment established by the previous instructions."
+  //
+  //  semantic barrier instructions:
+  //  (as defined in orderAccess.hpp)
+  //
+  //  - release  orders Store|Store,       (maps to lwsync)
+  //                     Load|Store
+  //  - acquire  orders  Load|Store,       (maps to lwsync)
+  //                     Load|Load
+  //  - fence    orders Store|Store,       (maps to sync)
+  //                     Load|Store,
+  //                     Load|Load,
+  //                    Store|Load
+  //
+ private:
+  inline void sync(int l);
+ public:
+  inline void sync();
+  inline void lwsync();
+  inline void ptesync();
+  inline void eieio();
+  inline void isync();
+
+  inline void release();
+  inline void acquire();
+  inline void fence();
+
+  // atomics
+  inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
+  inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0);
+  inline bool lxarx_hint_exclusive_access();
+  inline void lwarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
+  inline void ldarx(  Register d, Register a, Register b, bool hint_exclusive_access = false);
+  inline void stwcx_( Register s, Register a, Register b);
+  inline void stdcx_( Register s, Register a, Register b);
+
+  // Instructions for adjusting thread priority for simultaneous
+  // multithreading (SMT) on Power5.
+ private:
+  inline void smt_prio_very_low();
+  inline void smt_prio_medium_high();
+  inline void smt_prio_high();
+
+ public:
+  inline void smt_prio_low();
+  inline void smt_prio_medium_low();
+  inline void smt_prio_medium();
+
+  // trap instructions
+  inline void twi_0(Register a); // for load with acquire semantics use load+twi_0+isync (trap can't occur)
+  // NOT FOR DIRECT USE!!
+ protected:
+  inline void tdi_unchecked(int tobits, Register a, int si16);
+  inline void twi_unchecked(int tobits, Register a, int si16);
+  inline void tdi(          int tobits, Register a, int si16);   // asserts UseSIGTRAP
+  inline void twi(          int tobits, Register a, int si16);   // asserts UseSIGTRAP
+  inline void td(           int tobits, Register a, Register b); // asserts UseSIGTRAP
+  inline void tw(           int tobits, Register a, Register b); // asserts UseSIGTRAP
+
+  static bool is_tdi(int x, int tobits, int ra, int si16) {
+     return (TDI_OPCODE == (x & TDI_OPCODE_MASK))
+         && (tobits == inv_to_field(x))
+         && (ra == -1/*any reg*/ || ra == inv_ra_field(x))
+         && (si16 == inv_si_field(x));
+  }
+
+  static bool is_twi(int x, int tobits, int ra, int si16) {
+     return (TWI_OPCODE == (x & TWI_OPCODE_MASK))
+         && (tobits == inv_to_field(x))
+         && (ra == -1/*any reg*/ || ra == inv_ra_field(x))
+         && (si16 == inv_si_field(x));
+  }
+
+  static bool is_twi(int x, int tobits, int ra) {
+     return (TWI_OPCODE == (x & TWI_OPCODE_MASK))
+         && (tobits == inv_to_field(x))
+         && (ra == -1/*any reg*/ || ra == inv_ra_field(x));
+  }
+
+  static bool is_td(int x, int tobits, int ra, int rb) {
+     return (TD_OPCODE == (x & TD_OPCODE_MASK))
+         && (tobits == inv_to_field(x))
+         && (ra == -1/*any reg*/ || ra == inv_ra_field(x))
+         && (rb == -1/*any reg*/ || rb == inv_rb_field(x));
+  }
+
+  static bool is_tw(int x, int tobits, int ra, int rb) {
+     return (TW_OPCODE == (x & TW_OPCODE_MASK))
+         && (tobits == inv_to_field(x))
+         && (ra == -1/*any reg*/ || ra == inv_ra_field(x))
+         && (rb == -1/*any reg*/ || rb == inv_rb_field(x));
+  }
+
+ public:
+  // PPC floating point instructions
+  // PPC 1, section 4.6.2 Floating-Point Load Instructions
+  inline void lfs(  FloatRegister d, int si16,   Register a);
+  inline void lfsu( FloatRegister d, int si16,   Register a);
+  inline void lfsx( FloatRegister d, Register a, Register b);
+  inline void lfd(  FloatRegister d, int si16,   Register a);
+  inline void lfdu( FloatRegister d, int si16,   Register a);
+  inline void lfdx( FloatRegister d, Register a, Register b);
+
+  // PPC 1, section 4.6.3 Floating-Point Store Instructions
+  inline void stfs(  FloatRegister s, int si16,   Register a);
+  inline void stfsu( FloatRegister s, int si16,   Register a);
+  inline void stfsx( FloatRegister s, Register a, Register b);
+  inline void stfd(  FloatRegister s, int si16,   Register a);
+  inline void stfdu( FloatRegister s, int si16,   Register a);
+  inline void stfdx( FloatRegister s, Register a, Register b);
+
+  // PPC 1, section 4.6.4 Floating-Point Move Instructions
+  inline void fmr(  FloatRegister d, FloatRegister b);
+  inline void fmr_( FloatRegister d, FloatRegister b);
+
+  //  inline void mffgpr( FloatRegister d, Register b);
+  //  inline void mftgpr( Register d, FloatRegister b);
+  inline void cmpb(   Register a, Register s, Register b);
+  inline void popcntb(Register a, Register s);
+  inline void popcntw(Register a, Register s);
+  inline void popcntd(Register a, Register s);
+
+  inline void fneg(  FloatRegister d, FloatRegister b);
+  inline void fneg_( FloatRegister d, FloatRegister b);
+  inline void fabs(  FloatRegister d, FloatRegister b);
+  inline void fabs_( FloatRegister d, FloatRegister b);
+  inline void fnabs( FloatRegister d, FloatRegister b);
+  inline void fnabs_(FloatRegister d, FloatRegister b);
+
+  // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic Instructions
+  inline void fadd(  FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fadd_( FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fadds( FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fadds_(FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fsub(  FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fsub_( FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fsubs( FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fsubs_(FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fmul(  FloatRegister d, FloatRegister a, FloatRegister c);
+  inline void fmul_( FloatRegister d, FloatRegister a, FloatRegister c);
+  inline void fmuls( FloatRegister d, FloatRegister a, FloatRegister c);
+  inline void fmuls_(FloatRegister d, FloatRegister a, FloatRegister c);
+  inline void fdiv(  FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fdiv_( FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fdivs( FloatRegister d, FloatRegister a, FloatRegister b);
+  inline void fdivs_(FloatRegister d, FloatRegister a, FloatRegister b);
+
+  // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions
+  inline void frsp(  FloatRegister d, FloatRegister b);
+  inline void fctid( FloatRegister d, FloatRegister b);
+  inline void fctidz(FloatRegister d, FloatRegister b);
+  inline void fctiw( FloatRegister d, FloatRegister b);
+  inline void fctiwz(FloatRegister d, FloatRegister b);
+  inline void fcfid( FloatRegister d, FloatRegister b);
+  inline void fcfids(FloatRegister d, FloatRegister b);
+
+  // PPC 1, section 4.6.7 Floating-Point Compare Instructions
+  inline void fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b);
+
+  inline void fsqrt( FloatRegister d, FloatRegister b);
+  inline void fsqrts(FloatRegister d, FloatRegister b);
+
+  // Vector instructions for >= Power6.
+  inline void lvebx(    VectorRegister d, Register s1, Register s2);
+  inline void lvehx(    VectorRegister d, Register s1, Register s2);
+  inline void lvewx(    VectorRegister d, Register s1, Register s2);
+  inline void lvx(      VectorRegister d, Register s1, Register s2);
+  inline void lvxl(     VectorRegister d, Register s1, Register s2);
+  inline void stvebx(   VectorRegister d, Register s1, Register s2);
+  inline void stvehx(   VectorRegister d, Register s1, Register s2);
+  inline void stvewx(   VectorRegister d, Register s1, Register s2);
+  inline void stvx(     VectorRegister d, Register s1, Register s2);
+  inline void stvxl(    VectorRegister d, Register s1, Register s2);
+  inline void lvsl(     VectorRegister d, Register s1, Register s2);
+  inline void lvsr(     VectorRegister d, Register s1, Register s2);
+  inline void vpkpx(    VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vpkshss(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vpkswss(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vpkshus(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vpkswus(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vpkuhum(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vpkuwum(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vpkuhus(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vpkuwus(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vupkhpx(  VectorRegister d, VectorRegister b);
+  inline void vupkhsb(  VectorRegister d, VectorRegister b);
+  inline void vupkhsh(  VectorRegister d, VectorRegister b);
+  inline void vupklpx(  VectorRegister d, VectorRegister b);
+  inline void vupklsb(  VectorRegister d, VectorRegister b);
+  inline void vupklsh(  VectorRegister d, VectorRegister b);
+  inline void vmrghb(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmrghw(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmrghh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmrglb(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmrglw(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmrglh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsplt(    VectorRegister d, int ui4,          VectorRegister b);
+  inline void vsplth(   VectorRegister d, int ui3,          VectorRegister b);
+  inline void vspltw(   VectorRegister d, int ui2,          VectorRegister b);
+  inline void vspltisb( VectorRegister d, int si5);
+  inline void vspltish( VectorRegister d, int si5);
+  inline void vspltisw( VectorRegister d, int si5);
+  inline void vperm(    VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vsel(     VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vsl(      VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsldoi(   VectorRegister d, VectorRegister a, VectorRegister b, int si4);
+  inline void vslo(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsr(      VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsro(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vaddcuw(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vaddshs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vaddsbs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vaddsws(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vaddubm(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vadduwm(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vadduhm(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vaddubs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vadduws(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vadduhs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubcuw(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubshs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubsbs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubsws(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsububm(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubuwm(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubuhm(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsububs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubuws(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsubuhs(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmulesb(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmuleub(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmulesh(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmuleuh(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmulosb(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmuloub(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmulosh(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmulouh(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmhaddshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vmhraddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vmladduhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vmsubuhm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vmsummbm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vmsumshm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vmsumshs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vmsumuhm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vmsumuhs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c);
+  inline void vsumsws(  VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsum2sws( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsum4sbs( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsum4ubs( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsum4shs( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vavgsb(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vavgsw(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vavgsh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vavgub(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vavguw(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vavguh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmaxsb(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmaxsw(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmaxsh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmaxub(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmaxuw(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vmaxuh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vminsb(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vminsw(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vminsh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vminub(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vminuw(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vminuh(   VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpequb( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpequh( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpequw( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtsh( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtsb( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtsw( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtub( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtuh( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtuw( VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpequb_(VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpequh_(VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpequw_(VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtsh_(VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtsb_(VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtsw_(VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtub_(VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtuh_(VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vcmpgtuw_(VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vand(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vandc(    VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vnor(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vor(      VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vxor(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vrlb(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vrlw(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vrlh(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vslb(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vskw(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vslh(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsrb(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsrw(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsrh(     VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsrab(    VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsraw(    VectorRegister d, VectorRegister a, VectorRegister b);
+  inline void vsrah(    VectorRegister d, VectorRegister a, VectorRegister b);
+  // Vector Floating-Point not implemented yet
+  inline void mtvscr(   VectorRegister b);
+  inline void mfvscr(   VectorRegister d);
+
+  // The following encoders use r0 as second operand. These instructions
+  // read r0 as '0'.
+  inline void lwzx( Register d, Register s2);
+  inline void lwz(  Register d, int si16);
+  inline void lwax( Register d, Register s2);
+  inline void lwa(  Register d, int si16);
+  inline void lhzx( Register d, Register s2);
+  inline void lhz(  Register d, int si16);
+  inline void lhax( Register d, Register s2);
+  inline void lha(  Register d, int si16);
+  inline void lbzx( Register d, Register s2);
+  inline void lbz(  Register d, int si16);
+  inline void ldx(  Register d, Register s2);
+  inline void ld(   Register d, int si16);
+  inline void stwx( Register d, Register s2);
+  inline void stw(  Register d, int si16);
+  inline void sthx( Register d, Register s2);
+  inline void sth(  Register d, int si16);
+  inline void stbx( Register d, Register s2);
+  inline void stb(  Register d, int si16);
+  inline void stdx( Register d, Register s2);
+  inline void std(  Register d, int si16);
+
+  // PPC 2, section 3.2.1 Instruction Cache Instructions
+  inline void icbi(    Register s2);
+  // PPC 2, section 3.2.2 Data Cache Instructions
+  //inlinevoid dcba(   Register s2); // Instruction for embedded processor only.
+  inline void dcbz(    Register s2);
+  inline void dcbst(   Register s2);
+  inline void dcbf(    Register s2);
+  // dcache read hint
+  inline void dcbt(    Register s2);
+  inline void dcbtct(  Register s2, int ct);
+  inline void dcbtds(  Register s2, int ds);
+  // dcache write hint
+  inline void dcbtst(  Register s2);
+  inline void dcbtstct(Register s2, int ct);
+
+  // Atomics: use ra0mem to disallow R0 as base.
+  inline void lwarx_unchecked(Register d, Register b, int eh1);
+  inline void ldarx_unchecked(Register d, Register b, int eh1);
+  inline void lwarx( Register d, Register b, bool hint_exclusive_access);
+  inline void ldarx( Register d, Register b, bool hint_exclusive_access);
+  inline void stwcx_(Register s, Register b);
+  inline void stdcx_(Register s, Register b);
+  inline void lfs(   FloatRegister d, int si16);
+  inline void lfsx(  FloatRegister d, Register b);
+  inline void lfd(   FloatRegister d, int si16);
+  inline void lfdx(  FloatRegister d, Register b);
+  inline void stfs(  FloatRegister s, int si16);
+  inline void stfsx( FloatRegister s, Register b);
+  inline void stfd(  FloatRegister s, int si16);
+  inline void stfdx( FloatRegister s, Register b);
+  inline void lvebx( VectorRegister d, Register s2);
+  inline void lvehx( VectorRegister d, Register s2);
+  inline void lvewx( VectorRegister d, Register s2);
+  inline void lvx(   VectorRegister d, Register s2);
+  inline void lvxl(  VectorRegister d, Register s2);
+  inline void stvebx(VectorRegister d, Register s2);
+  inline void stvehx(VectorRegister d, Register s2);
+  inline void stvewx(VectorRegister d, Register s2);
+  inline void stvx(  VectorRegister d, Register s2);
+  inline void stvxl( VectorRegister d, Register s2);
+  inline void lvsl(  VectorRegister d, Register s2);
+  inline void lvsr(  VectorRegister d, Register s2);
+
+  // RegisterOrConstant versions.
+  // These emitters choose between the versions using two registers and
+  // those with register and immediate, depending on the content of roc.
+  // If the constant is not encodable as immediate, instructions to
+  // load the constant are emitted beforehand. Store instructions need a
+  // tmp reg if the constant is not encodable as immediate.
+  // Size unpredictable.
+  void ld(  Register d, RegisterOrConstant roc, Register s1 = noreg);
+  void lwa( Register d, RegisterOrConstant roc, Register s1 = noreg);
+  void lwz( Register d, RegisterOrConstant roc, Register s1 = noreg);
+  void lha( Register d, RegisterOrConstant roc, Register s1 = noreg);
+  void lhz( Register d, RegisterOrConstant roc, Register s1 = noreg);
+  void lbz( Register d, RegisterOrConstant roc, Register s1 = noreg);
+  void std( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg);
+  void stw( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg);
+  void sth( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg);
+  void stb( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg);
+  void add( Register d, RegisterOrConstant roc, Register s1);
+  void subf(Register d, RegisterOrConstant roc, Register s1);
+  void cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1);
+
+
+  // Emit several instructions to load a 64 bit constant. This issues a fixed
+  // instruction pattern so that the constant can be patched later on.
+  enum {
+    load_const_size = 5 * BytesPerInstWord
+  };
+         void load_const(Register d, long a,            Register tmp = noreg);
+  inline void load_const(Register d, void* a,           Register tmp = noreg);
+  inline void load_const(Register d, Label& L,          Register tmp = noreg);
+  inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg);
+
+  // Load a 64 bit constant, optimized, not identifyable.
+  // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a
+  // 16 bit immediate offset. This is useful if the offset can be encoded in
+  // a succeeding instruction.
+         int load_const_optimized(Register d, long a,  Register tmp = noreg, bool return_simm16_rest = false);
+  inline int load_const_optimized(Register d, void* a, Register tmp = noreg, bool return_simm16_rest = false) {
+    return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest);
+  }
+
+  // Creation
+  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
+#ifdef CHECK_DELAY
+    delay_state = no_delay;
+#endif
+  }
+
+  // Testing
+#ifndef PRODUCT
+  void test_asm();
+#endif
+};
+
+
+#endif // CPU_PPC_VM_ASSEMBLER_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/assembler_ppc.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,792 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP
+#define CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP
+
+#include "asm/assembler.inline.hpp"
+#include "asm/codeBuffer.hpp"
+#include "code/codeCache.hpp"
+
+inline void Assembler::emit_int32(int x) {
+  AbstractAssembler::emit_int32(x);
+}
+
+inline void Assembler::emit_data(int x) {
+  emit_int32(x);
+}
+
+inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
+  relocate(rtype);
+  emit_int32(x);
+}
+
+inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
+  relocate(rspec);
+  emit_int32(x);
+}
+
+// Emit an address
+inline address Assembler::emit_addr(const address addr) {
+  address start = pc();
+  emit_address(addr);
+  return start;
+}
+
+// Emit a function descriptor with the specified entry point, TOC, and
+// ENV. If the entry point is NULL, the descriptor will point just
+// past the descriptor.
+inline address Assembler::emit_fd(address entry, address toc, address env) {
+  FunctionDescriptor* fd = (FunctionDescriptor*)pc();
+
+  assert(sizeof(FunctionDescriptor) == 3*sizeof(address), "function descriptor size");
+
+  (void)emit_addr();
+  (void)emit_addr();
+  (void)emit_addr();
+
+  fd->set_entry(entry == NULL ? pc() : entry);
+  fd->set_toc(toc);
+  fd->set_env(env);
+
+  return (address)fd;
+}
+
+// Issue an illegal instruction. 0 is guaranteed to be an illegal instruction.
+inline void Assembler::illtrap() { Assembler::emit_int32(0); }
+inline bool Assembler::is_illtrap(int x) { return x == 0; }
+
+// PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions
+inline void Assembler::addi(   Register d, Register a, int si16)   { assert(a != R0, "r0 not allowed"); addi_r0ok( d, a, si16); }
+inline void Assembler::addis(  Register d, Register a, int si16)   { assert(a != R0, "r0 not allowed"); addis_r0ok(d, a, si16); }
+inline void Assembler::addi_r0ok(Register d,Register a,int si16)   { emit_int32(ADDI_OPCODE   | rt(d) | ra(a) | simm(si16, 16)); }
+inline void Assembler::addis_r0ok(Register d,Register a,int si16)  { emit_int32(ADDIS_OPCODE  | rt(d) | ra(a) | simm(si16, 16)); }
+inline void Assembler::addic_( Register d, Register a, int si16)   { emit_int32(ADDIC__OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
+inline void Assembler::subfic( Register d, Register a, int si16)   { emit_int32(SUBFIC_OPCODE | rt(d) | ra(a) | simm(si16, 16)); }
+inline void Assembler::add(    Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::add_(   Register d, Register a, Register b) { emit_int32(ADD_OPCODE    | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::subf(   Register d, Register a, Register b) { emit_int32(SUBF_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::sub(    Register d, Register a, Register b) { subf(d, b, a); }
+inline void Assembler::subf_(  Register d, Register a, Register b) { emit_int32(SUBF_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::addc(   Register d, Register a, Register b) { emit_int32(ADDC_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::addc_(  Register d, Register a, Register b) { emit_int32(ADDC_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::subfc(  Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::subfc_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::adde(   Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::adde_(  Register d, Register a, Register b) { emit_int32(ADDE_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::subfe(  Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::neg(    Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(0) | rc(0)); }
+inline void Assembler::neg_(   Register d, Register a)             { emit_int32(NEG_OPCODE    | rt(d) | ra(a) | oe(0) | rc(1)); }
+inline void Assembler::mulli(  Register d, Register a, int si16)   { emit_int32(MULLI_OPCODE  | rt(d) | ra(a) | simm(si16, 16)); }
+inline void Assembler::mulld(  Register d, Register a, Register b) { emit_int32(MULLD_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::mulld_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::mullw(  Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::mullw_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE  | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::mulhw(  Register d, Register a, Register b) { emit_int32(MULHW_OPCODE  | rt(d) | ra(a) | rb(b) | rc(0)); }
+inline void Assembler::mulhw_( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE  | rt(d) | ra(a) | rb(b) | rc(1)); }
+inline void Assembler::mulhd(  Register d, Register a, Register b) { emit_int32(MULHD_OPCODE  | rt(d) | ra(a) | rb(b) | rc(0)); }
+inline void Assembler::mulhd_( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE  | rt(d) | ra(a) | rb(b) | rc(1)); }
+inline void Assembler::mulhdu( Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); }
+inline void Assembler::mulhdu_(Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); }
+inline void Assembler::divd(   Register d, Register a, Register b) { emit_int32(DIVD_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::divd_(  Register d, Register a, Register b) { emit_int32(DIVD_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+inline void Assembler::divw(   Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); }
+inline void Assembler::divw_(  Register d, Register a, Register b) { emit_int32(DIVW_OPCODE   | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); }
+
+// extended mnemonics
+inline void Assembler::li(   Register d, int si16)             { Assembler::addi_r0ok( d, R0, si16); }
+inline void Assembler::lis(  Register d, int si16)             { Assembler::addis_r0ok(d, R0, si16); }
+inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); }
+
+// PPC 1, section 3.3.9, Fixed-Point Compare Instructions
+inline void Assembler::cmpi(  ConditionRegister f, int l, Register a, int si16)   { emit_int32( CMPI_OPCODE  | bf(f) | l10(l) | ra(a) | simm(si16,16)); }
+inline void Assembler::cmp(   ConditionRegister f, int l, Register a, Register b) { emit_int32( CMP_OPCODE   | bf(f) | l10(l) | ra(a) | rb(b)); }
+inline void Assembler::cmpli( ConditionRegister f, int l, Register a, int ui16)   { emit_int32( CMPLI_OPCODE | bf(f) | l10(l) | ra(a) | uimm(ui16,16)); }
+inline void Assembler::cmpl(  ConditionRegister f, int l, Register a, Register b) { emit_int32( CMPL_OPCODE  | bf(f) | l10(l) | ra(a) | rb(b)); }
+
+// extended mnemonics of Compare Instructions
+inline void Assembler::cmpwi( ConditionRegister crx, Register a, int si16)   { Assembler::cmpi( crx, 0, a, si16); }
+inline void Assembler::cmpdi( ConditionRegister crx, Register a, int si16)   { Assembler::cmpi( crx, 1, a, si16); }
+inline void Assembler::cmpw(  ConditionRegister crx, Register a, Register b) { Assembler::cmp(  crx, 0, a, b); }
+inline void Assembler::cmpd(  ConditionRegister crx, Register a, Register b) { Assembler::cmp(  crx, 1, a, b); }
+inline void Assembler::cmplwi(ConditionRegister crx, Register a, int ui16)   { Assembler::cmpli(crx, 0, a, ui16); }
+inline void Assembler::cmpldi(ConditionRegister crx, Register a, int ui16)   { Assembler::cmpli(crx, 1, a, ui16); }
+inline void Assembler::cmplw( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 0, a, b); }
+inline void Assembler::cmpld( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 1, a, b); }
+
+inline void Assembler::isel(Register d, Register a, Register b, int c) { emit_int32(ISEL_OPCODE    | rt(d)  | ra(a) | rb(b) | bc(c)); }
+
+// PPC 1, section 3.3.11, Fixed-Point Logical Instructions
+inline void Assembler::andi_(   Register a, Register s, int ui16)      { emit_int32(ANDI_OPCODE    | rta(a) | rs(s) | uimm(ui16, 16)); }
+inline void Assembler::andis_(  Register a, Register s, int ui16)      { emit_int32(ANDIS_OPCODE   | rta(a) | rs(s) | uimm(ui16, 16)); }
+inline void Assembler::ori(     Register a, Register s, int ui16)      { emit_int32(ORI_OPCODE     | rta(a) | rs(s) | uimm(ui16, 16)); }
+inline void Assembler::oris(    Register a, Register s, int ui16)      { emit_int32(ORIS_OPCODE    | rta(a) | rs(s) | uimm(ui16, 16)); }
+inline void Assembler::xori(    Register a, Register s, int ui16)      { emit_int32(XORI_OPCODE    | rta(a) | rs(s) | uimm(ui16, 16)); }
+inline void Assembler::xoris(   Register a, Register s, int ui16)      { emit_int32(XORIS_OPCODE   | rta(a) | rs(s) | uimm(ui16, 16)); }
+inline void Assembler::andr(    Register a, Register s, Register b)    { emit_int32(AND_OPCODE     | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::and_(    Register a, Register s, Register b)    { emit_int32(AND_OPCODE     | rta(a) | rs(s) | rb(b) | rc(1)); }
+
+inline void Assembler::or_unchecked(Register a, Register s, Register b){ emit_int32(OR_OPCODE      | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::orr(     Register a, Register s, Register b)    { if (a==s && s==b) { Assembler::nop(); } else { Assembler::or_unchecked(a,s,b); } }
+inline void Assembler::or_(     Register a, Register s, Register b)    { emit_int32(OR_OPCODE      | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::xorr(    Register a, Register s, Register b)    { emit_int32(XOR_OPCODE     | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::xor_(    Register a, Register s, Register b)    { emit_int32(XOR_OPCODE     | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::nand(    Register a, Register s, Register b)    { emit_int32(NAND_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::nand_(   Register a, Register s, Register b)    { emit_int32(NAND_OPCODE    | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::nor(     Register a, Register s, Register b)    { emit_int32(NOR_OPCODE     | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::nor_(    Register a, Register s, Register b)    { emit_int32(NOR_OPCODE     | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::andc(    Register a, Register s, Register b)    { emit_int32(ANDC_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::andc_(   Register a, Register s, Register b)    { emit_int32(ANDC_OPCODE    | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::orc(     Register a, Register s, Register b)    { emit_int32(ORC_OPCODE     | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::orc_(    Register a, Register s, Register b)    { emit_int32(ORC_OPCODE     | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::extsb(   Register a, Register s)                { emit_int32(EXTSB_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsh(   Register a, Register s)                { emit_int32(EXTSH_OPCODE   | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::extsw(   Register a, Register s)                { emit_int32(EXTSW_OPCODE   | rta(a) | rs(s) | rc(0)); }
+
+// extended mnemonics
+inline void Assembler::nop()                              { Assembler::ori(R0, R0, 0); }
+// NOP for FP and BR units (different versions to allow them to be in one group)
+inline void Assembler::fpnop0()                           { Assembler::fmr(F30, F30); }
+inline void Assembler::fpnop1()                           { Assembler::fmr(F31, F31); }
+inline void Assembler::brnop0()                           { Assembler::mcrf(CCR2, CCR2); }
+inline void Assembler::brnop1()                           { Assembler::mcrf(CCR3, CCR3); }
+inline void Assembler::brnop2()                           { Assembler::mcrf(CCR4,  CCR4); }
+
+inline void Assembler::mr(      Register d, Register s)   { Assembler::orr(d, s, s); }
+inline void Assembler::ori_opt( Register d, int ui16)     { if (ui16!=0) Assembler::ori( d, d, ui16); }
+inline void Assembler::oris_opt(Register d, int ui16)     { if (ui16!=0) Assembler::oris(d, d, ui16); }
+
+inline void Assembler::endgroup()                         { Assembler::ori(R1, R1, 0); }
+
+// count instructions
+inline void Assembler::cntlzw(  Register a, Register s)              { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::cntlzw_( Register a, Register s)              { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(1)); }
+inline void Assembler::cntlzd(  Register a, Register s)              { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(0)); }
+inline void Assembler::cntlzd_( Register a, Register s)              { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(1)); }
+
+// PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions
+inline void Assembler::sld(     Register a, Register s, Register b)  { emit_int32(SLD_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::sld_(    Register a, Register s, Register b)  { emit_int32(SLD_OPCODE    | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::slw(     Register a, Register s, Register b)  { emit_int32(SLW_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::slw_(    Register a, Register s, Register b)  { emit_int32(SLW_OPCODE    | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::srd(     Register a, Register s, Register b)  { emit_int32(SRD_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::srd_(    Register a, Register s, Register b)  { emit_int32(SRD_OPCODE    | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::srw(     Register a, Register s, Register b)  { emit_int32(SRW_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::srw_(    Register a, Register s, Register b)  { emit_int32(SRW_OPCODE    | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::srad(    Register a, Register s, Register b)  { emit_int32(SRAD_OPCODE   | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::srad_(   Register a, Register s, Register b)  { emit_int32(SRAD_OPCODE   | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::sraw(    Register a, Register s, Register b)  { emit_int32(SRAW_OPCODE   | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::sraw_(   Register a, Register s, Register b)  { emit_int32(SRAW_OPCODE   | rta(a) | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::sradi(   Register a, Register s, int sh6)     { emit_int32(SRADI_OPCODE  | rta(a) | rs(s) | sh162030(sh6) | rc(0)); }
+inline void Assembler::sradi_(  Register a, Register s, int sh6)     { emit_int32(SRADI_OPCODE  | rta(a) | rs(s) | sh162030(sh6) | rc(1)); }
+inline void Assembler::srawi(   Register a, Register s, int sh5)     { emit_int32(SRAWI_OPCODE  | rta(a) | rs(s) | sh1620(sh5) | rc(0)); }
+inline void Assembler::srawi_(  Register a, Register s, int sh5)     { emit_int32(SRAWI_OPCODE  | rta(a) | rs(s) | sh1620(sh5) | rc(1)); }
+
+// extended mnemonics for Shift Instructions
+inline void Assembler::sldi(    Register a, Register s, int sh6)     { Assembler::rldicr(a, s, sh6, 63-sh6); }
+inline void Assembler::sldi_(   Register a, Register s, int sh6)     { Assembler::rldicr_(a, s, sh6, 63-sh6); }
+inline void Assembler::slwi(    Register a, Register s, int sh5)     { Assembler::rlwinm(a, s, sh5, 0, 31-sh5); }
+inline void Assembler::slwi_(   Register a, Register s, int sh5)     { Assembler::rlwinm_(a, s, sh5, 0, 31-sh5); }
+inline void Assembler::srdi(    Register a, Register s, int sh6)     { Assembler::rldicl(a, s, 64-sh6, sh6); }
+inline void Assembler::srdi_(   Register a, Register s, int sh6)     { Assembler::rldicl_(a, s, 64-sh6, sh6); }
+inline void Assembler::srwi(    Register a, Register s, int sh5)     { Assembler::rlwinm(a, s, 32-sh5, sh5, 31); }
+inline void Assembler::srwi_(   Register a, Register s, int sh5)     { Assembler::rlwinm_(a, s, 32-sh5, sh5, 31); }
+
+inline void Assembler::clrrdi(  Register a, Register s, int ui6)     { Assembler::rldicr(a, s, 0, 63-ui6); }
+inline void Assembler::clrrdi_( Register a, Register s, int ui6)     { Assembler::rldicr_(a, s, 0, 63-ui6); }
+inline void Assembler::clrldi(  Register a, Register s, int ui6)     { Assembler::rldicl(a, s, 0, ui6); }
+inline void Assembler::clrldi_( Register a, Register s, int ui6)     { Assembler::rldicl_(a, s, 0, ui6); }
+inline void Assembler::clrlsldi( Register a, Register s, int clrl6, int shl6) { Assembler::rldic( a, s, shl6, clrl6-shl6); }
+inline void Assembler::clrlsldi_(Register a, Register s, int clrl6, int shl6) { Assembler::rldic_(a, s, shl6, clrl6-shl6); }
+inline void Assembler::extrdi(  Register a, Register s, int n, int b){ Assembler::rldicl(a, s, b+n, 64-n); }
+// testbit with condition register.
+inline void Assembler::testbitdi(ConditionRegister cr, Register a, Register s, int ui6) {
+  Assembler::rldicr(a, s, 63-ui6, 0);
+  Assembler::cmpdi(cr, a, 0);
+}
+
+// rotate instructions
+inline void Assembler::rotldi( Register a, Register s, int n) { Assembler::rldicl(a, s, n, 0); }
+inline void Assembler::rotrdi( Register a, Register s, int n) { Assembler::rldicl(a, s, 64-n, 0); }
+inline void Assembler::rotlwi( Register a, Register s, int n) { Assembler::rlwinm(a, s, n, 0, 31); }
+inline void Assembler::rotrwi( Register a, Register s, int n) { Assembler::rlwinm(a, s, 32-n, 0, 31); }
+
+inline void Assembler::rldic(   Register a, Register s, int sh6, int mb6)         { emit_int32(RLDIC_OPCODE  | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); }
+inline void Assembler::rldic_(  Register a, Register s, int sh6, int mb6)         { emit_int32(RLDIC_OPCODE  | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); }
+inline void Assembler::rldicr(  Register a, Register s, int sh6, int mb6)         { emit_int32(RLDICR_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); }
+inline void Assembler::rldicr_( Register a, Register s, int sh6, int mb6)         { emit_int32(RLDICR_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); }
+inline void Assembler::rldicl(  Register a, Register s, int sh6, int me6)         { emit_int32(RLDICL_OPCODE | rta(a) | rs(s) | sh162030(sh6) | me2126(me6) | rc(0)); }
+inline void Assembler::rldicl_( Register a, Register s, int sh6, int me6)         { emit_int32(RLDICL_OPCODE | rta(a) | rs(s) | sh162030(sh6) | me2126(me6) | rc(1)); }
+inline void Assembler::rlwinm(  Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWINM_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(0)); }
+inline void Assembler::rlwinm_( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWINM_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(1)); }
+inline void Assembler::rldimi(  Register a, Register s, int sh6, int mb6)         { emit_int32(RLDIMI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); }
+inline void Assembler::rlwimi(  Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWIMI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(0)); }
+inline void Assembler::rldimi_( Register a, Register s, int sh6, int mb6)         { emit_int32(RLDIMI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); }
+inline void Assembler::insrdi(  Register a, Register s, int n,   int b)           { Assembler::rldimi(a, s, 64-(b+n), b); }
+inline void Assembler::insrwi(  Register a, Register s, int n,   int b)           { Assembler::rlwimi(a, s, 32-(b+n), b, b+n-1); }
+
+// PPC 1, section 3.3.2 Fixed-Point Load Instructions
+inline void Assembler::lwzx( Register d, Register s1, Register s2) { emit_int32(LWZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::lwz(  Register d, int si16,    Register s1) { emit_int32(LWZ_OPCODE  | rt(d) | d1(si16)   | ra0mem(s1));}
+inline void Assembler::lwzu( Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LWZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
+
+inline void Assembler::lwax( Register d, Register s1, Register s2) { emit_int32(LWAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::lwa(  Register d, int si16,    Register s1) { emit_int32(LWA_OPCODE  | rt(d) | ds(si16)   | ra0mem(s1));}
+
+inline void Assembler::lhzx( Register d, Register s1, Register s2) { emit_int32(LHZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::lhz(  Register d, int si16,    Register s1) { emit_int32(LHZ_OPCODE  | rt(d) | d1(si16)   | ra0mem(s1));}
+inline void Assembler::lhzu( Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
+
+inline void Assembler::lhax( Register d, Register s1, Register s2) { emit_int32(LHAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::lha(  Register d, int si16,    Register s1) { emit_int32(LHA_OPCODE  | rt(d) | d1(si16)   | ra0mem(s1));}
+inline void Assembler::lhau( Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHAU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
+
+inline void Assembler::lbzx( Register d, Register s1, Register s2) { emit_int32(LBZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::lbz(  Register d, int si16,    Register s1) { emit_int32(LBZ_OPCODE  | rt(d) | d1(si16)   | ra0mem(s1));}
+inline void Assembler::lbzu( Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LBZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
+
+inline void Assembler::ld(   Register d, int si16,    Register s1) { emit_int32(LD_OPCODE  | rt(d) | ds(si16)   | ra0mem(s1));}
+inline void Assembler::ldx(  Register d, Register s1, Register s2) { emit_int32(LDX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::ldu(  Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LDU_OPCODE | rt(d) | ds(si16) | rta0mem(s1));}
+
+//  PPC 1, section 3.3.3 Fixed-Point Store Instructions
+inline void Assembler::stwx( Register d, Register s1, Register s2) { emit_int32(STWX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::stw(  Register d, int si16,    Register s1) { emit_int32(STW_OPCODE  | rs(d) | d1(si16)   | ra0mem(s1));}
+inline void Assembler::stwu( Register d, int si16,    Register s1) { emit_int32(STWU_OPCODE | rs(d) | d1(si16)   | rta0mem(s1));}
+
+inline void Assembler::sthx( Register d, Register s1, Register s2) { emit_int32(STHX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::sth(  Register d, int si16,    Register s1) { emit_int32(STH_OPCODE  | rs(d) | d1(si16)   | ra0mem(s1));}
+inline void Assembler::sthu( Register d, int si16,    Register s1) { emit_int32(STHU_OPCODE | rs(d) | d1(si16)   | rta0mem(s1));}
+
+inline void Assembler::stbx( Register d, Register s1, Register s2) { emit_int32(STBX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::stb(  Register d, int si16,    Register s1) { emit_int32(STB_OPCODE  | rs(d) | d1(si16)   | ra0mem(s1));}
+inline void Assembler::stbu( Register d, int si16,    Register s1) { emit_int32(STBU_OPCODE | rs(d) | d1(si16)   | rta0mem(s1));}
+
+inline void Assembler::std(  Register d, int si16,    Register s1) { emit_int32(STD_OPCODE  | rs(d) | ds(si16)   | ra0mem(s1));}
+inline void Assembler::stdx( Register d, Register s1, Register s2) { emit_int32(STDX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));}
+inline void Assembler::stdu( Register d, int si16,    Register s1) { emit_int32(STDU_OPCODE | rs(d) | ds(si16)   | rta0mem(s1));}
+inline void Assembler::stdux(Register s, Register a,  Register b)  { emit_int32(STDUX_OPCODE| rs(s) | rta0mem(a) | rb(b));}
+
+// PPC 1, section 3.3.13 Move To/From System Register Instructions
+inline void Assembler::mtlr( Register s1)         { emit_int32(MTLR_OPCODE  | rs(s1)); }
+inline void Assembler::mflr( Register d )         { emit_int32(MFLR_OPCODE  | rt(d)); }
+inline void Assembler::mtctr(Register s1)         { emit_int32(MTCTR_OPCODE | rs(s1)); }
+inline void Assembler::mfctr(Register d )         { emit_int32(MFCTR_OPCODE | rt(d)); }
+inline void Assembler::mtcrf(int afxm, Register s){ emit_int32(MTCRF_OPCODE | fxm(afxm) | rs(s)); }
+inline void Assembler::mfcr( Register d )         { emit_int32(MFCR_OPCODE  | rt(d)); }
+inline void Assembler::mcrf( ConditionRegister crd, ConditionRegister cra)
+                                                      { emit_int32(MCRF_OPCODE | bf(crd) | bfa(cra)); }
+inline void Assembler::mtcr( Register s)          { Assembler::mtcrf(0xff, s); }
+
+// SAP JVM 2006-02-13 PPC branch instruction.
+// PPC 1, section 2.4.1 Branch Instructions
+inline void Assembler::b( address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(0), rt); }
+inline void Assembler::b( Label& L)                           { b( target(L)); }
+inline void Assembler::bl(address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(1), rt); }
+inline void Assembler::bl(Label& L)                           { bl(target(L)); }
+inline void Assembler::bc( int boint, int biint, address a, relocInfo::relocType rt) { emit_data(BCXX_OPCODE| bo(boint) | bi(biint) | bd(disp( intptr_t(a), intptr_t(pc()))) | aa(0) | lk(0), rt); }
+inline void Assembler::bc( int boint, int biint, Label& L)                           { bc(boint, biint, target(L)); }
+inline void Assembler::bcl(int boint, int biint, address a, relocInfo::relocType rt) { emit_data(BCXX_OPCODE| bo(boint) | bi(biint) | bd(disp( intptr_t(a), intptr_t(pc()))) | aa(0)|lk(1)); }
+inline void Assembler::bcl(int boint, int biint, Label& L)                           { bcl(boint, biint, target(L)); }
+
+inline void Assembler::bclr(  int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCLR_OPCODE | bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(0), rt); }
+inline void Assembler::bclrl( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCLR_OPCODE | bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(1), rt); }
+inline void Assembler::bcctr( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(0), rt); }
+inline void Assembler::bcctrl(int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(1), rt); }
+
+// helper function for b
+inline bool Assembler::is_within_range_of_b(address a, address pc) {
+  // Guard against illegal branch targets, e.g. -1 (see CompiledStaticCall and ad-file).
+  if ((((uint64_t)a) & 0x3) != 0) return false;
+
+  const int range = 1 << (29-6); // li field is from bit 6 to bit 29.
+  int value = disp(intptr_t(a), intptr_t(pc));
+  bool result = -range <= value && value < range-1;
+#ifdef ASSERT
+  if (result) li(value); // Assert that value is in correct range.
+#endif
+  return result;
+}
+
+// helper functions for bcxx.
+inline bool Assembler::is_within_range_of_bcxx(address a, address pc) {
+  // Guard against illegal branch targets, e.g. -1 (see CompiledStaticCall and ad-file).
+  if ((((uint64_t)a) & 0x3) != 0) return false;
+
+  const int range = 1 << (29-16); // bd field is from bit 16 to bit 29.
+  int value = disp(intptr_t(a), intptr_t(pc));
+  bool result = -range <= value && value < range-1;
+#ifdef ASSERT
+  if (result) bd(value); // Assert that value is in correct range.
+#endif
+  return result;
+}
+
+// Get the destination of a bxx branch (b, bl, ba, bla).
+address  Assembler::bxx_destination(address baddr) { return bxx_destination(*(int*)baddr, baddr); }
+address  Assembler::bxx_destination(int instr, address pc) { return (address)bxx_destination_offset(instr, (intptr_t)pc); }
+intptr_t Assembler::bxx_destination_offset(int instr, intptr_t bxx_pos) {
+  intptr_t displ = inv_li_field(instr);
+  return bxx_pos + displ;
+}
+
+// Extended mnemonics for Branch Instructions
+inline void Assembler::blt(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, less), L); }
+inline void Assembler::bgt(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, greater), L); }
+inline void Assembler::beq(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, equal), L); }
+inline void Assembler::bso(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, summary_overflow), L); }
+inline void Assembler::bge(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, less), L); }
+inline void Assembler::ble(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, greater), L); }
+inline void Assembler::bne(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, equal), L); }
+inline void Assembler::bns(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, summary_overflow), L); }
+
+// Branch instructions with static prediction hints.
+inline void Assembler::blt_predict_taken    (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken,    bi0(crx, less), L); }
+inline void Assembler::bgt_predict_taken    (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken,    bi0(crx, greater), L); }
+inline void Assembler::beq_predict_taken    (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken,    bi0(crx, equal), L); }
+inline void Assembler::bso_predict_taken    (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken,    bi0(crx, summary_overflow), L); }
+inline void Assembler::bge_predict_taken    (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken,    bi0(crx, less), L); }
+inline void Assembler::ble_predict_taken    (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken,    bi0(crx, greater), L); }
+inline void Assembler::bne_predict_taken    (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken,    bi0(crx, equal), L); }
+inline void Assembler::bns_predict_taken    (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken,    bi0(crx, summary_overflow), L); }
+inline void Assembler::blt_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, less), L); }
+inline void Assembler::bgt_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, greater), L); }
+inline void Assembler::beq_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, equal), L); }
+inline void Assembler::bso_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, summary_overflow), L); }
+inline void Assembler::bge_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, less), L); }
+inline void Assembler::ble_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, greater), L); }
+inline void Assembler::bne_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, equal), L); }
+inline void Assembler::bns_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, summary_overflow), L); }
+
+// For use in conjunction with testbitdi:
+inline void Assembler::btrue( ConditionRegister crx, Label& L) { Assembler::bne(crx, L); }
+inline void Assembler::bfalse(ConditionRegister crx, Label& L) { Assembler::beq(crx, L); }
+
+inline void Assembler::bltl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, less), L); }
+inline void Assembler::bgtl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, greater), L); }
+inline void Assembler::beql(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, equal), L); }
+inline void Assembler::bsol(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, summary_overflow), L); }
+inline void Assembler::bgel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, less), L); }
+inline void Assembler::blel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, greater), L); }
+inline void Assembler::bnel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, equal), L); }
+inline void Assembler::bnsl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, summary_overflow), L); }
+
+// Extended mnemonics for Branch Instructions via LR.
+// We use `blr' for returns.
+inline void Assembler::blr(relocInfo::relocType rt) { Assembler::bclr(bcondAlways, 0, bhintbhBCLRisReturn, rt); }
+
+// Extended mnemonics for Branch Instructions with CTR.
+// Bdnz means `decrement CTR and jump to L if CTR is not zero'.
+inline void Assembler::bdnz(Label& L) { Assembler::bc(16, 0, L); }
+// Decrement and branch if result is zero.
+inline void Assembler::bdz(Label& L)  { Assembler::bc(18, 0, L); }
+// We use `bctr[l]' for jumps/calls in function descriptor glue
+// code, e.g. for calls to runtime functions.
+inline void Assembler::bctr( relocInfo::relocType rt) { Assembler::bcctr(bcondAlways, 0, bhintbhBCCTRisNotReturnButSame, rt); }
+inline void Assembler::bctrl(relocInfo::relocType rt) { Assembler::bcctrl(bcondAlways, 0, bhintbhBCCTRisNotReturnButSame, rt); }
+// Conditional jumps/branches via CTR.
+inline void Assembler::beqctr( ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctr( bcondCRbiIs1, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); }
+inline void Assembler::beqctrl(ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctrl(bcondCRbiIs1, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); }
+inline void Assembler::bnectr( ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctr( bcondCRbiIs0, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); }
+inline void Assembler::bnectrl(ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctrl(bcondCRbiIs0, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); }
+
+// condition register logic instructions
+inline void Assembler::crand( int d, int s1, int s2) { emit_int32(CRAND_OPCODE  | bt(d) | ba(s1) | bb(s2)); }
+inline void Assembler::crnand(int d, int s1, int s2) { emit_int32(CRNAND_OPCODE | bt(d) | ba(s1) | bb(s2)); }
+inline void Assembler::cror(  int d, int s1, int s2) { emit_int32(CROR_OPCODE   | bt(d) | ba(s1) | bb(s2)); }
+inline void Assembler::crxor( int d, int s1, int s2) { emit_int32(CRXOR_OPCODE  | bt(d) | ba(s1) | bb(s2)); }
+inline void Assembler::crnor( int d, int s1, int s2) { emit_int32(CRNOR_OPCODE  | bt(d) | ba(s1) | bb(s2)); }
+inline void Assembler::creqv( int d, int s1, int s2) { emit_int32(CREQV_OPCODE  | bt(d) | ba(s1) | bb(s2)); }
+inline void Assembler::crandc(int d, int s1, int s2) { emit_int32(CRANDC_OPCODE | bt(d) | ba(s1) | bb(s2)); }
+inline void Assembler::crorc( int d, int s1, int s2) { emit_int32(CRORC_OPCODE  | bt(d) | ba(s1) | bb(s2)); }
+
+// PPC 2, section 3.2.1 Instruction Cache Instructions
+inline void Assembler::icbi(    Register s1, Register s2)         { emit_int32( ICBI_OPCODE   | ra0mem(s1) | rb(s2)           ); }
+// PPC 2, section 3.2.2 Data Cache Instructions
+//inline void Assembler::dcba(  Register s1, Register s2)         { emit_int32( DCBA_OPCODE   | ra0mem(s1) | rb(s2)           ); }
+inline void Assembler::dcbz(    Register s1, Register s2)         { emit_int32( DCBZ_OPCODE   | ra0mem(s1) | rb(s2)           ); }
+inline void Assembler::dcbst(   Register s1, Register s2)         { emit_int32( DCBST_OPCODE  | ra0mem(s1) | rb(s2)           ); }
+inline void Assembler::dcbf(    Register s1, Register s2)         { emit_int32( DCBF_OPCODE   | ra0mem(s1) | rb(s2)           ); }
+// dcache read hint
+inline void Assembler::dcbt(    Register s1, Register s2)         { emit_int32( DCBT_OPCODE   | ra0mem(s1) | rb(s2)           ); }
+inline void Assembler::dcbtct(  Register s1, Register s2, int ct) { emit_int32( DCBT_OPCODE   | ra0mem(s1) | rb(s2) | thct(ct)); }
+inline void Assembler::dcbtds(  Register s1, Register s2, int ds) { emit_int32( DCBT_OPCODE   | ra0mem(s1) | rb(s2) | thds(ds)); }
+// dcache write hint
+inline void Assembler::dcbtst(  Register s1, Register s2)         { emit_int32( DCBTST_OPCODE | ra0mem(s1) | rb(s2)           ); }
+inline void Assembler::dcbtstct(Register s1, Register s2, int ct) { emit_int32( DCBTST_OPCODE | ra0mem(s1) | rb(s2) | thct(ct)); }
+
+// machine barrier instructions:
+inline void Assembler::sync(int a) { emit_int32( SYNC_OPCODE | l910(a)); }
+inline void Assembler::sync()      { Assembler::sync(0); }
+inline void Assembler::lwsync()    { Assembler::sync(1); }
+inline void Assembler::ptesync()   { Assembler::sync(2); }
+inline void Assembler::eieio()     { emit_int32( EIEIO_OPCODE); }
+inline void Assembler::isync()     { emit_int32( ISYNC_OPCODE); }
+
+inline void Assembler::release()   { Assembler::lwsync(); }
+inline void Assembler::acquire()   { Assembler::lwsync(); }
+inline void Assembler::fence()     { Assembler::sync(); }
+
+// atomics
+// Use ra0mem to disallow R0 as base.
+inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
+inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1)           { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); }
+inline bool Assembler::lxarx_hint_exclusive_access()                                          { return VM_Version::has_lxarxeh(); }
+inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::stwcx_(Register s, Register a, Register b)                             { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
+inline void Assembler::stdcx_(Register s, Register a, Register b)                             { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); }
+
+// Instructions for adjusting thread priority
+// for simultaneous multithreading (SMT) on POWER5.
+inline void Assembler::smt_prio_very_low()    { Assembler::or_unchecked(R31, R31, R31); }
+inline void Assembler::smt_prio_low()         { Assembler::or_unchecked(R1,  R1,  R1); }
+inline void Assembler::smt_prio_medium_low()  { Assembler::or_unchecked(R6,  R6,  R6); }
+inline void Assembler::smt_prio_medium()      { Assembler::or_unchecked(R2,  R2,  R2); }
+inline void Assembler::smt_prio_medium_high() { Assembler::or_unchecked(R5,  R5,  R5); }
+inline void Assembler::smt_prio_high()        { Assembler::or_unchecked(R3,  R3,  R3); }
+
+inline void Assembler::twi_0(Register a)      { twi_unchecked(0, a, 0);}
+
+// trap instructions
+inline void Assembler::tdi_unchecked(int tobits, Register a, int si16){                                     emit_int32( TDI_OPCODE | to(tobits) | ra(a) | si(si16)); }
+inline void Assembler::twi_unchecked(int tobits, Register a, int si16){                                     emit_int32( TWI_OPCODE | to(tobits) | ra(a) | si(si16)); }
+inline void Assembler::tdi(int tobits, Register a, int si16)          { assert(UseSIGTRAP, "precondition"); tdi_unchecked(tobits, a, si16);                      }
+inline void Assembler::twi(int tobits, Register a, int si16)          { assert(UseSIGTRAP, "precondition"); twi_unchecked(tobits, a, si16);                      }
+inline void Assembler::td( int tobits, Register a, Register b)        { assert(UseSIGTRAP, "precondition"); emit_int32( TD_OPCODE  | to(tobits) | ra(a) | rb(b)); }
+inline void Assembler::tw( int tobits, Register a, Register b)        { assert(UseSIGTRAP, "precondition"); emit_int32( TW_OPCODE  | to(tobits) | ra(a) | rb(b)); }
+
+// FLOATING POINT instructions ppc.
+// PPC 1, section 4.6.2 Floating-Point Load Instructions
+// Use ra0mem instead of ra in some instructions below.
+inline void Assembler::lfs( FloatRegister d, int si16, Register a)   { emit_int32( LFS_OPCODE  | frt(d) | ra0mem(a) | simm(si16,16)); }
+inline void Assembler::lfsu(FloatRegister d, int si16, Register a)   { emit_int32( LFSU_OPCODE | frt(d) | ra(a)     | simm(si16,16)); }
+inline void Assembler::lfsx(FloatRegister d, Register a, Register b) { emit_int32( LFSX_OPCODE | frt(d) | ra0mem(a) | rb(b)); }
+inline void Assembler::lfd( FloatRegister d, int si16, Register a)   { emit_int32( LFD_OPCODE  | frt(d) | ra0mem(a) | simm(si16,16)); }
+inline void Assembler::lfdu(FloatRegister d, int si16, Register a)   { emit_int32( LFDU_OPCODE | frt(d) | ra(a)     | simm(si16,16)); }
+inline void Assembler::lfdx(FloatRegister d, Register a, Register b) { emit_int32( LFDX_OPCODE | frt(d) | ra0mem(a) | rb(b)); }
+
+// PPC 1, section 4.6.3 Floating-Point Store Instructions
+// Use ra0mem instead of ra in some instructions below.
+inline void Assembler::stfs( FloatRegister s, int si16, Register a)  { emit_int32( STFS_OPCODE  | frs(s) | ra0mem(a) | simm(si16,16)); }
+inline void Assembler::stfsu(FloatRegister s, int si16, Register a)  { emit_int32( STFSU_OPCODE | frs(s) | ra(a)     | simm(si16,16)); }
+inline void Assembler::stfsx(FloatRegister s, Register a, Register b){ emit_int32( STFSX_OPCODE | frs(s) | ra0mem(a) | rb(b)); }
+inline void Assembler::stfd( FloatRegister s, int si16, Register a)  { emit_int32( STFD_OPCODE  | frs(s) | ra0mem(a) | simm(si16,16)); }
+inline void Assembler::stfdu(FloatRegister s, int si16, Register a)  { emit_int32( STFDU_OPCODE | frs(s) | ra(a)     | simm(si16,16)); }
+inline void Assembler::stfdx(FloatRegister s, Register a, Register b){ emit_int32( STFDX_OPCODE | frs(s) | ra0mem(a) | rb(b)); }
+
+// PPC 1, section 4.6.4 Floating-Point Move Instructions
+inline void Assembler::fmr( FloatRegister d, FloatRegister b) { emit_int32( FMR_OPCODE | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fmr_(FloatRegister d, FloatRegister b) { emit_int32( FMR_OPCODE | frt(d) | frb(b) | rc(1)); }
+
+// These are special Power6 opcodes, reused for "lfdepx" and "stfdepx"
+// on Power7.  Do not use.
+//inline void Assembler::mffgpr( FloatRegister d, Register b)   { emit_int32( MFFGPR_OPCODE | frt(d) | rb(b) | rc(0)); }
+//inline void Assembler::mftgpr( Register d, FloatRegister b)   { emit_int32( MFTGPR_OPCODE | rt(d) | frb(b) | rc(0)); }
+// add cmpb and popcntb to detect ppc power version.
+inline void Assembler::cmpb(   Register a, Register s, Register b) { emit_int32( CMPB_OPCODE    | rta(a) | rs(s) | rb(b) | rc(0)); }
+inline void Assembler::popcntb(Register a, Register s)             { emit_int32( POPCNTB_OPCODE | rta(a) | rs(s)); };
+inline void Assembler::popcntw(Register a, Register s)             { emit_int32( POPCNTW_OPCODE | rta(a) | rs(s)); };
+inline void Assembler::popcntd(Register a, Register s)             { emit_int32( POPCNTD_OPCODE | rta(a) | rs(s)); };
+
+inline void Assembler::fneg(  FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE  | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fneg_( FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE  | frt(d) | frb(b) | rc(1)); }
+inline void Assembler::fabs(  FloatRegister d, FloatRegister b) { emit_int32( FABS_OPCODE  | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fabs_( FloatRegister d, FloatRegister b) { emit_int32( FABS_OPCODE  | frt(d) | frb(b) | rc(1)); }
+inline void Assembler::fnabs( FloatRegister d, FloatRegister b) { emit_int32( FNABS_OPCODE | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fnabs_(FloatRegister d, FloatRegister b) { emit_int32( FNABS_OPCODE | frt(d) | frb(b) | rc(1)); }
+
+// PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic Instructions
+inline void Assembler::fadd(  FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADD_OPCODE  | frt(d) | fra(a) | frb(b) | rc(0)); }
+inline void Assembler::fadd_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADD_OPCODE  | frt(d) | fra(a) | frb(b) | rc(1)); }
+inline void Assembler::fadds( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADDS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
+inline void Assembler::fadds_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADDS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
+inline void Assembler::fsub(  FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUB_OPCODE  | frt(d) | fra(a) | frb(b) | rc(0)); }
+inline void Assembler::fsub_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUB_OPCODE  | frt(d) | fra(a) | frb(b) | rc(1)); }
+inline void Assembler::fsubs( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUBS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
+inline void Assembler::fsubs_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUBS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
+inline void Assembler::fmul(  FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMUL_OPCODE  | frt(d) | fra(a) | frc(c) | rc(0)); }
+inline void Assembler::fmul_( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMUL_OPCODE  | frt(d) | fra(a) | frc(c) | rc(1)); }
+inline void Assembler::fmuls( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMULS_OPCODE | frt(d) | fra(a) | frc(c) | rc(0)); }
+inline void Assembler::fmuls_(FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMULS_OPCODE | frt(d) | fra(a) | frc(c) | rc(1)); }
+inline void Assembler::fdiv(  FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIV_OPCODE  | frt(d) | fra(a) | frb(b) | rc(0)); }
+inline void Assembler::fdiv_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIV_OPCODE  | frt(d) | fra(a) | frb(b) | rc(1)); }
+inline void Assembler::fdivs( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); }
+inline void Assembler::fdivs_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); }
+
+// PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions
+inline void Assembler::frsp(  FloatRegister d, FloatRegister b) { emit_int32( FRSP_OPCODE   | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fctid( FloatRegister d, FloatRegister b) { emit_int32( FCTID_OPCODE  | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fctidz(FloatRegister d, FloatRegister b) { emit_int32( FCTIDZ_OPCODE | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fctiw( FloatRegister d, FloatRegister b) { emit_int32( FCTIW_OPCODE  | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fctiwz(FloatRegister d, FloatRegister b) { emit_int32( FCTIWZ_OPCODE | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fcfid( FloatRegister d, FloatRegister b) { emit_int32( FCFID_OPCODE  | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fcfids(FloatRegister d, FloatRegister b) { emit_int32( FCFIDS_OPCODE | frt(d) | frb(b) | rc(0)); }
+
+// PPC 1, section 4.6.7 Floating-Point Compare Instructions
+inline void Assembler::fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b) { emit_int32( FCMPU_OPCODE | bf(crx) | fra(a) | frb(b)); }
+
+// PPC 1, section 5.2.1 Floating-Point Arithmetic Instructions
+inline void Assembler::fsqrt( FloatRegister d, FloatRegister b) { emit_int32( FSQRT_OPCODE  | frt(d) | frb(b) | rc(0)); }
+inline void Assembler::fsqrts(FloatRegister d, FloatRegister b) { emit_int32( FSQRTS_OPCODE | frt(d) | frb(b) | rc(0)); }
+
+// Vector instructions for >= Power6.
+inline void Assembler::lvebx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEBX_OPCODE  | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::lvehx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEHX_OPCODE  | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::lvewx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEWX_OPCODE  | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::lvx(   VectorRegister d, Register s1, Register s2) { emit_int32( LVX_OPCODE    | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::lvxl(  VectorRegister d, Register s1, Register s2) { emit_int32( LVXL_OPCODE   | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::stvebx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEBX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::stvehx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEHX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::stvewx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEWX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::stvx(  VectorRegister d, Register s1, Register s2) { emit_int32( STVX_OPCODE   | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::stvxl( VectorRegister d, Register s1, Register s2) { emit_int32( STVXL_OPCODE  | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::lvsl(  VectorRegister d, Register s1, Register s2) { emit_int32( LVSL_OPCODE   | vrt(d) | ra0mem(s1) | rb(s2)); }
+inline void Assembler::lvsr(  VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE   | vrt(d) | ra0mem(s1) | rb(s2)); }
+
+inline void Assembler::vpkpx(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKPX_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vpkshss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHSS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vpkswss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWSS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vpkshus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHUS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vpkswus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWUS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vpkuhum( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUHUM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vpkuwum( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUWUM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vpkuhus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUHUS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vpkuwus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUWUS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vupkhpx( VectorRegister d, VectorRegister b)                   { emit_int32( VUPKHPX_OPCODE | vrt(d) | vrb(b)); }
+inline void Assembler::vupkhsb( VectorRegister d, VectorRegister b)                   { emit_int32( VUPKHSB_OPCODE | vrt(d) | vrb(b)); }
+inline void Assembler::vupkhsh( VectorRegister d, VectorRegister b)                   { emit_int32( VUPKHSH_OPCODE | vrt(d) | vrb(b)); }
+inline void Assembler::vupklpx( VectorRegister d, VectorRegister b)                   { emit_int32( VUPKLPX_OPCODE | vrt(d) | vrb(b)); }
+inline void Assembler::vupklsb( VectorRegister d, VectorRegister b)                   { emit_int32( VUPKLSB_OPCODE | vrt(d) | vrb(b)); }
+inline void Assembler::vupklsh( VectorRegister d, VectorRegister b)                   { emit_int32( VUPKLSH_OPCODE | vrt(d) | vrb(b)); }
+inline void Assembler::vmrghb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHB_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmrghw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHW_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmrghh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHH_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmrglb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLB_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmrglw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLW_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmrglh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLH_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsplt(   VectorRegister d, int ui4,          VectorRegister b) { emit_int32( VSPLT_OPCODE   | vrt(d) | vsplt_uim(uimm(ui4,4)) | vrb(b)); }
+inline void Assembler::vsplth(  VectorRegister d, int ui3,          VectorRegister b) { emit_int32( VSPLTH_OPCODE  | vrt(d) | vsplt_uim(uimm(ui3,3)) | vrb(b)); }
+inline void Assembler::vspltw(  VectorRegister d, int ui2,          VectorRegister b) { emit_int32( VSPLTW_OPCODE  | vrt(d) | vsplt_uim(uimm(ui2,2)) | vrb(b)); }
+inline void Assembler::vspltisb(VectorRegister d, int si5)                            { emit_int32( VSPLTISB_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); }
+inline void Assembler::vspltish(VectorRegister d, int si5)                            { emit_int32( VSPLTISH_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); }
+inline void Assembler::vspltisw(VectorRegister d, int si5)                            { emit_int32( VSPLTISW_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); }
+inline void Assembler::vperm(   VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c){ emit_int32( VPERM_OPCODE | vrt(d) | vra(a) | vrb(b) | vrc(c)); }
+inline void Assembler::vsel(    VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c){ emit_int32( VSEL_OPCODE  | vrt(d) | vra(a) | vrb(b) | vrc(c)); }
+inline void Assembler::vsl(     VectorRegister d, VectorRegister a, VectorRegister b)                  { emit_int32( VSL_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsldoi(  VectorRegister d, VectorRegister a, VectorRegister b, int si4)         { emit_int32( VSLDOI_OPCODE| vrt(d) | vra(a) | vrb(b) | vsldoi_shb(simm(si4,4))); }
+inline void Assembler::vslo(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLO_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsr(     VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSR_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsro(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRO_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vaddcuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDCUW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vaddshs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vaddsbs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vaddsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vaddubm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUBM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vadduwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vadduhm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUHM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vaddubs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vadduws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vadduhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubcuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBCUW_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubshs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubsbs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsububm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubuwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubuhm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHM_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsububs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubuws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsubuhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmulesb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmuleub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULEUB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmulesh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmuleuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULEUH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmulosb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmuloub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUB_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmulosh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmulouh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUH_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmhaddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMHADDSHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vmhraddshs(VectorRegister d,VectorRegister a,VectorRegister b, VectorRegister c) { emit_int32( VMHRADDSHS_OPCODE| vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vmladduhm(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMLADDUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vmsubuhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUBUHM_OPCODE  | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vmsummbm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMMBM_OPCODE  | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vmsumshm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMSHM_OPCODE  | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vmsumshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMSHS_OPCODE  | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vmsumuhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHM_OPCODE  | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vmsumuhs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHS_OPCODE  | vrt(d) | vra(a) | vrb(b)| vrc(c)); }
+inline void Assembler::vsumsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUMSWS_OPCODE  | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsum2sws(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM2SWS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsum4sbs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4SBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsum4ubs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4UBS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsum4shs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4SHS_OPCODE | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vavgsb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vavgsw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vavgsh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vavgub(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vavguw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vavguh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmaxsb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmaxsw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmaxsh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmaxub(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmaxuw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vmaxuh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vminsb(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vminsw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vminsh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vminub(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUB_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vminuw(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUW_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vminuh(  VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUH_OPCODE   | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vcmpequb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
+inline void Assembler::vcmpequh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
+inline void Assembler::vcmpequw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
+inline void Assembler::vcmpgtsh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
+inline void Assembler::vcmpgtsb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
+inline void Assembler::vcmpgtsw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
+inline void Assembler::vcmpgtub(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
+inline void Assembler::vcmpgtuh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
+inline void Assembler::vcmpgtuw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); }
+inline void Assembler::vcmpequb_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
+inline void Assembler::vcmpequh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
+inline void Assembler::vcmpequw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
+inline void Assembler::vcmpgtsh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
+inline void Assembler::vcmpgtsb_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
+inline void Assembler::vcmpgtsw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
+inline void Assembler::vcmpgtub_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
+inline void Assembler::vcmpgtuh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
+inline void Assembler::vcmpgtuw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); }
+inline void Assembler::vand(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAND_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vandc(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VANDC_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vnor(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VNOR_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vor(     VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VOR_OPCODE      | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vxor(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VXOR_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vrlb(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLB_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vrlw(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLW_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vrlh(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLH_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vslb(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLB_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vskw(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSKW_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vslh(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLH_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsrb(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRB_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsrw(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRW_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsrh(    VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRH_OPCODE     | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsrab(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAB_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsraw(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAW_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::vsrah(   VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAH_OPCODE    | vrt(d) | vra(a) | vrb(b)); }
+inline void Assembler::mtvscr(  VectorRegister b)                                     { emit_int32( MTVSCR_OPCODE   | vrb(b)); }
+inline void Assembler::mfvscr(  VectorRegister d)                                     { emit_int32( MFVSCR_OPCODE   | vrt(d)); }
+
+// ra0 version
+inline void Assembler::lwzx( Register d, Register s2) { emit_int32( LWZX_OPCODE | rt(d) | rb(s2));}
+inline void Assembler::lwz(  Register d, int si16   ) { emit_int32( LWZ_OPCODE  | rt(d) | d1(si16));}
+inline void Assembler::lwax( Register d, Register s2) { emit_int32( LWAX_OPCODE | rt(d) | rb(s2));}
+inline void Assembler::lwa(  Register d, int si16   ) { emit_int32( LWA_OPCODE  | rt(d) | ds(si16));}
+inline void Assembler::lhzx( Register d, Register s2) { emit_int32( LHZX_OPCODE | rt(d) | rb(s2));}
+inline void Assembler::lhz(  Register d, int si16   ) { emit_int32( LHZ_OPCODE  | rt(d) | d1(si16));}
+inline void Assembler::lhax( Register d, Register s2) { emit_int32( LHAX_OPCODE | rt(d) | rb(s2));}
+inline void Assembler::lha(  Register d, int si16   ) { emit_int32( LHA_OPCODE  | rt(d) | d1(si16));}
+inline void Assembler::lbzx( Register d, Register s2) { emit_int32( LBZX_OPCODE | rt(d) | rb(s2));}
+inline void Assembler::lbz(  Register d, int si16   ) { emit_int32( LBZ_OPCODE  | rt(d) | d1(si16));}
+inline void Assembler::ld(   Register d, int si16   ) { emit_int32( LD_OPCODE   | rt(d) | ds(si16));}
+inline void Assembler::ldx(  Register d, Register s2) { emit_int32( LDX_OPCODE  | rt(d) | rb(s2));}
+inline void Assembler::stwx( Register d, Register s2) { emit_int32( STWX_OPCODE | rs(d) | rb(s2));}
+inline void Assembler::stw(  Register d, int si16   ) { emit_int32( STW_OPCODE  | rs(d) | d1(si16));}
+inline void Assembler::sthx( Register d, Register s2) { emit_int32( STHX_OPCODE | rs(d) | rb(s2));}
+inline void Assembler::sth(  Register d, int si16   ) { emit_int32( STH_OPCODE  | rs(d) | d1(si16));}
+inline void Assembler::stbx( Register d, Register s2) { emit_int32( STBX_OPCODE | rs(d) | rb(s2));}
+inline void Assembler::stb(  Register d, int si16   ) { emit_int32( STB_OPCODE  | rs(d) | d1(si16));}
+inline void Assembler::std(  Register d, int si16   ) { emit_int32( STD_OPCODE  | rs(d) | ds(si16));}
+inline void Assembler::stdx( Register d, Register s2) { emit_int32( STDX_OPCODE | rs(d) | rb(s2));}
+
+// ra0 version
+inline void Assembler::icbi(    Register s2)          { emit_int32( ICBI_OPCODE   | rb(s2)           ); }
+//inline void Assembler::dcba(  Register s2)          { emit_int32( DCBA_OPCODE   | rb(s2)           ); }
+inline void Assembler::dcbz(    Register s2)          { emit_int32( DCBZ_OPCODE   | rb(s2)           ); }
+inline void Assembler::dcbst(   Register s2)          { emit_int32( DCBST_OPCODE  | rb(s2)           ); }
+inline void Assembler::dcbf(    Register s2)          { emit_int32( DCBF_OPCODE   | rb(s2)           ); }
+inline void Assembler::dcbt(    Register s2)          { emit_int32( DCBT_OPCODE   | rb(s2)           ); }
+inline void Assembler::dcbtct(  Register s2, int ct)  { emit_int32( DCBT_OPCODE   | rb(s2) | thct(ct)); }
+inline void Assembler::dcbtds(  Register s2, int ds)  { emit_int32( DCBT_OPCODE   | rb(s2) | thds(ds)); }
+inline void Assembler::dcbtst(  Register s2)          { emit_int32( DCBTST_OPCODE | rb(s2)           ); }
+inline void Assembler::dcbtstct(Register s2, int ct)  { emit_int32( DCBTST_OPCODE | rb(s2) | thct(ct)); }
+
+// ra0 version
+inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
+inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1)          { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); }
+inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); }
+inline void Assembler::stwcx_(Register s, Register b)                            { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); }
+inline void Assembler::stdcx_(Register s, Register b)                            { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); }
+
+// ra0 version
+inline void Assembler::lfs( FloatRegister d, int si16)   { emit_int32( LFS_OPCODE  | frt(d) | simm(si16,16)); }
+inline void Assembler::lfsx(FloatRegister d, Register b) { emit_int32( LFSX_OPCODE | frt(d) | rb(b)); }
+inline void Assembler::lfd( FloatRegister d, int si16)   { emit_int32( LFD_OPCODE  | frt(d) | simm(si16,16)); }
+inline void Assembler::lfdx(FloatRegister d, Register b) { emit_int32( LFDX_OPCODE | frt(d) | rb(b)); }
+
+// ra0 version
+inline void Assembler::stfs( FloatRegister s, int si16)   { emit_int32( STFS_OPCODE  | frs(s) | simm(si16, 16)); }
+inline void Assembler::stfsx(FloatRegister s, Register b) { emit_int32( STFSX_OPCODE | frs(s) | rb(b)); }
+inline void Assembler::stfd( FloatRegister s, int si16)   { emit_int32( STFD_OPCODE  | frs(s) | simm(si16, 16)); }
+inline void Assembler::stfdx(FloatRegister s, Register b) { emit_int32( STFDX_OPCODE | frs(s) | rb(b)); }
+
+// ra0 version
+inline void Assembler::lvebx( VectorRegister d, Register s2) { emit_int32( LVEBX_OPCODE  | vrt(d) | rb(s2)); }
+inline void Assembler::lvehx( VectorRegister d, Register s2) { emit_int32( LVEHX_OPCODE  | vrt(d) | rb(s2)); }
+inline void Assembler::lvewx( VectorRegister d, Register s2) { emit_int32( LVEWX_OPCODE  | vrt(d) | rb(s2)); }
+inline void Assembler::lvx(   VectorRegister d, Register s2) { emit_int32( LVX_OPCODE    | vrt(d) | rb(s2)); }
+inline void Assembler::lvxl(  VectorRegister d, Register s2) { emit_int32( LVXL_OPCODE   | vrt(d) | rb(s2)); }
+inline void Assembler::stvebx(VectorRegister d, Register s2) { emit_int32( STVEBX_OPCODE | vrt(d) | rb(s2)); }
+inline void Assembler::stvehx(VectorRegister d, Register s2) { emit_int32( STVEHX_OPCODE | vrt(d) | rb(s2)); }
+inline void Assembler::stvewx(VectorRegister d, Register s2) { emit_int32( STVEWX_OPCODE | vrt(d) | rb(s2)); }
+inline void Assembler::stvx(  VectorRegister d, Register s2) { emit_int32( STVX_OPCODE   | vrt(d) | rb(s2)); }
+inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL_OPCODE  | vrt(d) | rb(s2)); }
+inline void Assembler::lvsl(  VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE   | vrt(d) | rb(s2)); }
+inline void Assembler::lvsr(  VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE   | vrt(d) | rb(s2)); }
+
+
+inline void Assembler::load_const(Register d, void* x, Register tmp) {
+   load_const(d, (long)x, tmp);
+}
+
+// Load a 64 bit constant encoded by a `Label'. This works for bound
+// labels as well as unbound ones. For unbound labels, the code will
+// be patched as soon as the label gets bound.
+inline void Assembler::load_const(Register d, Label& L, Register tmp) {
+  load_const(d, target(L), tmp);
+}
+
+// Load a 64 bit constant encoded by an AddressLiteral. patchable.
+inline void Assembler::load_const(Register d, AddressLiteral& a, Register tmp) {
+  assert(d != R0, "R0 not allowed");
+  // First relocate (we don't change the offset in the RelocationHolder,
+  // just pass a.rspec()), then delegate to load_const(Register, long).
+  relocate(a.rspec());
+  load_const(d, (long)a.value(), tmp);
+}
+
+
+#endif // CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,105 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_BYTECODEINTERPRETER_PPC_HPP
+#define CPU_PPC_VM_BYTECODEINTERPRETER_PPC_HPP
+
+// Platform specific for C++ based Interpreter
+#define LOTS_OF_REGS    /* Lets interpreter use plenty of registers */
+
+private:
+
+    // Save the bottom of the stack after frame manager setup. For ease of restoration after return
+    // from recursive interpreter call.
+    intptr_t* _frame_bottom;              // Saved bottom of frame manager frame.
+    address   _last_Java_pc;              // Pc to return to in frame manager.
+    intptr_t* _last_Java_fp;              // frame pointer
+    intptr_t* _last_Java_sp;              // stack pointer
+    interpreterState _self_link;          // Previous interpreter state  // sometimes points to self???
+    double    _native_fresult;            // Save result of native calls that might return floats.
+    intptr_t  _native_lresult;            // Save result of native calls that might return handle/longs.
+
+public:
+    address last_Java_pc(void)            { return _last_Java_pc; }
+    intptr_t* last_Java_fp(void)          { return _last_Java_fp; }
+
+    static ByteSize native_lresult_offset() {
+      return byte_offset_of(BytecodeInterpreter, _native_lresult);
+    }
+
+    static ByteSize native_fresult_offset() {
+      return byte_offset_of(BytecodeInterpreter, _native_fresult);
+    }
+
+    static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp);
+
+#define SET_LAST_JAVA_FRAME()   THREAD->frame_anchor()->set(istate->_last_Java_sp, istate->_last_Java_pc);
+#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->clear();
+
+
+// Macros for accessing the stack.
+#undef STACK_INT
+#undef STACK_FLOAT
+#undef STACK_ADDR
+#undef STACK_OBJECT
+#undef STACK_DOUBLE
+#undef STACK_LONG
+
+// JavaStack Implementation
+#define STACK_SLOT(offset)    ((address) &topOfStack[-(offset)])
+#define STACK_INT(offset)     (*((jint*) &topOfStack[-(offset)]))
+#define STACK_FLOAT(offset)   (*((jfloat *) &topOfStack[-(offset)]))
+#define STACK_OBJECT(offset)  (*((oop *) &topOfStack [-(offset)]))
+#define STACK_DOUBLE(offset)  (((VMJavaVal64*) &topOfStack[-(offset)])->d)
+#define STACK_LONG(offset)    (((VMJavaVal64 *) &topOfStack[-(offset)])->l)
+
+#define SET_STACK_SLOT(value, offset)   (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value))
+#define SET_STACK_ADDR(value, offset)   (*((address *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_INT(value, offset)    (*((jint *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_FLOAT(value, offset)  (*((jfloat *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value))
+#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value))
+#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d =  \
+                                                 ((VMJavaVal64*)(addr))->d)
+#define SET_STACK_LONG(value, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value))
+#define SET_STACK_LONG_FROM_ADDR(addr, offset)   (((VMJavaVal64*)&topOfStack[-(offset)])->l =  \
+                                                 ((VMJavaVal64*)(addr))->l)
+// JavaLocals implementation
+
+#define LOCALS_SLOT(offset)    ((intptr_t*)&locals[-(offset)])
+#define LOCALS_ADDR(offset)    ((address)locals[-(offset)])
+#define LOCALS_INT(offset)     (*(jint*)&(locals[-(offset)]))
+#define LOCALS_OBJECT(offset)  ((oop)locals[-(offset)])
+#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)]))
+#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)]))
+
+#define SET_LOCALS_SLOT(value, offset)    (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value))
+#define SET_LOCALS_INT(value, offset)     (*((jint *)&locals[-(offset)]) = (value))
+#define SET_LOCALS_DOUBLE(value, offset)  (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value))
+#define SET_LOCALS_LONG(value, offset)    (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value))
+#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \
+
+
+#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_PP
diff -r ca3dacaf9041 src/cpu/ppc/vm/bytecodeInterpreter_ppc.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/bytecodeInterpreter_ppc.inline.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,290 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP
+#define CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP
+
+#ifdef CC_INTERP
+
+// Inline interpreter functions for ppc.
+
+#include <math.h>
+
+inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; }
+inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; }
+inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; }
+inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; }
+inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return (jfloat)fmod((double)op1, (double)op2); }
+
+inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; }
+
+inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+
+}
+
+inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) {
+  to[0] = from[0]; to[1] = from[1];
+}
+
+// The long operations depend on compiler support for "long long" on ppc.
+
+inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) {
+  return op1 + op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) {
+  return op1 & op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) {
+  if (op1 == min_jlong && op2 == -1) return op1;
+  return op1 / op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) {
+  return op1 * op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) {
+  return op1 | op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) {
+  return op1 - op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) {
+  return op1 ^ op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) {
+  if (op1 == min_jlong && op2 == -1) return 0;
+  return op1 % op2;
+}
+
+inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) {
+  return ((uint64_t) op1) >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) {
+  return op1 >> (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) {
+  return op1 << (op2 & 0x3F);
+}
+
+inline jlong BytecodeInterpreter::VMlongNeg(jlong op) {
+  return -op;
+}
+
+inline jlong BytecodeInterpreter::VMlongNot(jlong op) {
+  return ~op;
+}
+
+inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) {
+  return (op <= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGez(jlong op) {
+  return (op >= 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) {
+  return (op == 0);
+}
+
+inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) {
+  return (op1 == op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) {
+  return (op1 != op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) {
+  return (op1 >= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) {
+  return (op1 <= op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) {
+  return (op1 < op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) {
+  return (op1 > op2);
+}
+
+inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) {
+  return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0);
+}
+
+// Long conversions
+
+inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) {
+  return (jfloat) val;
+}
+
+inline jint BytecodeInterpreter::VMlong2Int(jlong val) {
+  return (jint) val;
+}
+
+// Double Arithmetic
+
+inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) {
+  return op1 + op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) {
+  return op1 / op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) {
+  return op1 * op2;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) {
+  return -op;
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) {
+  return fmod(op1, op2);
+}
+
+inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) {
+  return op1 - op2;
+}
+
+inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) {
+  return ( op1 < op2 ? -1 :
+               op1 > op2 ? 1 :
+                   op1 == op2 ? 0 :
+                       (direction == -1 || direction == 1) ? direction : 0);
+}
+
+// Double Conversions
+
+inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) {
+  return (jfloat) val;
+}
+
+// Float Conversions
+
+inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) {
+  return (jdouble) op;
+}
+
+// Integer Arithmetic
+
+inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) {
+  return op1 + op2;
+}
+
+inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) {
+  return op1 & op2;
+}
+
+inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) {
+  /* it's possible we could catch this special case implicitly */
+  if ((juint)op1 == 0x80000000 && op2 == -1) return op1;
+  else return op1 / op2;
+}
+
+inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) {
+  return op1 * op2;
+}
+
+inline jint BytecodeInterpreter::VMintNeg(jint op) {
+  return -op;
+}
+
+inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) {
+  return op1 | op2;
+}
+
+inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) {
+  /* it's possible we could catch this special case implicitly */
+  if ((juint)op1 == 0x80000000 && op2 == -1) return 0;
+  else return op1 % op2;
+}
+
+inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) {
+  return op1 <<  (op2 & 0x1f);
+}
+
+inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) {
+  return op1 >>  (op2 & 0x1f);
+}
+
+inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) {
+  return op1 - op2;
+}
+
+inline juint BytecodeInterpreter::VMintUshr(jint op1, jint op2) {
+  return ((juint) op1) >> (op2 & 0x1f);
+}
+
+inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) {
+  return op1 ^ op2;
+}
+
+inline jdouble BytecodeInterpreter::VMint2Double(jint val) {
+  return (jdouble) val;
+}
+
+inline jfloat BytecodeInterpreter::VMint2Float(jint val) {
+  return (jfloat) val;
+}
+
+inline jlong BytecodeInterpreter::VMint2Long(jint val) {
+  return (jlong) val;
+}
+
+inline jchar BytecodeInterpreter::VMint2Char(jint val) {
+  return (jchar) val;
+}
+
+inline jshort BytecodeInterpreter::VMint2Short(jint val) {
+  return (jshort) val;
+}
+
+inline jbyte BytecodeInterpreter::VMint2Byte(jint val) {
+  return (jbyte) val;
+}
+
+#endif // CC_INTERP
+
+#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/bytecodes_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/bytecodes_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/bytecodes.hpp"
+
+void Bytecodes::pd_initialize() {
+  // No ppc specific initialization.
+}
diff -r ca3dacaf9041 src/cpu/ppc/vm/bytecodes_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/bytecodes_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_BYTECODES_PPC_HPP
+#define CPU_PPC_VM_BYTECODES_PPC_HPP
+
+// No ppc64 specific bytecodes
+
+#endif // CPU_PPC_VM_BYTECODES_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/bytes_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/bytes_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,156 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_BYTES_PPC_HPP
+#define CPU_PPC_VM_BYTES_PPC_HPP
+
+#include "memory/allocation.hpp"
+
+class Bytes: AllStatic {
+ public:
+  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
+  // PowerPC needs to check for alignment.
+
+  // can I count on address always being a pointer to an unsigned char? Yes
+
+  // Returns true, if the byte ordering used by Java is different from the nativ byte ordering
+  // of the underlying machine. For example, true for Intel x86, False, for Solaris on Sparc.
+  static inline bool is_Java_byte_ordering_different() { return false; }
+
+  // Thus, a swap between native and Java ordering is always a no-op:
+  static inline u2   swap_u2(u2 x)  { return x; }
+  static inline u4   swap_u4(u4 x)  { return x; }
+  static inline u8   swap_u8(u8 x)  { return x; }
+
+  static inline u2   get_native_u2(address p) {
+    return (intptr_t(p) & 1) == 0
+             ?   *(u2*)p
+             :   ( u2(p[0]) << 8 )
+               | ( u2(p[1])      );
+  }
+
+  static inline u4   get_native_u4(address p) {
+    switch (intptr_t(p) & 3) {
+     case 0:  return *(u4*)p;
+
+     case 2:  return (  u4( ((u2*)p)[0] ) << 16  )
+                   | (  u4( ((u2*)p)[1] )        );
+
+    default:  return ( u4(p[0]) << 24 )
+                   | ( u4(p[1]) << 16 )
+                   | ( u4(p[2]) <<  8 )
+                   |   u4(p[3]);
+    }
+  }
+
+  static inline u8   get_native_u8(address p) {
+    switch (intptr_t(p) & 7) {
+      case 0:  return *(u8*)p;
+
+      case 4:  return (  u8( ((u4*)p)[0] ) << 32  )
+                    | (  u8( ((u4*)p)[1] )        );
+
+      case 2:  return (  u8( ((u2*)p)[0] ) << 48  )
+                    | (  u8( ((u2*)p)[1] ) << 32  )
+                    | (  u8( ((u2*)p)[2] ) << 16  )
+                    | (  u8( ((u2*)p)[3] )        );
+
+     default:  return ( u8(p[0]) << 56 )
+                    | ( u8(p[1]) << 48 )
+                    | ( u8(p[2]) << 40 )
+                    | ( u8(p[3]) << 32 )
+                    | ( u8(p[4]) << 24 )
+                    | ( u8(p[5]) << 16 )
+                    | ( u8(p[6]) <<  8 )
+                    |   u8(p[7]);
+    }
+  }
+
+
+
+  static inline void put_native_u2(address p, u2 x) {
+    if ( (intptr_t(p) & 1) == 0 ) { *(u2*)p = x; }
+    else {
+      p[0] = x >> 8;
+      p[1] = x;
+    }
+  }
+
+  static inline void put_native_u4(address p, u4 x) {
+    switch ( intptr_t(p) & 3 ) {
+    case 0:  *(u4*)p = x;
+              break;
+
+    case 2:  ((u2*)p)[0] = x >> 16;
+             ((u2*)p)[1] = x;
+             break;
+
+    default: ((u1*)p)[0] = x >> 24;
+             ((u1*)p)[1] = x >> 16;
+             ((u1*)p)[2] = x >>  8;
+             ((u1*)p)[3] = x;
+             break;
+    }
+  }
+
+  static inline void put_native_u8(address p, u8 x) {
+    switch ( intptr_t(p) & 7 ) {
+    case 0:  *(u8*)p = x;
+             break;
+
+    case 4:  ((u4*)p)[0] = x >> 32;
+             ((u4*)p)[1] = x;
+             break;
+
+    case 2:  ((u2*)p)[0] = x >> 48;
+             ((u2*)p)[1] = x >> 32;
+             ((u2*)p)[2] = x >> 16;
+             ((u2*)p)[3] = x;
+             break;
+
+    default: ((u1*)p)[0] = x >> 56;
+             ((u1*)p)[1] = x >> 48;
+             ((u1*)p)[2] = x >> 40;
+             ((u1*)p)[3] = x >> 32;
+             ((u1*)p)[4] = x >> 24;
+             ((u1*)p)[5] = x >> 16;
+             ((u1*)p)[6] = x >>  8;
+             ((u1*)p)[7] = x;
+    }
+  }
+
+
+  // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering)
+  // (no byte-order reversal is needed since Power CPUs are big-endian oriented).
+  static inline u2   get_Java_u2(address p) { return get_native_u2(p); }
+  static inline u4   get_Java_u4(address p) { return get_native_u4(p); }
+  static inline u8   get_Java_u8(address p) { return get_native_u8(p); }
+
+  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, x); }
+  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, x); }
+  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, x); }
+};
+
+#endif // CPU_PPC_VM_BYTES_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/codeBuffer_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/codeBuffer_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_CODEBUFFER_PPC_HPP
+#define CPU_PPC_VM_CODEBUFFER_PPC_HPP
+
+private:
+  void pd_initialize() {}
+
+public:
+  void flush_bundle(bool start_new_bundle) {}
+
+#endif // CPU_PPC_VM_CODEBUFFER_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/compiledIC_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/compiledIC_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,261 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "code/compiledIC.hpp"
+#include "code/icBuffer.hpp"
+#include "code/nmethod.hpp"
+#include "memory/resourceArea.hpp"
+#include "runtime/mutexLocker.hpp"
+#include "runtime/safepoint.hpp"
+#ifdef COMPILER2
+#include "opto/matcher.hpp"
+#endif
+
+// Release the CompiledICHolder* associated with this call site is there is one.
+void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) {
+  // This call site might have become stale so inspect it carefully.
+  NativeCall* call = nativeCall_at(call_site->addr());
+  if (is_icholder_entry(call->destination())) {
+    NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value());
+    InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data());
+  }
+}
+
+bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) {
+  // This call site might have become stale so inspect it carefully.
+  NativeCall* call = nativeCall_at(call_site->addr());
+  return is_icholder_entry(call->destination());
+}
+
+//-----------------------------------------------------------------------------
+// High-level access to an inline cache. Guaranteed to be MT-safe.
+
+CompiledIC::CompiledIC(nmethod* nm, NativeCall* call)
+  : _ic_call(call)
+{
+  address ic_call = call->instruction_address();
+
+  assert(ic_call != NULL, "ic_call address must be set");
+  assert(nm != NULL, "must pass nmethod");
+  assert(nm->contains(ic_call), "must be in nmethod");
+
+  // Search for the ic_call at the given address.
+  RelocIterator iter(nm, ic_call, ic_call+1);
+  bool ret = iter.next();
+  assert(ret == true, "relocInfo must exist at this address");
+  assert(iter.addr() == ic_call, "must find ic_call");
+  if (iter.type() == relocInfo::virtual_call_type) {
+    virtual_call_Relocation* r = iter.virtual_call_reloc();
+    _is_optimized = false;
+    _value = nativeMovConstReg_at(r->cached_value());
+  } else {
+    assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call");
+    _is_optimized = true;
+    _value = NULL;
+  }
+}
+
+// ----------------------------------------------------------------------------
+
+// A PPC CompiledStaticCall looks like this:
+//
+// >>>> consts
+//
+// [call target1]
+// [IC cache]
+// [call target2]
+//
+// <<<< consts
+// >>>> insts
+//
+// bl offset16               -+  -+             ??? // How many bits available?
+//                            |   |
+// <<<< insts                 |   |
+// >>>> stubs                 |   |
+//                            |   |- trampoline_stub_Reloc
+// trampoline stub:           | <-+
+//   r2 = toc                 |
+//   r2 = [r2 + offset]       |       // Load call target1 from const section
+//   mtctr r2                 |
+//   bctr                     |- static_stub_Reloc
+// comp_to_interp_stub:   <---+
+//   r1 = toc
+//   ICreg = [r1 + IC_offset]         // Load IC from const section
+//   r1    = [r1 + offset]            // Load call target2 from const section
+//   mtctr r1
+//   bctr
+//
+// <<<< stubs
+//
+// The call instruction in the code either
+// - branches directly to a compiled method if offset encodable in instruction
+// - branches to the trampoline stub if offset to compiled method not encodable
+// - branches to the compiled_to_interp stub if target interpreted
+//
+// Further there are three relocations from the loads to the constants in
+// the constant section.
+//
+// Usage of r1 and r2 in the stubs allows to distinguish them.
+
+const int IC_pos_in_java_to_interp_stub = 8;
+#define __ _masm.
+void CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) {
+#ifdef COMPILER2
+  // Get the mark within main instrs section which is set to the address of the call.
+  address call_addr = cbuf.insts_mark();
+
+  // Note that the code buffer's insts_mark is always relative to insts.
+  // That's why we must use the macroassembler to generate a stub.
+  MacroAssembler _masm(&cbuf);
+
+  // Start the stub.
+  address stub = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
+  if (stub == NULL) {
+    Compile::current()->env()->record_out_of_memory_failure();
+    return;
+  }
+
+  // For java_to_interp stubs we use R11_scratch1 as scratch register
+  // and in call trampoline stubs we use R12_scratch2. This way we
+  // can distinguish them (see is_NativeCallTrampolineStub_at()).
+  Register reg_scratch = R11_scratch1;
+
+  // Create a static stub relocation which relates this stub
+  // with the call instruction at insts_call_instruction_offset in the
+  // instructions code-section.
+  __ relocate(static_stub_Relocation::spec(call_addr));
+  const int stub_start_offset = __ offset();
+
+  // Now, create the stub's code:
+  // - load the TOC
+  // - load the inline cache oop from the constant pool
+  // - load the call target from the constant pool
+  // - call
+  __ calculate_address_from_global_toc(reg_scratch, __ method_toc());
+  AddressLiteral ic = __ allocate_metadata_address((Metadata *)NULL);
+  __ load_const_from_method_toc(as_Register(Matcher::inline_cache_reg_encode()), ic, reg_scratch);
+
+  if (ReoptimizeCallSequences) {
+    __ b64_patchable((address)-1, relocInfo::none);
+  } else {
+    AddressLiteral a((address)-1);
+    __ load_const_from_method_toc(reg_scratch, a, reg_scratch);
+    __ mtctr(reg_scratch);
+    __ bctr();
+  }
+
+  // FIXME: Assert that the stub can be identified and patched.
+
+  // Java_to_interp_stub_size should be good.
+  assert((__ offset() - stub_start_offset) <= CompiledStaticCall::to_interp_stub_size(),
+         "should be good size");
+  assert(!is_NativeCallTrampolineStub_at(__ addr_at(stub_start_offset)),
+         "must not confuse java_to_interp with trampoline stubs");
+
+ // End the stub.
+  __ end_a_stub();
+#else
+  ShouldNotReachHere();
+#endif
+}
+#undef __
+
+// Size of java_to_interp stub, this doesn't need to be accurate but it must
+// be larger or equal to the real size of the stub.
+// Used for optimization in Compile::Shorten_branches.
+int CompiledStaticCall::to_interp_stub_size() {
+  return 12 * BytesPerInstWord;
+}
+
+// Relocation entries for call stub, compiled java to interpreter.
+// Used for optimization in Compile::Shorten_branches.
+int CompiledStaticCall::reloc_to_interp_stub() {
+  return 5;
+}
+
+void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) {
+  address stub = find_stub();
+  guarantee(stub != NULL, "stub not found");
+
+  if (TraceICs) {
+    ResourceMark rm;
+    tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
+                  instruction_address(),
+                  callee->name_and_sig_as_C_string());
+  }
+
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub);
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+
+  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
+         "a) MT-unsafe modification of inline cache");
+  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
+         "b) MT-unsafe modification of inline cache");
+
+  // Update stub.
+  method_holder->set_data((intptr_t)callee());
+  jump->set_jump_destination(entry);
+
+  // Update jump to call.
+  set_destination_mt_safe(stub);
+}
+
+void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
+  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
+  // Reset stub.
+  address stub = static_stub->addr();
+  assert(stub != NULL, "stub not found");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub);
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+  method_holder->set_data(0);
+  jump->set_jump_destination((address)-1);
+}
+
+//-----------------------------------------------------------------------------
+// Non-product mode code
+#ifndef PRODUCT
+
+void CompiledStaticCall::verify() {
+  // Verify call.
+  NativeCall::verify();
+  if (os::is_MP()) {
+    verify_alignment();
+  }
+
+  // Verify stub.
+  address stub = find_stub();
+  assert(stub != NULL, "no stub found for static call");
+  // Creation also verifies the object.
+  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub);
+  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
+
+  // Verify state.
+  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
+}
+
+#endif // !PRODUCT
diff -r ca3dacaf9041 src/cpu/ppc/vm/copy_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/copy_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_COPY_PPC_HPP
+#define CPU_PPC_VM_COPY_PPC_HPP
+
+#ifndef PPC64
+#error "copy currently only implemented for PPC64"
+#endif
+
+// Inline functions for memory copy and fill.
+
+static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  (void)memmove(to, from, count * HeapWordSize);
+}
+
+static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+  case 8:  to[7] = from[7];
+  case 7:  to[6] = from[6];
+  case 6:  to[5] = from[5];
+  case 5:  to[4] = from[4];
+  case 4:  to[3] = from[3];
+  case 3:  to[2] = from[2];
+  case 2:  to[1] = from[1];
+  case 1:  to[0] = from[0];
+  case 0:  break;
+  default: (void)memcpy(to, from, count * HeapWordSize);
+           break;
+  }
+}
+
+static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) {
+  switch (count) {
+  case 8:  to[7] = from[7];
+  case 7:  to[6] = from[6];
+  case 6:  to[5] = from[5];
+  case 5:  to[4] = from[4];
+  case 4:  to[3] = from[3];
+  case 3:  to[2] = from[2];
+  case 2:  to[1] = from[1];
+  case 1:  to[0] = from[0];
+  case 0:  break;
+  default: while (count-- > 0) {
+             *to++ = *from++;
+           }
+           break;
+  }
+}
+
+static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  (void)memmove(to, from, count * HeapWordSize);
+}
+
+static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) {
+  pd_disjoint_words(from, to, count);
+}
+
+static void pd_conjoint_bytes(void* from, void* to, size_t count) {
+  (void)memmove(to, from, count);
+}
+
+static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) {
+  (void)memmove(to, from, count);
+}
+
+// Template for atomic, element-wise copy.
+template <class T>
+static void copy_conjoint_atomic(T* from, T* to, size_t count) {
+  if (from > to) {
+    while (count-- > 0) {
+      // Copy forwards
+      *to++ = *from++;
+    }
+  } else {
+    from += count - 1;
+    to   += count - 1;
+    while (count-- > 0) {
+      // Copy backwards
+      *to-- = *from--;
+    }
+  }
+}
+
+static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) {
+  copy_conjoint_atomic<jshort>(from, to, count);
+}
+
+static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) {
+  copy_conjoint_atomic<jint>(from, to, count);
+}
+
+static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) {
+  copy_conjoint_atomic<jlong>(from, to, count);
+}
+
+static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) {
+  copy_conjoint_atomic<oop>(from, to, count);
+}
+
+static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_bytes_atomic(from, to, count);
+}
+
+static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
+}
+
+static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
+}
+
+static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
+}
+
+static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) {
+  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
+}
+
+static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
+  julong* to = (julong*)tohw;
+  julong  v  = ((julong)value << 32) | value;
+  while (count-- > 0) {
+    *to++ = v;
+  }
+}
+
+static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
+  pd_fill_to_words(tohw, count, value);
+}
+
+static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
+  (void)memset(to, value, count);
+}
+
+static void pd_zero_to_words(HeapWord* tohw, size_t count) {
+  pd_fill_to_words(tohw, count, 0);
+}
+
+static void pd_zero_to_bytes(void* to, size_t count) {
+  (void)memset(to, 0, count);
+}
+
+#endif // CPU_PPC_VM_COPY_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/cppInterpreterGenerator_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/cppInterpreterGenerator_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP
+#define CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP
+
+  address generate_normal_entry(void);
+  address generate_native_entry(void);
+
+  void lock_method(void);
+  void unlock_method(void);
+
+  void generate_counter_incr(Label& overflow);
+  void generate_counter_overflow(Label& do_continue);
+
+  void generate_more_monitors();
+  void generate_deopt_handling(Register result_index);
+
+  void generate_compute_interpreter_state(Label& exception_return);
+
+#endif // CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/cppInterpreter_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/cppInterpreter_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,3044 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/cppInterpreter.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#ifdef SHARK
+#include "shark/shark_globals.hpp"
+#endif
+
+#ifdef CC_INTERP
+
+#define __ _masm->
+
+// Contains is used for identifying interpreter frames during a stack-walk.
+// A frame with a PC in InterpretMethod must be identified as a normal C frame.
+bool CppInterpreter::contains(address pc) {
+  return _code->contains(pc);
+}
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) // nothing
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+static address interpreter_frame_manager        = NULL;
+static address frame_manager_specialized_return = NULL;
+static address native_entry                     = NULL;
+
+static address interpreter_return_address       = NULL;
+
+static address unctrap_frame_manager_entry      = NULL;
+
+static address deopt_frame_manager_return_atos  = NULL;
+static address deopt_frame_manager_return_btos  = NULL;
+static address deopt_frame_manager_return_itos  = NULL;
+static address deopt_frame_manager_return_ltos  = NULL;
+static address deopt_frame_manager_return_ftos  = NULL;
+static address deopt_frame_manager_return_dtos  = NULL;
+static address deopt_frame_manager_return_vtos  = NULL;
+
+// A result handler converts/unboxes a native call result into
+// a java interpreter/compiler result. The current frame is an
+// interpreter frame.
+address CppInterpreterGenerator::generate_result_handler_for(BasicType type) {
+  return AbstractInterpreterGenerator::generate_result_handler_for(type);
+}
+
+// tosca based result to c++ interpreter stack based result.
+address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) {
+  //
+  // A result is in the native abi result register from a native
+  // method call. We need to return this result to the interpreter by
+  // pushing the result on the interpreter's stack.
+  //
+  // Registers alive:
+  //   R3_ARG1(R3_RET)/F1_ARG1(F1_RET) - result to move
+  //   R4_ARG2                         - address of tos
+  //   LR
+  //
+  // Registers updated:
+  //   R3_RET(R3_ARG1)   - address of new tos (== R17_tos for T_VOID)
+  //
+
+  int number_of_used_slots = 1;
+
+  const Register tos = R4_ARG2;
+  Label done;
+  Label is_false;
+
+  address entry = __ pc();
+
+  switch (type) {
+  case T_BOOLEAN:
+    __ cmpwi(CCR0, R3_RET, 0);
+    __ beq(CCR0, is_false);
+    __ li(R3_RET, 1);
+    __ stw(R3_RET, 0, tos);
+    __ b(done);
+    __ bind(is_false);
+    __ li(R3_RET, 0);
+    __ stw(R3_RET, 0, tos);
+    break;
+  case T_BYTE:
+  case T_CHAR:
+  case T_SHORT:
+  case T_INT:
+    __ stw(R3_RET, 0, tos);
+    break;
+  case T_LONG:
+    number_of_used_slots = 2;
+    // mark unused slot for debugging
+    // long goes to topmost slot
+    __ std(R3_RET, -BytesPerWord, tos);
+    __ li(R3_RET, 0);
+    __ std(R3_RET, 0, tos);
+    break;
+  case T_OBJECT:
+    __ verify_oop(R3_RET);
+    __ std(R3_RET, 0, tos);
+    break;
+  case T_FLOAT:
+    __ stfs(F1_RET, 0, tos);
+    break;
+  case T_DOUBLE:
+    number_of_used_slots = 2;
+    // mark unused slot for debugging
+    __ li(R3_RET, 0);
+    __ std(R3_RET, 0, tos);
+    // double goes to topmost slot
+    __ stfd(F1_RET, -BytesPerWord, tos);
+    break;
+  case T_VOID:
+    number_of_used_slots = 0;
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  __ BIND(done);
+
+  // new expression stack top
+  __ addi(R3_RET, tos, -BytesPerWord * number_of_used_slots);
+
+  __ blr();
+
+  return entry;
+}
+
+address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) {
+  //
+  // Copy the result from the callee's stack to the caller's stack,
+  // caller and callee both being interpreted.
+  //
+  // Registers alive
+  //   R3_ARG1        - address of callee's tos + BytesPerWord
+  //   R4_ARG2        - address of caller's tos [i.e. free location]
+  //   LR
+  //
+  //   stack grows upwards, memory grows downwards.
+  //
+  //   [      free         ]  <-- callee's tos
+  //   [  optional result  ]  <-- R3_ARG1
+  //   [  optional dummy   ]
+  //          ...
+  //   [      free         ]  <-- caller's tos, R4_ARG2
+  //          ...
+  // Registers updated
+  //   R3_RET(R3_ARG1) - address of caller's new tos
+  //
+  //   stack grows upwards, memory grows downwards.
+  //
+  //   [      free         ]  <-- current tos, R3_RET
+  //   [  optional result  ]
+  //   [  optional dummy   ]
+  //          ...
+  //
+
+  const Register from = R3_ARG1;
+  const Register ret  = R3_ARG1;
+  const Register tos  = R4_ARG2;
+  const Register tmp1 = R21_tmp1;
+  const Register tmp2 = R22_tmp2;
+
+  address entry = __ pc();
+
+  switch (type) {
+  case T_BOOLEAN:
+  case T_BYTE:
+  case T_CHAR:
+  case T_SHORT:
+  case T_INT:
+  case T_FLOAT:
+    __ lwz(tmp1, 0, from);
+    __ stw(tmp1, 0, tos);
+    // New expression stack top.
+    __ addi(ret, tos, - BytesPerWord);
+    break;
+  case T_LONG:
+  case T_DOUBLE:
+    // Move both entries for debug purposes even though only one is live.
+    __ ld(tmp1, BytesPerWord, from);
+    __ ld(tmp2, 0, from);
+    __ std(tmp1, 0, tos);
+    __ std(tmp2, -BytesPerWord, tos);
+    // New expression stack top.
+    __ addi(ret, tos, - 2 * BytesPerWord); // two slots
+    break;
+  case T_OBJECT:
+    __ ld(tmp1, 0, from);
+    __ verify_oop(tmp1);
+    __ std(tmp1, 0, tos);
+    // New expression stack top.
+    __ addi(ret, tos, - BytesPerWord);
+    break;
+  case T_VOID:
+    // New expression stack top.
+    __ mr(ret, tos);
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  __ blr();
+
+  return entry;
+}
+
+address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) {
+  //
+  // Load a result from the callee's stack into the caller's expecting
+  // return register, callee being interpreted, caller being call stub
+  // or jit code.
+  //
+  // Registers alive
+  //   R3_ARG1   - callee expression tos + BytesPerWord
+  //   LR
+  //
+  //   stack grows upwards, memory grows downwards.
+  //
+  //   [      free         ]  <-- callee's tos
+  //   [  optional result  ]  <-- R3_ARG1
+  //   [  optional dummy   ]
+  //          ...
+  //
+  // Registers updated
+  //   R3_RET(R3_ARG1)/F1_RET - result
+  //
+
+  const Register from = R3_ARG1;
+  const Register ret = R3_ARG1;
+  const FloatRegister fret = F1_ARG1;
+
+  address entry = __ pc();
+
+  // Implemented uniformly for both kinds of endianness. The interpreter
+  // implements boolean, byte, char, and short as jint (4 bytes).
+  switch (type) {
+  case T_BOOLEAN:
+  case T_CHAR:
+    // zero extension
+    __ lwz(ret, 0, from);
+    break;
+  case T_BYTE:
+  case T_SHORT:
+  case T_INT:
+    // sign extension
+    __ lwa(ret, 0, from);
+    break;
+  case T_LONG:
+    __ ld(ret, 0, from);
+    break;
+  case T_OBJECT:
+    __ ld(ret, 0, from);
+    __ verify_oop(ret);
+    break;
+  case T_FLOAT:
+    __ lfs(fret, 0, from);
+    break;
+  case T_DOUBLE:
+    __ lfd(fret, 0, from);
+    break;
+  case T_VOID:
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  __ blr();
+
+  return entry;
+}
+
+address CppInterpreter::return_entry(TosState state, int length) {
+  assert(interpreter_return_address != NULL, "Not initialized");
+  return interpreter_return_address;
+}
+
+address CppInterpreter::deopt_entry(TosState state, int length) {
+  address ret = NULL;
+  if (length != 0) {
+    switch (state) {
+      case atos: ret = deopt_frame_manager_return_atos; break;
+      case btos: ret = deopt_frame_manager_return_itos; break;
+      case ctos:
+      case stos:
+      case itos: ret = deopt_frame_manager_return_itos; break;
+      case ltos: ret = deopt_frame_manager_return_ltos; break;
+      case ftos: ret = deopt_frame_manager_return_ftos; break;
+      case dtos: ret = deopt_frame_manager_return_dtos; break;
+      case vtos: ret = deopt_frame_manager_return_vtos; break;
+      default: ShouldNotReachHere();
+    }
+  } else {
+    ret = unctrap_frame_manager_entry;  // re-execute the bytecode (e.g. uncommon trap, popframe)
+  }
+  assert(ret != NULL, "Not initialized");
+  return ret;
+}
+
+//
+// Helpers for commoning out cases in the various type of method entries.
+//
+
+//
+// Registers alive
+//   R16_thread      - JavaThread*
+//   R1_SP           - old stack pointer
+//   R19_method      - callee's Method
+//   R17_tos         - address of caller's tos (prepushed)
+//   R15_prev_state  - address of caller's BytecodeInterpreter or 0
+//   return_pc in R21_tmp15 (only when called within generate_native_entry)
+//
+// Registers updated
+//   R14_state       - address of callee's interpreter state
+//   R1_SP           - new stack pointer
+//   CCR4_is_synced  - current method is synchronized
+//
+void CppInterpreterGenerator::generate_compute_interpreter_state(Label& stack_overflow_return) {
+  //
+  // Stack layout at this point:
+  //
+  //   F1      [TOP_IJAVA_FRAME_ABI]              <-- R1_SP
+  //           alignment (optional)
+  //           [F1's outgoing Java arguments]     <-- R17_tos
+  //           ...
+  //   F2      [PARENT_IJAVA_FRAME_ABI]
+  //            ...
+
+  //=============================================================================
+  // Allocate space for locals other than the parameters, the
+  // interpreter state, monitors, and the expression stack.
+
+  const Register local_count        = R21_tmp1;
+  const Register parameter_count    = R22_tmp2;
+  const Register max_stack          = R23_tmp3;
+  // Must not be overwritten within this method!
+  // const Register return_pc         = R29_tmp9;
+
+  const ConditionRegister is_synced = CCR4_is_synced;
+  const ConditionRegister is_native = CCR6;
+  const ConditionRegister is_static = CCR7;
+
+  assert(is_synced != is_native, "condition code registers must be distinct");
+  assert(is_synced != is_static, "condition code registers must be distinct");
+  assert(is_native != is_static, "condition code registers must be distinct");
+
+  {
+
+  // Local registers
+  const Register top_frame_size     = R24_tmp4;
+  const Register access_flags       = R25_tmp5;
+  const Register state_offset       = R26_tmp6;
+  Register mem_stack_limit          = R27_tmp7;
+  const Register page_size          = R28_tmp8;
+
+  BLOCK_COMMENT("compute_interpreter_state {");
+
+  // access_flags = method->access_flags();
+  // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size");
+  __ lwa(access_flags, method_(access_flags));
+
+  // parameter_count = method->constMethod->size_of_parameters();
+  // TODO: PPC port: assert(2 == ConstMethod::sz_size_of_parameters(), "unexpected field size");
+  __ ld(max_stack, in_bytes(Method::const_offset()), R19_method);   // Max_stack holds constMethod for a while.
+  __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), max_stack);
+
+  // local_count = method->constMethod()->max_locals();
+  // TODO: PPC port: assert(2 == ConstMethod::sz_max_locals(), "unexpected field size");
+  __ lhz(local_count, in_bytes(ConstMethod::size_of_locals_offset()), max_stack);
+
+  // max_stack = method->constMethod()->max_stack();
+  // TODO: PPC port: assert(2 == ConstMethod::sz_max_stack(), "unexpected field size");
+  __ lhz(max_stack, in_bytes(ConstMethod::max_stack_offset()), max_stack);
+
+  if (EnableInvokeDynamic) {
+    // Take into account 'extra_stack_entries' needed by method handles (see method.hpp).
+    __ addi(max_stack, max_stack, Method::extra_stack_entries());
+  }
+
+  // mem_stack_limit = thread->stack_limit();
+  __ ld(mem_stack_limit, thread_(stack_overflow_limit));
+
+  // Point locals at the first argument. Method's locals are the
+  // parameters on top of caller's expression stack.
+
+  // tos points past last Java argument
+  __ sldi(R18_locals, parameter_count, Interpreter::logStackElementSize);
+  __ add(R18_locals, R17_tos, R18_locals);
+
+  // R18_locals - i*BytesPerWord points to i-th Java local (i starts at 0)
+
+  // Set is_native, is_synced, is_static - will be used later.
+  __ testbitdi(is_native, R0, access_flags, JVM_ACC_NATIVE_BIT);
+  __ testbitdi(is_synced, R0, access_flags, JVM_ACC_SYNCHRONIZED_BIT);
+  assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile");
+  __ testbitdi(is_static, R0, access_flags, JVM_ACC_STATIC_BIT);
+
+  // PARENT_IJAVA_FRAME_ABI
+  //
+  // frame_size =
+  //   round_to((local_count - parameter_count)*BytesPerWord +
+  //              2*BytesPerWord +
+  //              alignment +
+  //              frame::interpreter_frame_cinterpreterstate_size_in_bytes()
+  //              sizeof(PARENT_IJAVA_FRAME_ABI)
+  //              method->is_synchronized() ? sizeof(BasicObjectLock) : 0 +
+  //              max_stack*BytesPerWord,
+  //            16)
+  //
+  // Note that this calculation is exactly mirrored by
+  // AbstractInterpreter::layout_activation_impl() [ and
+  // AbstractInterpreter::size_activation() ]. Which is used by
+  // deoptimization so that it can allocate the proper sized
+  // frame. This only happens for interpreted frames so the extra
+  // notes below about max_stack below are not important. The other
+  // thing to note is that for interpreter frames other than the
+  // current activation the size of the stack is the size of the live
+  // portion of the stack at the particular bcp and NOT the maximum
+  // stack that the method might use.
+  //
+  // If we're calling a native method, we replace max_stack (which is
+  // zero) with space for the worst-case signature handler varargs
+  // vector, which is:
+  //
+  //   max_stack = max(Argument::n_register_parameters, parameter_count+2);
+  //
+  // We add two slots to the parameter_count, one for the jni
+  // environment and one for a possible native mirror.  We allocate
+  // space for at least the number of ABI registers, even though
+  // InterpreterRuntime::slow_signature_handler won't write more than
+  // parameter_count+2 words when it creates the varargs vector at the
+  // top of the stack.  The generated slow signature handler will just
+  // load trash into registers beyond the necessary number.  We're
+  // still going to cut the stack back by the ABI register parameter
+  // count so as to get SP+16 pointing at the ABI outgoing parameter
+  // area, so we need to allocate at least that much even though we're
+  // going to throw it away.
+  //
+
+  // Adjust max_stack for native methods:
+  Label skip_native_calculate_max_stack;
+  __ bfalse(is_native, skip_native_calculate_max_stack);
+  // if (is_native) {
+  //  max_stack = max(Argument::n_register_parameters, parameter_count+2);
+  __ addi(max_stack, parameter_count, 2*Interpreter::stackElementWords);
+  __ cmpwi(CCR0, max_stack, Argument::n_register_parameters);
+  __ bge(CCR0, skip_native_calculate_max_stack);
+  __ li(max_stack,  Argument::n_register_parameters);
+  // }
+  __ bind(skip_native_calculate_max_stack);
+  // max_stack is now in bytes
+  __ slwi(max_stack, max_stack, Interpreter::logStackElementSize);
+
+  // Calculate number of non-parameter locals (in slots):
+  Label not_java;
+  __ btrue(is_native, not_java);
+  // if (!is_native) {
+  //   local_count = non-parameter local count
+  __ sub(local_count, local_count, parameter_count);
+  // } else {
+  //   // nothing to do: method->max_locals() == 0 for native methods
+  // }
+  __ bind(not_java);
+
+
+  // Calculate top_frame_size and parent_frame_resize.
+  {
+  const Register parent_frame_resize = R12_scratch2;
+
+  BLOCK_COMMENT("Compute top_frame_size.");
+  // top_frame_size = TOP_IJAVA_FRAME_ABI
+  //                  + size of interpreter state
+  __ li(top_frame_size, frame::top_ijava_frame_abi_size
+                        + frame::interpreter_frame_cinterpreterstate_size_in_bytes());
+  //                  + max_stack
+  __ add(top_frame_size, top_frame_size, max_stack);
+  //                  + stack slots for a BasicObjectLock for synchronized methods
+  {
+    Label not_synced;
+    __ bfalse(is_synced, not_synced);
+    __ addi(top_frame_size, top_frame_size, frame::interpreter_frame_monitor_size_in_bytes());
+    __ bind(not_synced);
+  }
+  // align
+  __ round_to(top_frame_size, frame::alignment_in_bytes);
+
+
+  BLOCK_COMMENT("Compute parent_frame_resize.");
+  // parent_frame_resize = R1_SP - R17_tos
+  __ sub(parent_frame_resize, R1_SP, R17_tos);
+  //__ li(parent_frame_resize, 0);
+  //                       + PARENT_IJAVA_FRAME_ABI
+  //                       + extra two slots for the no-parameter/no-locals
+  //                         method result
+  __ addi(parent_frame_resize, parent_frame_resize,
+                                      frame::parent_ijava_frame_abi_size
+                                    + 2*Interpreter::stackElementSize);
+  //                       + (locals_count - params_count)
+  __ sldi(R0, local_count, Interpreter::logStackElementSize);
+  __ add(parent_frame_resize, parent_frame_resize, R0);
+  // align
+  __ round_to(parent_frame_resize, frame::alignment_in_bytes);
+
+  //
+  // Stack layout at this point:
+  //
+  // The new frame F0 hasn't yet been pushed, F1 is still the top frame.
+  //
+  //   F0      [TOP_IJAVA_FRAME_ABI]
+  //           alignment (optional)
+  //           [F0's full operand stack]
+  //           [F0's monitors] (optional)
+  //           [F0's BytecodeInterpreter object]
+  //   F1      [PARENT_IJAVA_FRAME_ABI]
+  //           alignment (optional)
+  //           [F0's Java result]
+  //           [F0's non-arg Java locals]
+  //           [F1's outgoing Java arguments]     <-- R17_tos
+  //           ...
+  //   F2      [PARENT_IJAVA_FRAME_ABI]
+  //            ...
+
+
+  // Calculate new R14_state
+  // and
+  // test that the new memory stack pointer is above the limit,
+  // throw a StackOverflowError otherwise.
+  __ sub(R11_scratch1/*F1's SP*/,  R1_SP, parent_frame_resize);
+  __ addi(R14_state, R11_scratch1/*F1's SP*/,
+              -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
+  __ sub(R11_scratch1/*F0's SP*/,
+             R11_scratch1/*F1's SP*/, top_frame_size);
+
+  BLOCK_COMMENT("Test for stack overflow:");
+  __ cmpld(CCR0/*is_stack_overflow*/, R11_scratch1, mem_stack_limit);
+  __ blt(CCR0/*is_stack_overflow*/, stack_overflow_return);
+
+
+  //=============================================================================
+  // Frame_size doesn't overflow the stack. Allocate new frame and
+  // initialize interpreter state.
+
+  // Register state
+  //
+  //   R15          - local_count
+  //   R16          - parameter_count
+  //   R17          - max_stack
+  //
+  //   R18          - frame_size
+  //   R19          - access_flags
+  //   CCR4_is_synced - is_synced
+  //
+  //   GR_Lstate        - pointer to the uninitialized new BytecodeInterpreter.
+
+  // _last_Java_pc just needs to be close enough that we can identify
+  // the frame as an interpreted frame. It does not need to be the
+  // exact return address from either calling
+  // BytecodeInterpreter::InterpretMethod or the call to a jni native method.
+  // So we can initialize it here with a value of a bundle in this
+  // code fragment. We only do this initialization for java frames
+  // where InterpretMethod needs a a way to get a good pc value to
+  // store in the thread state. For interpreter frames used to call
+  // jni native code we just zero the value in the state and move an
+  // ip as needed in the native entry code.
+  //
+  // const Register last_Java_pc_addr     = GR24_SCRATCH;  // QQQ 27
+  // const Register last_Java_pc          = GR26_SCRATCH;
+
+  // Must reference stack before setting new SP since Windows
+  // will not be able to deliver the exception on a bad SP.
+  // Windows also insists that we bang each page one at a time in order
+  // for the OS to map in the reserved pages. If we bang only
+  // the final page, Windows stops delivering exceptions to our
+  // VectoredExceptionHandler and terminates our program.
+  // Linux only requires a single bang but it's rare to have
+  // to bang more than 1 page so the code is enabled for both OS's.
+
+  // BANG THE STACK
+  //
+  // Nothing to do for PPC, because updating the SP will automatically
+  // bang the page.
+
+  // Up to here we have calculated the delta for the new C-frame and
+  // checked for a stack-overflow. Now we can savely update SP and
+  // resize the C-frame.
+
+  // R14_state has already been calculated.
+  __ push_interpreter_frame(top_frame_size, parent_frame_resize,
+                            R25_tmp5, R26_tmp6, R27_tmp7, R28_tmp8);
+
+  }
+
+  //
+  // Stack layout at this point:
+  //
+  //   F0 has been been pushed!
+  //
+  //   F0      [TOP_IJAVA_FRAME_ABI]              <-- R1_SP
+  //           alignment (optional)               (now it's here, if required)
+  //           [F0's full operand stack]
+  //           [F0's monitors] (optional)
+  //           [F0's BytecodeInterpreter object]
+  //   F1      [PARENT_IJAVA_FRAME_ABI]
+  //           alignment (optional)               (now it's here, if required)
+  //           [F0's Java result]
+  //           [F0's non-arg Java locals]
+  //           [F1's outgoing Java arguments]
+  //           ...
+  //   F2      [PARENT_IJAVA_FRAME_ABI]
+  //           ...
+  //
+  // R14_state points to F0's BytecodeInterpreter object.
+  //
+
+  }
+
+  //=============================================================================
+  // new BytecodeInterpreter-object is save, let's initialize it:
+  BLOCK_COMMENT("New BytecodeInterpreter-object is save.");
+
+  {
+  // Locals
+  const Register bytecode_addr = R24_tmp4;
+  const Register constants     = R25_tmp5;
+  const Register tos           = R26_tmp6;
+  const Register stack_base    = R27_tmp7;
+  const Register local_addr    = R28_tmp8;
+  {
+    Label L;
+    __ btrue(is_native, L);
+    // if (!is_native) {
+      // bytecode_addr = constMethod->codes();
+      __ ld(bytecode_addr, method_(const));
+      __ addi(bytecode_addr, bytecode_addr, in_bytes(ConstMethod::codes_offset()));
+    // }
+    __ bind(L);
+  }
+
+  __ ld(constants, in_bytes(Method::const_offset()), R19_method);
+  __ ld(constants, in_bytes(ConstMethod::constants_offset()), constants);
+
+  // state->_prev_link = prev_state;
+  __ std(R15_prev_state, state_(_prev_link));
+
+  // For assertions only.
+  // TODO: not needed anyway because it coincides with `_monitor_base'. remove!
+  // state->_self_link = state;
+  DEBUG_ONLY(__ std(R14_state, state_(_self_link));)
+
+  // state->_thread = thread;
+  __ std(R16_thread, state_(_thread));
+
+  // state->_method = method;
+  __ std(R19_method, state_(_method));
+
+  // state->_locals = locals;
+  __ std(R18_locals, state_(_locals));
+
+  // state->_oop_temp = NULL;
+  __ li(R0, 0);
+  __ std(R0, state_(_oop_temp));
+
+  // state->_last_Java_fp = *R1_SP // Use *R1_SP as fp
+  __ ld(R0, _abi(callers_sp), R1_SP);
+  __ std(R0, state_(_last_Java_fp));
+
+  BLOCK_COMMENT("load Stack base:");
+  {
+    // Stack_base.
+    // if (!method->synchronized()) {
+    //   stack_base = state;
+    // } else {
+    //   stack_base = (uintptr_t)state - sizeof(BasicObjectLock);
+    // }
+    Label L;
+    __ mr(stack_base, R14_state);
+    __ bfalse(is_synced, L);
+    __ addi(stack_base, stack_base, -frame::interpreter_frame_monitor_size_in_bytes());
+    __ bind(L);
+  }
+
+  // state->_mdx = NULL;
+  __ li(R0, 0);
+  __ std(R0, state_(_mdx));
+
+  {
+    // if (method->is_native()) state->_bcp = NULL;
+    // else state->_bcp = bytecode_addr;
+    Label label1, label2;
+    __ bfalse(is_native, label1);
+    __ std(R0, state_(_bcp));
+    __ b(label2);
+    __ bind(label1);
+    __ std(bytecode_addr, state_(_bcp));
+    __ bind(label2);
+  }
+
+
+  // state->_result._to_call._callee = NULL;
+  __ std(R0, state_(_result._to_call._callee));
+
+  // state->_monitor_base = state;
+  __ std(R14_state, state_(_monitor_base));
+
+  // state->_msg = BytecodeInterpreter::method_entry;
+  __ li(R0, BytecodeInterpreter::method_entry);
+  __ stw(R0, state_(_msg));
+
+  // state->_last_Java_sp = R1_SP;
+  __ std(R1_SP, state_(_last_Java_sp));
+
+  // state->_stack_base = stack_base;
+  __ std(stack_base, state_(_stack_base));
+
+  // tos = stack_base - 1 slot (prepushed);
+  // state->_stack.Tos(tos);
+  __ addi(tos, stack_base, - Interpreter::stackElementSize);
+  __ std(tos,  state_(_stack));
+
+
+  {
+    BLOCK_COMMENT("get last_Java_pc:");
+    // if (!is_native) state->_last_Java_pc = <some_ip_in_this_code_buffer>;
+    // else state->_last_Java_pc = NULL; (just for neatness)
+    Label label1, label2;
+    __ btrue(is_native, label1);
+    __ get_PC_trash_LR(R0);
+    __ std(R0, state_(_last_Java_pc));
+    __ b(label2);
+    __ bind(label1);
+    __ li(R0, 0);
+    __ std(R0, state_(_last_Java_pc));
+    __ bind(label2);
+  }
+
+
+  // stack_limit = tos - max_stack;
+  __ sub(R0, tos, max_stack);
+  // state->_stack_limit = stack_limit;
+  __ std(R0, state_(_stack_limit));
+
+
+  // cache = method->constants()->cache();
+   __ ld(R0, ConstantPool::cache_offset_in_bytes(), constants);
+  // state->_constants = method->constants()->cache();
+  __ std(R0, state_(_constants));
+
+
+
+  //=============================================================================
+  // synchronized method, allocate and initialize method object lock.
+  // if (!method->is_synchronized()) goto fill_locals_with_0x0s;
+  Label fill_locals_with_0x0s;
+  __ bfalse(is_synced, fill_locals_with_0x0s);
+
+  //   pool_holder = method->constants()->pool_holder();
+  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+  {
+    Label label1, label2;
+    // lockee = NULL; for java methods, correct value will be inserted in BytecodeInterpretMethod.hpp
+    __ li(R0,0);
+    __ bfalse(is_native, label2);
+
+    __ bfalse(is_static, label1);
+    // if (method->is_static()) lockee =
+    // pool_holder->klass_part()->java_mirror();
+    __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), constants);
+    __ ld(R0/*lockee*/, mirror_offset, R11_scratch1/*pool_holder*/);
+    __ b(label2);
+
+    __ bind(label1);
+    // else lockee = *(oop*)locals;
+    __ ld(R0/*lockee*/, 0, R18_locals);
+    __ bind(label2);
+
+    // monitor->set_obj(lockee);
+    __ std(R0/*lockee*/, BasicObjectLock::obj_offset_in_bytes(), stack_base);
+  }
+
+  // See if we need to zero the locals
+  __ BIND(fill_locals_with_0x0s);
+
+
+  //=============================================================================
+  // fill locals with 0x0s
+  Label locals_zeroed;
+  __ btrue(is_native, locals_zeroed);
+
+  if (true /* zerolocals */ || ClearInterpreterLocals) {
+    // local_count is already num_locals_slots - num_param_slots
+    __ sldi(R0, parameter_count, Interpreter::logStackElementSize);
+    __ sub(local_addr, R18_locals, R0);
+    __ cmpdi(CCR0, local_count, 0);
+    __ ble(CCR0, locals_zeroed);
+
+    __ mtctr(local_count);
+    //__ ld_const_addr(R0, (address) 0xcafe0000babe);
+    __ li(R0, 0);
+
+    Label zero_slot;
+    __ bind(zero_slot);
+
+    // first local is at local_addr
+    __ std(R0, 0, local_addr);
+    __ addi(local_addr, local_addr, -BytesPerWord);
+    __ bdnz(zero_slot);
+  }
+
+   __ BIND(locals_zeroed);
+
+  }
+  BLOCK_COMMENT("} compute_interpreter_state");
+}
+
+// Generate code to initiate compilation on invocation counter overflow.
+void CppInterpreterGenerator::generate_counter_overflow(Label& continue_entry) {
+  // Registers alive
+  //   R14_state
+  //   R16_thread
+  //
+  // Registers updated
+  //   R14_state
+  //   R3_ARG1 (=R3_RET)
+  //   R4_ARG2
+
+  // After entering the vm we remove the activation and retry the
+  // entry point in case the compilation is complete.
+
+  // InterpreterRuntime::frequency_counter_overflow takes one argument
+  // that indicates if the counter overflow occurs at a backwards
+  // branch (NULL bcp). We pass zero. The call returns the address
+  // of the verified entry point for the method or NULL if the
+  // compilation did not complete (either went background or bailed
+  // out).
+  __ li(R4_ARG2, 0);
+
+  // Pass false to call_VM so it doesn't check for pending exceptions,
+  // since at this point in the method invocation the exception
+  // handler would try to exit the monitor of synchronized methods
+  // which haven't been entered yet.
+  //
+  // Returns verified_entry_point or NULL, we don't care which.
+  //
+  // Do not use the variant `frequency_counter_overflow' that returns
+  // a structure, because this will change the argument list by a
+  // hidden parameter (gcc 4.1).
+
+  __ call_VM(noreg,
+             CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow),
+             R4_ARG2,
+             false);
+  // Returns verified_entry_point or NULL, we don't care which as we ignore it
+  // and run interpreted.
+
+  // Reload method, it may have moved.
+  __ ld(R19_method, state_(_method));
+
+  // We jump now to the label "continue_after_compile".
+  __ b(continue_entry);
+}
+
+// Increment invocation count and check for overflow.
+//
+// R19_method must contain Method* of method to profile.
+void CppInterpreterGenerator::generate_counter_incr(Label& overflow) {
+  Label done;
+  const Register Rcounters             = R12_scratch2;
+  const Register iv_be_count           = R11_scratch1;
+  const Register invocation_limit      = R12_scratch2;
+  const Register invocation_limit_addr = invocation_limit;
+
+  // Load and ev. allocate MethodCounters object.
+  __ get_method_counters(R19_method, Rcounters, done);
+
+  // Update standard invocation counters.
+  __ increment_invocation_counter(Rcounters, iv_be_count, R0);
+
+  // Compare against limit.
+  BLOCK_COMMENT("Compare counter against limit:");
+  assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit),
+         "must be 4 bytes");
+  __ load_const(invocation_limit_addr, (address)&InvocationCounter::InterpreterInvocationLimit);
+  __ lwa(invocation_limit, 0, invocation_limit_addr);
+  __ cmpw(CCR0, iv_be_count, invocation_limit);
+  __ bge(CCR0, overflow);
+  __ bind(done);
+}
+
+//
+// Call a JNI method.
+//
+// Interpreter stub for calling a native method. (C++ interpreter)
+// This sets up a somewhat different looking stack for calling the native method
+// than the typical interpreter frame setup.
+//
+address CppInterpreterGenerator::generate_native_entry(void) {
+  if (native_entry != NULL) return native_entry;
+  address entry = __ pc();
+
+  // Read
+  //   R16_thread
+  //   R15_prev_state  - address of caller's BytecodeInterpreter, if this snippet
+  //                     gets called by the frame manager.
+  //   R19_method      - callee's Method
+  //   R17_tos         - address of caller's tos
+  //   R1_SP           - caller's stack pointer
+  //   R21_sender_SP   - initial caller sp
+  //
+  // Update
+  //   R14_state       - address of caller's BytecodeInterpreter
+  //   R3_RET          - integer result, if any.
+  //   F1_RET          - float result, if any.
+  //
+  //
+  // Stack layout at this point:
+  //
+  //    0       [TOP_IJAVA_FRAME_ABI]         <-- R1_SP
+  //            alignment (optional)
+  //            [outgoing Java arguments]     <-- R17_tos
+  //            ...
+  //    PARENT  [PARENT_IJAVA_FRAME_ABI]
+  //            ...
+  //
+
+  const bool inc_counter = UseCompiler || CountCompiledCalls;
+
+  const Register signature_handler_fd   = R21_tmp1;
+  const Register pending_exception      = R22_tmp2;
+  const Register result_handler_addr    = R23_tmp3;
+  const Register native_method_fd       = R24_tmp4;
+  const Register access_flags           = R25_tmp5;
+  const Register active_handles         = R26_tmp6;
+  const Register sync_state             = R27_tmp7;
+  const Register sync_state_addr        = sync_state;    // Address is dead after use.
+  const Register suspend_flags          = R24_tmp4;
+
+  const Register return_pc              = R28_tmp8;      // Register will be locked for some time.
+
+  const ConditionRegister is_synced     = CCR4_is_synced; // Live-on-exit from compute_interpreter_state.
+
+
+  // R1_SP still points to caller's SP at this point.
+
+  // Save initial_caller_sp to caller's abi. The caller frame must be
+  // resized before returning to get rid of the c2i arguments (if
+  // any).
+  // Override the saved SP with the senderSP so we can pop c2i
+  // arguments (if any) off when we return
+  __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
+
+  // Save LR to caller's frame. We don't use _abi(lr) here, because it is not safe.
+  __ mflr(return_pc);
+  __ std(return_pc, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+
+  assert(return_pc->is_nonvolatile(), "return_pc must be a non-volatile register");
+
+  __ verify_method_ptr(R19_method);
+
+  //=============================================================================
+
+  // If this snippet gets called by the frame manager (at label
+  // `call_special'), then R15_prev_state is valid. If this snippet
+  // is not called by the frame manager, but e.g. by the call stub or
+  // by compiled code, then R15_prev_state is invalid.
+  {
+    // Set R15_prev_state to 0 if we don't return to the frame
+    // manager; we will return to the call_stub or to compiled code
+    // instead. If R15_prev_state is 0 there will be only one
+    // interpreter frame (we will set this up later) in this C frame!
+    // So we must take care about retrieving prev_state_(_prev_link)
+    // and restoring R1_SP when popping that interpreter.
+    Label prev_state_is_valid;
+
+    __ load_const(R11_scratch1/*frame_manager_returnpc_addr*/, (address)&frame_manager_specialized_return);
+    __ ld(R12_scratch2/*frame_manager_returnpc*/, 0, R11_scratch1/*frame_manager_returnpc_addr*/);
+    __ cmpd(CCR0, return_pc, R12_scratch2/*frame_manager_returnpc*/);
+    __ beq(CCR0, prev_state_is_valid);
+
+    __ li(R15_prev_state, 0);
+
+    __ BIND(prev_state_is_valid);
+  }
+
+  //=============================================================================
+  // Allocate new frame and initialize interpreter state.
+
+  Label exception_return;
+  Label exception_return_sync_check;
+  Label stack_overflow_return;
+
+  // Generate new interpreter state and jump to stack_overflow_return in case of
+  // a stack overflow.
+  generate_compute_interpreter_state(stack_overflow_return);
+
+  //=============================================================================
+  // Increment invocation counter. On overflow, entry to JNI method
+  // will be compiled.
+  Label invocation_counter_overflow;
+  if (inc_counter) {
+    generate_counter_incr(invocation_counter_overflow);
+  }
+
+  Label continue_after_compile;
+  __ BIND(continue_after_compile);
+
+  // access_flags = method->access_flags();
+  // Load access flags.
+  assert(access_flags->is_nonvolatile(),
+         "access_flags must be in a non-volatile register");
+  // Type check.
+  // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size");
+  __ lwz(access_flags, method_(access_flags));
+
+  // We don't want to reload R19_method and access_flags after calls
+  // to some helper functions.
+  assert(R19_method->is_nonvolatile(), "R19_method must be a non-volatile register");
+
+  // Check for synchronized methods. Must happen AFTER invocation counter
+  // check, so method is not locked if counter overflows.
+
+  {
+    Label method_is_not_synced;
+    // Is_synced is still alive.
+    assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile");
+    __ bfalse(is_synced, method_is_not_synced);
+
+    lock_method();
+    // Reload method, it may have moved.
+    __ ld(R19_method, state_(_method));
+
+    __ BIND(method_is_not_synced);
+  }
+
+  // jvmti/jvmpi support
+  __ notify_method_entry();
+
+  // Reload method, it may have moved.
+  __ ld(R19_method, state_(_method));
+
+  //=============================================================================
+  // Get and call the signature handler
+
+  __ ld(signature_handler_fd, method_(signature_handler));
+  Label call_signature_handler;
+
+  __ cmpdi(CCR0, signature_handler_fd, 0);
+  __ bne(CCR0, call_signature_handler);
+
+  // Method has never been called. Either generate a specialized
+  // handler or point to the slow one.
+  //
+  // Pass parameter 'false' to avoid exception check in call_VM.
+  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R19_method, false);
+
+  // Check for an exception while looking up the target method. If we
+  // incurred one, bail.
+  __ ld(pending_exception, thread_(pending_exception));
+  __ cmpdi(CCR0, pending_exception, 0);
+  __ bne(CCR0, exception_return_sync_check); // has pending exception
+
+  // reload method
+  __ ld(R19_method, state_(_method));
+
+  // Reload signature handler, it may have been created/assigned in the meanwhile
+  __ ld(signature_handler_fd, method_(signature_handler));
+
+  __ BIND(call_signature_handler);
+
+  // Before we call the signature handler we push a new frame to
+  // protect the interpreter frame volatile registers when we return
+  // from jni but before we can get back to Java.
+
+  // First set the frame anchor while the SP/FP registers are
+  // convenient and the slow signature handler can use this same frame
+  // anchor.
+
+  // We have a TOP_IJAVA_FRAME here, which belongs to us.
+  __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/);
+
+  // Now the interpreter frame (and its call chain) have been
+  // invalidated and flushed. We are now protected against eager
+  // being enabled in native code. Even if it goes eager the
+  // registers will be reloaded as clean and we will invalidate after
+  // the call so no spurious flush should be possible.
+
+  // Call signature handler and pass locals address.
+  //
+  // Our signature handlers copy required arguments to the C stack
+  // (outgoing C args), R3_ARG1 to R10_ARG8, and F1_ARG1 to
+  // F13_ARG13.
+  __ mr(R3_ARG1, R18_locals);
+  __ ld(signature_handler_fd, 0, signature_handler_fd);
+  __ call_stub(signature_handler_fd);
+  // reload method
+  __ ld(R19_method, state_(_method));
+
+  // Remove the register parameter varargs slots we allocated in
+  // compute_interpreter_state. SP+16 ends up pointing to the ABI
+  // outgoing argument area.
+  //
+  // Not needed on PPC64.
+  //__ add(SP, SP, Argument::n_register_parameters*BytesPerWord);
+
+  assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register");
+  // Save across call to native method.
+  __ mr(result_handler_addr, R3_RET);
+
+  // Set up fixed parameters and call the native method.
+  // If the method is static, get mirror into R4_ARG2.
+
+  {
+    Label method_is_not_static;
+    // access_flags is non-volatile and still, no need to restore it
+
+    // restore access flags
+    __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT);
+    __ bfalse(CCR0, method_is_not_static);
+
+    // constants = method->constants();
+    __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method);
+    __ ld(R11_scratch1/*constants*/, in_bytes(ConstMethod::constants_offset()), R11_scratch1);
+    // pool_holder = method->constants()->pool_holder();
+    __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(),
+          R11_scratch1/*constants*/);
+
+    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
+
+    // mirror = pool_holder->klass_part()->java_mirror();
+    __ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/);
+    // state->_native_mirror = mirror;
+    __ std(R0/*mirror*/, state_(_oop_temp));
+    // R4_ARG2 = &state->_oop_temp;
+    __ addir(R4_ARG2, state_(_oop_temp));
+
+    __ BIND(method_is_not_static);
+  }
+
+  // At this point, arguments have been copied off the stack into
+  // their JNI positions. Oops are boxed in-place on the stack, with
+  // handles copied to arguments. The result handler address is in a
+  // register.
+
+  // pass JNIEnv address as first parameter
+  __ addir(R3_ARG1, thread_(jni_environment));
+
+  // Load the native_method entry before we change the thread state.
+  __ ld(native_method_fd, method_(native_function));
+
+  //=============================================================================
+  // Transition from _thread_in_Java to _thread_in_native. As soon as
+  // we make this change the safepoint code needs to be certain that
+  // the last Java frame we established is good. The pc in that frame
+  // just needs to be near here not an actual return address.
+
+  // We use release_store_fence to update values like the thread state, where
+  // we don't want the current thread to continue until all our prior memory
+  // accesses (including the new thread state) are visible to other threads.
+  __ li(R0, _thread_in_native);
+  __ release();
+
+  // TODO: PPC port: assert(4 == JavaThread::sz_thread_state(), "unexpected field size");
+  __ stw(R0, thread_(thread_state));
+
+  if (UseMembar) {
+    __ fence();
+  }
+
+  //=============================================================================
+  // Call the native method. Argument registers must not have been
+  // overwritten since "__ call_stub(signature_handler);" (except for
+  // ARG1 and ARG2 for static methods)
+  __ call_c(native_method_fd);
+
+  __ std(R3_RET, state_(_native_lresult));
+  __ stfd(F1_RET, state_(_native_fresult));
+
+  // The frame_manager_lr field, which we use for setting the last
+  // java frame, gets overwritten by the signature handler. Restore
+  // it now.
+  __ get_PC_trash_LR(R11_scratch1);
+  __ std(R11_scratch1, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+
+  // Because of GC R19_method may no longer be valid.
+
+  // Block, if necessary, before resuming in _thread_in_Java state.
+  // In order for GC to work, don't clear the last_Java_sp until after
+  // blocking.
+
+
+
+  //=============================================================================
+  // Switch thread to "native transition" state before reading the
+  // synchronization state.  This additional state is necessary
+  // because reading and testing the synchronization state is not
+  // atomic w.r.t. GC, as this scenario demonstrates: Java thread A,
+  // in _thread_in_native state, loads _not_synchronized and is
+  // preempted.  VM thread changes sync state to synchronizing and
+  // suspends threads for GC. Thread A is resumed to finish this
+  // native method, but doesn't block here since it didn't see any
+  // synchronization in progress, and escapes.
+
+  // We use release_store_fence to update values like the thread state, where
+  // we don't want the current thread to continue until all our prior memory
+  // accesses (including the new thread state) are visible to other threads.
+  __ li(R0/*thread_state*/, _thread_in_native_trans);
+  __ release();
+  __ stw(R0/*thread_state*/, thread_(thread_state));
+  if (UseMembar) {
+    __ fence();
+  }
+  // Write serialization page so that the VM thread can do a pseudo remote
+  // membar. We use the current thread pointer to calculate a thread
+  // specific offset to write to within the page. This minimizes bus
+  // traffic due to cache line collision.
+  else {
+    __ serialize_memory(R16_thread, R11_scratch1, R12_scratch2);
+  }
+
+  // Now before we return to java we must look for a current safepoint
+  // (a new safepoint can not start since we entered native_trans).
+  // We must check here because a current safepoint could be modifying
+  // the callers registers right this moment.
+
+  // Acquire isn't strictly necessary here because of the fence, but
+  // sync_state is declared to be volatile, so we do it anyway.
+  __ load_const(sync_state_addr, SafepointSynchronize::address_of_state());
+
+  // TODO: PPC port: assert(4 == SafepointSynchronize::sz_state(), "unexpected field size");
+  __ lwz(sync_state, 0, sync_state_addr);
+
+  // TODO: PPC port: assert(4 == Thread::sz_suspend_flags(), "unexpected field size");
+  __ lwz(suspend_flags, thread_(suspend_flags));
+
+  __ acquire();
+
+  Label sync_check_done;
+  Label do_safepoint;
+  // No synchronization in progress nor yet synchronized
+  __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized);
+  // not suspended
+  __ cmpwi(CCR1, suspend_flags, 0);
+
+  __ bne(CCR0, do_safepoint);
+  __ beq(CCR1, sync_check_done);
+  __ bind(do_safepoint);
+  // Block.  We do the call directly and leave the current
+  // last_Java_frame setup undisturbed.  We must save any possible
+  // native result acrosss the call. No oop is present
+
+  __ mr(R3_ARG1, R16_thread);
+  __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans),
+            relocInfo::none);
+  __ bind(sync_check_done);
+
+  //=============================================================================
+  // <<<<<< Back in Interpreter Frame >>>>>
+
+  // We are in thread_in_native_trans here and back in the normal
+  // interpreter frame. We don't have to do anything special about
+  // safepoints and we can switch to Java mode anytime we are ready.
+
+  // Note: frame::interpreter_frame_result has a dependency on how the
+  // method result is saved across the call to post_method_exit. For
+  // native methods it assumes that the non-FPU/non-void result is
+  // saved in _native_lresult and a FPU result in _native_fresult. If
+  // this changes then the interpreter_frame_result implementation
+  // will need to be updated too.
+
+  // On PPC64, we have stored the result directly after the native call.
+
+  //=============================================================================
+  // back in Java
+
+  // We use release_store_fence to update values like the thread state, where
+  // we don't want the current thread to continue until all our prior memory
+  // accesses (including the new thread state) are visible to other threads.
+  __ li(R0/*thread_state*/, _thread_in_Java);
+  __ release();
+  __ stw(R0/*thread_state*/, thread_(thread_state));
+  if (UseMembar) {
+    __ fence();
+  }
+
+  __ reset_last_Java_frame();
+
+  // Reload GR27_method, call killed it. We can't look at
+  // state->_method until we're back in java state because in java
+  // state gc can't happen until we get to a safepoint.
+  //
+  // We've set thread_state to _thread_in_Java already, so restoring
+  // R19_method from R14_state works; R19_method is invalid, because
+  // GC may have happened.
+  __ ld(R19_method, state_(_method)); // reload method, may have moved
+
+  // jvmdi/jvmpi support. Whether we've got an exception pending or
+  // not, and whether unlocking throws an exception or not, we notify
+  // on native method exit. If we do have an exception, we'll end up
+  // in the caller's context to handle it, so if we don't do the
+  // notify here, we'll drop it on the floor.
+
+  __ notify_method_exit(true/*native method*/,
+                        ilgl /*illegal state (not used for native methods)*/);
+
+
+
+  //=============================================================================
+  // Handle exceptions
+
+  // See if we must unlock.
+  //
+  {
+    Label method_is_not_synced;
+    // is_synced is still alive
+    assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile");
+    __ bfalse(is_synced, method_is_not_synced);
+
+    unlock_method();
+
+    __ bind(method_is_not_synced);
+  }
+
+  // Reset active handles after returning from native.
+  // thread->active_handles()->clear();
+  __ ld(active_handles, thread_(active_handles));
+  // JNIHandleBlock::_top is an int.
+  // TODO:  PPC port: assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size");
+  __ li(R0, 0);
+  __ stw(R0, JNIHandleBlock::top_offset_in_bytes(), active_handles);
+
+  Label no_pending_exception_from_native_method;
+  __ ld(R0/*pending_exception*/, thread_(pending_exception));
+  __ cmpdi(CCR0, R0/*pending_exception*/, 0);
+  __ beq(CCR0, no_pending_exception_from_native_method);
+
+
+  //-----------------------------------------------------------------------------
+  // An exception is pending. We call into the runtime only if the
+  // caller was not interpreted. If it was interpreted the
+  // interpreter will do the correct thing. If it isn't interpreted
+  // (call stub/compiled code) we will change our return and continue.
+  __ BIND(exception_return);
+
+  Label return_to_initial_caller_with_pending_exception;
+  __ cmpdi(CCR0, R15_prev_state, 0);
+  __ beq(CCR0, return_to_initial_caller_with_pending_exception);
+
+  // We are returning to an interpreter activation, just pop the state,
+  // pop our frame, leave the exception pending, and return.
+  __ pop_interpreter_state(/*prev_state_may_be_0=*/false);
+  __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2);
+  __ mtlr(R21_tmp1);
+  __ blr();
+
+  __ BIND(exception_return_sync_check);
+
+  assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile");
+  __ bfalse(is_synced, exception_return);
+  unlock_method();
+  __ b(exception_return);
+
+
+  __ BIND(return_to_initial_caller_with_pending_exception);
+  // We are returning to a c2i-adapter / call-stub, get the address of the
+  // exception handler, pop the frame and return to the handler.
+
+  // First, pop to caller's frame.
+  __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1  /* set to return pc */, R22_tmp2);
+
+  __ push_frame_abi112(0, R11_scratch1);
+  // Get the address of the exception handler.
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
+                  R16_thread,
+                  R21_tmp1 /* return pc */);
+  __ pop_frame();
+
+  // Load the PC of the the exception handler into LR.
+  __ mtlr(R3_RET);
+
+  // Load exception into R3_ARG1 and clear pending exception in thread.
+  __ ld(R3_ARG1/*exception*/, thread_(pending_exception));
+  __ li(R4_ARG2, 0);
+  __ std(R4_ARG2, thread_(pending_exception));
+
+  // Load the original return pc into R4_ARG2.
+  __ mr(R4_ARG2/*issuing_pc*/, R21_tmp1);
+
+  // Resize frame to get rid of a potential extension.
+  __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
+
+  // Return to exception handler.
+  __ blr();
+
+
+  //-----------------------------------------------------------------------------
+  // No exception pending.
+  __ BIND(no_pending_exception_from_native_method);
+
+  // Move native method result back into proper registers and return.
+  // Invoke result handler (may unbox/promote).
+  __ ld(R3_RET, state_(_native_lresult));
+  __ lfd(F1_RET, state_(_native_fresult));
+  __ call_stub(result_handler_addr);
+
+  // We have created a new BytecodeInterpreter object, now we must destroy it.
+  //
+  // Restore previous R14_state and caller's SP.  R15_prev_state may
+  // be 0 here, because our caller may be the call_stub or compiled
+  // code.
+  __ pop_interpreter_state(/*prev_state_may_be_0=*/true);
+  __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2);
+  // Resize frame to get rid of a potential extension.
+  __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
+
+  // Must use the return pc which was loaded from the caller's frame
+  // as the VM uses return-pc-patching for deoptimization.
+  __ mtlr(R21_tmp1);
+  __ blr();
+
+
+
+  //=============================================================================
+  // We encountered an exception while computing the interpreter
+  // state, so R14_state isn't valid. Act as if we just returned from
+  // the callee method with a pending exception.
+  __ BIND(stack_overflow_return);
+
+  //
+  // Register state:
+  //   R14_state         invalid; trashed by compute_interpreter_state
+  //   R15_prev_state    valid, but may be 0
+  //
+  //   R1_SP             valid, points to caller's SP; wasn't yet updated by
+  //                     compute_interpreter_state
+  //
+
+  // Create exception oop and make it pending.
+
+  // Throw the exception via RuntimeStub "throw_StackOverflowError_entry".
+  //
+  // Previously, we called C-Code directly. As a consequence, a
+  // possible GC tried to process the argument oops of the top frame
+  // (see RegisterMap::clear, which sets the corresponding flag to
+  // true). This lead to crashes because:
+  //   1. The top register map did not contain locations for the argument registers
+  //   2. The arguments are dead anyway, could be already overwritten in the worst case
+  // Solution: Call via special runtime stub that pushes it's own
+  // frame. This runtime stub has the flag "CodeBlob::caller_must_gc_arguments()"
+  // set to "false", what prevents the dead arguments getting GC'd.
+  //
+  // 2 cases exist:
+  // 1. We were called by the c2i adapter / call stub
+  // 2. We were called by the frame manager
+  //
+  // Both cases are handled by this code:
+  // 1. - initial_caller_sp was saved in both cases on entry, so it's safe to load it back even if it was not changed.
+  //    - control flow will be:
+  //      throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of caller method
+  // 2. - control flow will be:
+  //      throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->rethrow_excp_entry of frame manager->resume_method
+  //      Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state
+  //      registers using the stack and resume the calling method with a pending excp.
+
+  // Pop any c2i extension from the stack, restore LR just to be sure
+  __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+  __ mtlr(R0);
+  // Resize frame to get rid of a potential extension.
+  __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
+
+  // Load target address of the runtime stub.
+  __ load_const(R12_scratch2, (StubRoutines::throw_StackOverflowError_entry()));
+  __ mtctr(R12_scratch2);
+  __ bctr();
+
+
+  //=============================================================================
+  // Counter overflow.
+
+  if (inc_counter) {
+    // Handle invocation counter overflow
+    __ bind(invocation_counter_overflow);
+
+    generate_counter_overflow(continue_after_compile);
+  }
+
+  native_entry = entry;
+  return entry;
+}
+
+bool AbstractInterpreter::can_be_compiled(methodHandle m) {
+  // No special entry points that preclude compilation.
+  return true;
+}
+
+// Unlock the current method.
+//
+void CppInterpreterGenerator::unlock_method(void) {
+  // Find preallocated monitor and unlock method. Method monitor is
+  // the first one.
+
+  // Registers alive
+  //   R14_state
+  //
+  // Registers updated
+  //   volatiles
+  //
+  const Register monitor = R4_ARG2;
+
+  // Pass address of initial monitor we allocated.
+  //
+  // First monitor.
+  __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes());
+
+  // Unlock method
+  __ unlock_object(monitor);
+}
+
+// Lock the current method.
+//
+void CppInterpreterGenerator::lock_method(void) {
+  // Find preallocated monitor and lock method. Method monitor is the
+  // first one.
+
+  //
+  // Registers alive
+  //   R14_state
+  //
+  // Registers updated
+  //   volatiles
+  //
+
+  const Register monitor = R4_ARG2;
+  const Register object  = R5_ARG3;
+
+  // Pass address of initial monitor we allocated.
+  __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes());
+
+  // Pass object address.
+  __ ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
+
+  // Lock method.
+  __ lock_object(monitor, object);
+}
+
+// Generate code for handling resuming a deopted method.
+void CppInterpreterGenerator::generate_deopt_handling(Register result_index) {
+
+  //=============================================================================
+  // Returning from a compiled method into a deopted method. The
+  // bytecode at the bcp has completed. The result of the bytecode is
+  // in the native abi (the tosca for the template based
+  // interpreter). Any stack space that was used by the bytecode that
+  // has completed has been removed (e.g. parameters for an invoke) so
+  // all that we have to do is place any pending result on the
+  // expression stack and resume execution on the next bytecode.
+
+  Label return_from_deopt_common;
+
+  // R3_RET and F1_RET are live here! Load the array index of the
+  // required result stub address and continue at return_from_deopt_common.
+
+  // Deopt needs to jump to here to enter the interpreter (return a result).
+  deopt_frame_manager_return_atos = __ pc();
+  __ li(result_index, AbstractInterpreter::BasicType_as_index(T_OBJECT));
+  __ b(return_from_deopt_common);
+
+  deopt_frame_manager_return_btos = __ pc();
+  __ li(result_index, AbstractInterpreter::BasicType_as_index(T_BOOLEAN));
+  __ b(return_from_deopt_common);
+
+  deopt_frame_manager_return_itos = __ pc();
+  __ li(result_index, AbstractInterpreter::BasicType_as_index(T_INT));
+  __ b(return_from_deopt_common);
+
+  deopt_frame_manager_return_ltos = __ pc();
+  __ li(result_index, AbstractInterpreter::BasicType_as_index(T_LONG));
+  __ b(return_from_deopt_common);
+
+  deopt_frame_manager_return_ftos = __ pc();
+  __ li(result_index, AbstractInterpreter::BasicType_as_index(T_FLOAT));
+  __ b(return_from_deopt_common);
+
+  deopt_frame_manager_return_dtos = __ pc();
+  __ li(result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE));
+  __ b(return_from_deopt_common);
+
+  deopt_frame_manager_return_vtos = __ pc();
+  __ li(result_index, AbstractInterpreter::BasicType_as_index(T_VOID));
+  // Last one, fall-through to return_from_deopt_common.
+
+  // Deopt return common. An index is present that lets us move any
+  // possible result being return to the interpreter's stack.
+  //
+  __ BIND(return_from_deopt_common);
+
+}
+
+// Generate the code to handle a more_monitors message from the c++ interpreter.
+void CppInterpreterGenerator::generate_more_monitors() {
+
+  //
+  // Registers alive
+  //   R16_thread      - JavaThread*
+  //   R15_prev_state  - previous BytecodeInterpreter or 0
+  //   R14_state       - BytecodeInterpreter* address of receiver's interpreter state
+  //   R1_SP           - old stack pointer
+  //
+  // Registers updated
+  //   R1_SP          - new stack pointer
+  //
+
+  // Very-local scratch registers.
+  const Register old_tos         = R21_tmp1;
+  const Register new_tos         = R22_tmp2;
+  const Register stack_base      = R23_tmp3;
+  const Register stack_limit     = R24_tmp4;
+  const Register slot            = R25_tmp5;
+  const Register n_slots         = R25_tmp5;
+
+  // Interpreter state fields.
+  const Register msg             = R24_tmp4;
+
+  // Load up relevant interpreter state.
+
+  __ ld(stack_base, state_(_stack_base));                // Old stack_base
+  __ ld(old_tos, state_(_stack));                        // Old tos
+  __ ld(stack_limit, state_(_stack_limit));              // Old stack_limit
+
+  // extracted monitor_size
+  int monitor_size = frame::interpreter_frame_monitor_size_in_bytes();
+  assert(Assembler::is_aligned((unsigned int)monitor_size,
+                               (unsigned int)frame::alignment_in_bytes),
+         "size of a monitor must respect alignment of SP");
+
+  // Save and restore top LR
+  __ ld(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+  __ resize_frame(-monitor_size, R11_scratch1);// Allocate space for new monitor
+  __ std(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+    // Initial_caller_sp is used as unextended_sp for non initial callers.
+  __ std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
+  __ addi(stack_base, stack_base, -monitor_size);        // New stack_base
+  __ addi(new_tos, old_tos, -monitor_size);              // New tos
+  __ addi(stack_limit, stack_limit, -monitor_size);      // New stack_limit
+
+  __ std(R1_SP, state_(_last_Java_sp));                  // Update frame_bottom
+
+  __ std(stack_base, state_(_stack_base));               // Update stack_base
+  __ std(new_tos, state_(_stack));                       // Update tos
+  __ std(stack_limit, state_(_stack_limit));             // Update stack_limit
+
+  __ li(msg, BytecodeInterpreter::got_monitors);         // Tell interpreter we allocated the lock
+  __ stw(msg, state_(_msg));
+
+  // Shuffle expression stack down. Recall that stack_base points
+  // just above the new expression stack bottom. Old_tos and new_tos
+  // are used to scan thru the old and new expression stacks.
+
+  Label copy_slot, copy_slot_finished;
+  __ sub(n_slots, stack_base, new_tos);
+  __ srdi_(n_slots, n_slots, LogBytesPerWord);           // compute number of slots to copy
+  assert(LogBytesPerWord == 3, "conflicts assembler instructions");
+  __ beq(CCR0, copy_slot_finished);                       // nothing to copy
+
+  __ mtctr(n_slots);
+
+  // loop
+  __ bind(copy_slot);
+  __ ldu(slot, BytesPerWord, old_tos);                   // slot = *++old_tos;
+  __ stdu(slot, BytesPerWord, new_tos);                  // *++new_tos = slot;
+  __ bdnz(copy_slot);
+
+  __ bind(copy_slot_finished);
+
+  // Restart interpreter
+  __ li(R0, 0);
+  __ std(R0, BasicObjectLock::obj_offset_in_bytes(), stack_base);  // Mark lock as unused
+}
+
+address CppInterpreterGenerator::generate_normal_entry(void) {
+  if (interpreter_frame_manager != NULL) return interpreter_frame_manager;
+
+  address entry = __ pc();
+
+  address return_from_native_pc = (address) NULL;
+
+  // Initial entry to frame manager (from call_stub or c2i_adapter)
+
+  //
+  // Registers alive
+  //   R16_thread               - JavaThread*
+  //   R19_method               - callee's Method (method to be invoked)
+  //   R17_tos                  - address of sender tos (prepushed)
+  //   R1_SP                    - SP prepared by call stub such that caller's outgoing args are near top
+  //   LR                       - return address to caller (call_stub or c2i_adapter)
+  //   R21_sender_SP            - initial caller sp
+  //
+  // Registers updated
+  //   R15_prev_state           - 0
+  //
+  // Stack layout at this point:
+  //
+  //   0       [TOP_IJAVA_FRAME_ABI]         <-- R1_SP
+  //           alignment (optional)
+  //           [outgoing Java arguments]     <-- R17_tos
+  //           ...
+  //   PARENT  [PARENT_IJAVA_FRAME_ABI]
+  //           ...
+  //
+
+  // Save initial_caller_sp to caller's abi.
+  // The caller frame must be resized before returning to get rid of
+  // the c2i part on top of the calling compiled frame (if any).
+  // R21_tmp1 must match sender_sp in gen_c2i_adapter.
+  // Now override the saved SP with the senderSP so we can pop c2i
+  // arguments (if any) off when we return.
+  __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
+
+  // Save LR to caller's frame. We don't use _abi(lr) here,
+  // because it is not safe.
+  __ mflr(R0);
+  __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+
+  // If we come here, it is the first invocation of the frame manager.
+  // So there is no previous interpreter state.
+  __ li(R15_prev_state, 0);
+
+
+  // Fall through to where "recursive" invocations go.
+
+  //=============================================================================
+  // Dispatch an instance of the interpreter. Recursive activations
+  // come here.
+
+  Label re_dispatch;
+  __ BIND(re_dispatch);
+
+  //
+  // Registers alive
+  //    R16_thread        - JavaThread*
+  //    R19_method        - callee's Method
+  //    R17_tos           - address of caller's tos (prepushed)
+  //    R15_prev_state    - address of caller's BytecodeInterpreter or 0
+  //    R1_SP             - caller's SP trimmed such that caller's outgoing args are near top.
+  //
+  // Stack layout at this point:
+  //
+  //   0       [TOP_IJAVA_FRAME_ABI]
+  //           alignment (optional)
+  //           [outgoing Java arguments]
+  //           ...
+  //   PARENT  [PARENT_IJAVA_FRAME_ABI]
+  //           ...
+
+  // fall through to interpreted execution
+
+  //=============================================================================
+  // Allocate a new Java frame and initialize the new interpreter state.
+
+  Label stack_overflow_return;
+
+  // Create a suitable new Java frame plus a new BytecodeInterpreter instance
+  // in the current (frame manager's) C frame.
+  generate_compute_interpreter_state(stack_overflow_return);
+
+  // fall through
+
+  //=============================================================================
+  // Interpreter dispatch.
+
+  Label call_interpreter;
+  __ BIND(call_interpreter);
+
+  //
+  // Registers alive
+  //   R16_thread       - JavaThread*
+  //   R15_prev_state   - previous BytecodeInterpreter or 0
+  //   R14_state        - address of receiver's BytecodeInterpreter
+  //   R1_SP            - receiver's stack pointer
+  //
+
+  // Thread fields.
+  const Register pending_exception = R21_tmp1;
+
+  // Interpreter state fields.
+  const Register msg               = R24_tmp4;
+
+  // MethodOop fields.
+  const Register parameter_count   = R25_tmp5;
+  const Register result_index      = R26_tmp6;
+
+  const Register dummy             = R28_tmp8;
+
+  // Address of various interpreter stubs.
+  // R29_tmp9 is reserved.
+  const Register stub_addr         = R27_tmp7;
+
+  // Uncommon trap needs to jump to here to enter the interpreter
+  // (re-execute current bytecode).
+  unctrap_frame_manager_entry  = __ pc();
+
+  // If we are profiling, store our fp (BSP) in the thread so we can
+  // find it during a tick.
+  if (Arguments::has_profile()) {
+    // On PPC64 we store the pointer to the current BytecodeInterpreter,
+    // instead of the bsp of ia64. This should suffice to be able to
+    // find all interesting information.
+    __ std(R14_state, thread_(last_interpreter_fp));
+  }
+
+  // R16_thread, R14_state and R15_prev_state are nonvolatile
+  // registers. There is no need to save these. If we needed to save
+  // some state in the current Java frame, this could be a place to do
+  // so.
+
+  // Call Java bytecode dispatcher passing "BytecodeInterpreter* istate".
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address,
+                                   JvmtiExport::can_post_interpreter_events()
+                                   ? BytecodeInterpreter::runWithChecks
+                                   : BytecodeInterpreter::run),
+                  R14_state);
+
+  interpreter_return_address  = __ last_calls_return_pc();
+
+  // R16_thread, R14_state and R15_prev_state have their values preserved.
+
+  // If we are profiling, clear the fp in the thread to tell
+  // the profiler that we are no longer in the interpreter.
+  if (Arguments::has_profile()) {
+    __ li(R11_scratch1, 0);
+    __ std(R11_scratch1, thread_(last_interpreter_fp));
+  }
+
+  // Load message from bytecode dispatcher.
+  // TODO: PPC port: guarantee(4 == BytecodeInterpreter::sz_msg(), "unexpected field size");
+  __ lwz(msg, state_(_msg));
+
+
+  Label more_monitors;
+  Label return_from_native;
+  Label return_from_native_common;
+  Label return_from_native_no_exception;
+  Label return_from_interpreted_method;
+  Label return_from_recursive_activation;
+  Label unwind_recursive_activation;
+  Label resume_interpreter;
+  Label return_to_initial_caller;
+  Label unwind_initial_activation;
+  Label unwind_initial_activation_pending_exception;
+  Label call_method;
+  Label call_special;
+  Label retry_method;
+  Label retry_method_osr;
+  Label popping_frame;
+  Label throwing_exception;
+
+  // Branch according to the received message
+
+  __ cmpwi(CCR1, msg, BytecodeInterpreter::call_method);
+  __ cmpwi(CCR2, msg, BytecodeInterpreter::return_from_method);
+
+  __ beq(CCR1, call_method);
+  __ beq(CCR2, return_from_interpreted_method);
+
+  __ cmpwi(CCR3, msg, BytecodeInterpreter::more_monitors);
+  __ cmpwi(CCR4, msg, BytecodeInterpreter::throwing_exception);
+
+  __ beq(CCR3, more_monitors);
+  __ beq(CCR4, throwing_exception);
+
+  __ cmpwi(CCR5, msg, BytecodeInterpreter::popping_frame);
+  __ cmpwi(CCR6, msg, BytecodeInterpreter::do_osr);
+
+  __ beq(CCR5, popping_frame);
+  __ beq(CCR6, retry_method_osr);
+
+  __ stop("bad message from interpreter");
+
+
+  //=============================================================================
+  // Add a monitor just below the existing one(s). State->_stack_base
+  // points to the lowest existing one, so we insert the new one just
+  // below it and shuffle the expression stack down. Ref. the above
+  // stack layout picture, we must update _stack_base, _stack, _stack_limit
+  // and _last_Java_sp in the interpreter state.
+
+  __ BIND(more_monitors);
+
+  generate_more_monitors();
+  __ b(call_interpreter);
+
+  generate_deopt_handling(result_index);
+
+  // Restoring the R14_state is already done by the deopt_blob.
+
+  // Current tos includes no parameter slots.
+  __ ld(R17_tos, state_(_stack));
+  __ li(msg, BytecodeInterpreter::deopt_resume);
+  __ b(return_from_native_common);
+
+  // We are sent here when we are unwinding from a native method or
+  // adapter with an exception pending. We need to notify the interpreter
+  // that there is an exception to process.
+  // We arrive here also if the frame manager called an (interpreted) target
+  // which returns with a StackOverflow exception.
+  // The control flow is in this case is:
+  // frame_manager->throw_excp_stub->forward_excp->rethrow_excp_entry
+
+  AbstractInterpreter::_rethrow_exception_entry = __ pc();
+
+  // Restore R14_state.
+  __ ld(R14_state, 0, R1_SP);
+  __ addi(R14_state, R14_state,
+              -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
+
+  // Store exception oop into thread object.
+  __ std(R3_RET, thread_(pending_exception));
+  __ li(msg, BytecodeInterpreter::method_resume /*rethrow_exception*/);
+  //
+  // NOTE: the interpreter frame as setup be deopt does NOT include
+  // any parameter slots (good thing since we have no callee here
+  // and couldn't remove them) so we don't have to do any calculations
+  // here to figure it out.
+  //
+  __ ld(R17_tos, state_(_stack));
+  __ b(return_from_native_common);
+
+
+  //=============================================================================
+  // Returning from a native method.  Result is in the native abi
+  // location so we must move it to the java expression stack.
+
+  __ BIND(return_from_native);
+  guarantee(return_from_native_pc == (address) NULL, "precondition");
+  return_from_native_pc = __ pc();
+
+  // Restore R14_state.
+  __ ld(R14_state, 0, R1_SP);
+  __ addi(R14_state, R14_state,
+              -frame::interpreter_frame_cinterpreterstate_size_in_bytes());
+
+  //
+  // Registers alive
+  //   R16_thread
+  //   R14_state    - address of caller's BytecodeInterpreter.
+  //   R3_RET       - integer result, if any.
+  //   F1_RET       - float result, if any.
+  //
+  // Registers updated
+  //   R19_method   - callee's Method
+  //   R17_tos      - caller's tos, with outgoing args popped
+  //   result_index - index of result handler.
+  //   msg          - message for resuming interpreter.
+  //
+
+  // Very-local scratch registers.
+
+  const ConditionRegister have_pending_exception = CCR0;
+
+  // Load callee Method, gc may have moved it.
+  __ ld(R19_method, state_(_result._to_call._callee));
+
+  // Load address of caller's tos. includes parameter slots.
+  __ ld(R17_tos, state_(_stack));
+
+  // Pop callee's parameters.
+
+  __ ld(parameter_count, in_bytes(Method::const_offset()), R19_method);
+  __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), parameter_count);
+  __ sldi(parameter_count, parameter_count, Interpreter::logStackElementSize);
+  __ add(R17_tos, R17_tos, parameter_count);
+
+  // Result stub address array index
+  // TODO: PPC port: assert(4 == methodOopDesc::sz_result_index(), "unexpected field size");
+  __ lwa(result_index, method_(result_index));
+
+  __ li(msg, BytecodeInterpreter::method_resume);
+
+  //
+  // Registers alive
+  //   R16_thread
+  //   R14_state    - address of caller's BytecodeInterpreter.
+  //   R17_tos      - address of caller's tos with outgoing args already popped
+  //   R3_RET       - integer return value, if any.
+  //   F1_RET       - float return value, if any.
+  //   result_index - index of result handler.
+  //   msg          - message for resuming interpreter.
+  //
+  // Registers updated
+  //   R3_RET       - new address of caller's tos, including result, if any
+  //
+
+  __ BIND(return_from_native_common);
+
+  // Check for pending exception
+  __ ld(pending_exception, thread_(pending_exception));
+  __ cmpdi(CCR0, pending_exception, 0);
+  __ beq(CCR0, return_from_native_no_exception);
+
+  // If there's a pending exception, we really have no result, so
+  // R3_RET is dead. Resume_interpreter assumes the new tos is in
+  // R3_RET.
+  __ mr(R3_RET, R17_tos);
+  // `resume_interpreter' expects R15_prev_state to be alive.
+  __ ld(R15_prev_state, state_(_prev_link));
+  __ b(resume_interpreter);
+
+  __ BIND(return_from_native_no_exception);
+
+  // No pending exception, copy method result from native ABI register
+  // to tos.
+
+  // Address of stub descriptor address array.
+  __ load_const(stub_addr, CppInterpreter::tosca_result_to_stack());
+
+  // Pass address of tos to stub.
+  __ mr(R4_ARG2, R17_tos);
+
+  // Address of stub descriptor address.
+  __ sldi(result_index, result_index, LogBytesPerWord);
+  __ add(stub_addr, stub_addr, result_index);
+
+  // Stub descriptor address.
+  __ ld(stub_addr, 0, stub_addr);
+
+  // TODO: don't do this via a call, do it in place!
+  //
+  // call stub via descriptor
+  // in R3_ARG1/F1_ARG1: result value (R3_RET or F1_RET)
+  __ call_stub(stub_addr);
+
+  // new tos = result of call in R3_RET
+
+  // `resume_interpreter' expects R15_prev_state to be alive.
+  __ ld(R15_prev_state, state_(_prev_link));
+  __ b(resume_interpreter);
+
+  //=============================================================================
+  // We encountered an exception while computing the interpreter
+  // state, so R14_state isn't valid. Act as if we just returned from
+  // the callee method with a pending exception.
+  __ BIND(stack_overflow_return);
+
+  //
+  // Registers alive
+  //   R16_thread        - JavaThread*
+  //   R1_SP             - old stack pointer
+  //   R19_method        - callee's Method
+  //   R17_tos           - address of caller's tos (prepushed)
+  //   R15_prev_state    - address of caller's BytecodeInterpreter or 0
+  //   R18_locals        - address of callee's locals array
+  //
+  // Registers updated
+  //   R3_RET           - address of resuming tos, if recursive unwind
+
+  Label Lskip_unextend_SP;
+
+  {
+  const ConditionRegister is_initial_call = CCR0;
+  const Register tos_save = R21_tmp1;
+  const Register tmp = R22_tmp2;
+
+  assert(tos_save->is_nonvolatile(), "need a nonvolatile");
+
+  // Is the exception thrown in the initial Java frame of this frame
+  // manager frame?
+  __ cmpdi(is_initial_call, R15_prev_state, 0);
+  __ bne(is_initial_call, Lskip_unextend_SP);
+
+  // Pop any c2i extension from the stack. This is necessary in the
+  // non-recursive case (that is we were called by the c2i adapter,
+  // meaning we have to prev state). In this case we entered the frame
+  // manager through a special entry which pushes the orignal
+  // unextended SP to the stack. Here we load it back.
+  __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+  __ mtlr(R0);
+  // Resize frame to get rid of a potential extension.
+  __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
+
+  // Fall through
+
+  __ bind(Lskip_unextend_SP);
+
+  // Throw the exception via RuntimeStub "throw_StackOverflowError_entry".
+  //
+  // Previously, we called C-Code directly. As a consequence, a
+  // possible GC tried to process the argument oops of the top frame
+  // (see RegisterMap::clear, which sets the corresponding flag to
+  // true). This lead to crashes because:
+  // 1. The top register map did not contain locations for the argument registers
+  // 2. The arguments are dead anyway, could be already overwritten in the worst case
+  // Solution: Call via special runtime stub that pushes it's own frame. This runtime stub has the flag
+  // "CodeBlob::caller_must_gc_arguments()" set to "false", what prevents the dead arguments getting GC'd.
+  //
+  // 2 cases exist:
+  // 1. We were called by the c2i adapter / call stub
+  // 2. We were called by the frame manager
+  //
+  // Both cases are handled by this code:
+  // 1. - initial_caller_sp was saved on stack => Load it back and we're ok
+  //    - control flow will be:
+  //      throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of calling method
+  // 2. - control flow will be:
+  //      throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->
+  //        ->rethrow_excp_entry of frame manager->resume_method
+  //      Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state
+  //      registers using the stack and resume the calling method with a pending excp.
+
+  __ load_const(R3_ARG1, (StubRoutines::throw_StackOverflowError_entry()));
+  __ mtctr(R3_ARG1);
+  __ bctr();
+  }
+  //=============================================================================
+  // We have popped a frame from an interpreted call. We are assured
+  // of returning to an interpreted call by the popframe abi. We have
+  // no return value all we have to do is pop the current frame and
+  // then make sure that the top of stack (of the caller) gets set to
+  // where it was when we entered the callee (i.e. the args are still
+  // in place).  Or we are returning to the interpreter. In the first
+  // case we must extract result (if any) from the java expression
+  // stack and store it in the location the native abi would expect
+  // for a call returning this type. In the second case we must simply
+  // do a stack to stack move as we unwind.
+
+  __ BIND(popping_frame);
+
+  // Registers alive
+  //   R14_state
+  //   R15_prev_state
+  //   R17_tos
+  //
+  // Registers updated
+  //   R19_method
+  //   R3_RET
+  //   msg
+  {
+    Label L;
+
+    // Reload callee method, gc may have moved it.
+    __ ld(R19_method, state_(_method));
+
+    // We may be returning to a deoptimized frame in which case the
+    // usual assumption of a recursive return is not true.
+
+    // not equal = is recursive call
+    __ cmpdi(CCR0, R15_prev_state, 0);
+
+    __ bne(CCR0, L);
+
+    // Pop_frame capability.
+    // The pop_frame api says that the underlying frame is a Java frame, in this case
+    // (prev_state==null) it must be a compiled frame:
+    //
+    // Stack at this point: I, C2I + C, ...
+    //
+    // The outgoing arguments of the call have just been copied (popframe_preserve_args).
+    // By the pop_frame api, we must end up in an interpreted frame. So the compiled frame
+    // will be deoptimized. Deoptimization will restore the outgoing arguments from
+    // popframe_preserve_args, adjust the tos such that it includes the popframe_preserve_args,
+    // and adjust the bci such that the call will be executed again.
+    // We have no results, just pop the interpreter frame, resize the compiled frame to get rid
+    // of the c2i extension and return to the deopt_handler.
+    __ b(unwind_initial_activation);
+
+    // is recursive call
+    __ bind(L);
+
+    // Resume_interpreter expects the original tos in R3_RET.
+    __ ld(R3_RET, prev_state_(_stack));
+
+    // We're done.
+    __ li(msg, BytecodeInterpreter::popping_frame);
+
+    __ b(unwind_recursive_activation);
+  }
+
+
+  //=============================================================================
+
+  // We have finished an interpreted call. We are either returning to
+  // native (call_stub/c2) or we are returning to the interpreter.
+  // When returning to native, we must extract the result (if any)
+  // from the java expression stack and store it in the location the
+  // native abi expects. When returning to the interpreter we must
+  // simply do a stack to stack move as we unwind.
+
+  __ BIND(return_from_interpreted_method);
+
+  //
+  // Registers alive
+  //   R16_thread     - JavaThread*
+  //   R15_prev_state - address of caller's BytecodeInterpreter or 0
+  //   R14_state      - address of callee's interpreter state
+  //   R1_SP          - callee's stack pointer
+  //
+  // Registers updated
+  //   R19_method     - callee's method
+  //   R3_RET         - address of result (new caller's tos),
+  //
+  // if returning to interpreted
+  //   msg  - message for interpreter,
+  // if returning to interpreted
+  //
+
+  // Check if this is the initial invocation of the frame manager.
+  // If so, R15_prev_state will be null.
+  __ cmpdi(CCR0, R15_prev_state, 0);
+
+  // Reload callee method, gc may have moved it.
+  __ ld(R19_method, state_(_method));
+
+  // Load the method's result type.
+  __ lwz(result_index, method_(result_index));
+
+  // Go to return_to_initial_caller if R15_prev_state is null.
+  __ beq(CCR0, return_to_initial_caller);
+
+  // Copy callee's result to caller's expression stack via inline stack-to-stack
+  // converters.
+  {
+    Register new_tos   = R3_RET;
+    Register from_temp = R4_ARG2;
+    Register from      = R5_ARG3;
+    Register tos       = R6_ARG4;
+    Register tmp1      = R7_ARG5;
+    Register tmp2      = R8_ARG6;
+
+    ConditionRegister result_type_is_void   = CCR1;
+    ConditionRegister result_type_is_long   = CCR2;
+    ConditionRegister result_type_is_double = CCR3;
+
+    Label stack_to_stack_void;
+    Label stack_to_stack_double_slot; // T_LONG, T_DOUBLE
+    Label stack_to_stack_single_slot; // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT
+    Label stack_to_stack_done;
+
+    // Pass callee's address of tos + BytesPerWord
+    __ ld(from_temp, state_(_stack));
+
+    // result type: void
+    __ cmpwi(result_type_is_void, result_index, AbstractInterpreter::BasicType_as_index(T_VOID));
+
+    // Pass caller's tos == callee's locals address
+    __ ld(tos, state_(_locals));
+
+    // result type: long
+    __ cmpwi(result_type_is_long, result_index, AbstractInterpreter::BasicType_as_index(T_LONG));
+
+    __ addi(from, from_temp, Interpreter::stackElementSize);
+
+    // !! don't branch above this line !!
+
+    // handle void
+    __ beq(result_type_is_void,   stack_to_stack_void);
+
+    // result type: double
+    __ cmpwi(result_type_is_double, result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE));
+
+    // handle long or double
+    __ beq(result_type_is_long, stack_to_stack_double_slot);
+    __ beq(result_type_is_double, stack_to_stack_double_slot);
+
+    // fall through to single slot types (incl. object)
+
+    {
+      __ BIND(stack_to_stack_single_slot);
+      // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT
+
+      __ ld(tmp1, 0, from);
+      __ std(tmp1, 0, tos);
+      // New expression stack top
+      __ addi(new_tos, tos, - BytesPerWord);
+
+      __ b(stack_to_stack_done);
+    }
+
+    {
+      __ BIND(stack_to_stack_double_slot);
+      // T_LONG, T_DOUBLE
+
+      // Move both entries for debug purposes even though only one is live
+      __ ld(tmp1, BytesPerWord, from);
+      __ ld(tmp2, 0, from);
+      __ std(tmp1, 0, tos);
+      __ std(tmp2, -BytesPerWord, tos);
+
+      // new expression stack top
+      __ addi(new_tos, tos, - 2 * BytesPerWord); // two slots
+      __ b(stack_to_stack_done);
+    }
+
+    {
+      __ BIND(stack_to_stack_void);
+      // T_VOID
+
+      // new expression stack top
+      __ mr(new_tos, tos);
+      // fall through to stack_to_stack_done
+    }
+
+    __ BIND(stack_to_stack_done);
+  }
+
+  // new tos = R3_RET
+
+  // Get the message for the interpreter
+  __ li(msg, BytecodeInterpreter::method_resume);
+
+  // And fall thru
+
+
+  //=============================================================================
+  // Restore caller's interpreter state and pass pointer to caller's
+  // new tos to caller.
+
+  __ BIND(unwind_recursive_activation);
+
+  //
+  // Registers alive
+  //   R15_prev_state   - address of caller's BytecodeInterpreter
+  //   R3_RET           - address of caller's tos
+  //   msg              - message for caller's BytecodeInterpreter
+  //   R1_SP            - callee's stack pointer
+  //
+  // Registers updated
+  //   R14_state        - address of caller's BytecodeInterpreter
+  //   R15_prev_state   - address of its parent or 0
+  //
+
+  // Pop callee's interpreter and set R14_state to caller's interpreter.
+  __ pop_interpreter_state(/*prev_state_may_be_0=*/false);
+
+  // And fall thru
+
+
+  //=============================================================================
+  // Resume the (calling) interpreter after a call.
+
+  __ BIND(resume_interpreter);
+
+  //
+  // Registers alive
+  //   R14_state        - address of resuming BytecodeInterpreter
+  //   R15_prev_state   - address of its parent or 0
+  //   R3_RET           - address of resuming tos
+  //   msg              - message for resuming interpreter
+  //   R1_SP            - callee's stack pointer
+  //
+  // Registers updated
+  //   R1_SP            - caller's stack pointer
+  //
+
+  // Restore C stack pointer of caller (resuming interpreter),
+  // R14_state already points to the resuming BytecodeInterpreter.
+  __ pop_interpreter_frame_to_state(R14_state, R21_tmp1, R11_scratch1, R12_scratch2);
+
+  // Store new address of tos (holding return value) in interpreter state.
+  __ std(R3_RET, state_(_stack));
+
+  // Store message for interpreter.
+  __ stw(msg, state_(_msg));
+
+  __ b(call_interpreter);
+
+  //=============================================================================
+  // Interpreter returning to native code (call_stub/c1/c2) from
+  // initial activation. Convert stack result and unwind activation.
+
+  __ BIND(return_to_initial_caller);
+
+  //
+  // Registers alive
+  //   R19_method       - callee's Method
+  //   R14_state        - address of callee's interpreter state
+  //   R16_thread       - JavaThread
+  //   R1_SP            - callee's stack pointer
+  //
+  // Registers updated
+  //   R3_RET/F1_RET - result in expected output register
+  //
+
+  // If we have an exception pending we have no result and we
+  // must figure out where to really return to.
+  //
+  __ ld(pending_exception, thread_(pending_exception));
+  __ cmpdi(CCR0, pending_exception, 0);
+  __ bne(CCR0, unwind_initial_activation_pending_exception);
+
+  __ lwa(result_index, method_(result_index));
+
+  // Address of stub descriptor address array.
+  __ load_const(stub_addr, CppInterpreter::stack_result_to_native());
+
+  // Pass address of callee's tos + BytesPerWord.
+  // Will then point directly to result.
+  __ ld(R3_ARG1, state_(_stack));
+  __ addi(R3_ARG1, R3_ARG1, Interpreter::stackElementSize);
+
+  // Address of stub descriptor address
+  __ sldi(result_index, result_index, LogBytesPerWord);
+  __ add(stub_addr, stub_addr, result_index);
+
+  // Stub descriptor address
+  __ ld(stub_addr, 0, stub_addr);
+
+  // TODO: don't do this via a call, do it in place!
+  //
+  // call stub via descriptor
+  __ call_stub(stub_addr);
+
+  __ BIND(unwind_initial_activation);
+
+  // Unwind from initial activation. No exception is pending.
+
+  //
+  // Stack layout at this point:
+  //
+  //    0       [TOP_IJAVA_FRAME_ABI]         <-- R1_SP
+  //            ...
+  //    CALLER  [PARENT_IJAVA_FRAME_ABI]
+  //            ...
+  //    CALLER  [unextended ABI]
+  //            ...
+  //
+  //  The CALLER frame has a C2I adapter or is an entry-frame.
+  //
+
+  // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and
+  // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME.
+  // But, we simply restore the return pc from the caller's frame and
+  // use the caller's initial_caller_sp as the new SP which pops the
+  // interpreter frame and "resizes" the caller's frame to its "unextended"
+  // size.
+
+  // get rid of top frame
+  __ pop_frame();
+
+  // Load return PC from parent frame.
+  __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP);
+
+  // Resize frame to get rid of a potential extension.
+  __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
+
+  // update LR
+  __ mtlr(R21_tmp1);
+
+  // return
+  __ blr();
+
+  //=============================================================================
+  // Unwind from initial activation. An exception is pending
+
+  __ BIND(unwind_initial_activation_pending_exception);
+
+  //
+  // Stack layout at this point:
+  //
+  //   0       [TOP_IJAVA_FRAME_ABI]         <-- R1_SP
+  //           ...
+  //   CALLER  [PARENT_IJAVA_FRAME_ABI]
+  //           ...
+  //   CALLER  [unextended ABI]
+  //           ...
+  //
+  // The CALLER frame has a C2I adapter or is an entry-frame.
+  //
+
+  // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and
+  // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME.
+  // But, we just pop the current TOP_IJAVA_FRAME and fall through
+
+  __ pop_frame();
+  __ ld(R3_ARG1, _top_ijava_frame_abi(lr), R1_SP);
+
+  //
+  // Stack layout at this point:
+  //
+  //   CALLER  [PARENT_IJAVA_FRAME_ABI]      <-- R1_SP
+  //           ...
+  //   CALLER  [unextended ABI]
+  //           ...
+  //
+  // The CALLER frame has a C2I adapter or is an entry-frame.
+  //
+  // Registers alive
+  //   R16_thread
+  //   R3_ARG1 - return address to caller
+  //
+  // Registers updated
+  //   R3_ARG1 - address of pending exception
+  //   R4_ARG2 - issuing pc = return address to caller
+  //   LR      - address of exception handler stub
+  //
+
+  // Resize frame to get rid of a potential extension.
+  __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
+
+  __ mr(R14, R3_ARG1);   // R14 := ARG1
+  __ mr(R4_ARG2, R3_ARG1);  // ARG2 := ARG1
+
+  // Find the address of the "catch_exception" stub.
+  __ push_frame_abi112(0, R11_scratch1);
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address),
+                  R16_thread,
+                  R4_ARG2);
+  __ pop_frame();
+
+  // Load continuation address into LR.
+  __ mtlr(R3_RET);
+
+  // Load address of pending exception and clear it in thread object.
+  __ ld(R3_ARG1/*R3_RET*/, thread_(pending_exception));
+  __ li(R4_ARG2, 0);
+  __ std(R4_ARG2, thread_(pending_exception));
+
+  // re-load issuing pc
+  __ mr(R4_ARG2, R14);
+
+  // Branch to found exception handler.
+  __ blr();
+
+  //=============================================================================
+  // Call a new method. Compute new args and trim the expression stack
+  // to only what we are currently using and then recurse.
+
+  __ BIND(call_method);
+
+  //
+  //  Registers alive
+  //    R16_thread
+  //    R14_state      - address of caller's BytecodeInterpreter
+  //    R1_SP          - caller's stack pointer
+  //
+  //  Registers updated
+  //    R15_prev_state - address of caller's BytecodeInterpreter
+  //    R17_tos        - address of caller's tos
+  //    R19_method     - callee's Method
+  //    R1_SP          - trimmed back
+  //
+
+  // Very-local scratch registers.
+
+  const Register offset = R21_tmp1;
+  const Register tmp    = R22_tmp2;
+  const Register self_entry  = R23_tmp3;
+  const Register stub_entry  = R24_tmp4;
+
+  const ConditionRegister cr = CCR0;
+
+  // Load the address of the frame manager.
+  __ load_const(self_entry, &interpreter_frame_manager);
+  __ ld(self_entry, 0, self_entry);
+
+  // Load BytecodeInterpreter._result._to_call._callee (callee's Method).
+  __ ld(R19_method, state_(_result._to_call._callee));
+  // Load BytecodeInterpreter._stack (outgoing tos).
+  __ ld(R17_tos, state_(_stack));
+
+  // Save address of caller's BytecodeInterpreter.
+  __ mr(R15_prev_state, R14_state);
+
+  // Load the callee's entry point.
+  // Load BytecodeInterpreter._result._to_call._callee_entry_point.
+  __ ld(stub_entry, state_(_result._to_call._callee_entry_point));
+
+  // Check whether stub_entry is equal to self_entry.
+  __ cmpd(cr, self_entry, stub_entry);
+  // if (self_entry == stub_entry)
+  //   do a re-dispatch
+  __ beq(cr, re_dispatch);
+  // else
+  //   call the specialized entry (adapter for jni or compiled code)
+  __ BIND(call_special);
+
+  //
+  // Call the entry generated by `InterpreterGenerator::generate_native_entry'.
+  //
+  // Registers alive
+  //   R16_thread
+  //   R15_prev_state    - address of caller's BytecodeInterpreter
+  //   R19_method        - callee's Method
+  //   R17_tos           - address of caller's tos
+  //   R1_SP             - caller's stack pointer
+  //
+
+  // Mark return from specialized entry for generate_native_entry.
+  guarantee(return_from_native_pc != (address) NULL, "precondition");
+  frame_manager_specialized_return = return_from_native_pc;
+
+  // Set sender_SP in case we call interpreter native wrapper which
+  // will expect it. Compiled code should not care.
+  __ mr(R21_sender_SP, R1_SP);
+
+  // Do a tail call here, and let the link register point to
+  // frame_manager_specialized_return which is return_from_native_pc.
+  __ load_const(tmp, frame_manager_specialized_return);
+  __ call_stub_and_return_to(stub_entry,  tmp /* return_pc=tmp */);
+
+
+  //=============================================================================
+  //
+  // InterpretMethod triggered OSR compilation of some Java method M
+  // and now asks to run the compiled code.  We call this code the
+  // `callee'.
+  //
+  // This is our current idea on how OSR should look like on PPC64:
+  //
+  // While interpreting a Java method M the stack is:
+  //
+  //  (InterpretMethod (M), IJAVA_FRAME (M), ANY_FRAME, ...).
+  //
+  // After having OSR compiled M, `InterpretMethod' returns to the
+  // frame manager, sending the message `retry_method_osr'.  The stack
+  // is:
+  //
+  //  (IJAVA_FRAME (M), ANY_FRAME, ...).
+  //
+  // The compiler will have generated an `nmethod' suitable for
+  // continuing execution of M at the bytecode index at which OSR took
+  // place.  So now the frame manager calls the OSR entry.  The OSR
+  // entry sets up a JIT_FRAME for M and continues execution of M with
+  // initial state determined by the IJAVA_FRAME.
+  //
+  //  (JIT_FRAME (M), IJAVA_FRAME (M), ANY_FRAME, ...).
+  //
+
+  __ BIND(retry_method_osr);
+  {
+  //
+  // Registers alive
+  //   R16_thread
+  //   R15_prev_state     - address of caller's BytecodeInterpreter
+  //   R14_state          - address of callee's BytecodeInterpreter
+  //   R1_SP              - callee's SP before call to InterpretMethod
+  //
+  // Registers updated
+  //   R17                - pointer to callee's locals array
+  //                       (declared via `interpreter_arg_ptr_reg' in the AD file)
+  //   R19_method         - callee's Method
+  //   R1_SP              - callee's SP (will become SP of OSR adapter frame)
+  //
+
+  // Provide a debugger breakpoint in the frame manager if breakpoints
+  // in osr'd methods are requested.
+#ifdef COMPILER2
+  NOT_PRODUCT( if (OptoBreakpointOSR) { __ illtrap(); } )
+#endif
+
+  // Load callee's pointer to locals array from callee's state.
+  //  __ ld(R17, state_(_locals));
+
+  // Load osr entry.
+  __ ld(R12_scratch2, state_(_result._osr._osr_entry));
+
+  // Load address of temporary osr buffer to arg1.
+  __ ld(R3_ARG1, state_(_result._osr._osr_buf));
+  __ mtctr(R12_scratch2);
+
+  // Load method oop, gc may move it during execution of osr'd method.
+  __ ld(R22_tmp2, state_(_method));
+  // Load message 'call_method'.
+  __ li(R23_tmp3, BytecodeInterpreter::call_method);
+
+  {
+    // Pop the IJAVA frame of the method which we are going to call osr'd.
+    Label no_state, skip_no_state;
+    __ pop_interpreter_state(/*prev_state_may_be_0=*/true);
+    __ cmpdi(CCR0, R14_state,0);
+    __ beq(CCR0, no_state);
+    // return to interpreter
+    __ pop_interpreter_frame_to_state(R14_state, R11_scratch1, R12_scratch2, R21_tmp1);
+
+    // Init _result._to_call._callee and tell gc that it contains a valid oop
+    // by setting _msg to 'call_method'.
+    __ std(R22_tmp2, state_(_result._to_call._callee));
+    // TODO: PPC port: assert(4 == BytecodeInterpreter::sz_msg(), "unexpected field size");
+    __ stw(R23_tmp3, state_(_msg));
+
+    __ load_const(R21_tmp1, frame_manager_specialized_return);
+    __ b(skip_no_state);
+    __ bind(no_state);
+
+    // Return to initial caller.
+
+    // Get rid of top frame.
+    __ pop_frame();
+
+    // Load return PC from parent frame.
+    __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP);
+
+    // Resize frame to get rid of a potential extension.
+    __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2);
+
+    __ bind(skip_no_state);
+
+    // Update LR with return pc.
+    __ mtlr(R21_tmp1);
+  }
+  // Jump to the osr entry point.
+  __ bctr();
+
+  }
+
+  //=============================================================================
+  // Interpreted method "returned" with an exception, pass it on.
+  // Pass no result, unwind activation and continue/return to
+  // interpreter/call_stub/c2.
+
+  __ BIND(throwing_exception);
+
+  // Check if this is the initial invocation of the frame manager.  If
+  // so, previous interpreter state in R15_prev_state will be null.
+
+  // New tos of caller is callee's first parameter address, that is
+  // callee's incoming arguments are popped.
+  __ ld(R3_RET, state_(_locals));
+
+  // Check whether this is an initial call.
+  __ cmpdi(CCR0, R15_prev_state, 0);
+  // Yes, called from the call stub or from generated code via a c2i frame.
+  __ beq(CCR0, unwind_initial_activation_pending_exception);
+
+  // Send resume message, interpreter will see the exception first.
+
+  __ li(msg, BytecodeInterpreter::method_resume);
+  __ b(unwind_recursive_activation);
+
+
+  //=============================================================================
+  // Push the last instruction out to the code buffer.
+
+  {
+    __ unimplemented("end of InterpreterGenerator::generate_normal_entry", 128);
+  }
+
+  interpreter_frame_manager = entry;
+  return interpreter_frame_manager;
+}
+
+// Generate code for various sorts of method entries
+//
+address AbstractInterpreterGenerator::generate_method_entry(AbstractInterpreter::MethodKind kind) {
+  address entry_point = NULL;
+
+  switch (kind) {
+    case Interpreter::zerolocals                 :                                                                              break;
+    case Interpreter::zerolocals_synchronized    :                                                                              break;
+    case Interpreter::native                     : // Fall thru
+    case Interpreter::native_synchronized        : entry_point = ((CppInterpreterGenerator*)this)->generate_native_entry();     break;
+    case Interpreter::empty                      :                                                                              break;
+    case Interpreter::accessor                   : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry();      break;
+    case Interpreter::abstract                   : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry();      break;
+    // These are special interpreter intrinsics which we don't support so far.
+    case Interpreter::java_lang_math_sin         :                                                                              break;
+    case Interpreter::java_lang_math_cos         :                                                                              break;
+    case Interpreter::java_lang_math_tan         :                                                                              break;
+    case Interpreter::java_lang_math_abs         :                                                                              break;
+    case Interpreter::java_lang_math_log         :                                                                              break;
+    case Interpreter::java_lang_math_log10       :                                                                              break;
+    case Interpreter::java_lang_math_sqrt        :                                                                              break;
+    case Interpreter::java_lang_math_pow         :                                                                              break;
+    case Interpreter::java_lang_math_exp         :                                                                              break;
+    case Interpreter::java_lang_ref_reference_get: entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break;
+    default                                      : ShouldNotReachHere();                                                        break;
+  }
+
+  if (entry_point) {
+    return entry_point;
+  }
+  return ((InterpreterGenerator*)this)->generate_normal_entry();
+}
+
+InterpreterGenerator::InterpreterGenerator(StubQueue* code)
+ : CppInterpreterGenerator(code) {
+   generate_all(); // down here so it can be "virtual"
+}
+
+// How much stack a topmost interpreter method activation needs in words.
+int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
+  // Computation is in bytes not words to match layout_activation_impl
+  // below, but the return is in words.
+
+  //
+  //  0       [TOP_IJAVA_FRAME_ABI]                                                    \
+  //          alignment (optional)                                             \       |
+  //          [operand stack / Java parameters] > stack                        |       |
+  //          [monitors] (optional)             > monitors                     |       |
+  //          [PARENT_IJAVA_FRAME_ABI]                                \        |       |
+  //          [BytecodeInterpreter object]      > interpreter \       |        |       |
+  //          alignment (optional)                            | round | parent | round | top
+  //          [Java result] (2 slots)           > result      |       |        |       |
+  //          [Java non-arg locals]             \ locals      |       |        |       |
+  //          [arg locals]                      /             /       /        /       /
+  //
+
+  int locals = method->max_locals() * BytesPerWord;
+  int interpreter = frame::interpreter_frame_cinterpreterstate_size_in_bytes();
+  int result = 2 * BytesPerWord;
+
+  int parent = round_to(interpreter + result + locals, 16) + frame::parent_ijava_frame_abi_size;
+
+  int stack = method->max_stack() * BytesPerWord;
+  int monitors = method->is_synchronized() ? frame::interpreter_frame_monitor_size_in_bytes() : 0;
+  int top = round_to(parent + monitors + stack, 16) + frame::top_ijava_frame_abi_size;
+
+  return (top / BytesPerWord);
+}
+
+void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill,
+                                                  frame* caller,
+                                                  frame* current,
+                                                  Method* method,
+                                                  intptr_t* locals,
+                                                  intptr_t* stack,
+                                                  intptr_t* stack_base,
+                                                  intptr_t* monitor_base,
+                                                  intptr_t* frame_sp,
+                                                  bool is_top_frame) {
+  // What about any vtable?
+  //
+  to_fill->_thread = JavaThread::current();
+  // This gets filled in later but make it something recognizable for now.
+  to_fill->_bcp = method->code_base();
+  to_fill->_locals = locals;
+  to_fill->_constants = method->constants()->cache();
+  to_fill->_method = method;
+  to_fill->_mdx = NULL;
+  to_fill->_stack = stack;
+
+  if (is_top_frame && JavaThread::current()->popframe_forcing_deopt_reexecution()) {
+    to_fill->_msg = deopt_resume2;
+  } else {
+    to_fill->_msg = method_resume;
+  }
+  to_fill->_result._to_call._bcp_advance = 0;
+  to_fill->_result._to_call._callee_entry_point = NULL; // doesn't matter to anyone
+  to_fill->_result._to_call._callee = NULL; // doesn't matter to anyone
+  to_fill->_prev_link = NULL;
+
+  if (caller->is_interpreted_frame()) {
+    interpreterState prev  = caller->get_interpreterState();
+
+    // Support MH calls. Make sure the interpreter will return the right address:
+    // 1. Caller did ordinary interpreted->compiled call call: Set a prev_state
+    //    which makes the CPP interpreter return to frame manager "return_from_interpreted_method"
+    //    entry after finishing execution.
+    // 2. Caller did a MH call: If the caller has a MethodHandleInvoke in it's
+    //    state (invariant: must be the caller of the bottom vframe) we used the
+    //    "call_special" entry to do the call, meaning the arguments have not been
+    //    popped from the stack. Therefore, don't enter a prev state in this case
+    //    in order to return to "return_from_native" frame manager entry which takes
+    //    care of popping arguments. Also, don't overwrite the MH.invoke Method in
+    //    the prev_state in order to be able to figure out the number of arguments to
+    //     pop.
+    // The parameter method can represent MethodHandle.invokeExact(...).
+    // The MethodHandleCompiler generates these synthetic Methods,
+    // including bytecodes, if an invokedynamic call gets inlined. In
+    // this case we want to return like from any other interpreted
+    // Java call, so we set _prev_link.
+    to_fill->_prev_link = prev;
+
+    if (*prev->_bcp == Bytecodes::_invokeinterface || *prev->_bcp == Bytecodes::_invokedynamic) {
+      prev->_result._to_call._bcp_advance = 5;
+    } else {
+      prev->_result._to_call._bcp_advance = 3;
+    }
+  }
+  to_fill->_oop_temp = NULL;
+  to_fill->_stack_base = stack_base;
+  // Need +1 here because stack_base points to the word just above the
+  // first expr stack entry and stack_limit is supposed to point to
+  // the word just below the last expr stack entry. See
+  // generate_compute_interpreter_state.
+  to_fill->_stack_limit = stack_base - (method->max_stack() + 1);
+  to_fill->_monitor_base = (BasicObjectLock*) monitor_base;
+
+  to_fill->_frame_bottom = frame_sp;
+
+  // PPC64 specific
+  to_fill->_last_Java_pc = NULL;
+  to_fill->_last_Java_fp = NULL;
+  to_fill->_last_Java_sp = frame_sp;
+#ifdef ASSERT
+  to_fill->_self_link = to_fill;
+  to_fill->_native_fresult = 123456.789;
+  to_fill->_native_lresult = CONST64(0xdeafcafedeadc0de);
+#endif
+}
+
+void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate,
+                                                     address last_Java_pc,
+                                                     intptr_t* last_Java_fp) {
+  istate->_last_Java_pc = last_Java_pc;
+  istate->_last_Java_fp = last_Java_fp;
+}
+
+int AbstractInterpreter::layout_activation(Method* method,
+                                           int temps,        // Number of slots on java expression stack in use.
+                                           int popframe_args,
+                                           int monitors,     // Number of active monitors.
+                                           int caller_actual_parameters,
+                                           int callee_params,// Number of slots for callee parameters.
+                                           int callee_locals,// Number of slots for locals.
+                                           frame* caller,
+                                           frame* interpreter_frame,
+                                           bool is_top_frame,
+                                           bool is_bottom_frame) {
+
+  // NOTE this code must exactly mimic what
+  // InterpreterGenerator::generate_compute_interpreter_state() does
+  // as far as allocating an interpreter frame. However there is an
+  // exception. With the C++ based interpreter only the top most frame
+  // has a full sized expression stack.  The 16 byte slop factor is
+  // both the abi scratch area and a place to hold a result from a
+  // callee on its way to the callers stack.
+
+  int monitor_size = frame::interpreter_frame_monitor_size_in_bytes() * monitors;
+  int frame_size;
+  int top_frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes()
+                                + monitor_size
+                                + (method->max_stack() *Interpreter::stackElementWords * BytesPerWord)
+                                + 2*BytesPerWord,
+                                frame::alignment_in_bytes)
+                      + frame::top_ijava_frame_abi_size;
+  if (is_top_frame) {
+    frame_size = top_frame_size;
+  } else {
+    frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes()
+                          + monitor_size
+                          + ((temps - callee_params + callee_locals) *
+                             Interpreter::stackElementWords * BytesPerWord)
+                          + 2*BytesPerWord,
+                          frame::alignment_in_bytes)
+                 + frame::parent_ijava_frame_abi_size;
+    assert(popframe_args==0, "non-zero for top_frame only");
+  }
+
+  // If we actually have a frame to layout we must now fill in all the pieces.
+  if (interpreter_frame != NULL) {
+
+    intptr_t sp = (intptr_t)interpreter_frame->sp();
+    intptr_t fp = *(intptr_t *)sp;
+    assert(fp == (intptr_t)caller->sp(), "fp must match");
+    interpreterState cur_state =
+      (interpreterState)(fp - frame::interpreter_frame_cinterpreterstate_size_in_bytes());
+
+    // Now fill in the interpreterState object.
+
+    intptr_t* locals;
+    if (caller->is_interpreted_frame()) {
+      // Locals must agree with the caller because it will be used to set the
+      // caller's tos when we return.
+      interpreterState prev  = caller->get_interpreterState();
+      // Calculate start of "locals" for MH calls.  For MH calls, the
+      // current method() (= MH target) and prev->callee() (=
+      // MH.invoke*()) are different and especially have different
+      // signatures. To pop the argumentsof the caller, we must use
+      // the prev->callee()->size_of_arguments() because that's what
+      // the caller actually pushed.  Currently, for synthetic MH
+      // calls (deoptimized from inlined MH calls), detected by
+      // is_method_handle_invoke(), we use the callee's arguments
+      // because here, the caller's and callee's signature match.
+      if (true /*!caller->is_at_mh_callsite()*/) {
+        locals = prev->stack() + method->size_of_parameters();
+      } else {
+        // Normal MH call.
+        locals = prev->stack() + prev->callee()->size_of_parameters();
+      }
+    } else {
+      bool is_deopted;
+      locals = (intptr_t*) (fp + ((method->max_locals() - 1) * BytesPerWord) +
+                            frame::parent_ijava_frame_abi_size);
+    }
+
+    intptr_t* monitor_base = (intptr_t*) cur_state;
+    intptr_t* stack_base   = (intptr_t*) ((intptr_t) monitor_base - monitor_size);
+
+    // Provide pop_frame capability on PPC64, add popframe_args.
+    // +1 because stack is always prepushed.
+    intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (temps + popframe_args + 1) * BytesPerWord);
+
+    BytecodeInterpreter::layout_interpreterState(cur_state,
+                                                 caller,
+                                                 interpreter_frame,
+                                                 method,
+                                                 locals,
+                                                 stack,
+                                                 stack_base,
+                                                 monitor_base,
+                                                 (intptr_t*)(((intptr_t)fp)-top_frame_size),
+                                                 is_top_frame);
+
+    BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address,
+                                                    interpreter_frame->fp());
+  }
+  return frame_size/BytesPerWord;
+}
+
+#endif // CC_INTERP
diff -r ca3dacaf9041 src/cpu/ppc/vm/cppInterpreter_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/cppInterpreter_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,39 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_CPPINTERPRETER_PPC_HPP
+#define CPU_PPC_VM_CPPINTERPRETER_PPC_HPP
+
+  protected:
+
+  // Size of interpreter code.  Increase if too small.  Interpreter will
+  // fail with a guarantee ("not enough space for interpreter generation");
+  // if too small.
+  // Run with +PrintInterpreter to get the VM to print out the size.
+  // Max size with JVMTI
+
+  const static int InterpreterCodeSize = 12*K;
+
+#endif // CPU_PPC_VM_CPPINTERPRETER_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/debug_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/debug_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,35 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "code/codeCache.hpp"
+#include "code/nmethod.hpp"
+#include "runtime/frame.hpp"
+#include "runtime/init.hpp"
+#include "runtime/os.hpp"
+#include "utilities/debug.hpp"
+#include "utilities/top.hpp"
+
+void pd_ps(frame f) {}
diff -r ca3dacaf9041 src/cpu/ppc/vm/depChecker_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/depChecker_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_DEPCHECKER_PPC_HPP
+#define CPU_PPC_VM_DEPCHECKER_PPC_HPP
+
+// Nothing to do on ppc64
+
+#endif // CPU_PPC_VM_DEPCHECKER_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/disassembler_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/disassembler_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_DISASSEMBLER_PPC_HPP
+#define CPU_PPC_VM_DISASSEMBLER_PPC_HPP
+
+  static int pd_instruction_alignment() {
+    return sizeof(int);
+  }
+
+  static const char* pd_cpu_opts() {
+    return "ppc64";
+  }
+
+#endif // CPU_PPC_VM_DISASSEMBLER_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/frame_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/frame_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,306 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/resourceArea.hpp"
+#include "oops/markOop.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/javaCalls.hpp"
+#include "runtime/monitorChunk.hpp"
+#include "runtime/signature.hpp"
+#include "runtime/stubCodeGenerator.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "vmreg_ppc.inline.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#include "runtime/vframeArray.hpp"
+#endif
+
+#ifndef CC_INTERP
+#error "CC_INTERP must be defined on PPC64"
+#endif
+
+#ifdef ASSERT
+void RegisterMap::check_location_valid() {
+}
+#endif // ASSERT
+
+bool frame::safe_for_sender(JavaThread *thread) {
+  bool safe = false;
+  address   cursp = (address)sp();
+  address   curfp = (address)fp();
+  if ((cursp != NULL && curfp != NULL &&
+      (cursp <= thread->stack_base() && cursp >= thread->stack_base() - thread->stack_size())) &&
+      (curfp <= thread->stack_base() && curfp >= thread->stack_base() - thread->stack_size())) {
+      safe = true;
+  }
+  return safe;
+}
+
+bool frame::is_interpreted_frame() const  {
+  return Interpreter::contains(pc());
+}
+
+frame frame::sender_for_entry_frame(RegisterMap *map) const {
+  assert(map != NULL, "map must be set");
+  // Java frame called from C; skip all C frames and return top C
+  // frame of that chunk as the sender.
+  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
+  assert(!entry_frame_is_first(), "next Java fp must be non zero");
+  assert(jfa->last_Java_sp() > _sp, "must be above this frame on stack");
+  map->clear();
+  assert(map->include_argument_oops(), "should be set by clear");
+
+  if (jfa->last_Java_pc() != NULL) {
+    frame fr(jfa->last_Java_sp(), jfa->last_Java_pc());
+    return fr;
+  }
+  // Last_java_pc is not set, if we come here from compiled code. The
+  // constructor retrieves the PC from the stack.
+  frame fr(jfa->last_Java_sp());
+  return fr;
+}
+
+frame frame::sender_for_interpreter_frame(RegisterMap *map) const {
+  // Pass callers initial_caller_sp as unextended_sp.
+  return frame(sender_sp(), sender_pc(), (intptr_t*)((parent_ijava_frame_abi *)callers_abi())->initial_caller_sp);
+}
+
+frame frame::sender_for_compiled_frame(RegisterMap *map) const {
+  assert(map != NULL, "map must be set");
+
+  // Frame owned by compiler.
+  address pc = *compiled_sender_pc_addr(_cb);
+  frame caller(compiled_sender_sp(_cb), pc);
+
+  // Now adjust the map.
+
+  // Get the rest.
+  if (map->update_map()) {
+    // Tell GC to use argument oopmaps for some runtime stubs that need it.
+    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
+    if (_cb->oop_maps() != NULL) {
+      OopMapSet::update_register_map(this, map);
+    }
+  }
+
+  return caller;
+}
+
+intptr_t* frame::compiled_sender_sp(CodeBlob* cb) const {
+  return sender_sp();
+}
+
+address* frame::compiled_sender_pc_addr(CodeBlob* cb) const {
+  return sender_pc_addr();
+}
+
+frame frame::sender(RegisterMap* map) const {
+  // Default is we do have to follow them. The sender_for_xxx will
+  // update it accordingly.
+  map->set_include_argument_oops(false);
+
+  if (is_entry_frame())       return sender_for_entry_frame(map);
+  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
+  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
+
+  if (_cb != NULL) {
+    return sender_for_compiled_frame(map);
+  }
+  // Must be native-compiled frame, i.e. the marshaling code for native
+  // methods that exists in the core system.
+  return frame(sender_sp(), sender_pc());
+}
+
+void frame::patch_pc(Thread* thread, address pc) {
+  if (TracePcPatching) {
+    tty->print_cr("patch_pc at address " PTR_FORMAT " [" PTR_FORMAT " -> " PTR_FORMAT "]",
+                  &((address*) _sp)[-1], ((address*) _sp)[-1], pc);
+  }
+  own_abi()->lr = (uint64_t)pc;
+  _cb = CodeCache::find_blob(pc);
+  if (_cb != NULL && _cb->is_nmethod() && ((nmethod*)_cb)->is_deopt_pc(_pc)) {
+    address orig = (((nmethod*)_cb)->get_original_pc(this));
+    assert(orig == _pc, "expected original to be stored before patching");
+    _deopt_state = is_deoptimized;
+    // Leave _pc as is.
+  } else {
+    _deopt_state = not_deoptimized;
+    _pc = pc;
+  }
+}
+
+void frame::pd_gc_epilog() {
+  if (is_interpreted_frame()) {
+    // Set constant pool cache entry for interpreter.
+    Method* m = interpreter_frame_method();
+
+    *interpreter_frame_cpoolcache_addr() = m->constants()->cache();
+  }
+}
+
+bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
+  // Is there anything to do?
+  assert(is_interpreted_frame(), "Not an interpreted frame");
+  return true;
+}
+
+BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
+  assert(is_interpreted_frame(), "interpreted frame expected");
+  Method* method = interpreter_frame_method();
+  BasicType type = method->result_type();
+
+#ifdef CC_INTERP
+  if (method->is_native()) {
+    // Prior to calling into the runtime to notify the method exit the possible
+    // result value is saved into the interpreter frame.
+    interpreterState istate = get_interpreterState();
+    address lresult = (address)istate + in_bytes(BytecodeInterpreter::native_lresult_offset());
+    address fresult = (address)istate + in_bytes(BytecodeInterpreter::native_fresult_offset());
+
+    switch (method->result_type()) {
+      case T_OBJECT:
+      case T_ARRAY: {
+        oop* obj_p = *(oop**)lresult;
+        oop obj = (obj_p == NULL) ? NULL : *obj_p;
+        assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+        *oop_result = obj;
+        break;
+      }
+      // We use std/stfd to store the values.
+      case T_BOOLEAN : value_result->z = (jboolean) *(unsigned long*)lresult; break;
+      case T_INT     : value_result->i = (jint)     *(long*)lresult;          break;
+      case T_CHAR    : value_result->c = (jchar)    *(unsigned long*)lresult; break;
+      case T_SHORT   : value_result->s = (jshort)   *(long*)lresult;          break;
+      case T_BYTE    : value_result->z = (jbyte)    *(long*)lresult;          break;
+      case T_LONG    : value_result->j = (jlong)    *(long*)lresult;          break;
+      case T_FLOAT   : value_result->f = (jfloat)   *(double*)fresult;        break;
+      case T_DOUBLE  : value_result->d = (jdouble)  *(double*)fresult;        break;
+      case T_VOID    : /* Nothing to do */ break;
+      default        : ShouldNotReachHere();
+    }
+  } else {
+    intptr_t* tos_addr = interpreter_frame_tos_address();
+    switch (method->result_type()) {
+      case T_OBJECT:
+      case T_ARRAY: {
+        oop obj = *(oop*)tos_addr;
+        assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
+        *oop_result = obj;
+      }
+      case T_BOOLEAN : value_result->z = (jboolean) *(jint*)tos_addr; break;
+      case T_BYTE    : value_result->b = (jbyte) *(jint*)tos_addr; break;
+      case T_CHAR    : value_result->c = (jchar) *(jint*)tos_addr; break;
+      case T_SHORT   : value_result->s = (jshort) *(jint*)tos_addr; break;
+      case T_INT     : value_result->i = *(jint*)tos_addr; break;
+      case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
+      case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
+      case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
+      case T_VOID    : /* Nothing to do */ break;
+      default        : ShouldNotReachHere();
+    }
+  }
+#else
+  Unimplemented();
+#endif
+  return type;
+}
+
+#ifndef PRODUCT
+
+void frame::describe_pd(FrameValues& values, int frame_no) {
+  if (is_interpreted_frame()) {
+#ifdef CC_INTERP
+    interpreterState istate = get_interpreterState();
+    values.describe(frame_no, (intptr_t*)istate, "istate");
+    values.describe(frame_no, (intptr_t*)&(istate->_thread), " thread");
+    values.describe(frame_no, (intptr_t*)&(istate->_bcp), " bcp");
+    values.describe(frame_no, (intptr_t*)&(istate->_locals), " locals");
+    values.describe(frame_no, (intptr_t*)&(istate->_constants), " constants");
+    values.describe(frame_no, (intptr_t*)&(istate->_method), err_msg(" method = %s", istate->_method->name_and_sig_as_C_string()));
+    values.describe(frame_no, (intptr_t*)&(istate->_mdx), " mdx");
+    values.describe(frame_no, (intptr_t*)&(istate->_stack), " stack");
+    values.describe(frame_no, (intptr_t*)&(istate->_msg), err_msg(" msg = %s", BytecodeInterpreter::C_msg(istate->_msg)));
+    values.describe(frame_no, (intptr_t*)&(istate->_result), " result");
+    values.describe(frame_no, (intptr_t*)&(istate->_prev_link), " prev_link");
+    values.describe(frame_no, (intptr_t*)&(istate->_oop_temp), " oop_temp");
+    values.describe(frame_no, (intptr_t*)&(istate->_stack_base), " stack_base");
+    values.describe(frame_no, (intptr_t*)&(istate->_stack_limit), " stack_limit");
+    values.describe(frame_no, (intptr_t*)&(istate->_monitor_base), " monitor_base");
+    values.describe(frame_no, (intptr_t*)&(istate->_frame_bottom), " frame_bottom");
+    values.describe(frame_no, (intptr_t*)&(istate->_last_Java_pc), " last_Java_pc");
+    values.describe(frame_no, (intptr_t*)&(istate->_last_Java_fp), " last_Java_fp");
+    values.describe(frame_no, (intptr_t*)&(istate->_last_Java_sp), " last_Java_sp");
+    values.describe(frame_no, (intptr_t*)&(istate->_self_link), " self_link");
+    values.describe(frame_no, (intptr_t*)&(istate->_native_fresult), " native_fresult");
+    values.describe(frame_no, (intptr_t*)&(istate->_native_lresult), " native_lresult");
+#else
+    Unimplemented();
+#endif
+  }
+}
+#endif
+
+void frame::adjust_unextended_sp() {
+  // If we are returning to a compiled MethodHandle call site, the
+  // saved_fp will in fact be a saved value of the unextended SP. The
+  // simplest way to tell whether we are returning to such a call site
+  // is as follows:
+
+  if (is_compiled_frame() && false /*is_at_mh_callsite()*/) {  // TODO PPC port
+    // If the sender PC is a deoptimization point, get the original
+    // PC. For MethodHandle call site the unextended_sp is stored in
+    // saved_fp.
+    _unextended_sp = _fp - _cb->frame_size();
+
+#ifdef ASSERT
+    nmethod *sender_nm = _cb->as_nmethod_or_null();
+    assert(sender_nm && *_sp == *_unextended_sp, "backlink changed");
+
+    intptr_t* sp = _unextended_sp;  // check if stack can be walked from here
+    for (int x = 0; x < 5; ++x) {   // check up to a couple of backlinks
+      intptr_t* prev_sp = *(intptr_t**)sp;
+      if (prev_sp == 0) break;      // end of stack
+      assert(prev_sp>sp, "broken stack");
+      sp = prev_sp;
+    }
+
+    if (sender_nm->is_deopt_mh_entry(_pc)) { // checks for deoptimization
+      address original_pc = sender_nm->get_original_pc(this);
+      assert(sender_nm->insts_contains(original_pc), "original PC must be in nmethod");
+      assert(sender_nm->is_method_handle_return(original_pc), "must be");
+    }
+#endif
+  }
+}
+
+intptr_t *frame::initial_deoptimization_info() {
+  // unused... but returns fp() to minimize changes introduced by 7087445
+  return fp();
+}
diff -r ca3dacaf9041 src/cpu/ppc/vm/frame_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/frame_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,449 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_FRAME_PPC_HPP
+#define CPU_PPC_VM_FRAME_PPC_HPP
+
+#include "runtime/synchronizer.hpp"
+#include "utilities/top.hpp"
+
+#ifndef CC_INTERP
+#error "CC_INTERP must be defined on PPC64"
+#endif
+
+  //  C frame layout on PPC-64.
+  //
+  //  In this figure the stack grows upwards, while memory grows
+  //  downwards. See "64-bit PowerPC ELF ABI Supplement Version 1.7",
+  //  IBM Corp. (2003-10-29)
+  //  (http://math-atlas.sourceforge.net/devel/assembly/PPC-elf64abi-1.7.pdf).
+  //
+  //  Square brackets denote stack regions possibly larger
+  //  than a single 64 bit slot.
+  //
+  //  STACK:
+  //    0       [C_FRAME]               <-- SP after prolog (mod 16 = 0)
+  //            [C_FRAME]               <-- SP before prolog
+  //            ...
+  //            [C_FRAME]
+  //
+  //  C_FRAME:
+  //    0       [ABI_112]
+  //    112     CARG_9: outgoing arg 9 (arg_1 ... arg_8 via gpr_3 ... gpr_{10})
+  //            ...
+  //    40+M*8  CARG_M: outgoing arg M (M is the maximum of outgoing args taken over all call sites in the procedure)
+  //            local 1
+  //            ...
+  //            local N
+  //            spill slot for vector reg (16 bytes aligned)
+  //            ...
+  //            spill slot for vector reg
+  //            alignment       (4 or 12 bytes)
+  //    V       SR_VRSAVE
+  //    V+4     spill slot for GR
+  //    ...     ...
+  //            spill slot for GR
+  //            spill slot for FR
+  //            ...
+  //            spill slot for FR
+  //
+  //  ABI_48:
+  //    0       caller's SP
+  //    8       space for condition register (CR) for next call
+  //    16      space for link register (LR) for next call
+  //    24      reserved
+  //    32      reserved
+  //    40      space for TOC (=R2) register for next call
+  //
+  //  ABI_112:
+  //    0       [ABI_48]
+  //    48      CARG_1: spill slot for outgoing arg 1. used by next callee.
+  //    ...     ...
+  //    104     CARG_8: spill slot for outgoing arg 8. used by next callee.
+  //
+
+ public:
+
+  // C frame layout
+
+  enum {
+    // stack alignment
+    alignment_in_bytes = 16,
+    // log_2(16*8 bits) = 7.
+    log_2_of_alignment_in_bits = 7
+  };
+
+  // ABI_48:
+  struct abi_48 {
+    uint64_t callers_sp;
+    uint64_t cr;                                  //_16
+    uint64_t lr;
+    uint64_t reserved1;                           //_16
+    uint64_t reserved2;
+    uint64_t toc;                                 //_16
+    // nothing to add here!
+    // aligned to frame::alignment_in_bytes (16)
+  };
+
+  enum {
+    abi_48_size = sizeof(abi_48)
+  };
+
+  struct abi_112 : abi_48 {
+    uint64_t carg_1;
+    uint64_t carg_2;                              //_16
+    uint64_t carg_3;
+    uint64_t carg_4;                              //_16
+    uint64_t carg_5;
+    uint64_t carg_6;                              //_16
+    uint64_t carg_7;
+    uint64_t carg_8;                              //_16
+    // aligned to frame::alignment_in_bytes (16)
+  };
+
+  enum {
+    abi_112_size = sizeof(abi_112)
+  };
+
+  #define _abi(_component) \
+          (offset_of(frame::abi_112, _component))
+
+  struct abi_112_spill : abi_112 {
+    // additional spill slots
+    uint64_t spill_ret;
+    uint64_t spill_fret;                          //_16
+    // aligned to frame::alignment_in_bytes (16)
+  };
+
+  enum {
+    abi_112_spill_size = sizeof(abi_112_spill)
+  };
+
+  #define _abi_112_spill(_component) \
+          (offset_of(frame::abi_112_spill, _component))
+
+  // non-volatile GPRs:
+
+  struct spill_nonvolatiles {
+    uint64_t r14;
+    uint64_t r15;                                 //_16
+    uint64_t r16;
+    uint64_t r17;                                 //_16
+    uint64_t r18;
+    uint64_t r19;                                 //_16
+    uint64_t r20;
+    uint64_t r21;                                 //_16
+    uint64_t r22;
+    uint64_t r23;                                 //_16
+    uint64_t r24;
+    uint64_t r25;                                 //_16
+    uint64_t r26;
+    uint64_t r27;                                 //_16
+    uint64_t r28;
+    uint64_t r29;                                 //_16
+    uint64_t r30;
+    uint64_t r31;                                 //_16
+
+    double f14;
+    double f15;
+    double f16;
+    double f17;
+    double f18;
+    double f19;
+    double f20;
+    double f21;
+    double f22;
+    double f23;
+    double f24;
+    double f25;
+    double f26;
+    double f27;
+    double f28;
+    double f29;
+    double f30;
+    double f31;
+
+    // aligned to frame::alignment_in_bytes (16)
+  };
+
+  enum {
+    spill_nonvolatiles_size = sizeof(spill_nonvolatiles)
+  };
+
+  #define _spill_nonvolatiles_neg(_component) \
+     (int)(-frame::spill_nonvolatiles_size + offset_of(frame::spill_nonvolatiles, _component))
+
+  //  Frame layout for the Java interpreter on PPC64.
+  //
+  //  This frame layout provides a C-like frame for every Java frame.
+  //
+  //  In these figures the stack grows upwards, while memory grows
+  //  downwards. Square brackets denote regions possibly larger than
+  //  single 64 bit slots.
+  //
+  //  STACK (no JNI, no compiled code, no library calls,
+  //         interpreter-loop is active):
+  //    0       [InterpretMethod]
+  //            [TOP_IJAVA_FRAME]
+  //            [PARENT_IJAVA_FRAME]
+  //            ...
+  //            [PARENT_IJAVA_FRAME]
+  //            [ENTRY_FRAME]
+  //            [C_FRAME]
+  //            ...
+  //            [C_FRAME]
+  //
+  //  TOP_IJAVA_FRAME:
+  //    0       [TOP_IJAVA_FRAME_ABI]
+  //            alignment (optional)
+  //            [operand stack]
+  //            [monitors] (optional)
+  //            [cInterpreter object]
+  //            result, locals, and arguments are in parent frame!
+  //
+  //  PARENT_IJAVA_FRAME:
+  //    0       [PARENT_IJAVA_FRAME_ABI]
+  //            alignment (optional)
+  //            [callee's Java result]
+  //            [callee's locals w/o arguments]
+  //            [outgoing arguments]
+  //            [used part of operand stack w/o arguments]
+  //            [monitors] (optional)
+  //            [cInterpreter object]
+  //
+  //  ENTRY_FRAME:
+  //    0       [PARENT_IJAVA_FRAME_ABI]
+  //            alignment (optional)
+  //            [callee's Java result]
+  //            [callee's locals w/o arguments]
+  //            [outgoing arguments]
+  //            [ENTRY_FRAME_LOCALS]
+  //
+  //  PARENT_IJAVA_FRAME_ABI:
+  //    0       [ABI_48]
+  //            top_frame_sp
+  //            initial_caller_sp
+  //
+  //  TOP_IJAVA_FRAME_ABI:
+  //    0       [PARENT_IJAVA_FRAME_ABI]
+  //            carg_3_unused
+  //            carg_4_unused
+  //            carg_5_unused
+  //            carg_6_unused
+  //            carg_7_unused
+  //            frame_manager_lr
+  //
+
+  // PARENT_IJAVA_FRAME_ABI
+
+  struct parent_ijava_frame_abi : abi_48 {
+    // SOE registers.
+    // C2i adapters spill their top-frame stack-pointer here.
+    uint64_t top_frame_sp;                        //      carg_1
+    // Sp of calling compiled frame before it was resized by the c2i
+    // adapter or sp of call stub. Does not contain a valid value for
+    // non-initial frames.
+    uint64_t initial_caller_sp;                   //      carg_2
+    // aligned to frame::alignment_in_bytes (16)
+  };
+
+  enum {
+    parent_ijava_frame_abi_size = sizeof(parent_ijava_frame_abi)
+  };
+
+  #define _parent_ijava_frame_abi(_component) \
+          (offset_of(frame::parent_ijava_frame_abi, _component))
+
+  // TOP_IJAVA_FRAME_ABI
+
+  struct top_ijava_frame_abi : parent_ijava_frame_abi {
+    uint64_t carg_3_unused;                       //      carg_3
+    uint64_t card_4_unused;                       //_16   carg_4
+    uint64_t carg_5_unused;                       //      carg_5
+    uint64_t carg_6_unused;                       //_16   carg_6
+    uint64_t carg_7_unused;                       //      carg_7
+    // Use arg8 for storing frame_manager_lr. The size of
+    // top_ijava_frame_abi must match abi_112.
+    uint64_t frame_manager_lr;                    //_16   carg_8
+    // nothing to add here!
+    // aligned to frame::alignment_in_bytes (16)
+  };
+
+  enum {
+    top_ijava_frame_abi_size = sizeof(top_ijava_frame_abi)
+  };
+
+  #define _top_ijava_frame_abi(_component) \
+          (offset_of(frame::top_ijava_frame_abi, _component))
+
+  // ENTRY_FRAME
+
+  struct entry_frame_locals {
+    uint64_t call_wrapper_address;
+    uint64_t result_address;                      //_16
+    uint64_t result_type;
+    uint64_t arguments_tos_address;               //_16
+    // aligned to frame::alignment_in_bytes (16)
+    uint64_t r[spill_nonvolatiles_size/sizeof(uint64_t)];
+  };
+
+  enum {
+    entry_frame_locals_size = sizeof(entry_frame_locals)
+  };
+
+  #define _entry_frame_locals_neg(_component) \
+    (int)(-frame::entry_frame_locals_size + offset_of(frame::entry_frame_locals, _component))
+
+
+  //  Frame layout for JIT generated methods
+  //
+  //  In these figures the stack grows upwards, while memory grows
+  //  downwards. Square brackets denote regions possibly larger than single
+  //  64 bit slots.
+  //
+  //  STACK (interpreted Java calls JIT generated Java):
+  //          [JIT_FRAME]                                <-- SP (mod 16 = 0)
+  //          [TOP_IJAVA_FRAME]
+  //         ...
+  //
+  //  JIT_FRAME (is a C frame according to PPC-64 ABI):
+  //          [out_preserve]
+  //          [out_args]
+  //          [spills]
+  //          [pad_1]
+  //          [monitor] (optional)
+  //       ...
+  //          [monitor] (optional)
+  //          [pad_2]
+  //          [in_preserve] added / removed by prolog / epilog
+  //
+
+  // JIT_ABI (TOP and PARENT)
+
+  struct jit_abi {
+    uint64_t callers_sp;
+    uint64_t cr;
+    uint64_t lr;
+    uint64_t toc;
+    // Nothing to add here!
+    // NOT ALIGNED to frame::alignment_in_bytes (16).
+  };
+
+  struct jit_out_preserve : jit_abi {
+    // Nothing to add here!
+  };
+
+  struct jit_in_preserve {
+    // Nothing to add here!
+  };
+
+  enum {
+    jit_out_preserve_size = sizeof(jit_out_preserve),
+    jit_in_preserve_size  = sizeof(jit_in_preserve)
+  };
+
+  struct jit_monitor {
+    uint64_t monitor[1];
+  };
+
+  enum {
+    jit_monitor_size = sizeof(jit_monitor),
+  };
+
+ private:
+
+  //  STACK:
+  //            ...
+  //            [THIS_FRAME]             <-- this._sp (stack pointer for this frame)
+  //            [CALLER_FRAME]           <-- this.fp() (_sp of caller's frame)
+  //            ...
+  //
+
+  // frame pointer for this frame
+  intptr_t* _fp;
+
+  // The frame's stack pointer before it has been extended by a c2i adapter;
+  // needed by deoptimization
+  intptr_t* _unextended_sp;
+  void adjust_unextended_sp();
+
+ public:
+
+  // Accessors for fields
+  intptr_t* fp() const { return _fp; }
+
+  // Accessors for ABIs
+  inline abi_48* own_abi()     const { return (abi_48*) _sp; }
+  inline abi_48* callers_abi() const { return (abi_48*) _fp; }
+
+ private:
+
+  // Find codeblob and set deopt_state.
+  inline void find_codeblob_and_set_pc_and_deopt_state(address pc);
+
+ public:
+
+  // Constructors
+  inline frame(intptr_t* sp);
+  frame(intptr_t* sp, address pc);
+  inline frame(intptr_t* sp, address pc, intptr_t* unextended_sp);
+
+ private:
+
+  intptr_t* compiled_sender_sp(CodeBlob* cb) const;
+  address*  compiled_sender_pc_addr(CodeBlob* cb) const;
+  address*  sender_pc_addr(void) const;
+
+ public:
+
+#ifdef CC_INTERP
+  // Additional interface for interpreter frames:
+  inline interpreterState get_interpreterState() const;
+#endif
+
+  // Size of a monitor in bytes.
+  static int interpreter_frame_monitor_size_in_bytes();
+
+  // The size of a cInterpreter object.
+  static inline int interpreter_frame_cinterpreterstate_size_in_bytes();
+
+ private:
+
+  // PPC port: permgen stuff
+  ConstantPoolCache** interpreter_frame_cpoolcache_addr() const;
+
+ public:
+
+  // Additional interface for entry frames:
+  inline entry_frame_locals* get_entry_frame_locals() const {
+    return (entry_frame_locals*) (((address) fp()) - entry_frame_locals_size);
+  }
+
+  enum {
+    // normal return address is 1 bundle past PC
+    pc_return_offset = 0
+  };
+
+#endif // CPU_PPC_VM_FRAME_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/frame_ppc.inline.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/frame_ppc.inline.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_FRAME_PPC_INLINE_HPP
+#define CPU_PPC_VM_FRAME_PPC_INLINE_HPP
+
+#ifndef CC_INTERP
+#error "CC_INTERP must be defined on PPC64"
+#endif
+
+// Inline functions for ppc64 frames:
+
+// Find codeblob and set deopt_state.
+inline void frame::find_codeblob_and_set_pc_and_deopt_state(address pc) {
+  assert(pc != NULL, "precondition: must have PC");
+
+  _cb = CodeCache::find_blob(pc);
+  _pc = pc;   // Must be set for get_deopt_original_pc()
+
+  _fp = (intptr_t*)own_abi()->callers_sp;
+  // Use _fp - frame_size, needs to be done between _cb and _pc initialization
+  // and get_deopt_original_pc.
+  adjust_unextended_sp();
+
+  address original_pc = nmethod::get_deopt_original_pc(this);
+  if (original_pc != NULL) {
+    _pc = original_pc;
+    _deopt_state = is_deoptimized;
+  } else {
+    _deopt_state = not_deoptimized;
+  }
+
+  assert(((uint64_t)_sp & 0xf) == 0, "SP must be 16-byte aligned");
+}
+
+// Constructors
+
+// Initialize all fields, _unextended_sp will be adjusted in find_codeblob_and_set_pc_and_deopt_state.
+inline frame::frame() : _sp(NULL), _unextended_sp(NULL), _fp(NULL), _cb(NULL), _pc(NULL), _deopt_state(unknown) {}
+
+inline frame::frame(intptr_t* sp) : _sp(sp), _unextended_sp(sp) {
+  find_codeblob_and_set_pc_and_deopt_state((address)own_abi()->lr); // also sets _fp and adjusts _unextended_sp
+}
+
+inline frame::frame(intptr_t* sp, address pc) : _sp(sp), _unextended_sp(sp) {
+  find_codeblob_and_set_pc_and_deopt_state(pc); // also sets _fp and adjusts _unextended_sp
+}
+
+inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp), _unextended_sp(unextended_sp) {
+  find_codeblob_and_set_pc_and_deopt_state(pc); // also sets _fp and adjusts _unextended_sp
+}
+
+// Accessors
+
+// Return unique id for this frame. The id must have a value where we
+// can distinguish identity and younger/older relationship. NULL
+// represents an invalid (incomparable) frame.
+inline intptr_t* frame::id(void) const {
+  // Use the _unextended_pc as the frame's ID. Because we have no
+  // adapters, but resized compiled frames, some of the new code
+  // (e.g. JVMTI) wouldn't work if we return the (current) SP of the
+  // frame.
+  return _unextended_sp;
+}
+
+// Return true if this frame is older (less recent activation) than
+// the frame represented by id.
+inline bool frame::is_older(intptr_t* id) const {
+   assert(this->id() != NULL && id != NULL, "NULL frame id");
+   // Stack grows towards smaller addresses on ppc64.
+   return this->id() > id;
+}
+
+inline int frame::frame_size(RegisterMap* map) const {
+  // Stack grows towards smaller addresses on PPC64: sender is at a higher address.
+  return sender_sp() - sp();
+}
+
+// Return the frame's stack pointer before it has been extended by a
+// c2i adapter. This is needed by deoptimization for ignoring c2i adapter
+// frames.
+inline intptr_t* frame::unextended_sp() const {
+  return _unextended_sp;
+}
+
+// All frames have this field.
+inline address frame::sender_pc() const {
+  return (address)callers_abi()->lr;
+}
+inline address* frame::sender_pc_addr() const {
+  return (address*)&(callers_abi()->lr);
+}
+
+// All frames have this field.
+inline intptr_t* frame::sender_sp() const {
+  return (intptr_t*)callers_abi();
+}
+
+// All frames have this field.
+inline intptr_t* frame::link() const {
+  return (intptr_t*)callers_abi()->callers_sp;
+}
+
+inline intptr_t* frame::real_fp() const {
+  return fp();
+}
+
+#ifdef CC_INTERP
+
+inline interpreterState frame::get_interpreterState() const {
+  return (interpreterState)(((address)callers_abi())
+                            - frame::interpreter_frame_cinterpreterstate_size_in_bytes());
+}
+
+inline intptr_t** frame::interpreter_frame_locals_addr() const {
+  interpreterState istate = get_interpreterState();
+  return (intptr_t**)&istate->_locals;
+}
+
+inline intptr_t* frame::interpreter_frame_bcx_addr() const {
+  interpreterState istate = get_interpreterState();
+  return (intptr_t*)&istate->_bcp;
+}
+
+inline intptr_t* frame::interpreter_frame_mdx_addr() const {
+  interpreterState istate = get_interpreterState();
+  return (intptr_t*)&istate->_mdx;
+}
+
+inline intptr_t* frame::interpreter_frame_expression_stack() const {
+  return (intptr_t*)interpreter_frame_monitor_end() - 1;
+}
+
+inline jint frame::interpreter_frame_expression_stack_direction() {
+  return -1;
+}
+
+// top of expression stack
+inline intptr_t* frame::interpreter_frame_tos_address() const {
+  interpreterState istate = get_interpreterState();
+  return istate->_stack + 1;
+}
+
+inline intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
+  return &interpreter_frame_tos_address()[offset];
+}
+
+// monitor elements
+
+// in keeping with Intel side: end is lower in memory than begin;
+// and beginning element is oldest element
+// Also begin is one past last monitor.
+
+inline BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
+  return get_interpreterState()->monitor_base();
+}
+
+inline BasicObjectLock* frame::interpreter_frame_monitor_end() const {
+  return (BasicObjectLock*)get_interpreterState()->stack_base();
+}
+
+inline int frame::interpreter_frame_cinterpreterstate_size_in_bytes() {
+  // Size of an interpreter object. Not aligned with frame size.
+  return round_to(sizeof(BytecodeInterpreter), 8);
+}
+
+inline Method** frame::interpreter_frame_method_addr() const {
+  interpreterState istate = get_interpreterState();
+  return &istate->_method;
+}
+
+// Constant pool cache
+
+inline ConstantPoolCache** frame::interpreter_frame_cpoolcache_addr() const {
+  interpreterState istate = get_interpreterState();
+  return &istate->_constants; // should really use accessor
+}
+
+inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
+  interpreterState istate = get_interpreterState();
+  return &istate->_constants;
+}
+#endif // CC_INTERP
+
+inline int frame::interpreter_frame_monitor_size() {
+  // Number of stack slots for a monitor.
+  return round_to(BasicObjectLock::size(),  // number of stack slots
+                  WordsPerLong);            // number of stack slots for a Java long
+}
+
+inline int frame::interpreter_frame_monitor_size_in_bytes() {
+  return frame::interpreter_frame_monitor_size() * wordSize;
+}
+
+// entry frames
+
+inline intptr_t* frame::entry_frame_argument_at(int offset) const {
+  // Since an entry frame always calls the interpreter first, the
+  // parameters are on the stack and relative to known register in the
+  // entry frame.
+  intptr_t* tos = (intptr_t*)get_entry_frame_locals()->arguments_tos_address;
+  return &tos[offset + 1]; // prepushed tos
+}
+
+inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
+  return (JavaCallWrapper**)&get_entry_frame_locals()->call_wrapper_address;
+}
+
+inline oop frame::saved_oop_result(RegisterMap* map) const {
+  return *((oop*)map->location(R3->as_VMReg()));
+}
+
+inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
+  *((oop*)map->location(R3->as_VMReg())) = obj;
+}
+
+#endif // CPU_PPC_VM_FRAME_PPC_INLINE_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/globalDefinitions_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
+#define CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
+
+// Size of PPC Instructions
+const int BytesPerInstWord = 4;
+
+const int StackAlignmentInBytes  = 16;
+
+#endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/globals_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/globals_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,116 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_GLOBALS_PPC_HPP
+#define CPU_PPC_VM_GLOBALS_PPC_HPP
+
+#include "utilities/globalDefinitions.hpp"
+#include "utilities/macros.hpp"
+
+// Sets the default values for platform dependent flags used by the runtime system.
+// (see globals.hpp)
+
+define_pd_global(bool, ConvertSleepToYield,   true);
+define_pd_global(bool, ShareVtableStubs,      false); // Improves performance markedly for mtrt and compress.
+define_pd_global(bool, NeedsDeoptSuspend,     false); // Only register window machines need this.
+
+
+define_pd_global(bool, ImplicitNullChecks,    true);  // Generate code for implicit null checks.
+define_pd_global(bool, UncommonNullCast,      true);  // Uncommon-trap NULLs passed to check cast.
+
+// Use large code-entry alignment.
+define_pd_global(intx, CodeEntryAlignment,    128);
+define_pd_global(intx, OptoLoopAlignment,     16);
+define_pd_global(intx, InlineFrequencyCount,  100);
+define_pd_global(intx, InlineSmallCode,       1500);
+
+define_pd_global(intx, PreInflateSpin,        10);
+
+// Flags for template interpreter.
+define_pd_global(bool, RewriteBytecodes,      true);
+define_pd_global(bool, RewriteFrequentPairs,  true);
+
+define_pd_global(bool, UseMembar,             false);
+
+// GC Ergo Flags
+define_pd_global(intx, CMSYoungGenPerWorker,  16*M);  // Default max size of CMS young gen, per GC worker thread.
+
+
+// Platform dependent flag handling: flags only defined on this platform.
+#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct)  \
+  product(uintx, PowerArchitecturePPC64, 0,                                 \
+          "CPU Version: x for PowerX. Currently recognizes Power5 to "      \
+          "Power7. Default is 0. CPUs newer than Power7 will be "           \
+          "recognized as Power7.")                                          \
+                                                                            \
+  /* Reoptimize code-sequences of calls at runtime, e.g. replace an */      \
+  /* indirect call by a direct call.                                */      \
+  product(bool, ReoptimizeCallSequences, true,                              \
+          "Reoptimize code-sequences of calls at runtime.")                 \
+                                                                            \
+  product(bool, UseLoadInstructionsForStackBangingPPC64, false,             \
+          "Use load instructions for stack banging.")                       \
+                                                                            \
+  /* special instructions */                                                \
+                                                                            \
+  product(bool, UseCountLeadingZerosInstructionsPPC64, true,                \
+          "Use count leading zeros instructions.")                          \
+                                                                            \
+  product(bool, UseExtendedLoadAndReserveInstructionsPPC64, false,          \
+          "Use extended versions of load-and-reserve instructions.")        \
+                                                                            \
+  product(bool, UseRotateAndMaskInstructionsPPC64, true,                    \
+          "Use rotate and mask instructions.")                              \
+                                                                            \
+  product(bool, UseStaticBranchPredictionInCompareAndSwapPPC64, true,       \
+          "Use static branch prediction hints in CAS operations.")          \
+                                                                            \
+  /* Trap based checks. */                                                  \
+  /* Trap based checks use the ppc trap instructions to check certain */    \
+  /* conditions. This instruction raises a SIGTRAP caught by the      */    \
+  /* exception handler of the VM.                                     */    \
+  product(bool, UseSIGTRAP, false,                                          \
+          "Allow trap instructions that make use of SIGTRAP. Use this to "  \
+          "switch off all optimizations requiring SIGTRAP.")                \
+  product(bool, TrapBasedICMissChecks, true,                                \
+          "Raise and handle SIGTRAP if inline cache miss detected.")        \
+  product(bool, TrapBasedNotEntrantChecks, true,                            \
+          "Raise and handle SIGTRAP if calling not entrant or zombie"       \
+          " method.")                                                       \
+  product(bool, TrapBasedNullChecks, true,                                  \
+          "Generate code for null checks that uses a cmp and trap "         \
+          "instruction raising SIGTRAP.  This is only used if an access to" \
+          "null (+offset) will not raise a SIGSEGV.")                       \
+  product(bool, TrapBasedRangeChecks, true,                                 \
+          "Raise and handle SIGTRAP if array out of bounds check fails.")   \
+  product(bool, TraceTraps, false, "Trace all traps the signal handler"     \
+          "handles.")                                                       \
+                                                                            \
+  product(bool, ZapMemory, false, "Write 0x0101... to empty memory."        \
+          " Use this to ease debugging.")                                   \
+
+
+
+#endif // CPU_PPC_VM_GLOBALS_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/icBuffer_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/icBuffer_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "assembler_ppc.inline.hpp"
+#include "code/icBuffer.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/bytecodes.hpp"
+#include "memory/resourceArea.hpp"
+#include "nativeInst_ppc.hpp"
+#include "oops/oop.inline.hpp"
+#include "oops/oop.inline2.hpp"
+
+#define __ masm.
+
+int InlineCacheBuffer::ic_stub_code_size() {
+  return MacroAssembler::load_const_size + MacroAssembler::b64_patchable_size;
+}
+
+void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
+  ResourceMark rm;
+  CodeBuffer code(code_begin, ic_stub_code_size());
+  MacroAssembler masm(&code);
+  // Note: even though the code contains an embedded metadata, we do not need reloc info
+  // because
+  // (1) the metadata is old (i.e., doesn't matter for scavenges)
+  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear.
+
+  // Load the oop ...
+  __ load_const(R19_method, (address) cached_value, R0);
+  // ... and jump to entry point.
+  __ b64_patchable((address) entry_point, relocInfo::none);
+
+  __ flush();
+}
+
+address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
+  NativeJump*        jump = nativeJump_at(move->next_instruction_address());
+  return jump->jump_destination();
+}
+
+void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
+  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
+  void* o = (void*)move->data();
+  return o;
+}
+
diff -r ca3dacaf9041 src/cpu/ppc/vm/icache_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/icache_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "assembler_ppc.inline.hpp"
+#include "runtime/icache.hpp"
+
+// Use inline assembler to implement icache flush.
+int ppc64_flush_icache(address start, int lines, int magic){
+  address end = start + (unsigned int)lines*ICache::line_size;
+  assert(start <= end, "flush_icache parms");
+
+  // store modified cache lines from data cache
+  for (address a=start; a<end; a+=ICache::line_size) {
+    __asm__ __volatile__(
+       "dcbst 0, %0  \n"
+       :
+       : "r" (a)
+       : "memory");
+  }
+
+  // sync instruction
+  __asm__ __volatile__(
+     "sync \n"
+     :
+     :
+     : "memory");
+
+  // invalidate respective cache lines in instruction cache
+  for (address a=start; a<end; a+=ICache::line_size) {
+    __asm__ __volatile__(
+       "icbi 0, %0   \n"
+       :
+       : "r" (a)
+       : "memory");
+  }
+
+  // discard fetched instructions
+  __asm__ __volatile__(
+                 "isync \n"
+                 :
+                 :
+                 : "memory");
+
+  return magic;
+}
+
+void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
+  StubCodeMark mark(this, "ICache", "flush_icache_stub");
+
+  *flush_icache_stub = (ICache::flush_icache_stub_t)ppc64_flush_icache;
+
+  // First call to flush itself
+  ICache::invalidate_range((address)(*flush_icache_stub), 0);
+}
diff -r ca3dacaf9041 src/cpu/ppc/vm/icache_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/icache_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,44 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_ICACHE_PPC_HPP
+#define CPU_PPC_VM_ICACHE_PPC_HPP
+
+// Interface for updating the instruction cache.  Whenever the VM modifies
+// code, part of the processor instruction cache potentially has to be flushed.
+
+class ICache : public AbstractICache {
+ public:
+  enum {
+    // On PowerPC the cache line size is 32 bytes.
+    stub_size      = 160, // Size of the icache flush stub in bytes.
+    line_size      = 32,  // Flush instruction affects 32 bytes.
+    log2_line_size = 5    // log2(line_size)
+  };
+
+  // Use default implementation
+};
+
+#endif // CPU_PPC_VM_ICACHE_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/interp_masm_ppc_64.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,504 @@
+/*
+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interp_masm_ppc_64.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) // nothing
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+
+// Lock object
+//
+// Registers alive
+//   monitor - Address of the BasicObjectLock to be used for locking,
+//             which must be initialized with the object to lock.
+//   object  - Address of the object to be locked.
+//
+void InterpreterMacroAssembler::lock_object(Register monitor, Register object) {
+  if (UseHeavyMonitors) {
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            monitor, /*check_for_exceptions=*/false);
+  } else {
+    // template code:
+    //
+    // markOop displaced_header = obj->mark().set_unlocked();
+    // monitor->lock()->set_displaced_header(displaced_header);
+    // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+    //   // We stored the monitor address into the object's mark word.
+    // } else if (THREAD->is_lock_owned((address)displaced_header))
+    //   // Simple recursive case.
+    //   monitor->lock()->set_displaced_header(NULL);
+    // } else {
+    //   // Slow path.
+    //   InterpreterRuntime::monitorenter(THREAD, monitor);
+    // }
+
+    const Register displaced_header = R7_ARG5;
+    const Register object_mark_addr = R8_ARG6;
+    const Register current_header   = R9_ARG7;
+    const Register tmp              = R10_ARG8;
+
+    Label done;
+    Label slow_case;
+
+    assert_different_registers(displaced_header, object_mark_addr, current_header, tmp);
+
+
+    // markOop displaced_header = obj->mark().set_unlocked();
+
+    // Load markOop from object into displaced_header.
+    ld(displaced_header, oopDesc::mark_offset_in_bytes(), object);
+
+    if (UseBiasedLocking) {
+      biased_locking_enter(CCR0, object, displaced_header, tmp, current_header, done, &slow_case);
+    }
+
+    // Set displaced_header to be (markOop of object | UNLOCK_VALUE).
+    ori(displaced_header, displaced_header, markOopDesc::unlocked_value);
+
+
+    // monitor->lock()->set_displaced_header(displaced_header);
+
+    // Initialize the box (Must happen before we update the object mark!).
+    std(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
+            BasicLock::displaced_header_offset_in_bytes(), monitor);
+
+    // if (Atomic::cmpxchg_ptr(/*ex=*/monitor, /*addr*/obj->mark_addr(), /*cmp*/displaced_header) == displaced_header) {
+
+    // Store stack address of the BasicObjectLock (this is monitor) into object.
+    addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
+
+    // Must fence, otherwise, preceding store(s) may float below cmpxchg.
+    // CmpxchgX sets CCR0 to cmpX(current, displaced).
+    fence(); // TODO: replace by MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq ?
+    cmpxchgd(/*flag=*/CCR0,
+             /*current_value=*/current_header,
+             /*compare_value=*/displaced_header, /*exchange_value=*/monitor,
+             /*where=*/object_mark_addr,
+             MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+             MacroAssembler::cmpxchgx_hint_acquire_lock());
+
+    // If the compare-and-exchange succeeded, then we found an unlocked
+    // object and we have now locked it.
+    beq(CCR0, done);
+
+
+    // } else if (THREAD->is_lock_owned((address)displaced_header))
+    //   // Simple recursive case.
+    //   monitor->lock()->set_displaced_header(NULL);
+
+    // We did not see an unlocked object so try the fast recursive case.
+
+    // Check if owner is self by comparing the value in the markOop of object
+    // (current_header) with the stack pointer.
+    sub(current_header, current_header, R1_SP);
+
+    assert(os::vm_page_size() > 0xfff, "page size too small - change the constant");
+    load_const_optimized(tmp,
+                         (address) (~(os::vm_page_size()-1) |
+                                    markOopDesc::lock_mask_in_place));
+
+    and_(R0/*==0?*/, current_header, tmp);
+    // If condition is true we are done and hence we can store 0 in the displaced
+    // header indicating it is a recursive lock.
+    bne(CCR0, slow_case);
+    release();
+    std(R0/*==0!*/, BasicObjectLock::lock_offset_in_bytes() +
+            BasicLock::displaced_header_offset_in_bytes(), monitor);
+    b(done);
+
+
+    // } else {
+    //   // Slow path.
+    //   InterpreterRuntime::monitorenter(THREAD, monitor);
+
+    // None of the above fast optimizations worked so we have to get into the
+    // slow case of monitor enter.
+    bind(slow_case);
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
+            monitor, /*check_for_exceptions=*/false);
+    // }
+
+    bind(done);
+  }
+}
+
+// Unlocks an object. Used in monitorexit bytecode and remove_activation.
+//
+// Registers alive
+//   monitor - Address of the BasicObjectLock to be used for locking,
+//             which must be initialized with the object to lock.
+//
+// Throw IllegalMonitorException if object is not locked by current thread.
+void InterpreterMacroAssembler::unlock_object(Register monitor) {
+  if (UseHeavyMonitors) {
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            monitor, /*check_for_exceptions=*/false);
+  } else {
+
+    // template code:
+    //
+    // if ((displaced_header = monitor->displaced_header()) == NULL) {
+    //   // Recursive unlock.  Mark the monitor unlocked by setting the object field to NULL.
+    //   monitor->set_obj(NULL);
+    // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+    //   // We swapped the unlocked mark in displaced_header into the object's mark word.
+    //   monitor->set_obj(NULL);
+    // } else {
+    //   // Slow path.
+    //   InterpreterRuntime::monitorexit(THREAD, monitor);
+    // }
+
+    const Register object           = R7_ARG5;
+    const Register displaced_header = R8_ARG6;
+    const Register object_mark_addr = R9_ARG7;
+    const Register current_header   = R10_ARG8;
+
+    Label no_recursive_unlock;
+    Label slow_case;
+    Label done;
+
+    assert_different_registers(object, displaced_header, object_mark_addr, current_header);
+
+    if (UseBiasedLocking) {
+      // The object address from the monitor is in object.
+      ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
+      assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
+      biased_locking_exit(CCR0, object, displaced_header, done);
+    }
+
+    // Test first if we are in the fast recursive case.
+    ld(displaced_header, BasicObjectLock::lock_offset_in_bytes() +
+           BasicLock::displaced_header_offset_in_bytes(), monitor);
+
+    // If the displaced header is zero, we have a recursive unlock.
+    cmpdi(CCR0, displaced_header, 0);
+    bne(CCR0, no_recursive_unlock);
+    // Release in recursive unlock is not necessary.
+    // release();
+    std(displaced_header/*==0!*/, BasicObjectLock::obj_offset_in_bytes(), monitor);
+    b(done);
+
+    bind(no_recursive_unlock);
+
+    // } else if (Atomic::cmpxchg_ptr(displaced_header, obj->mark_addr(), monitor) == monitor) {
+    //   // We swapped the unlocked mark in displaced_header into the object's mark word.
+    //   monitor->set_obj(NULL);
+
+    // If we still have a lightweight lock, unlock the object and be done.
+
+    // The object address from the monitor is in object.
+    ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor);
+    addi(object_mark_addr, object, oopDesc::mark_offset_in_bytes());
+
+    // We have the displaced header in displaced_header. If the lock is still
+    // lightweight, it will contain the monitor address and we'll store the
+    // displaced header back into the object's mark word.
+    // CmpxchgX sets CCR0 to cmpX(current, monitor).
+    cmpxchgd(/*flag=*/CCR0,
+             /*current_value=*/current_header,
+             /*compare_value=*/monitor, /*exchange_value=*/displaced_header,
+             /*where=*/object_mark_addr,
+             MacroAssembler::MemBarRel | MacroAssembler::MemBarAcq,
+             MacroAssembler::cmpxchgx_hint_release_lock());
+    bne(CCR0, slow_case);
+
+    // Exchange worked, do monitor->set_obj(NULL).
+    li(R0, 0);
+    // Must realease earlier (see cmpxchgd above).
+    // release();
+    std(R0, BasicObjectLock::obj_offset_in_bytes(), monitor);
+    b(done);
+
+
+    // } else {
+    //   // Slow path.
+    //   InterpreterRuntime::monitorexit(THREAD, monitor);
+
+    // The lock has been converted into a heavy lock and hence
+    // we need to get into the slow case.
+    bind(slow_case);
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
+            monitor, /*check_for_exceptions=*/false);
+    // }
+
+    bind(done);
+  }
+}
+
+void InterpreterMacroAssembler::get_method_counters(Register method,
+                                                    Register Rcounters,
+                                                    Label& skip) {
+  BLOCK_COMMENT("Load and ev. allocate counter object {");
+  Label has_counters;
+  ld(Rcounters, in_bytes(Method::method_counters_offset()), method);
+  cmpdi(CCR0, Rcounters, 0);
+  bne(CCR0, has_counters);
+  call_VM(noreg, CAST_FROM_FN_PTR(address,
+                                  InterpreterRuntime::build_method_counters), method, false);
+  ld(Rcounters, in_bytes(Method::method_counters_offset()), method);
+  cmpdi(CCR0, Rcounters, 0);
+  beq(CCR0, skip); // No MethodCounters, OutOfMemory.
+  BLOCK_COMMENT("} Load and ev. allocate counter object");
+
+  bind(has_counters);
+}
+
+void InterpreterMacroAssembler::increment_invocation_counter(Register Rcounters, Register iv_be_count, Register Rtmp_r0) {
+  assert(UseCompiler, "incrementing must be useful");
+  Register invocation_count = iv_be_count;
+  Register backedge_count   = Rtmp_r0;
+  int delta = InvocationCounter::count_increment;
+
+  // Load each counter in a register.
+  //  ld(inv_counter, Rtmp);
+  //  ld(be_counter, Rtmp2);
+  int inv_counter_offset = in_bytes(MethodCounters::invocation_counter_offset() +
+                                    InvocationCounter::counter_offset());
+  int be_counter_offset  = in_bytes(MethodCounters::backedge_counter_offset() +
+                                    InvocationCounter::counter_offset());
+
+  BLOCK_COMMENT("Increment profiling counters {");
+
+  // Load the backedge counter.
+  lwz(backedge_count, be_counter_offset, Rcounters); // is unsigned int
+  // Mask the backedge counter.
+  Register tmp = invocation_count;
+  li(tmp, InvocationCounter::count_mask_value);
+  andr(backedge_count, tmp, backedge_count); // Cannot use andi, need sign extension of count_mask_value.
+
+  // Load the invocation counter.
+  lwz(invocation_count, inv_counter_offset, Rcounters); // is unsigned int
+  // Add the delta to the invocation counter and store the result.
+  addi(invocation_count, invocation_count, delta);
+  // Store value.
+  stw(invocation_count, inv_counter_offset, Rcounters);
+
+  // Add invocation counter + backedge counter.
+  add(iv_be_count, backedge_count, invocation_count);
+
+  // Note that this macro must leave the backedge_count + invocation_count in
+  // register iv_be_count!
+  BLOCK_COMMENT("} Increment profiling counters");
+}
+
+void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
+  if (state == atos) { MacroAssembler::verify_oop(reg); }
+}
+
+// Inline assembly for:
+//
+// if (thread is in interp_only_mode) {
+//   InterpreterRuntime::post_method_entry();
+// }
+// if (*jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY ) ||
+//     *jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_ENTRY2)   ) {
+//   SharedRuntime::jvmpi_method_entry(method, receiver);
+// }
+void InterpreterMacroAssembler::notify_method_entry() {
+  // JVMTI
+  // Whenever JVMTI puts a thread in interp_only_mode, method
+  // entry/exit events are sent for that thread to track stack
+  // depth. If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label jvmti_post_done;
+
+    lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
+    cmpwi(CCR0, R0, 0);
+    beq(CCR0, jvmti_post_done);
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_entry),
+            /*check_exceptions=*/false);
+
+    bind(jvmti_post_done);
+  }
+}
+
+
+// Inline assembly for:
+//
+// if (thread is in interp_only_mode) {
+//   // save result
+//   InterpreterRuntime::post_method_exit();
+//   // restore result
+// }
+// if (*jvmpi::event_flags_array_at_addr(JVMPI_EVENT_METHOD_EXIT)) {
+//   // save result
+//   SharedRuntime::jvmpi_method_exit();
+//   // restore result
+// }
+//
+// Native methods have their result stored in d_tmp and l_tmp.
+// Java methods have their result stored in the expression stack.
+void InterpreterMacroAssembler::notify_method_exit(bool is_native_method, TosState state) {
+  // JVMTI
+  // Whenever JVMTI puts a thread in interp_only_mode, method
+  // entry/exit events are sent for that thread to track stack
+  // depth. If it is possible to enter interp_only_mode we add
+  // the code to check if the event should be sent.
+  if (JvmtiExport::can_post_interpreter_events()) {
+    Label jvmti_post_done;
+
+    lwz(R0, in_bytes(JavaThread::interp_only_mode_offset()), R16_thread);
+    cmpwi(CCR0, R0, 0);
+    beq(CCR0, jvmti_post_done);
+    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit),
+            /*check_exceptions=*/false);
+
+    bind(jvmti_post_done);
+  }
+}
+
+// Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME
+// (using parent_frame_resize) and push a new interpreter
+// TOP_IJAVA_FRAME (using frame_size).
+void InterpreterMacroAssembler::push_interpreter_frame(Register top_frame_size, Register parent_frame_resize,
+                                                       Register tmp1, Register tmp2, Register tmp3,
+                                                       Register tmp4, Register pc) {
+  assert_different_registers(top_frame_size, parent_frame_resize, tmp1, tmp2, tmp3, tmp4);
+  ld(tmp1, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+  mr(tmp2/*top_frame_sp*/, R1_SP);
+  // Move initial_caller_sp.
+  ld(tmp4, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
+  neg(parent_frame_resize, parent_frame_resize);
+  resize_frame(parent_frame_resize/*-parent_frame_resize*/, tmp3);
+
+  // Set LR in new parent frame.
+  std(tmp1, _abi(lr), R1_SP);
+  // Set top_frame_sp info for new parent frame.
+  std(tmp2, _parent_ijava_frame_abi(top_frame_sp), R1_SP);
+  std(tmp4, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
+
+  // Push new TOP_IJAVA_FRAME.
+  push_frame(top_frame_size, tmp2);
+
+  get_PC_trash_LR(tmp3);
+  std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+  // Used for non-initial callers by unextended_sp().
+  std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
+}
+
+// Pop the topmost TOP_IJAVA_FRAME and convert the previous
+// PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME.
+void InterpreterMacroAssembler::pop_interpreter_frame(Register tmp1, Register tmp2, Register tmp3, Register tmp4) {
+  assert_different_registers(tmp1, tmp2, tmp3, tmp4);
+
+  ld(tmp1/*caller's sp*/, _abi(callers_sp), R1_SP);
+  ld(tmp3, _abi(lr), tmp1);
+
+  ld(tmp4, _parent_ijava_frame_abi(initial_caller_sp), tmp1);
+
+  ld(tmp2/*caller's caller's sp*/, _abi(callers_sp), tmp1);
+  // Merge top frame.
+  std(tmp2, _abi(callers_sp), R1_SP);
+
+  ld(tmp2, _parent_ijava_frame_abi(top_frame_sp), tmp1);
+
+  // Update C stack pointer to caller's top_abi.
+  resize_frame_absolute(tmp2/*addr*/, tmp1/*tmp*/, tmp2/*tmp*/);
+
+  // Update LR in top_frame.
+  std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+
+  std(tmp4, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
+
+  // Store the top-frame stack-pointer for c2i adapters.
+  std(R1_SP, _top_ijava_frame_abi(top_frame_sp), R1_SP);
+}
+
+#ifdef CC_INTERP
+// Turn state's interpreter frame into the current TOP_IJAVA_FRAME.
+void InterpreterMacroAssembler::pop_interpreter_frame_to_state(Register state, Register tmp1, Register tmp2, Register tmp3) {
+  assert_different_registers(R14_state, R15_prev_state, tmp1, tmp2, tmp3);
+
+  if (state == R14_state) {
+    ld(tmp1/*state's fp*/, state_(_last_Java_fp));
+    ld(tmp2/*state's sp*/, state_(_last_Java_sp));
+  } else if (state == R15_prev_state) {
+    ld(tmp1/*state's fp*/, prev_state_(_last_Java_fp));
+    ld(tmp2/*state's sp*/, prev_state_(_last_Java_sp));
+  } else {
+    ShouldNotReachHere();
+  }
+
+  // Merge top frames.
+  std(tmp1, _abi(callers_sp), R1_SP);
+
+  // Tmp2 is new SP.
+  // Tmp1 is parent's SP.
+  resize_frame_absolute(tmp2/*addr*/, tmp1/*tmp*/, tmp2/*tmp*/);
+
+  // Update LR in top_frame.
+  // Must be interpreter frame.
+  get_PC_trash_LR(tmp3);
+  std(tmp3, _top_ijava_frame_abi(frame_manager_lr), R1_SP);
+  // Used for non-initial callers by unextended_sp().
+  std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP);
+}
+#endif // CC_INTERP
+
+// Set SP to initial caller's sp, but before fix the back chain.
+void InterpreterMacroAssembler::resize_frame_to_initial_caller(Register tmp1, Register tmp2) {
+  ld(tmp1, _parent_ijava_frame_abi(initial_caller_sp), R1_SP);
+  ld(tmp2, _parent_ijava_frame_abi(callers_sp), R1_SP);
+  std(tmp2, _parent_ijava_frame_abi(callers_sp), tmp1); // Fix back chain ...
+  mr(R1_SP, tmp1); // ... and resize to initial caller.
+}
+
+#ifdef CC_INTERP
+// Pop the current interpreter state (without popping the correspoding
+// frame) and restore R14_state and R15_prev_state accordingly.
+// Use prev_state_may_be_0 to indicate whether prev_state may be 0
+// in order to generate an extra check before retrieving prev_state_(_prev_link).
+void InterpreterMacroAssembler::pop_interpreter_state(bool prev_state_may_be_0)
+{
+  // Move prev_state to state and restore prev_state from state_(_prev_link).
+  Label prev_state_is_0;
+  mr(R14_state, R15_prev_state);
+
+  // Don't retrieve /*state==*/prev_state_(_prev_link)
+  // if /*state==*/prev_state is 0.
+  if (prev_state_may_be_0) {
+    cmpdi(CCR0, R15_prev_state, 0);
+    beq(CCR0, prev_state_is_0);
+  }
+
+  ld(R15_prev_state, /*state==*/prev_state_(_prev_link));
+  bind(prev_state_is_0);
+}
+
+void InterpreterMacroAssembler::restore_prev_state() {
+  // _prev_link is private, but cInterpreter is a friend.
+  ld(R15_prev_state, state_(_prev_link));
+}
+#endif // CC_INTERP
diff -r ca3dacaf9041 src/cpu/ppc/vm/interp_masm_ppc_64.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,89 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
+#define CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
+
+#include "assembler_ppc.inline.hpp"
+#include "interpreter/invocationCounter.hpp"
+
+// This file specializes the assembler with interpreter-specific macros
+
+
+class InterpreterMacroAssembler: public MacroAssembler {
+
+ public:
+  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code) {}
+
+  // Handy address generation macros
+#define thread_(field_name) in_bytes(JavaThread::field_name ## _offset()), R16_thread
+#define method_(field_name) in_bytes(Method::field_name ## _offset()), R19_method
+
+#ifdef CC_INTERP
+#define state_(field_name)  in_bytes(byte_offset_of(BytecodeInterpreter, field_name)), R14_state
+#define prev_state_(field_name)  in_bytes(byte_offset_of(BytecodeInterpreter, field_name)), R15_prev_state
+#endif
+
+  void get_method_counters(Register method, Register Rcounters, Label& skip);
+  void increment_invocation_counter(Register iv_be_count, Register Rtmp1, Register Rtmp2_r0);
+
+  // Object locking
+  void lock_object  (Register lock_reg, Register obj_reg);
+  void unlock_object(Register lock_reg);
+
+  // Debugging
+  void verify_oop(Register reg, TosState state = atos);    // only if +VerifyOops && state == atos
+
+  // support for jvmdi/jvmpi
+  void notify_method_entry();
+  void notify_method_exit(bool save_result, TosState state);
+
+  // Convert the current TOP_IJAVA_FRAME into a PARENT_IJAVA_FRAME
+  // (using parent_frame_resize) and push a new interpreter
+  // TOP_IJAVA_FRAME (using frame_size).
+  void push_interpreter_frame(Register top_frame_size, Register parent_frame_resize,
+                              Register tmp1, Register tmp2, Register tmp3, Register tmp4, Register pc=noreg);
+
+  // Pop the topmost TOP_IJAVA_FRAME and convert the previous
+  // PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME.
+  void pop_interpreter_frame(Register tmp1, Register tmp2, Register tmp3, Register tmp4);
+
+  // Turn state's interpreter frame into the current TOP_IJAVA_FRAME.
+  void pop_interpreter_frame_to_state(Register state, Register tmp1, Register tmp2, Register tmp3);
+
+  // Set SP to initial caller's sp, but before fix the back chain.
+  void resize_frame_to_initial_caller(Register tmp1, Register tmp2);
+
+  // Pop the current interpreter state (without popping the
+  // correspoding frame) and restore R14_state and R15_prev_state
+  // accordingly. Use prev_state_may_be_0 to indicate whether
+  // prev_state may be 0 in order to generate an extra check before
+  // retrieving prev_state_(_prev_link).
+  void pop_interpreter_state(bool prev_state_may_be_0);
+
+  void restore_prev_state();
+};
+
+#endif // CPU_PPC_VM_INTERP_MASM_PPC_64_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/interpreterGenerator_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/interpreterGenerator_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,37 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
+#define CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
+
+ friend class AbstractInterpreterGenerator;
+
+ private:
+
+  address generate_abstract_entry(void);
+  address generate_accessor_entry(void);
+  address generate_Reference_get_entry(void);
+
+#endif // CPU_PPC_VM_INTERPRETERGENERATOR_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/interpreterRT_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/interpreterRT_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "memory/allocation.inline.hpp"
+#include "memory/universe.inline.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "runtime/handles.inline.hpp"
+#include "runtime/icache.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/signature.hpp"
+
+#define __ _masm->
+
+// Access macros for Java and C arguments.
+// The first Java argument is at index -1.
+#define locals_j_arg_at(index)    (Interpreter::local_offset_in_bytes(index)), R18_locals
+// The first C argument is at index 0.
+#define sp_c_arg_at(index)        ((index)*wordSize + _abi(carg_1)), R1_SP
+
+// Implementation of SignatureHandlerGenerator
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
+  Argument jni_arg(jni_offset());
+  Register r = jni_arg.is_register() ? jni_arg.as_register() : R0;
+
+  __ lwa(r, locals_j_arg_at(offset())); // sign extension of integer
+  if (DEBUG_ONLY(true ||) !jni_arg.is_register()) {
+    __ std(r, sp_c_arg_at(jni_arg.number()));
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
+  Argument jni_arg(jni_offset());
+  Register r = jni_arg.is_register() ? jni_arg.as_register() : R0;
+
+  __ ld(r, locals_j_arg_at(offset()+1)); // long resides in upper slot
+  if (DEBUG_ONLY(true ||) !jni_arg.is_register()) {
+    __ std(r, sp_c_arg_at(jni_arg.number()));
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
+  FloatRegister fp_reg = (_num_used_fp_arg_regs < 13/*max_fp_register_arguments*/)
+                         ? as_FloatRegister((_num_used_fp_arg_regs++) + F1_ARG1->encoding())
+                         : F0;
+
+  __ lfs(fp_reg, locals_j_arg_at(offset()));
+  if (DEBUG_ONLY(true ||) jni_offset() > 8) {
+    __ stfs(fp_reg, sp_c_arg_at(jni_offset()));
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
+  FloatRegister fp_reg = (_num_used_fp_arg_regs < 13/*max_fp_register_arguments*/)
+                         ? as_FloatRegister((_num_used_fp_arg_regs++) + F1_ARG1->encoding())
+                         : F0;
+
+  __ lfd(fp_reg, locals_j_arg_at(offset()+1));
+  if (DEBUG_ONLY(true ||) jni_offset() > 8) {
+    __ stfd(fp_reg, sp_c_arg_at(jni_offset()));
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
+  Argument jni_arg(jni_offset());
+  Register r = jni_arg.is_register() ? jni_arg.as_register() : R11_scratch1;
+
+  // The handle for a receiver will never be null.
+  bool do_NULL_check = offset() != 0 || is_static();
+
+  Label do_null;
+  if (do_NULL_check) {
+    __ ld(R0, locals_j_arg_at(offset()));
+    __ cmpdi(CCR0, R0, 0);
+    __ li(r, 0);
+    __ beq(CCR0, do_null);
+  }
+  __ addir(r, locals_j_arg_at(offset()));
+  __ bind(do_null);
+  if (DEBUG_ONLY(true ||) !jni_arg.is_register()) {
+    __ std(r, sp_c_arg_at(jni_arg.number()));
+  }
+}
+
+void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
+  // Emit fd for current codebuffer. Needs patching!
+  __ emit_fd();
+
+  // Generate code to handle arguments.
+  iterate(fingerprint);
+
+  // Return the result handler.
+  __ load_const(R3_RET, AbstractInterpreter::result_handler(method()->result_type()));
+  __ blr();
+
+  __ flush();
+}
+
+#undef __
+
+// Implementation of SignatureHandlerLibrary
+
+void SignatureHandlerLibrary::pd_set_handler(address handler) {
+  // patch fd here.
+  FunctionDescriptor* fd = (FunctionDescriptor*) handler;
+
+  fd->set_entry(handler + (int)sizeof(FunctionDescriptor));
+  assert(fd->toc() == (address)0xcafe, "need to adjust TOC here");
+}
+
+
+// Access function to get the signature.
+IRT_ENTRY(address, InterpreterRuntime::get_signature(JavaThread* thread, Method* method))
+  methodHandle m(thread, method);
+  assert(m->is_native(), "sanity check");
+  Symbol *s = m->signature();
+  return (address) s->base();
+IRT_END
+
+IRT_ENTRY(address, InterpreterRuntime::get_result_handler(JavaThread* thread, Method* method))
+  methodHandle m(thread, method);
+  assert(m->is_native(), "sanity check");
+  return AbstractInterpreter::result_handler(m->result_type());
+IRT_END
diff -r ca3dacaf9041 src/cpu/ppc/vm/interpreterRT_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/interpreterRT_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_INTERPRETERRT_PPC_HPP
+#define CPU_PPC_VM_INTERPRETERRT_PPC_HPP
+
+#include "memory/allocation.hpp"
+
+// native method calls
+
+class SignatureHandlerGenerator: public NativeSignatureIterator {
+ private:
+  MacroAssembler* _masm;
+  // number of already used floating-point argument registers
+  int _num_used_fp_arg_regs;
+
+  void pass_int();
+  void pass_long();
+  void pass_double();
+  void pass_float();
+  void pass_object();
+
+ public:
+  // Creation
+  SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
+    _masm = new MacroAssembler(buffer);
+    _num_used_fp_arg_regs = 0;
+  }
+
+  // Code generation
+  void generate(uint64_t fingerprint);
+};
+
+// Support for generate_slow_signature_handler.
+static address get_result_handler(JavaThread* thread, Method* method);
+
+// A function to get the signature.
+static address get_signature(JavaThread* thread, Method* method);
+
+#endif // CPU_PPC_VM_INTERPRETERRT_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/interpreter_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/interpreter_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,736 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "interpreter/bytecodeHistogram.hpp"
+#include "interpreter/interpreter.hpp"
+#include "interpreter/interpreterGenerator.hpp"
+#include "interpreter/interpreterRuntime.hpp"
+#include "interpreter/templateTable.hpp"
+#include "oops/arrayOop.hpp"
+#include "oops/methodData.hpp"
+#include "oops/method.hpp"
+#include "oops/oop.inline.hpp"
+#include "prims/jvmtiExport.hpp"
+#include "prims/jvmtiThreadState.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/arguments.hpp"
+#include "runtime/deoptimization.hpp"
+#include "runtime/frame.inline.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "runtime/synchronizer.hpp"
+#include "runtime/timer.hpp"
+#include "runtime/vframeArray.hpp"
+#include "utilities/debug.hpp"
+#ifdef COMPILER1
+#include "c1/c1_Runtime1.hpp"
+#endif
+
+#ifndef CC_INTERP
+#error "CC_INTERP must be defined on PPC"
+#endif
+
+#define __ _masm->
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) // nothing
+#else
+#define BLOCK_COMMENT(str) __ block_comment(str)
+#endif
+
+#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
+
+int AbstractInterpreter::BasicType_as_index(BasicType type) {
+  int i = 0;
+  switch (type) {
+    case T_BOOLEAN: i = 0; break;
+    case T_CHAR   : i = 1; break;
+    case T_BYTE   : i = 2; break;
+    case T_SHORT  : i = 3; break;
+    case T_INT    : i = 4; break;
+    case T_LONG   : i = 5; break;
+    case T_VOID   : i = 6; break;
+    case T_FLOAT  : i = 7; break;
+    case T_DOUBLE : i = 8; break;
+    case T_OBJECT : i = 9; break;
+    case T_ARRAY  : i = 9; break;
+    default       : ShouldNotReachHere();
+  }
+  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, "index out of bounds");
+  return i;
+}
+
+address AbstractInterpreterGenerator::generate_slow_signature_handler() {
+  // Slow_signature handler that respects the PPC C calling conventions.
+  //
+  // We get called by the native entry code with our output register
+  // area == 8. First we call InterpreterRuntime::get_result_handler
+  // to copy the pointer to the signature string temporarily to the
+  // first C-argument and to return the result_handler in
+  // R3_RET. Since native_entry will copy the jni-pointer to the
+  // first C-argument slot later on, it is OK to occupy this slot
+  // temporarilly. Then we copy the argument list on the java
+  // expression stack into native varargs format on the native stack
+  // and load arguments into argument registers. Integer arguments in
+  // the varargs vector will be sign-extended to 8 bytes.
+  //
+  // On entry:
+  //   R3_ARG1        - intptr_t*     Address of java argument list in memory.
+  //   R15_prev_state - BytecodeInterpreter* Address of interpreter state for
+  //     this method
+  //   R19_method
+  //
+  // On exit (just before return instruction):
+  //   R3_RET            - contains the address of the result_handler.
+  //   R4_ARG2           - is not updated for static methods and contains "this" otherwise.
+  //   R5_ARG3-R10_ARG8: - When the (i-2)th Java argument is not of type float or double,
+  //                       ARGi contains this argument. Otherwise, ARGi is not updated.
+  //   F1_ARG1-F13_ARG13 - contain the first 13 arguments of type float or double.
+
+  const int LogSizeOfTwoInstructions = 3;
+
+  // FIXME: use Argument:: GL: Argument names different numbers!
+  const int max_fp_register_arguments  = 13;
+  const int max_int_register_arguments = 6;  // first 2 are reserved
+
+  const Register arg_java       = R21_tmp1;
+  const Register arg_c          = R22_tmp2;
+  const Register signature      = R23_tmp3;  // is string
+  const Register sig_byte       = R24_tmp4;
+  const Register fpcnt          = R25_tmp5;
+  const Register argcnt         = R26_tmp6;
+  const Register intSlot        = R27_tmp7;
+  const Register target_sp      = R28_tmp8;
+  const FloatRegister floatSlot = F0;
+
+  address entry = __ emit_fd();
+
+  __ save_LR_CR(R0);
+  __ save_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
+  // We use target_sp for storing arguments in the C frame.
+  __ mr(target_sp, R1_SP);
+  __ push_frame_abi112_nonvolatiles(0, R11_scratch1);
+
+  __ mr(arg_java, R3_ARG1);
+
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_signature), R16_thread, R19_method);
+
+  // Signature is in R3_RET. Signature is callee saved.
+  __ mr(signature, R3_RET);
+
+  // Reload method, it may have moved.
+#ifdef CC_INTERP
+  __ ld(R19_method, state_(_method));
+#else
+  __ unimplemented("slow signature handler 1");
+#endif
+
+  // Get the result handler.
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::get_result_handler), R16_thread, R19_method);
+
+  // Reload method, it may have moved.
+#ifdef CC_INTERP
+  __ ld(R19_method, state_(_method));
+#else
+  __ unimplemented("slow signature handler 2");
+#endif
+
+  {
+    Label L;
+    // test if static
+    // _access_flags._flags must be at offset 0.
+    // TODO PPC port: requires change in shared code.
+    //assert(in_bytes(AccessFlags::flags_offset()) == 0,
+    //       "MethodOopDesc._access_flags == MethodOopDesc._access_flags._flags");
+    // _access_flags must be a 32 bit value.
+    assert(sizeof(AccessFlags) == 4, "wrong size");
+    __ lwa(R11_scratch1/*access_flags*/, method_(access_flags));
+    // testbit with condition register.
+    __ testbitdi(CCR0, R0, R11_scratch1/*access_flags*/, JVM_ACC_STATIC_BIT);
+    __ btrue(CCR0, L);
+    // For non-static functions, pass "this" in R4_ARG2 and copy it
+    // to 2nd C-arg slot.
+    // We need to box the Java object here, so we use arg_java
+    // (address of current Java stack slot) as argument and don't
+    // dereference it as in case of ints, floats, etc.
+    __ mr(R4_ARG2, arg_java);
+    __ addi(arg_java, arg_java, -BytesPerWord);
+    __ std(R4_ARG2, _abi(carg_2), target_sp);
+    __ bind(L);
+  }
+
+  // Will be incremented directly after loop_start. argcnt=0
+  // corresponds to 3rd C argument.
+  __ li(argcnt, -1);
+  // arg_c points to 3rd C argument
+  __ addi(arg_c, target_sp, _abi(carg_3));
+  // no floating-point args parsed so far
+  __ li(fpcnt, 0);
+
+  Label move_intSlot_to_ARG, move_floatSlot_to_FARG;
+  Label loop_start, loop_end;
+  Label do_int, do_long, do_float, do_double, do_dontreachhere, do_object, do_array, do_boxed;
+
+  // signature points to '(' at entry
+#ifdef ASSERT
+  __ lbz(sig_byte, 0, signature);
+  __ cmplwi(CCR0, sig_byte, '(');
+  __ bne(CCR0, do_dontreachhere);
+#endif
+
+  __ bind(loop_start);
+
+  __ addi(argcnt, argcnt, 1);
+  __ lbzu(sig_byte, 1, signature);
+
+  __ cmplwi(CCR0, sig_byte, ')'); // end of signature
+  __ beq(CCR0, loop_end);
+
+  __ cmplwi(CCR0, sig_byte, 'B'); // byte
+  __ beq(CCR0, do_int);
+
+  __ cmplwi(CCR0, sig_byte, 'C'); // char
+  __ beq(CCR0, do_int);
+
+  __ cmplwi(CCR0, sig_byte, 'D'); // double
+  __ beq(CCR0, do_double);
+
+  __ cmplwi(CCR0, sig_byte, 'F'); // float
+  __ beq(CCR0, do_float);
+
+  __ cmplwi(CCR0, sig_byte, 'I'); // int
+  __ beq(CCR0, do_int);
+
+  __ cmplwi(CCR0, sig_byte, 'J'); // long
+  __ beq(CCR0, do_long);
+
+  __ cmplwi(CCR0, sig_byte, 'S'); // short
+  __ beq(CCR0, do_int);
+
+  __ cmplwi(CCR0, sig_byte, 'Z'); // boolean
+  __ beq(CCR0, do_int);
+
+  __ cmplwi(CCR0, sig_byte, 'L'); // object
+  __ beq(CCR0, do_object);
+
+  __ cmplwi(CCR0, sig_byte, '['); // array
+  __ beq(CCR0, do_array);
+
+  //  __ cmplwi(CCR0, sig_byte, 'V'); // void cannot appear since we do not parse the return type
+  //  __ beq(CCR0, do_void);
+
+  __ bind(do_dontreachhere);
+
+  __ unimplemented("ShouldNotReachHere in slow_signature_handler", 120);
+
+  __ bind(do_array);
+
+  {
+    Label start_skip, end_skip;
+
+    __ bind(start_skip);
+    __ lbzu(sig_byte, 1, signature);
+    __ cmplwi(CCR0, sig_byte, '[');
+    __ beq(CCR0, start_skip); // skip further brackets
+    __ cmplwi(CCR0, sig_byte, '9');
+    __ bgt(CCR0, end_skip);   // no optional size
+    __ cmplwi(CCR0, sig_byte, '0');
+    __ bge(CCR0, start_skip); // skip optional size
+    __ bind(end_skip);
+
+    __ cmplwi(CCR0, sig_byte, 'L');
+    __ beq(CCR0, do_object);  // for arrays of objects, the name of the object must be skipped
+    __ b(do_boxed);          // otherwise, go directly to do_boxed
+  }
+
+  __ bind(do_object);
+  {
+    Label L;
+    __ bind(L);
+    __ lbzu(sig_byte, 1, signature);
+    __ cmplwi(CCR0, sig_byte, ';');
+    __ bne(CCR0, L);
+   }
+  // Need to box the Java object here, so we use arg_java (address of
+  // current Java stack slot) as argument and don't dereference it as
+  // in case of ints, floats, etc.
+  Label do_null;
+  __ bind(do_boxed);
+  __ ld(R0,0, arg_java);
+  __ cmpdi(CCR0, R0, 0);
+  __ li(intSlot,0);
+  __ beq(CCR0, do_null);
+  __ mr(intSlot, arg_java);
+  __ bind(do_null);
+  __ std(intSlot, 0, arg_c);
+  __ addi(arg_java, arg_java, -BytesPerWord);
+  __ addi(arg_c, arg_c, BytesPerWord);
+  __ cmplwi(CCR0, argcnt, max_int_register_arguments);
+  __ blt(CCR0, move_intSlot_to_ARG);
+  __ b(loop_start);
+
+  __ bind(do_int);
+  __ lwa(intSlot, 0, arg_java);
+  __ std(intSlot, 0, arg_c);
+  __ addi(arg_java, arg_java, -BytesPerWord);
+  __ addi(arg_c, arg_c, BytesPerWord);
+  __ cmplwi(CCR0, argcnt, max_int_register_arguments);
+  __ blt(CCR0, move_intSlot_to_ARG);
+  __ b(loop_start);
+
+  __ bind(do_long);
+  __ ld(intSlot, -BytesPerWord, arg_java);
+  __ std(intSlot, 0, arg_c);
+  __ addi(arg_java, arg_java, - 2 * BytesPerWord);
+  __ addi(arg_c, arg_c, BytesPerWord);
+  __ cmplwi(CCR0, argcnt, max_int_register_arguments);
+  __ blt(CCR0, move_intSlot_to_ARG);
+  __ b(loop_start);
+
+  __ bind(do_float);
+  __ lfs(floatSlot, 0, arg_java);
+#if defined(LINUX)
+  __ stfs(floatSlot, 4, arg_c);
+#elif defined(AIX)
+  __ stfs(floatSlot, 0, arg_c);
+#else
+#error "unknown OS"
+#endif
+  __ addi(arg_java, arg_java, -BytesPerWord);
+  __ addi(arg_c, arg_c, BytesPerWord);
+  __ cmplwi(CCR0, fpcnt, max_fp_register_arguments);
+  __ blt(CCR0, move_floatSlot_to_FARG);
+  __ b(loop_start);
+
+  __ bind(do_double);
+  __ lfd(floatSlot, - BytesPerWord, arg_java);
+  __ stfd(floatSlot, 0, arg_c);
+  __ addi(arg_java, arg_java, - 2 * BytesPerWord);
+  __ addi(arg_c, arg_c, BytesPerWord);
+  __ cmplwi(CCR0, fpcnt, max_fp_register_arguments);
+  __ blt(CCR0, move_floatSlot_to_FARG);
+  __ b(loop_start);
+
+  __ bind(loop_end);
+
+  __ pop_frame();
+  __ restore_nonvolatile_gprs(R1_SP, _spill_nonvolatiles_neg(r14));
+  __ restore_LR_CR(R0);
+
+  __ blr();
+
+  Label move_int_arg, move_float_arg;
+  __ bind(move_int_arg); // each case must consist of 2 instructions (otherwise adapt LogSizeOfTwoInstructions)
+  __ mr(R5_ARG3, intSlot);  __ b(loop_start);
+  __ mr(R6_ARG4, intSlot);  __ b(loop_start);
+  __ mr(R7_ARG5, intSlot);  __ b(loop_start);
+  __ mr(R8_ARG6, intSlot);  __ b(loop_start);
+  __ mr(R9_ARG7, intSlot);  __ b(loop_start);
+  __ mr(R10_ARG8, intSlot); __ b(loop_start);
+
+  __ bind(move_float_arg); // each case must consist of 2 instructions (otherwise adapt LogSizeOfTwoInstructions)
+  __ fmr(F1_ARG1, floatSlot);   __ b(loop_start);
+  __ fmr(F2_ARG2, floatSlot);   __ b(loop_start);
+  __ fmr(F3_ARG3, floatSlot);   __ b(loop_start);
+  __ fmr(F4_ARG4, floatSlot);   __ b(loop_start);
+  __ fmr(F5_ARG5, floatSlot);   __ b(loop_start);
+  __ fmr(F6_ARG6, floatSlot);   __ b(loop_start);
+  __ fmr(F7_ARG7, floatSlot);   __ b(loop_start);
+  __ fmr(F8_ARG8, floatSlot);   __ b(loop_start);
+  __ fmr(F9_ARG9, floatSlot);   __ b(loop_start);
+  __ fmr(F10_ARG10, floatSlot); __ b(loop_start);
+  __ fmr(F11_ARG11, floatSlot); __ b(loop_start);
+  __ fmr(F12_ARG12, floatSlot); __ b(loop_start);
+  __ fmr(F13_ARG13, floatSlot); __ b(loop_start);
+
+  __ bind(move_intSlot_to_ARG);
+  __ sldi(R0, argcnt, LogSizeOfTwoInstructions);
+  __ load_const(R11_scratch1, move_int_arg); // Label must be bound here.
+  __ add(R11_scratch1, R0, R11_scratch1);
+  __ mtctr(R11_scratch1/*branch_target*/);
+  __ bctr();
+  __ bind(move_floatSlot_to_FARG);
+  __ sldi(R0, fpcnt, LogSizeOfTwoInstructions);
+  __ addi(fpcnt, fpcnt, 1);
+  __ load_const(R11_scratch1, move_float_arg); // Label must be bound here.
+  __ add(R11_scratch1, R0, R11_scratch1);
+  __ mtctr(R11_scratch1/*branch_target*/);
+  __ bctr();
+
+  return entry;
+}
+
+address AbstractInterpreterGenerator::generate_result_handler_for(BasicType type) {
+  //
+  // Registers alive
+  //   R3_RET
+  //   LR
+  //
+  // Registers updated
+  //   R3_RET
+  //
+
+  Label done;
+  Label is_false;
+
+  address entry = __ pc();
+
+  switch (type) {
+  case T_BOOLEAN:
+    __ cmpwi(CCR0, R3_RET, 0);
+    __ beq(CCR0, is_false);
+    __ li(R3_RET, 1);
+    __ b(done);
+    __ bind(is_false);
+    __ li(R3_RET, 0);
+    break;
+  case T_BYTE:
+     // sign extend 8 bits
+     __ extsb(R3_RET, R3_RET);
+     break;
+  case T_CHAR:
+     // zero extend 16 bits
+     __ clrldi(R3_RET, R3_RET, 48);
+     break;
+  case T_SHORT:
+     // sign extend 16 bits
+     __ extsh(R3_RET, R3_RET);
+     break;
+  case T_INT:
+     // sign extend 32 bits
+     __ extsw(R3_RET, R3_RET);
+     break;
+  case T_LONG:
+     break;
+  case T_OBJECT:
+    // unbox result if not null
+    __ cmpdi(CCR0, R3_RET, 0);
+    __ beq(CCR0, done);
+    __ ld(R3_RET, 0, R3_RET);
+    __ verify_oop(R3_RET);
+    break;
+  case T_FLOAT:
+     break;
+  case T_DOUBLE:
+     break;
+  case T_VOID:
+     break;
+  default: ShouldNotReachHere();
+  }
+
+  __ BIND(done);
+  __ blr();
+
+  return entry;
+}
+
+// Abstract method entry.
+//
+address InterpreterGenerator::generate_abstract_entry(void) {
+  address entry = __ pc();
+
+  //
+  // Registers alive
+  //   R16_thread     - JavaThread*
+  //   R19_method     - callee's methodOop (method to be invoked)
+  //   R1_SP          - SP prepared such that caller's outgoing args are near top
+  //   LR             - return address to caller
+  //
+  // Stack layout at this point:
+  //
+  //   0       [TOP_IJAVA_FRAME_ABI]         <-- R1_SP
+  //           alignment (optional)
+  //           [outgoing Java arguments]
+  //           ...
+  //   PARENT  [PARENT_IJAVA_FRAME_ABI]
+  //            ...
+  //
+
+  // Can't use call_VM here because we have not set up a new
+  // interpreter state. Make the call to the vm and make it look like
+  // our caller set up the JavaFrameAnchor.
+  __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/);
+
+  // Push a new C frame and save LR.
+  __ save_LR_CR(R0);
+  __ push_frame_abi112_nonvolatiles(0, R11_scratch1);
+
+  // This is not a leaf but we have a JavaFrameAnchor now and we will
+  // check (create) exceptions afterward so this is ok.
+  __ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodError));
+
+  // Pop the C frame and restore LR.
+  __ pop_frame();
+  __ restore_LR_CR(R0);
+
+  // Reset JavaFrameAnchor from call_VM_leaf above.
+  __ reset_last_Java_frame();
+
+  // Return to frame manager, it will handle the pending exception.
+  __ blr();
+
+  return entry;
+}
+
+// Call an accessor method (assuming it is resolved, otherwise drop into
+// vanilla (slow path) entry.
+address InterpreterGenerator::generate_accessor_entry(void) {
+  if(!UseFastAccessorMethods && (!FLAG_IS_ERGO(UseFastAccessorMethods)))
+    return NULL;
+
+  Label Ldone, Lslow_path;
+
+  const Register Rthis = R3_ARG1,
+         Rconst_method = R4_ARG2,
+         Rcodes        = Rconst_method,
+         Rcpool_cache  = R5_ARG3,
+         Rscratch      = R11_scratch1,
+         Rjvmti_mode   = Rscratch,
+         Roffset       = R12_scratch2,
+         Rflags        = R6_ARG4;
+
+  address entry = __ pc();
+
+  // Check for safepoint:
+  // Ditch this, real man don't need safepoint checks.
+
+  // Also check for JVMTI mode
+  // Check for null obj, take slow path if so.
+#ifdef CC_INTERP
+  __ ld(Rthis, Interpreter::stackElementSize, R17_tos);
+#else
+  Unimplemented()
+#endif
+  __ lwz(Rjvmti_mode, thread_(interp_only_mode));
+  __ cmpdi(CCR1, Rthis, 0);
+  __ cmpwi(CCR0, Rjvmti_mode, 0);
+  __ crorc(/*CCR0 eq*/2, /*CCR1 eq*/4+2, /*CCR0 eq*/2);
+  __ beq(CCR0, Lslow_path); // this==null or jvmti_mode!=0
+
+  // Do 2 things in parallel:
+  // 1. Load the index out of the first instruction word, which looks like this:
+  //    <0x2a><0xb4><index (2 byte, native endianess)>.
+  // 2. Load constant pool cache base.
+  __ ld(Rconst_method, in_bytes(Method::const_offset()), R19_method);
+  __ ld(Rcpool_cache, in_bytes(ConstMethod::constants_offset()), Rconst_method);
+
+  __ lhz(Rcodes, in_bytes(ConstMethod::codes_offset()) + 2, Rconst_method); // Lower half of 32 bit field.
+  __ ld(Rcpool_cache, ConstantPool::cache_offset_in_bytes(), Rcpool_cache);
+
+  // Get the const pool entry by means of <index>.
+  const int codes_shift = exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord);
+  __ slwi(Rscratch, Rcodes, codes_shift); // (codes&0xFFFF)<<codes_shift
+  __ add(Rcpool_cache, Rscratch, Rcpool_cache);
+
+  // Check if cpool cache entry is resolved.
+  // We are resolved if the indices offset contains the current bytecode.
+  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
+  // Big Endian:
+  __ lbz(Rscratch, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::indices_offset()) + 7 - 2, Rcpool_cache);
+  __ cmpwi(CCR0, Rscratch, Bytecodes::_getfield);
+  __ bne(CCR0, Lslow_path);
+  __ isync(); // Order succeeding loads wrt. load of _indices field from cpool_cache.
+
+  // Finally, start loading the value: Get cp cache entry into regs.
+  __ ld(Rflags, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::flags_offset()), Rcpool_cache);
+  __ ld(Roffset, in_bytes(cp_base_offset) + in_bytes(ConstantPoolCacheEntry::f2_offset()), Rcpool_cache);
+
+  // Get field type.
+  // (Rflags>>ConstantPoolCacheEntry::tos_state_shift)&((1<<ConstantPoolCacheEntry::tos_state_bits)-1)
+  __ rldicl(Rflags, Rflags, 64-ConstantPoolCacheEntry::tos_state_shift, 64-ConstantPoolCacheEntry::tos_state_bits);
+
+#ifdef ASSERT
+    __ ld(R9_ARG7, 0, R1_SP);
+    __ ld(R10_ARG8, 0, R21_sender_SP);
+    __ cmpd(CCR0, R9_ARG7, R10_ARG8);
+    __ asm_assert_eq("backlink", 0x543);
+#endif // ASSERT
+  __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
+
+  // Load the return value according to field type.
+  Label Litos, Lltos, Lbtos, Lctos, Lstos;
+  __ cmpdi(CCR1, Rflags, itos);
+  __ cmpdi(CCR0, Rflags, ltos);
+  __ beq(CCR1, Litos);
+  __ beq(CCR0, Lltos);
+  __ cmpdi(CCR1, Rflags, btos);
+  __ cmpdi(CCR0, Rflags, ctos);
+  __ beq(CCR1, Lbtos);
+  __ beq(CCR0, Lctos);
+  __ cmpdi(CCR1, Rflags, stos);
+  __ beq(CCR1, Lstos);
+#ifdef ASSERT
+  __ cmpdi(CCR0, Rflags, atos);
+  __ asm_assert_eq("what type is this?", 0x432);
+#endif
+  // fallthru: __ bind(Latos);
+  __ load_heap_oop(R3_RET, (RegisterOrConstant)Roffset, Rthis);
+  __ blr();
+
+  __ bind(Litos);
+  __ lwax(R3_RET, Rthis, Roffset);
+  __ blr();
+
+  __ bind(Lltos);
+  __ ldx(R3_RET, Rthis, Roffset);
+  __ blr();
+
+  __ bind(Lbtos);
+  __ lbzx(R3_RET, Rthis, Roffset);
+  __ extsb(R3_RET, R3_RET);
+  __ blr();
+
+  __ bind(Lctos);
+  __ lhzx(R3_RET, Rthis, Roffset);
+  __ blr();
+
+  __ bind(Lstos);
+  __ lhax(R3_RET, Rthis, Roffset);
+  __ blr();
+
+  __ bind(Lslow_path);
+  assert(Interpreter::entry_for_kind(Interpreter::zerolocals), "Normal entry must have been generated by now");
+  __ load_const_optimized(Rscratch, Interpreter::entry_for_kind(Interpreter::zerolocals), R0);
+  __ mtctr(Rscratch);
+  __ bctr();
+  __ flush();
+
+  return entry;
+}
+
+// Interpreter intrinsic for WeakReference.get().
+// 1. Don't push a full blown frame and go on dispatching, but fetch the value
+//    into R8 and return quickly
+// 2. If G1 is active we *must* execute this intrinsic for corrrectness:
+//    It contains a GC barrier which puts the reference into the satb buffer
+//    to indicate that someone holds a strong reference to the object the
+//    weak ref points to!
+address InterpreterGenerator::generate_Reference_get_entry(void) {
+  // Code: _aload_0, _getfield, _areturn
+  // parameter size = 1
+  //
+  // The code that gets generated by this routine is split into 2 parts:
+  //    1. the "intrinsified" code for G1 (or any SATB based GC),
+  //    2. the slow path - which is an expansion of the regular method entry.
+  //
+  // Notes:
+  // * In the G1 code we do not check whether we need to block for
+  //   a safepoint. If G1 is enabled then we must execute the specialized
+  //   code for Reference.get (except when the Reference object is null)
+  //   so that we can log the value in the referent field with an SATB
+  //   update buffer.
+  //   If the code for the getfield template is modified so that the
+  //   G1 pre-barrier code is executed when the current method is
+  //   Reference.get() then going through the normal method entry
+  //   will be fine.
+  // * The G1 code can, however, check the receiver object (the instance
+  //   of java.lang.Reference) and jump to the slow path if null. If the
+  //   Reference object is null then we obviously cannot fetch the referent
+  //   and so we don't need to call the G1 pre-barrier. Thus we can use the
+  //   regular method entry code to generate the NPE.
+  //
+  // This code is based on generate_accessor_enty.
+
+  address entry = __ pc();
+
+  const int referent_offset = java_lang_ref_Reference::referent_offset;
+  guarantee(referent_offset > 0, "referent offset not initialized");
+
+  if (UseG1GC) {
+     Label slow_path;
+
+    // Debugging not possible, so can't use __ skip_if_jvmti_mode(slow_path, GR31_SCRATCH);
+
+    // In the G1 code we don't check if we need to reach a safepoint. We
+    // continue and the thread will safepoint at the next bytecode dispatch.
+
+    // If the receiver is null then it is OK to jump to the slow path.
+#ifdef CC_INTERP
+     __ ld(R3_RET, Interpreter::stackElementSize, R17_tos); // get receiver
+#else
+     Unimplemented();
+#endif
+
+    // Check if receiver == NULL and go the slow path.
+    __ cmpdi(CCR0, R3_RET, 0);
+    __ beq(CCR0, slow_path);
+
+    // Load the value of the referent field.
+    __ load_heap_oop_not_null(R3_RET, referent_offset, R3_RET);
+
+    // Generate the G1 pre-barrier code to log the value of
+    // the referent field in an SATB buffer. Note with
+    // these parameters the pre-barrier does not generate
+    // the load of the previous value.
+
+    // Restore caller sp for c2i case.
+#ifdef ASSERT
+      __ ld(R9_ARG7, 0, R1_SP);
+      __ ld(R10_ARG8, 0, R21_sender_SP);
+      __ cmpd(CCR0, R9_ARG7, R10_ARG8);
+      __ asm_assert_eq("backlink", 0x544);
+#endif // ASSERT
+    __ mr(R1_SP, R21_sender_SP); // Cut the stack back to where the caller started.
+
+    __ g1_write_barrier_pre(noreg,         // obj
+                            noreg,         // offset
+                            R3_RET,        // pre_val
+                            R11_scratch1,  // tmp
+                            R12_scratch2,  // tmp
+                            true);         // needs_frame
+
+    __ blr();
+
+    // Generate regular method entry.
+    __ bind(slow_path);
+    assert(Interpreter::entry_for_kind(Interpreter::zerolocals), "Normal entry must have been generated by now");
+    __ load_const_optimized(R11_scratch1, Interpreter::entry_for_kind(Interpreter::zerolocals), R0);
+    __ mtctr(R11_scratch1);
+    __ bctr();
+    __ flush();
+
+    return entry;
+  } else {
+    return generate_accessor_entry();
+  }
+}
+
+void Deoptimization::unwind_callee_save_values(frame* f, vframeArray* vframe_array) {
+  // This code is sort of the equivalent of C2IAdapter::setup_stack_frame back in
+  // the days we had adapter frames. When we deoptimize a situation where a
+  // compiled caller calls a compiled caller will have registers it expects
+  // to survive the call to the callee. If we deoptimize the callee the only
+  // way we can restore these registers is to have the oldest interpreter
+  // frame that we create restore these values. That is what this routine
+  // will accomplish.
+
+  // At the moment we have modified c2 to not have any callee save registers
+  // so this problem does not exist and this routine is just a place holder.
+
+  assert(f->is_interpreted_frame(), "must be interpreted");
+}
diff -r ca3dacaf9041 src/cpu/ppc/vm/interpreter_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/interpreter_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,42 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_INTERPRETER_PPC_HPP
+#define CPU_PPC_VM_INTERPRETER_PPC_HPP
+
+ public:
+
+  // Stack index relative to tos (which points at value)
+  static int expr_index_at(int i) {
+    return stackElementWords * i;
+  }
+
+  // Already negated by c++ interpreter
+  static int local_index_at(int i) {
+    assert(i <= 0, "local direction already negated");
+    return stackElementWords * i;
+  }
+
+#endif // CPU_PPC_VM_INTERPRETER_PPC_PP
diff -r ca3dacaf9041 src/cpu/ppc/vm/javaFrameAnchor_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/javaFrameAnchor_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_JAVAFRAMEANCHOR_PPC_HPP
+#define CPU_PPC_VM_JAVAFRAMEANCHOR_PPC_HPP
+
+#ifndef CC_INTERP
+#error "CC_INTERP must be defined on PPC64"
+#endif
+
+public:
+  // Each arch must define reset, save, restore
+  // These are used by objects that only care about:
+  //  1 - initializing a new state (thread creation, javaCalls)
+  //  2 - saving a current state (javaCalls)
+  //  3 - restoring an old state (javaCalls)
+
+  inline void clear(void) {
+    // clearing _last_Java_sp must be first
+    _last_Java_sp = NULL;
+    // fence?
+    OrderAccess::release();
+    _last_Java_pc = NULL;
+  }
+
+  inline void set(intptr_t* sp, address pc) {
+    _last_Java_pc = pc;
+    OrderAccess::release();
+    _last_Java_sp = sp;
+  }
+
+  void copy(JavaFrameAnchor* src) {
+    // In order to make sure the transition state is valid for "this".
+    // We must clear _last_Java_sp before copying the rest of the new data.
+    //
+    // Hack Alert: Temporary bugfix for 4717480/4721647
+    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
+    // unless the value is changing.
+    if (_last_Java_sp != src->_last_Java_sp) {
+      _last_Java_sp = NULL;
+      OrderAccess::release();
+    }
+    _last_Java_pc = src->_last_Java_pc;
+    // Must be last so profiler will always see valid frame if has_last_frame() is true.
+    OrderAccess::release();
+    _last_Java_sp = src->_last_Java_sp;
+  }
+
+  // Always walkable.
+  bool walkable(void) { return true; }
+  // Never any thing to do since we are always walkable and can find address of return addresses.
+  void make_walkable(JavaThread* thread) { }
+
+  intptr_t* last_Java_sp(void) const  { return _last_Java_sp; }
+
+  address last_Java_pc(void)          { return _last_Java_pc; }
+
+  void set_last_Java_sp(intptr_t* sp) { OrderAccess::release(); _last_Java_sp = sp; }
+
+#endif // CPU_PPC_VM_JAVAFRAMEANCHOR_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/jniFastGetField_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/jniFastGetField_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "assembler_ppc.inline.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/jniFastGetField.hpp"
+#include "prims/jvm_misc.hpp"
+#include "runtime/safepoint.hpp"
+
+
+address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
+  // we don't have fast jni accessors.
+  return (address) -1;
+}
+
+address JNI_FastGetField::generate_fast_get_boolean_field() {
+  return generate_fast_get_int_field0(T_BOOLEAN);
+}
+
+address JNI_FastGetField::generate_fast_get_byte_field() {
+  return generate_fast_get_int_field0(T_BYTE);
+}
+
+address JNI_FastGetField::generate_fast_get_char_field() {
+  return generate_fast_get_int_field0(T_CHAR);
+}
+
+address JNI_FastGetField::generate_fast_get_short_field() {
+  return generate_fast_get_int_field0(T_SHORT);
+}
+
+address JNI_FastGetField::generate_fast_get_int_field() {
+  return generate_fast_get_int_field0(T_INT);
+}
+
+address JNI_FastGetField::generate_fast_get_long_field() {
+  // we don't have fast jni accessors.
+  return (address) -1;
+}
+
+address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
+  // e don't have fast jni accessors.
+  return (address) -1;
+}
+
+address JNI_FastGetField::generate_fast_get_float_field() {
+  return generate_fast_get_float_field0(T_FLOAT);
+}
+
+address JNI_FastGetField::generate_fast_get_double_field() {
+  return generate_fast_get_float_field0(T_DOUBLE);
+}
diff -r ca3dacaf9041 src/cpu/ppc/vm/jniTypes_ppc.hpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/jniTypes_ppc.hpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#ifndef CPU_PPC_VM_JNITYPES_PPC_HPP
+#define CPU_PPC_VM_JNITYPES_PPC_HPP
+
+#include "memory/allocation.hpp"
+#include "oops/oop.hpp"
+#include "prims/jni.h"
+
+// This file holds platform-dependent routines used to write primitive
+// jni types to the array of arguments passed into JavaCalls::call.
+
+class JNITypes : AllStatic {
+  // These functions write a java primitive type (in native format) to
+  // a java stack slot array to be passed as an argument to
+  // JavaCalls:calls.  I.e., they are functionally 'push' operations
+  // if they have a 'pos' formal parameter.  Note that jlong's and
+  // jdouble's are written _in reverse_ of the order in which they
+  // appear in the interpreter stack.  This is because call stubs (see
+  // stubGenerator_sparc.cpp) reverse the argument list constructed by
+  // JavaCallArguments (see javaCalls.hpp).
+
+ private:
+
+#ifndef PPC64
+#error "ppc32 support currently not implemented!!!"
+#endif // PPC64
+
+ public:
+  // Ints are stored in native format in one JavaCallArgument slot at *to.
+  static inline void put_int(jint  from, intptr_t *to)           { *(jint *)(to +   0  ) =  from; }
+  static inline void put_int(jint  from, intptr_t *to, int& pos) { *(jint *)(to + pos++) =  from; }
+  static inline void put_int(jint *from, intptr_t *to, int& pos) { *(jint *)(to + pos++) = *from; }
+
+  // Longs are stored in native format in one JavaCallArgument slot at
+  // *(to+1).
+  static inline void put_long(jlong  from, intptr_t *to) {
+    *(jlong*) (to + 1) = from;
+  }
+
+  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
+    *(jlong*) (to + 1 + pos) = *from;
+    pos += 2;
+  }
+
+  // Oops are stored in native format in one JavaCallArgument slot at *to.
+  static inline void put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
+  static inline void put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
+  static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
+
+  // Floats are stored in native format in one JavaCallArgument slot at *to.
+  static inline void put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
+  static inline void put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
+  static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
+
+  // Doubles are stored in native word format in one JavaCallArgument
+  // slot at *(to+1).
+  static inline void put_double(jdouble  from, intptr_t *to) {
+    *(jdouble*) (to + 1) = from;
+  }
+
+  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = from;
+    pos += 2;
+  }
+
+  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
+    *(jdouble*) (to + 1 + pos) = *from;
+    pos += 2;
+  }
+
+  // The get_xxx routines, on the other hand, actually _do_ fetch
+  // java primitive types from the interpreter stack.
+  // No need to worry about alignment on Intel.
+  static inline jint    get_int   (intptr_t *from) { return *(jint *)    from; }
+  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + 1); }
+  static inline oop     get_obj   (intptr_t *from) { return *(oop *)     from; }
+  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *)  from; }
+  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + 1); }
+};
+
+#endif // CPU_PPC_VM_JNITYPES_PPC_HPP
diff -r ca3dacaf9041 src/cpu/ppc/vm/jni_ppc.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/jni_ppc.h	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,53 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.  Oracle designates this
+ * particular file as subject to the "Classpath" exception as provided
+ * by Oracle in the LICENSE file that accompanied this code.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+#ifndef CPU_PPC_VM_JNI_PPC_H
+#define CPU_PPC_VM_JNI_PPC_H
+
+// Note: please do not change these without also changing jni_md.h in the JDK
+// repository
+#ifndef __has_attribute
+  #define __has_attribute(x) 0
+#endif
+#if (defined(__GNUC__) && ((__GNUC__ > 4) || (__GNUC__ == 4) && (__GNUC_MINOR__ > 2))) || __has_attribute(visibility)
+  #define JNIEXPORT     __attribute__((visibility("default")))
+  #define JNIIMPORT     __attribute__((visibility("default")))
+#else
+  #define JNIEXPORT
+  #define JNIIMPORT
+#endif
+
+  #define JNICALL
+  typedef int jint;
+#if defined(_LP64)
+  typedef long jlong;
+#else
+  typedef long long jlong;
+#endif
+
+typedef signed char jbyte;
+
+#endif // CPU_PPC_VM_JNI_PPC_H
diff -r ca3dacaf9041 src/cpu/ppc/vm/macroAssembler_ppc.cpp
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cpu/ppc/vm/macroAssembler_ppc.cpp	Wed Jul 24 14:29:57 2013 +0200
@@ -0,0 +1,3017 @@
+/*
+ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2012, 2013 SAP AG. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ *
+ */
+
+#include "precompiled.hpp"
+#include "asm/assembler.hpp"
+#include "asm/assembler.inline.hpp"
+#include "asm/macroAssembler.inline.hpp"
+#include "compiler/disassembler.hpp"
+#include "gc_interface/collectedHeap.inline.hpp"
+#include "interpreter/interpreter.hpp"
+#include "memory/cardTableModRefBS.hpp"
+#include "memory/resourceArea.hpp"
+#include "prims/methodHandles.hpp"
+#include "runtime/biasedLocking.hpp"
+#include "runtime/interfaceSupport.hpp"
+#include "runtime/objectMonitor.hpp"
+#include "runtime/os.hpp"
+#include "runtime/sharedRuntime.hpp"
+#include "runtime/stubRoutines.hpp"
+#include "utilities/macros.hpp"
+#if INCLUDE_ALL_GCS
+#include "gc_implementation/g1/g1CollectedHeap.inline.hpp"
+#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp"
+#include "gc_implementation/g1/heapRegion.hpp"
+#endif // INCLUDE_ALL_GCS
+
+#ifdef PRODUCT
+#define BLOCK_COMMENT(str) // nothing
+#else
+#define BLOCK_COMMENT(str) block_comment(str)
+#endif
+
+#ifdef ASSERT
+// On RISC, there's no benefit to verifying instruction boundaries.
+bool AbstractAssembler::pd_check_instruction_mark() { return false; }
+#endif
+
+void MacroAssembler::ld_largeoffset_unchecked(Register d, int si31, Register a, int emit_filler_nop) {
+  assert(Assembler::is_simm(si31, 31) && si31 >= 0, "si31 out of range");
+  if (Assembler::is_simm(si31, 16)) {
+    ld(d, si31, a);
+    if (emit_filler_nop) nop();
+  } else {
+    const int hi = MacroAssembler::largeoffset_si16_si16_hi(si31);
+    const int lo = MacroAssembler::largeoffset_si16_si16_lo(si31);
+    addis(d, a, hi);
+    ld(d, lo, d);
+  }
+}
+
+void MacroAssembler::ld_largeoffset(Register d, int si31, Register a, int emit_filler_nop) {
+  assert_different_registers(d, a);
+  ld_largeoffset_unchecked(d, si31, a, emit_filler_nop);
+}
+
+void MacroAssembler::load_sized_value(Register dst, RegisterOrConstant offs, Register base,
+                                      size_t size_in_bytes, bool is_signed) {
+  switch (size_in_bytes) {
+  case  8:              ld(dst, offs, base);                         break;
+  case  4:  is_signed ? lwa(dst, offs, base) : lwz(dst, offs, base); break;
+  case  2:  is_signed ? lha(dst, offs, base) : lhz(dst, offs, base); break;
+  case  1:  lbz(dst, offs, base); if (is_signed) extsb(dst, dst);    break; // lba doesn't exist :(
+  default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::store_sized_value(Register dst, RegisterOrConstant offs, Register base,
+                                       size_t size_in_bytes) {
+  switch (size_in_bytes) {
+  case  8:  std(dst, offs, base); break;
+  case  4:  stw(dst, offs, base); break;
+  case  2:  sth(dst, offs, base); break;
+  case  1:  stb(dst, offs, base); break;
+  default:  ShouldNotReachHere();
+  }
+}
+
+void MacroAssembler::align(int modulus) {
+  while (offset() % modulus != 0) nop();
+}
+
+// Issue instructions that calculate given TOC from global TOC.
+void MacroAssembler::calculate_address_from_global_toc(Register dst, address addr, bool hi16, bool lo16,
+                                                       bool add_relocation, bool emit_dummy_addr) {
+  int offset = -1;
+  if (emit_dummy_addr) {
+    offset = -128; // dummy address
+  } else if (addr != (address)(intptr_t)-1) {
+    offset = MacroAssembler::offset_to_global_toc(addr);
+  }
+
+  if (hi16) {
+    addis(dst, R29, MacroAssembler::largeoffset_si16_si16_hi(offset));
+  }
+  if (lo16) {
+    if (add_relocation) {
+      // Relocate at the addi to avoid confusion with a load from the method's TOC.
+      relocate(internal_word_Relocation::spec(addr));
+    }
+    addi(dst, dst, MacroAssembler::largeoffset_si16_si16_lo(offset));
+  }
+}
+
+int MacroAssembler::patch_calculate_address_from_global_toc_at(address a, address bound, address addr) {
+  const int offset = MacroAssembler::offset_to_global_toc(addr);
+
+  const address inst2_addr = a;
+  const int inst2 = *(int *)inst2_addr;
+
+  // The relocation points to the second instruction, the addi,
+  // and the addi reads and writes the same register dst.
+  const int dst = inv_rt_field(inst2);
+  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
+
+  // Now, find the preceding addis which writes to dst.
+  int inst1 = 0;
+  address inst1_addr = inst2_addr - BytesPerInstWord;
+  while (inst1_addr >= bound) {
+    inst1 = *(int *) inst1_addr;
+    if (is_addis(inst1) && inv_rt_field(inst1) == dst) {
+      // Stop, found the addis which writes dst.
+      break;
+    }
+    inst1_addr -= BytesPerInstWord;
+  }
+
+  assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
+  set_imm((int *)inst1_addr, MacroAssembler::largeoffset_si16_si16_hi(offset));
+  set_imm((int *)inst2_addr, MacroAssembler::largeoffset_si16_si16_lo(offset));
+  return (int)((intptr_t)addr - (intptr_t)inst1_addr);
+}
+
+address MacroAssembler::get_address_of_calculate_address_from_global_toc_at(address a, address bound) {
+  const address inst2_addr = a;
+  const int inst2 = *(int *)inst2_addr;
+
+  // The relocation points to the second instruction, the addi,
+  // and the addi reads and writes the same register dst.
+  const int dst = inv_rt_field(inst2);
+  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
+
+  // Now, find the preceding addis which writes to dst.
+  int inst1 = 0;
+  address inst1_addr = inst2_addr - BytesPerInstWord;
+  while (inst1_addr >= bound) {
+    inst1 = *(int *) inst1_addr;
+    if (is_addis(inst1) && inv_rt_field(inst1) == dst) {
+      // stop, found the addis which writes dst
+      break;
+    }
+    inst1_addr -= BytesPerInstWord;
+  }
+
+  assert(is_addis(inst1) && inv_ra_field(inst1) == 29 /* R29 */, "source must be global TOC");
+
+  int offset = (get_imm(inst1_addr, 0) << 16) + get_imm(inst2_addr, 0);
+  // -1 is a special case
+  if (offset == -1) {
+    return (address)(intptr_t)-1;
+  } else {
+    return global_toc() + offset;
+  }
+}
+
+#ifdef _LP64
+// Patch compressed oops or klass constants.
+int MacroAssembler::patch_set_narrow_oop(address a, address bound, narrowOop data) {
+  assert(UseCompressedOops, "Should only patch compressed oops");
+
+  const address inst2_addr = a;
+  const int inst2 = *(int *)inst2_addr;
+
+  // The relocation points to the second instruction, the addi,
+  // and the addi reads and writes the same register dst.
+  const int dst = inv_rt_field(inst2);
+  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
+  // Now, find the preceding addis which writes to dst.
+  int inst1 = 0;
+  address inst1_addr = inst2_addr - BytesPerInstWord;
+  bool inst1_found = false;
+  while (inst1_addr >= bound) {
+    inst1 = *(int *)inst1_addr;
+    if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break; }
+    inst1_addr -= BytesPerInstWord;
+  }
+  assert(inst1_found, "inst is not lis");
+
+  int xc = (data >> 16) & 0xffff;
+  int xd = (data >>  0) & 0xffff;
+
+  set_imm((int *)inst1_addr,((short)(xc + ((xd & 0x8000) != 0 ? 1 : 0)))); // see enc_load_con_narrow1/2
+  set_imm((int *)inst2_addr, (short)(xd));
+  return (int)((intptr_t)inst2_addr - (intptr_t)inst1_addr);
+}
+
+// Get compressed oop or klass constant.
+narrowOop MacroAssembler::get_narrow_oop(address a, address bound) {
+  assert(UseCompressedOops, "Should only patch compressed oops");
+
+  const address inst2_addr = a;
+  const int inst2 = *(int *)inst2_addr;
+
+  // The relocation points to the second instruction, the addi,
+  // and the addi reads and writes the same register dst.
+  const int dst = inv_rt_field(inst2);
+  assert(is_addi(inst2) && inv_ra_field(inst2) == dst, "must be addi reading and writing dst");
+  // Now, find the preceding lis which writes to dst.
+  int inst1 = 0;
+  address inst1_addr = inst2_addr - BytesPerInstWord;
+  bool inst1_found = false;
+
+  while (inst1_addr >= bound) {
+    inst1 = *(int *) inst1_addr;
+    if (is_lis(inst1) && inv_rs_field(inst1) == dst) { inst1_found = true; break;}
+    inst1_addr -= BytesPerInstWord;
+  }
+  assert(inst1_found, "inst is not lis");
+
+  uint xl = ((unsigned int) (get_imm(inst2_addr,0) & 0xffff));
+  uint xh = (((((xl & 0x8000) != 0 ? -1 : 0) + get_imm(inst1_addr,0)) & 0xffff) << 16);
+  return (int) (xl | xh);
+}
+#endif // _LP64
+
+void MacroAssembler::load_const_from_method_toc(Register dst, AddressLiteral& a, Register toc) {
+  int toc_offset = 0;
+  // Use RelocationHolder::none for the constant pool entry, otherwise
+  // we will end up with a failing NativeCall::verify(x) where x is
+  // the address of the constant pool entry.
+  // FIXME: We should insert relocation information for oops at the constant
+  // pool entries instead of inserting it at the loads; patching of a constant
+  // pool entry should be less expensive.
+  Unimplemented();
+  if (false) {
+    address oop_address = address_constant((address)a.value(), RelocationHolder::none);
+    // Relocate at the pc of the load.
+    relocate(a.rspec());
+    toc_offset = (int)(oop_address - code()->consts()->start());
+  }
+  ld_largeoffset_unchecked(dst, toc_offset, toc, true);
+}
+
+bool MacroAssembler::is_load_const_from_method_toc_at(address a) {
+  const address inst1_addr = a;
+  const int inst1 = *(int *)inst1_addr;
+
+   // The relocation points to the ld or the addis.
+   return (is_ld(inst1)) ||
+          (is_addis(inst1) && inv_ra_field(inst1) != 0);
+}
+
+int MacroAssembler::get_offset_of_load_const_from_method_toc_at(address a) {
+  assert(is_load_const_from_method_toc_at(a), "must be load_const_from_method_toc");
+
+  const address inst1_addr = a;
+  const int inst1 = *(int *)inst1_addr;
+
+  if (is_ld(inst1)) {
+    return inv_d1_field(inst1);
+  } else if (is_addis(inst1)) {
+    const int dst = inv_rt_field(inst1);
+
+    // Now, find the succeeding ld which reads and writes to dst.
+    address inst2_addr = inst1_addr + BytesPerInstWord;
+    int inst2 = 0;
+    while (true) {
+      inst2 = *(int *) inst2_addr;
+      if (is_ld(inst2) && inv_ra_field(inst2) == dst && inv_rt_field(inst2) == dst) {
+        // Stop, found the ld which reads and writes dst.
+        break;
+      }
+      inst2_addr += BytesPerInstWord;
+    }
+    return (inv_d1_field(inst1) << 16) + inv_d1_field(inst2);
+  }
+  ShouldNotReachHere();
+  return 0;
+}
+
+// Get the constant from a `load_const' sequence.
+long MacroAssembler::get_const(address a) {
+  assert(is_load_const_at(a), "not a load of a constant");
+  const int *p = (const int*) a;
+  unsigned long x = (((unsigned long) (get_imm(a,0) & 0xffff)) << 48);
+  if (is_ori(*(p+1))) {
+    x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 32);
+    x |= (((unsigned long) (get_imm(a,3) & 0xffff)) << 16);
+    x |= (((unsigned long) (get_imm(a,4) & 0xffff)));
+  } else if (is_lis(*(p+1))) {
+    x |= (((unsigned long) (get_imm(a,2) & 0xffff)) << 32);
+    x |= (((unsigned long) (get_imm(a,1) & 0xffff)) << 16);
+    x |= (((unsigned long) (get_imm(a,3) & 0xffff)));
+  } else {
+    ShouldNotReachHere();
+    return (long) 0;
+  }
+  return (long) x;
+}
+
+// Patch the 64 bit constant of a `load_const' sequence. This is a low
+// level procedure. It neither flushes the instruction cache nor is it
+// mt safe.
+void MacroAssembler::patch_const(address a, long x) {
+  assert(is_load_const_at(a), "not a load of a constant");
+  int *p = (int*) a;
+  if (is_ori(*(p+1))) {
+    set_imm(0 + p, (x >> 48) & 0xffff);
+    set_imm(1 + p, (x >> 32) & 0xffff);
+    set_imm(3 + p, (x >> 16) & 0xffff);
+    set_imm(4 + p, x & 0xffff);
+  } else if (is_lis(*(p+1))) {
+    set_imm(0 + p, (x >> 48) & 0xffff);
+    set_imm(2 + p, (x >> 32) & 0xffff);
+    set_imm(1 + p, (x >> 16) & 0xffff);
+    set_imm(3 + p, x & 0xffff);
+  } else {
+    ShouldNotReachHere();
+  }
+}
+
+AddressLiteral MacroAssembler::allocate_metadata_address(Metadata* obj) {
+  assert(oop_recorder() != NULL, "this assembler needs a Recorder");
+  int index = oop_recorder()->allocate_metadata_index(obj);
+  RelocationHolder rspec = metadata_Relocation::spec(index);
+  return AddressLiteral((address)obj, rspec);
+}
+
+AddressLiteral MacroAssembler::constant_metadata_address(Metadata* obj) {
+  assert(oop_recorder() != NULL, "this assembler needs a Recorder");
+  int index = oop_recorder()->find_index(obj);
+  RelocationHolder rspec = metadata_Relocation::spec(index);
+  return AddressLiteral((address)obj, rspec);
+}
+
+AddressLiteral MacroAssembler::allocate_oop_address(jobject obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->allocate_oop_index(obj);
+  return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
+}
+
+AddressLiteral MacroAssembler::constant_oop_address(jobject obj) {
+  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
+  int oop_index = oop_recorder()->find_index(obj);
+  return AddressLiteral(address(obj), oop_Relocation::spec(oop_index));
+}
+
+RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
+                                                      Register tmp, int offset) {
+  intptr_t value = *delayed_value_addr;
+  if (value != 0) {
+    return RegisterOrConstant(value + offset);
+  }
+
+  // Load indirectly to solve generation ordering problem.
+  // static address, no relocation
+  int simm16_offset = load_const_optimized(tmp, delayed_value_addr, noreg, true);
+  ld(tmp, simm16_offset, tmp); // must be aligned ((xa & 3) == 0)
+
+  if (offset != 0) {
+    addi(tmp, tmp, offset);
+  }
+
+  return RegisterOrConstant(tmp);
+}
+
+#ifndef PRODUCT
+void MacroAssembler::pd_print_patched_instruction(address branch) {
+  Unimplemented(); // TODO: PPC port
+}
+#endif // ndef PRODUCT
+
+// Conditional far branch for destinations encodable in 24+2 bits.
+void MacroAssembler::bc_far(int boint, int biint, Label& dest, int optimize) {
+
+  // If requested by flag optimize, relocate the bc_far as a
+  // runtime_call and prepare for optimizing it when the code gets
+  // relocated.
+  if (optimize == bc_far_optimize_on_relocate) {
+    relocate(relocInfo::runtime_call_type);
+  }
+
+  // variant 2:
+  //
+  //    b!cxx SKIP
+  //    bxx   DEST
+  //  SKIP:
+  //
+
+  const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)),
+                                                opposite_bcond(inv_boint_bcond(boint)));
+
+  // We emit two branches.
+  // First, a conditional branch which jumps around the far branch.
+  const address not_taken_pc = pc() + 2 * BytesPerInstWord;
+  const address bc_pc        = pc();
+  bc(opposite_boint, biint, not_taken_pc);
+
+  const int bc_instr = *(int*)bc_pc;
+  assert(not_taken_pc == (address)inv_bd_field(bc_instr, (intptr_t)bc_pc), "postcondition");
+  assert(opposite_boint == inv_bo_field(bc_instr), "postcondition");
+  assert(boint == add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(bc_instr))),
+                                     opposite_bcond(inv_boint_bcond(inv_bo_field(bc_instr)))),
+         "postcondition");
+  assert(biint == inv_bi_field(bc_instr), "postcondition");
+
+  // Second, an unconditional far branch which jumps to dest.
+  // Note: target(dest) remembers the current pc (see CodeSection::target)
+  //       and returns the current pc if the label is not bound yet; when
+  //       the label gets bound, the unconditional far branch will be patched.
+  const address target_pc = target(dest);
+  const address b_pc  = pc();
+  b(target_pc);
+
+  assert(not_taken_pc == pc(),                     "postcondition");
+  assert(dest.is_bound() || target_pc == b_pc, "postcondition");
+}
+
+bool MacroAssembler::is_bc_far_at(address instruction_addr) {
+  return is_bc_far_variant1_at(instruction_addr) ||
+         is_bc_far_variant2_at(instruction_addr) ||
+         is_bc_far_variant3_at(instruction_addr);
+}
+
+address MacroAssembler::get_dest_of_bc_far_at(address instruction_addr) {
+  if (is_bc_far_variant1_at(instruction_addr)) {
+    const address instruction_1_addr = instruction_addr;
+    const int instruction_1 = *(int*)instruction_1_addr;
+    return (address)inv_bd_field(instruction_1, (intptr_t)instruction_1_addr);
+  } else if (is_bc_far_variant2_at(instruction_addr)) {
+    const address instruction_2_addr = instruction_addr + 4;
+    return bxx_destination(instruction_2_addr);
+  } else if (is_bc_far_variant3_at(instruction_addr)) {
+    return instruction_addr + 8;
+  }
+  // variant 4 ???
+  ShouldNotReachHere();
+  return NULL;
+}
+void MacroAssembler::set_dest_of_bc_far_at(address instruction_addr, address dest) {
+
+  if (is_bc_far_variant3_at(instruction_addr)) {
+    // variant 3, far cond branch to the next instruction, already patched to nops:
+    //
+    //    nop
+    //    endgroup
+    //  SKIP/DEST:
+    //
+    return;
+  }
+
+  // first, extract boint and biint from the current branch
+  int boint = 0;
+  int biint = 0;
+
+  ResourceMark rm;
+  const int code_size = 2 * BytesPerInstWord;
+  CodeBuffer buf(instruction_addr, code_size);
+  MacroAssembler masm(&buf);
+  if (is_bc_far_variant2_at(instruction_addr) && dest == instruction_addr + 8) {
+    // Far branch to next instruction: Optimize it by patching nops (produce variant 3).
+    masm.nop();
+    masm.endgroup();
+  } else {
+    if (is_bc_far_variant1_at(instruction_addr)) {
+      // variant 1, the 1st instruction contains the destination address:
+      //
+      //    bcxx  DEST
+      //    endgroup
+      //
+      const int instruction_1 = *(int*)(instruction_addr);
+      boint = inv_bo_field(instruction_1);
+      biint = inv_bi_field(instruction_1);
+    } else if (is_bc_far_variant2_at(instruction_addr)) {
+      // variant 2, the 2nd instruction contains the destination address:
+      //
+      //    b!cxx SKIP
+      //    bxx   DEST
+      //  SKIP:
+      //
+      const int instruction_1 = *(int*)(instruction_addr);
+      boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(inv_bo_field(instruction_1))),
+          opposite_bcond(inv_boint_bcond(inv_bo_field(instruction_1))));
+      biint = inv_bi_field(instruction_1);
+    } else {
+      // variant 4???
+      ShouldNotReachHere();
+    }
+
+    // second, set the new branch destination and optimize the code
+    if (dest != instruction_addr + 4 && // the bc_far is still unbound!
+        masm.is_within_range_of_bcxx(dest, instruction_addr)) {
+      // variant 1:
+      //
+      //    bcxx  DEST
+      //    endgroup
+      //
+      masm.bc(boint, biint, dest);
+      masm.endgroup();
+    } else {
+      // variant 2:
+      //
+      //    b!cxx SKIP
+      //    bxx   DEST
+      //  SKIP:
+      //
+      const int opposite_boint = add_bhint_to_boint(opposite_bhint(inv_boint_bhint(boint)),
+                                                    opposite_bcond(inv_boint_bcond(boint)));
+      const address not_taken_pc = masm.pc() + 2 * BytesPerInstWord;
+      masm.bc(opposite_boint, biint, not_taken_pc);
+      masm.b(dest);
+    }
+  }
+  ICache::invalidate_range(instruction_addr, code_size);
+}
+
+// Emit a NOT mt-safe patchable 64 bit absolute call/jump.
+void MacroAssembler::bxx64_patchable(address dest, relocInfo::relocType rt, bool link) {
+  // get current pc
+  uint64_t start_pc = (uint64_t) pc();
+
+  const address pc_of_bl = (address) (start_pc + (6*BytesPerInstWord)); // bl is last
+  const address pc_of_b  = (address) (start_pc + (0*BytesPerInstWord)); // b is first
+
+  // relocate here
+  if (rt != relocInfo::none) {
+    relocate(rt);
+  }
+
+  if ( ReoptimizeCallSequences &&
+       (( link && is_within_range_of_b(dest, pc_of_bl)) ||
+        (!link && is_within_range_of_b(dest, pc_of_b)))) {
+    // variant 2:
+    // Emit an optimized, pc-relative call/jump.
+
+    if (link) {
+      // some padding
+      nop();
+      nop();
+      nop();
+      nop();
+      nop();
+      nop();
+
+      // do the call
+      assert(pc() == pc_of_bl, "just checking");
+      bl(dest, relocInfo::none);
+    } else {