OpenJDK / aarch32-port / jdk8u / hotspot
changeset 6023:ec28f9c041ff
8019972: PPC64 (part 9): platform files for interpreter only VM.
Summary: With this change the HotSpot core build works on Linux/PPC64. The VM succesfully executes simple test programs.
Reviewed-by: kvn
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/assembler_ppc.cpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,699 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/assembler.inline.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/cardTableModRefBS.hpp" +#include "memory/resourceArea.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/objectMonitor.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#if INCLUDE_ALL_GCS +#include "gc_implementation/g1/g1CollectedHeap.inline.hpp" +#include "gc_implementation/g1/g1SATBCardTableModRefBS.hpp" +#include "gc_implementation/g1/heapRegion.hpp" +#endif // INCLUDE_ALL_GCS + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#else +#define BLOCK_COMMENT(str) block_comment(str) +#endif + +int AbstractAssembler::code_fill_byte() { + return 0x00; // illegal instruction 0x00000000 +} + +void Assembler::print_instruction(int inst) { + Unimplemented(); +} + +// Patch instruction `inst' at offset `inst_pos' to refer to +// `dest_pos' and return the resulting instruction. We should have +// pcs, not offsets, but since all is relative, it will work out fine. +int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) { + int m = 0; // mask for displacement field + int v = 0; // new value for displacement field + + switch (inv_op_ppc(inst)) { + case b_op: m = li(-1); v = li(disp(dest_pos, inst_pos)); break; + case bc_op: m = bd(-1); v = bd(disp(dest_pos, inst_pos)); break; + default: ShouldNotReachHere(); + } + return inst & ~m | v; +} + +// Return the offset, relative to _code_begin, of the destination of +// the branch inst at offset pos. +int Assembler::branch_destination(int inst, int pos) { + int r = 0; + switch (inv_op_ppc(inst)) { + case b_op: r = bxx_destination_offset(inst, pos); break; + case bc_op: r = inv_bd_field(inst, pos); break; + default: ShouldNotReachHere(); + } + return r; +} + +// Low-level andi-one-instruction-macro. +void Assembler::andi(Register a, Register s, const int ui16) { + assert(is_uimm(ui16, 16), "must be 16-bit unsigned immediate"); + if (is_power_of_2_long(((jlong) ui16)+1)) { + // pow2minus1 + clrldi(a, s, 64-log2_long((((jlong) ui16)+1))); + } else if (is_power_of_2_long((jlong) ui16)) { + // pow2 + rlwinm(a, s, 0, 31-log2_long((jlong) ui16), 31-log2_long((jlong) ui16)); + } else if (is_power_of_2_long((jlong)-ui16)) { + // negpow2 + clrrdi(a, s, log2_long((jlong)-ui16)); + } else { + andi_(a, s, ui16); + } +} + +// RegisterOrConstant version. +void Assembler::ld(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::ld(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::ld(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::ldx(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::ld(d, 0, roc.as_register()); + else + Assembler::ldx(d, roc.as_register(), s1); + } +} + +void Assembler::lwa(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lwa(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lwa(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lwax(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lwa(d, 0, roc.as_register()); + else + Assembler::lwax(d, roc.as_register(), s1); + } +} + +void Assembler::lwz(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lwz(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lwz(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lwzx(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lwz(d, 0, roc.as_register()); + else + Assembler::lwzx(d, roc.as_register(), s1); + } +} + +void Assembler::lha(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lha(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lha(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lhax(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lha(d, 0, roc.as_register()); + else + Assembler::lhax(d, roc.as_register(), s1); + } +} + +void Assembler::lhz(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lhz(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lhz(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lhzx(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lhz(d, 0, roc.as_register()); + else + Assembler::lhzx(d, roc.as_register(), s1); + } +} + +void Assembler::lbz(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + if (s1 == noreg) { + int simm16_rest = load_const_optimized(d, roc.as_constant(), noreg, true); + Assembler::lbz(d, simm16_rest, d); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::lbz(d, roc.as_constant(), s1); + } else { + load_const_optimized(d, roc.as_constant()); + Assembler::lbzx(d, d, s1); + } + } else { + if (s1 == noreg) + Assembler::lbz(d, 0, roc.as_register()); + else + Assembler::lbzx(d, roc.as_register(), s1); + } +} + +void Assembler::std(Register d, RegisterOrConstant roc, Register s1, Register tmp) { + if (roc.is_constant()) { + if (s1 == noreg) { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true); + Assembler::std(d, simm16_rest, tmp); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::std(d, roc.as_constant(), s1); + } else { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + load_const_optimized(tmp, roc.as_constant()); + Assembler::stdx(d, tmp, s1); + } + } else { + if (s1 == noreg) + Assembler::std(d, 0, roc.as_register()); + else + Assembler::stdx(d, roc.as_register(), s1); + } +} + +void Assembler::stw(Register d, RegisterOrConstant roc, Register s1, Register tmp) { + if (roc.is_constant()) { + if (s1 == noreg) { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true); + Assembler::stw(d, simm16_rest, tmp); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::stw(d, roc.as_constant(), s1); + } else { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + load_const_optimized(tmp, roc.as_constant()); + Assembler::stwx(d, tmp, s1); + } + } else { + if (s1 == noreg) + Assembler::stw(d, 0, roc.as_register()); + else + Assembler::stwx(d, roc.as_register(), s1); + } +} + +void Assembler::sth(Register d, RegisterOrConstant roc, Register s1, Register tmp) { + if (roc.is_constant()) { + if (s1 == noreg) { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true); + Assembler::sth(d, simm16_rest, tmp); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::sth(d, roc.as_constant(), s1); + } else { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + load_const_optimized(tmp, roc.as_constant()); + Assembler::sthx(d, tmp, s1); + } + } else { + if (s1 == noreg) + Assembler::sth(d, 0, roc.as_register()); + else + Assembler::sthx(d, roc.as_register(), s1); + } +} + +void Assembler::stb(Register d, RegisterOrConstant roc, Register s1, Register tmp) { + if (roc.is_constant()) { + if (s1 == noreg) { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + int simm16_rest = load_const_optimized(tmp, roc.as_constant(), noreg, true); + Assembler::stb(d, simm16_rest, tmp); + } else if (is_simm(roc.as_constant(), 16)) { + Assembler::stb(d, roc.as_constant(), s1); + } else { + guarantee(tmp != noreg, "Need tmp reg to encode large constants"); + load_const_optimized(tmp, roc.as_constant()); + Assembler::stbx(d, tmp, s1); + } + } else { + if (s1 == noreg) + Assembler::stb(d, 0, roc.as_register()); + else + Assembler::stbx(d, roc.as_register(), s1); + } +} + +void Assembler::add(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + intptr_t c = roc.as_constant(); + assert(is_simm(c, 16), "too big"); + addi(d, s1, (int)c); + } + else add(d, roc.as_register(), s1); +} + +void Assembler::subf(Register d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + intptr_t c = roc.as_constant(); + assert(is_simm(-c, 16), "too big"); + addi(d, s1, (int)-c); + } + else subf(d, roc.as_register(), s1); +} + +void Assembler::cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1) { + if (roc.is_constant()) { + intptr_t c = roc.as_constant(); + assert(is_simm(c, 16), "too big"); + cmpdi(d, s1, (int)c); + } + else cmpd(d, roc.as_register(), s1); +} + +// Load a 64 bit constant. Patchable. +void Assembler::load_const(Register d, long x, Register tmp) { + // 64-bit value: x = xa xb xc xd + int xa = (x >> 48) & 0xffff; + int xb = (x >> 32) & 0xffff; + int xc = (x >> 16) & 0xffff; + int xd = (x >> 0) & 0xffff; + if (tmp == noreg) { + Assembler::lis( d, (int)(short)xa); + Assembler::ori( d, d, (unsigned int)xb); + Assembler::sldi(d, d, 32); + Assembler::oris(d, d, (unsigned int)xc); + Assembler::ori( d, d, (unsigned int)xd); + } else { + // exploit instruction level parallelism if we have a tmp register + assert_different_registers(d, tmp); + Assembler::lis(tmp, (int)(short)xa); + Assembler::lis(d, (int)(short)xc); + Assembler::ori(tmp, tmp, (unsigned int)xb); + Assembler::ori(d, d, (unsigned int)xd); + Assembler::insrdi(d, tmp, 32, 0); + } +} + +// Load a 64 bit constant, optimized, not identifyable. +// Tmp can be used to increase ILP. Set return_simm16_rest=true to get a +// 16 bit immediate offset. +int Assembler::load_const_optimized(Register d, long x, Register tmp, bool return_simm16_rest) { + // Avoid accidentally trying to use R0 for indexed addressing. + assert(d != R0, "R0 not allowed"); + assert_different_registers(d, tmp); + + short xa, xb, xc, xd; // Four 16-bit chunks of const. + long rem = x; // Remaining part of const. + + xd = rem & 0xFFFF; // Lowest 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xd >> 15); // Compensation for sign extend. + + if (rem == 0) { // opt 1: simm16 + li(d, xd); + return 0; + } + + xc = rem & 0xFFFF; // Next 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xc >> 15); // Compensation for sign extend. + + if (rem == 0) { // opt 2: simm32 + lis(d, xc); + } else { // High 32 bits needed. + + if (tmp != noreg) { // opt 3: We have a temp reg. + // No carry propagation between xc and higher chunks here (use logical instructions). + xa = (x >> 48) & 0xffff; + xb = (x >> 32) & 0xffff; // No sign compensation, we use lis+ori or li to allow usage of R0. + bool load_xa = (xa != 0) || (xb < 0); + bool return_xd = false; + + if (load_xa) lis(tmp, xa); + if (xc) lis(d, xc); + if (load_xa) { + if (xb) ori(tmp, tmp, xb); // No addi, we support tmp == R0. + } else { + li(tmp, xb); // non-negative + } + if (xc) { + if (return_simm16_rest && xd >= 0) { return_xd = true; } // >= 0 to avoid carry propagation after insrdi/rldimi. + else if (xd) { addi(d, d, xd); } + } else { + li(d, xd); + } + insrdi(d, tmp, 32, 0); + return return_xd ? xd : 0; // non-negative + } + + xb = rem & 0xFFFF; // Next 16-bit chunk. + rem = (rem >> 16) + ((unsigned short)xb >> 15); // Compensation for sign extend. + + xa = rem & 0xFFFF; // Highest 16-bit chunk. + + // opt 4: avoid adding 0 + if (xa) { // Highest 16-bit needed? + lis(d, xa); + if (xb) addi(d, d, xb); + } else { + li(d, xb); + } + sldi(d, d, 32); + if (xc) addis(d, d, xc); + } + + // opt 5: Return offset to be inserted into following instruction. + if (return_simm16_rest) return xd; + + if (xd) addi(d, d, xd); + return 0; +} + +#ifndef PRODUCT +// Test of ppc assembler. +void Assembler::test_asm() { + // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions + addi( R0, R1, 10); + addis( R5, R2, 11); + addic_( R3, R31, 42); + subfic( R21, R12, 2112); + add( R3, R2, R1); + add_( R11, R22, R30); + subf( R7, R6, R5); + subf_( R8, R9, R4); + addc( R11, R12, R13); + addc_( R14, R14, R14); + subfc( R15, R16, R17); + subfc_( R18, R20, R19); + adde( R20, R22, R24); + adde_( R29, R27, R26); + subfe( R28, R1, R0); + subfe_( R21, R11, R29); + neg( R21, R22); + neg_( R13, R23); + mulli( R0, R11, -31); + mulld( R1, R18, R21); + mulld_( R2, R17, R22); + mullw( R3, R16, R23); + mullw_( R4, R15, R24); + divd( R5, R14, R25); + divd_( R6, R13, R26); + divw( R7, R12, R27); + divw_( R8, R11, R28); + + li( R3, -4711); + + // PPC 1, section 3.3.9, Fixed-Point Compare Instructions + cmpi( CCR7, 0, R27, 4711); + cmp( CCR0, 1, R14, R11); + cmpli( CCR5, 1, R17, 45); + cmpl( CCR3, 0, R9, R10); + + cmpwi( CCR7, R27, 4711); + cmpw( CCR0, R14, R11); + cmplwi( CCR5, R17, 45); + cmplw( CCR3, R9, R10); + + cmpdi( CCR7, R27, 4711); + cmpd( CCR0, R14, R11); + cmpldi( CCR5, R17, 45); + cmpld( CCR3, R9, R10); + + // PPC 1, section 3.3.11, Fixed-Point Logical Instructions + andi_( R4, R5, 0xff); + andis_( R12, R13, 0x7b51); + ori( R1, R4, 13); + oris( R3, R5, 177); + xori( R7, R6, 51); + xoris( R29, R0, 1); + andr( R17, R21, R16); + and_( R3, R5, R15); + orr( R2, R1, R9); + or_( R17, R15, R11); + xorr( R19, R18, R10); + xor_( R31, R21, R11); + nand( R5, R7, R3); + nand_( R3, R1, R0); + nor( R2, R3, R5); + nor_( R3, R6, R8); + andc( R25, R12, R11); + andc_( R24, R22, R21); + orc( R20, R10, R12); + orc_( R22, R2, R13); + + nop(); + + // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions + sld( R5, R6, R8); + sld_( R3, R5, R9); + slw( R2, R1, R10); + slw_( R6, R26, R16); + srd( R16, R24, R8); + srd_( R21, R14, R7); + srw( R22, R25, R29); + srw_( R5, R18, R17); + srad( R7, R11, R0); + srad_( R9, R13, R1); + sraw( R7, R15, R2); + sraw_( R4, R17, R3); + sldi( R3, R18, 63); + sldi_( R2, R20, 30); + slwi( R1, R21, 30); + slwi_( R7, R23, 8); + srdi( R0, R19, 2); + srdi_( R12, R24, 5); + srwi( R13, R27, 6); + srwi_( R14, R29, 7); + sradi( R15, R30, 9); + sradi_( R16, R31, 19); + srawi( R17, R31, 15); + srawi_( R18, R31, 12); + + clrrdi( R3, R30, 5); + clrldi( R9, R10, 11); + + rldicr( R19, R20, 13, 15); + rldicr_(R20, R20, 16, 14); + rldicl( R21, R21, 30, 33); + rldicl_(R22, R1, 20, 25); + rlwinm( R23, R2, 25, 10, 11); + rlwinm_(R24, R3, 12, 13, 14); + + // PPC 1, section 3.3.2 Fixed-Point Load Instructions + lwzx( R3, R5, R7); + lwz( R11, 0, R1); + lwzu( R31, -4, R11); + + lwax( R3, R5, R7); + lwa( R31, -4, R11); + lhzx( R3, R5, R7); + lhz( R31, -4, R11); + lhzu( R31, -4, R11); + + + lhax( R3, R5, R7); + lha( R31, -4, R11); + lhau( R11, 0, R1); + + lbzx( R3, R5, R7); + lbz( R31, -4, R11); + lbzu( R11, 0, R1); + + ld( R31, -4, R11); + ldx( R3, R5, R7); + ldu( R31, -4, R11); + + // PPC 1, section 3.3.3 Fixed-Point Store Instructions + stwx( R3, R5, R7); + stw( R31, -4, R11); + stwu( R11, 0, R1); + + sthx( R3, R5, R7 ); + sth( R31, -4, R11); + sthu( R31, -4, R11); + + stbx( R3, R5, R7); + stb( R31, -4, R11); + stbu( R31, -4, R11); + + std( R31, -4, R11); + stdx( R3, R5, R7); + stdu( R31, -4, R11); + + // PPC 1, section 3.3.13 Move To/From System Register Instructions + mtlr( R3); + mflr( R3); + mtctr( R3); + mfctr( R3); + mtcrf( 0xff, R15); + mtcr( R15); + mtcrf( 0x03, R15); + mtcr( R15); + mfcr( R15); + + // PPC 1, section 2.4.1 Branch Instructions + Label lbl1, lbl2, lbl3; + bind(lbl1); + + b(pc()); + b(pc() - 8); + b(lbl1); + b(lbl2); + b(lbl3); + + bl(pc() - 8); + bl(lbl1); + bl(lbl2); + + bcl(4, 10, pc() - 8); + bcl(4, 10, lbl1); + bcl(4, 10, lbl2); + + bclr( 4, 6, 0); + bclrl(4, 6, 0); + + bind(lbl2); + + bcctr( 4, 6, 0); + bcctrl(4, 6, 0); + + blt(CCR0, lbl2); + bgt(CCR1, lbl2); + beq(CCR2, lbl2); + bso(CCR3, lbl2); + bge(CCR4, lbl2); + ble(CCR5, lbl2); + bne(CCR6, lbl2); + bns(CCR7, lbl2); + + bltl(CCR0, lbl2); + bgtl(CCR1, lbl2); + beql(CCR2, lbl2); + bsol(CCR3, lbl2); + bgel(CCR4, lbl2); + blel(CCR5, lbl2); + bnel(CCR6, lbl2); + bnsl(CCR7, lbl2); + blr(); + + sync(); + icbi( R1, R2); + dcbst(R2, R3); + + // FLOATING POINT instructions ppc. + // PPC 1, section 4.6.2 Floating-Point Load Instructions + lfs( F1, -11, R3); + lfsu(F2, 123, R4); + lfsx(F3, R5, R6); + lfd( F4, 456, R7); + lfdu(F5, 789, R8); + lfdx(F6, R10, R11); + + // PPC 1, section 4.6.3 Floating-Point Store Instructions + stfs( F7, 876, R12); + stfsu( F8, 543, R13); + stfsx( F9, R14, R15); + stfd( F10, 210, R16); + stfdu( F11, 111, R17); + stfdx( F12, R18, R19); + + // PPC 1, section 4.6.4 Floating-Point Move Instructions + fmr( F13, F14); + fmr_( F14, F15); + fneg( F16, F17); + fneg_( F18, F19); + fabs( F20, F21); + fabs_( F22, F23); + fnabs( F24, F25); + fnabs_(F26, F27); + + // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic + // Instructions + fadd( F28, F29, F30); + fadd_( F31, F0, F1); + fadds( F2, F3, F4); + fadds_(F5, F6, F7); + fsub( F8, F9, F10); + fsub_( F11, F12, F13); + fsubs( F14, F15, F16); + fsubs_(F17, F18, F19); + fmul( F20, F21, F22); + fmul_( F23, F24, F25); + fmuls( F26, F27, F28); + fmuls_(F29, F30, F31); + fdiv( F0, F1, F2); + fdiv_( F3, F4, F5); + fdivs( F6, F7, F8); + fdivs_(F9, F10, F11); + + // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion + // Instructions + frsp( F12, F13); + fctid( F14, F15); + fctidz(F16, F17); + fctiw( F18, F19); + fctiwz(F20, F21); + fcfid( F22, F23); + + // PPC 1, section 4.6.7 Floating-Point Compare Instructions + fcmpu( CCR7, F24, F25); + + tty->print_cr("\ntest_asm disassembly (0x%lx 0x%lx):", code()->insts_begin(), code()->insts_end()); + code()->decode(); +} +#endif // !PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/assembler_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,1963 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_ASSEMBLER_PPC_HPP +#define CPU_PPC_VM_ASSEMBLER_PPC_HPP + +#include "asm/register.hpp" + +// Address is an abstraction used to represent a memory location +// as used in assembler instructions. +// PPC instructions grok either baseReg + indexReg or baseReg + disp. +// So far we do not use this as simplification by this class is low +// on PPC with its simple addressing mode. Use RegisterOrConstant to +// represent an offset. +class Address VALUE_OBJ_CLASS_SPEC { +}; + +class AddressLiteral VALUE_OBJ_CLASS_SPEC { + private: + address _address; + RelocationHolder _rspec; + + RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { + switch (rtype) { + case relocInfo::external_word_type: + return external_word_Relocation::spec(addr); + case relocInfo::internal_word_type: + return internal_word_Relocation::spec(addr); + case relocInfo::opt_virtual_call_type: + return opt_virtual_call_Relocation::spec(); + case relocInfo::static_call_type: + return static_call_Relocation::spec(); + case relocInfo::runtime_call_type: + return runtime_call_Relocation::spec(); + case relocInfo::none: + return RelocationHolder(); + default: + ShouldNotReachHere(); + return RelocationHolder(); + } + } + + protected: + // creation + AddressLiteral() : _address(NULL), _rspec(NULL) {} + + public: + AddressLiteral(address addr, RelocationHolder const& rspec) + : _address(addr), + _rspec(rspec) {} + + AddressLiteral(address addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + AddressLiteral(oop* addr, relocInfo::relocType rtype = relocInfo::none) + : _address((address) addr), + _rspec(rspec_from_rtype(rtype, (address) addr)) {} + + intptr_t value() const { return (intptr_t) _address; } + + const RelocationHolder& rspec() const { return _rspec; } +}; + +// Argument is an abstraction used to represent an outgoing +// actual argument or an incoming formal parameter, whether +// it resides in memory or in a register, in a manner consistent +// with the PPC Application Binary Interface, or ABI. This is +// often referred to as the native or C calling convention. + +class Argument VALUE_OBJ_CLASS_SPEC { + private: + int _number; // The number of the argument. + public: + enum { + // Only 8 registers may contain integer parameters. + n_register_parameters = 8, + // Can have up to 8 floating registers. + n_float_register_parameters = 8 + }; + // creation + Argument(int number) : _number(number) {} + + int number() const { return _number; } + + // Locating register-based arguments: + bool is_register() const { return _number < n_register_parameters; } + + Register as_register() const { + assert(is_register(), "must be a register argument"); + return as_Register(number() + R3_ARG1->encoding()); + } +}; + +// A ppc64 function descriptor. +struct FunctionDescriptor VALUE_OBJ_CLASS_SPEC { + private: + address _entry; + address _toc; + address _env; + + public: + inline address entry() const { return _entry; } + inline address toc() const { return _toc; } + inline address env() const { return _env; } + + inline void set_entry(address entry) { _entry = entry; } + inline void set_toc( address toc) { _toc = toc; } + inline void set_env( address env) { _env = env; } + + inline static ByteSize entry_offset() { return byte_offset_of(FunctionDescriptor, _entry); } + inline static ByteSize toc_offset() { return byte_offset_of(FunctionDescriptor, _toc); } + inline static ByteSize env_offset() { return byte_offset_of(FunctionDescriptor, _env); } + + // Friend functions can be called without loading toc and env. + enum { + friend_toc = 0xcafe, + friend_env = 0xc0de + }; + + inline bool is_friend_function() const { + return (toc() == (address) friend_toc) && (env() == (address) friend_env); + } + + // Constructor for stack-allocated instances. + FunctionDescriptor() { + _entry = (address) 0xbad; + _toc = (address) 0xbad; + _env = (address) 0xbad; + } +}; + +class Assembler : public AbstractAssembler { + protected: + // Displacement routines + static void print_instruction(int inst); + static int patched_branch(int dest_pos, int inst, int inst_pos); + static int branch_destination(int inst, int pos); + + friend class AbstractAssembler; + + // Code patchers need various routines like inv_wdisp() + friend class NativeInstruction; + friend class NativeGeneralJump; + friend class Relocation; + + public: + + enum shifts { + XO_21_29_SHIFT = 2, + XO_21_30_SHIFT = 1, + XO_27_29_SHIFT = 2, + XO_30_31_SHIFT = 0, + SPR_5_9_SHIFT = 11u, // SPR_5_9 field in bits 11 -- 15 + SPR_0_4_SHIFT = 16u, // SPR_0_4 field in bits 16 -- 20 + RS_SHIFT = 21u, // RS field in bits 21 -- 25 + OPCODE_SHIFT = 26u, // opcode in bits 26 -- 31 + }; + + enum opcdxos_masks { + XL_FORM_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + ADDI_OPCODE_MASK = (63u << OPCODE_SHIFT), + ADDIS_OPCODE_MASK = (63u << OPCODE_SHIFT), + BXX_OPCODE_MASK = (63u << OPCODE_SHIFT), + BCXX_OPCODE_MASK = (63u << OPCODE_SHIFT), + // trap instructions + TDI_OPCODE_MASK = (63u << OPCODE_SHIFT), + TWI_OPCODE_MASK = (63u << OPCODE_SHIFT), + TD_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + TW_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + LD_OPCODE_MASK = (63u << OPCODE_SHIFT) | (3u << XO_30_31_SHIFT), // DS-FORM + STD_OPCODE_MASK = LD_OPCODE_MASK, + STDU_OPCODE_MASK = STD_OPCODE_MASK, + STDX_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + STDUX_OPCODE_MASK = STDX_OPCODE_MASK, + STW_OPCODE_MASK = (63u << OPCODE_SHIFT), + STWU_OPCODE_MASK = STW_OPCODE_MASK, + STWX_OPCODE_MASK = (63u << OPCODE_SHIFT) | (1023u << 1), + STWUX_OPCODE_MASK = STWX_OPCODE_MASK, + MTCTR_OPCODE_MASK = ~(31u << RS_SHIFT), + ORI_OPCODE_MASK = (63u << OPCODE_SHIFT), + ORIS_OPCODE_MASK = (63u << OPCODE_SHIFT), + RLDICR_OPCODE_MASK = (63u << OPCODE_SHIFT) | (7u << XO_27_29_SHIFT) + }; + + enum opcdxos { + ADD_OPCODE = (31u << OPCODE_SHIFT | 266u << 1), + ADDC_OPCODE = (31u << OPCODE_SHIFT | 10u << 1), + ADDI_OPCODE = (14u << OPCODE_SHIFT), + ADDIS_OPCODE = (15u << OPCODE_SHIFT), + ADDIC__OPCODE = (13u << OPCODE_SHIFT), + ADDE_OPCODE = (31u << OPCODE_SHIFT | 138u << 1), + SUBF_OPCODE = (31u << OPCODE_SHIFT | 40u << 1), + SUBFC_OPCODE = (31u << OPCODE_SHIFT | 8u << 1), + SUBFE_OPCODE = (31u << OPCODE_SHIFT | 136u << 1), + SUBFIC_OPCODE = (8u << OPCODE_SHIFT), + SUBFZE_OPCODE = (31u << OPCODE_SHIFT | 200u << 1), + DIVW_OPCODE = (31u << OPCODE_SHIFT | 491u << 1), + MULLW_OPCODE = (31u << OPCODE_SHIFT | 235u << 1), + MULHW_OPCODE = (31u << OPCODE_SHIFT | 75u << 1), + MULHWU_OPCODE = (31u << OPCODE_SHIFT | 11u << 1), + MULLI_OPCODE = (7u << OPCODE_SHIFT), + AND_OPCODE = (31u << OPCODE_SHIFT | 28u << 1), + ANDI_OPCODE = (28u << OPCODE_SHIFT), + ANDIS_OPCODE = (29u << OPCODE_SHIFT), + ANDC_OPCODE = (31u << OPCODE_SHIFT | 60u << 1), + ORC_OPCODE = (31u << OPCODE_SHIFT | 412u << 1), + OR_OPCODE = (31u << OPCODE_SHIFT | 444u << 1), + ORI_OPCODE = (24u << OPCODE_SHIFT), + ORIS_OPCODE = (25u << OPCODE_SHIFT), + XOR_OPCODE = (31u << OPCODE_SHIFT | 316u << 1), + XORI_OPCODE = (26u << OPCODE_SHIFT), + XORIS_OPCODE = (27u << OPCODE_SHIFT), + + NEG_OPCODE = (31u << OPCODE_SHIFT | 104u << 1), + + RLWINM_OPCODE = (21u << OPCODE_SHIFT), + CLRRWI_OPCODE = RLWINM_OPCODE, + CLRLWI_OPCODE = RLWINM_OPCODE, + + RLWIMI_OPCODE = (20u << OPCODE_SHIFT), + + SLW_OPCODE = (31u << OPCODE_SHIFT | 24u << 1), + SLWI_OPCODE = RLWINM_OPCODE, + SRW_OPCODE = (31u << OPCODE_SHIFT | 536u << 1), + SRWI_OPCODE = RLWINM_OPCODE, + SRAW_OPCODE = (31u << OPCODE_SHIFT | 792u << 1), + SRAWI_OPCODE = (31u << OPCODE_SHIFT | 824u << 1), + + CMP_OPCODE = (31u << OPCODE_SHIFT | 0u << 1), + CMPI_OPCODE = (11u << OPCODE_SHIFT), + CMPL_OPCODE = (31u << OPCODE_SHIFT | 32u << 1), + CMPLI_OPCODE = (10u << OPCODE_SHIFT), + + ISEL_OPCODE = (31u << OPCODE_SHIFT | 15u << 1), + + MTLR_OPCODE = (31u << OPCODE_SHIFT | 467u << 1 | 8 << SPR_0_4_SHIFT), + MFLR_OPCODE = (31u << OPCODE_SHIFT | 339u << 1 | 8 << SPR_0_4_SHIFT), + + MTCRF_OPCODE = (31u << OPCODE_SHIFT | 144u << 1), + MFCR_OPCODE = (31u << OPCODE_SHIFT | 19u << 1), + MCRF_OPCODE = (19u << OPCODE_SHIFT | 0u << 1), + + // condition register logic instructions + CRAND_OPCODE = (19u << OPCODE_SHIFT | 257u << 1), + CRNAND_OPCODE = (19u << OPCODE_SHIFT | 225u << 1), + CROR_OPCODE = (19u << OPCODE_SHIFT | 449u << 1), + CRXOR_OPCODE = (19u << OPCODE_SHIFT | 193u << 1), + CRNOR_OPCODE = (19u << OPCODE_SHIFT | 33u << 1), + CREQV_OPCODE = (19u << OPCODE_SHIFT | 289u << 1), + CRANDC_OPCODE = (19u << OPCODE_SHIFT | 129u << 1), + CRORC_OPCODE = (19u << OPCODE_SHIFT | 417u << 1), + + BCLR_OPCODE = (19u << OPCODE_SHIFT | 16u << 1), + BXX_OPCODE = (18u << OPCODE_SHIFT), + BCXX_OPCODE = (16u << OPCODE_SHIFT), + + // CTR-related opcodes + BCCTR_OPCODE = (19u << OPCODE_SHIFT | 528u << 1), + MTCTR_OPCODE = (31u << OPCODE_SHIFT | 467u << 1 | 9 << SPR_0_4_SHIFT), + MFCTR_OPCODE = (31u << OPCODE_SHIFT | 339u << 1 | 9 << SPR_0_4_SHIFT), + + + LWZ_OPCODE = (32u << OPCODE_SHIFT), + LWZX_OPCODE = (31u << OPCODE_SHIFT | 23u << 1), + LWZU_OPCODE = (33u << OPCODE_SHIFT), + + LHA_OPCODE = (42u << OPCODE_SHIFT), + LHAX_OPCODE = (31u << OPCODE_SHIFT | 343u << 1), + LHAU_OPCODE = (43u << OPCODE_SHIFT), + + LHZ_OPCODE = (40u << OPCODE_SHIFT), + LHZX_OPCODE = (31u << OPCODE_SHIFT | 279u << 1), + LHZU_OPCODE = (41u << OPCODE_SHIFT), + + LBZ_OPCODE = (34u << OPCODE_SHIFT), + LBZX_OPCODE = (31u << OPCODE_SHIFT | 87u << 1), + LBZU_OPCODE = (35u << OPCODE_SHIFT), + + STW_OPCODE = (36u << OPCODE_SHIFT), + STWX_OPCODE = (31u << OPCODE_SHIFT | 151u << 1), + STWU_OPCODE = (37u << OPCODE_SHIFT), + STWUX_OPCODE = (31u << OPCODE_SHIFT | 183u << 1), + + STH_OPCODE = (44u << OPCODE_SHIFT), + STHX_OPCODE = (31u << OPCODE_SHIFT | 407u << 1), + STHU_OPCODE = (45u << OPCODE_SHIFT), + + STB_OPCODE = (38u << OPCODE_SHIFT), + STBX_OPCODE = (31u << OPCODE_SHIFT | 215u << 1), + STBU_OPCODE = (39u << OPCODE_SHIFT), + + EXTSB_OPCODE = (31u << OPCODE_SHIFT | 954u << 1), + EXTSH_OPCODE = (31u << OPCODE_SHIFT | 922u << 1), + EXTSW_OPCODE = (31u << OPCODE_SHIFT | 986u << 1), // X-FORM + + // 32 bit opcode encodings + + LWA_OPCODE = (58u << OPCODE_SHIFT | 2u << XO_30_31_SHIFT), // DS-FORM + LWAX_OPCODE = (31u << OPCODE_SHIFT | 341u << XO_21_30_SHIFT), // X-FORM + + CNTLZW_OPCODE = (31u << OPCODE_SHIFT | 26u << XO_21_30_SHIFT), // X-FORM + + // 64 bit opcode encodings + + LD_OPCODE = (58u << OPCODE_SHIFT | 0u << XO_30_31_SHIFT), // DS-FORM + LDU_OPCODE = (58u << OPCODE_SHIFT | 1u << XO_30_31_SHIFT), // DS-FORM + LDX_OPCODE = (31u << OPCODE_SHIFT | 21u << XO_21_30_SHIFT), // X-FORM + + STD_OPCODE = (62u << OPCODE_SHIFT | 0u << XO_30_31_SHIFT), // DS-FORM + STDU_OPCODE = (62u << OPCODE_SHIFT | 1u << XO_30_31_SHIFT), // DS-FORM + STDUX_OPCODE = (31u << OPCODE_SHIFT | 181u << 1), // X-FORM + STDX_OPCODE = (31u << OPCODE_SHIFT | 149u << XO_21_30_SHIFT), // X-FORM + + RLDICR_OPCODE = (30u << OPCODE_SHIFT | 1u << XO_27_29_SHIFT), // MD-FORM + RLDICL_OPCODE = (30u << OPCODE_SHIFT | 0u << XO_27_29_SHIFT), // MD-FORM + RLDIC_OPCODE = (30u << OPCODE_SHIFT | 2u << XO_27_29_SHIFT), // MD-FORM + RLDIMI_OPCODE = (30u << OPCODE_SHIFT | 3u << XO_27_29_SHIFT), // MD-FORM + + SRADI_OPCODE = (31u << OPCODE_SHIFT | 413u << XO_21_29_SHIFT), // XS-FORM + + SLD_OPCODE = (31u << OPCODE_SHIFT | 27u << 1), // X-FORM + SRD_OPCODE = (31u << OPCODE_SHIFT | 539u << 1), // X-FORM + SRAD_OPCODE = (31u << OPCODE_SHIFT | 794u << 1), // X-FORM + + MULLD_OPCODE = (31u << OPCODE_SHIFT | 233u << 1), // XO-FORM + MULHD_OPCODE = (31u << OPCODE_SHIFT | 73u << 1), // XO-FORM + MULHDU_OPCODE = (31u << OPCODE_SHIFT | 9u << 1), // XO-FORM + DIVD_OPCODE = (31u << OPCODE_SHIFT | 489u << 1), // XO-FORM + + CNTLZD_OPCODE = (31u << OPCODE_SHIFT | 58u << XO_21_30_SHIFT), // X-FORM + NAND_OPCODE = (31u << OPCODE_SHIFT | 476u << XO_21_30_SHIFT), // X-FORM + NOR_OPCODE = (31u << OPCODE_SHIFT | 124u << XO_21_30_SHIFT), // X-FORM + + + // opcodes only used for floating arithmetic + FADD_OPCODE = (63u << OPCODE_SHIFT | 21u << 1), + FADDS_OPCODE = (59u << OPCODE_SHIFT | 21u << 1), + FCMPU_OPCODE = (63u << OPCODE_SHIFT | 00u << 1), + FDIV_OPCODE = (63u << OPCODE_SHIFT | 18u << 1), + FDIVS_OPCODE = (59u << OPCODE_SHIFT | 18u << 1), + FMR_OPCODE = (63u << OPCODE_SHIFT | 72u << 1), + // These are special Power6 opcodes, reused for "lfdepx" and "stfdepx" + // on Power7. Do not use. + // MFFGPR_OPCODE = (31u << OPCODE_SHIFT | 607u << 1), + // MFTGPR_OPCODE = (31u << OPCODE_SHIFT | 735u << 1), + CMPB_OPCODE = (31u << OPCODE_SHIFT | 508 << 1), + POPCNTB_OPCODE = (31u << OPCODE_SHIFT | 122 << 1), + POPCNTW_OPCODE = (31u << OPCODE_SHIFT | 378 << 1), + POPCNTD_OPCODE = (31u << OPCODE_SHIFT | 506 << 1), + FABS_OPCODE = (63u << OPCODE_SHIFT | 264u << 1), + FNABS_OPCODE = (63u << OPCODE_SHIFT | 136u << 1), + FMUL_OPCODE = (63u << OPCODE_SHIFT | 25u << 1), + FMULS_OPCODE = (59u << OPCODE_SHIFT | 25u << 1), + FNEG_OPCODE = (63u << OPCODE_SHIFT | 40u << 1), + FSUB_OPCODE = (63u << OPCODE_SHIFT | 20u << 1), + FSUBS_OPCODE = (59u << OPCODE_SHIFT | 20u << 1), + + // PPC64-internal FPU conversion opcodes + FCFID_OPCODE = (63u << OPCODE_SHIFT | 846u << 1), + FCFIDS_OPCODE = (59u << OPCODE_SHIFT | 846u << 1), + FCTID_OPCODE = (63u << OPCODE_SHIFT | 814u << 1), + FCTIDZ_OPCODE = (63u << OPCODE_SHIFT | 815u << 1), + FCTIW_OPCODE = (63u << OPCODE_SHIFT | 14u << 1), + FCTIWZ_OPCODE = (63u << OPCODE_SHIFT | 15u << 1), + FRSP_OPCODE = (63u << OPCODE_SHIFT | 12u << 1), + + // WARNING: using fmadd results in a non-compliant vm. Some floating + // point tck tests will fail. + FMADD_OPCODE = (59u << OPCODE_SHIFT | 29u << 1), + DMADD_OPCODE = (63u << OPCODE_SHIFT | 29u << 1), + FMSUB_OPCODE = (59u << OPCODE_SHIFT | 28u << 1), + DMSUB_OPCODE = (63u << OPCODE_SHIFT | 28u << 1), + FNMADD_OPCODE = (59u << OPCODE_SHIFT | 31u << 1), + DNMADD_OPCODE = (63u << OPCODE_SHIFT | 31u << 1), + FNMSUB_OPCODE = (59u << OPCODE_SHIFT | 30u << 1), + DNMSUB_OPCODE = (63u << OPCODE_SHIFT | 30u << 1), + + LFD_OPCODE = (50u << OPCODE_SHIFT | 00u << 1), + LFDU_OPCODE = (51u << OPCODE_SHIFT | 00u << 1), + LFDX_OPCODE = (31u << OPCODE_SHIFT | 599u << 1), + LFS_OPCODE = (48u << OPCODE_SHIFT | 00u << 1), + LFSU_OPCODE = (49u << OPCODE_SHIFT | 00u << 1), + LFSX_OPCODE = (31u << OPCODE_SHIFT | 535u << 1), + + STFD_OPCODE = (54u << OPCODE_SHIFT | 00u << 1), + STFDU_OPCODE = (55u << OPCODE_SHIFT | 00u << 1), + STFDX_OPCODE = (31u << OPCODE_SHIFT | 727u << 1), + STFS_OPCODE = (52u << OPCODE_SHIFT | 00u << 1), + STFSU_OPCODE = (53u << OPCODE_SHIFT | 00u << 1), + STFSX_OPCODE = (31u << OPCODE_SHIFT | 663u << 1), + + FSQRT_OPCODE = (63u << OPCODE_SHIFT | 22u << 1), // A-FORM + FSQRTS_OPCODE = (59u << OPCODE_SHIFT | 22u << 1), // A-FORM + + // Vector instruction support for >= Power6 + // Vector Storage Access + LVEBX_OPCODE = (31u << OPCODE_SHIFT | 7u << 1), + LVEHX_OPCODE = (31u << OPCODE_SHIFT | 39u << 1), + LVEWX_OPCODE = (31u << OPCODE_SHIFT | 71u << 1), + LVX_OPCODE = (31u << OPCODE_SHIFT | 103u << 1), + LVXL_OPCODE = (31u << OPCODE_SHIFT | 359u << 1), + STVEBX_OPCODE = (31u << OPCODE_SHIFT | 135u << 1), + STVEHX_OPCODE = (31u << OPCODE_SHIFT | 167u << 1), + STVEWX_OPCODE = (31u << OPCODE_SHIFT | 199u << 1), + STVX_OPCODE = (31u << OPCODE_SHIFT | 231u << 1), + STVXL_OPCODE = (31u << OPCODE_SHIFT | 487u << 1), + LVSL_OPCODE = (31u << OPCODE_SHIFT | 6u << 1), + LVSR_OPCODE = (31u << OPCODE_SHIFT | 38u << 1), + + // Vector Permute and Formatting + VPKPX_OPCODE = (4u << OPCODE_SHIFT | 782u ), + VPKSHSS_OPCODE = (4u << OPCODE_SHIFT | 398u ), + VPKSWSS_OPCODE = (4u << OPCODE_SHIFT | 462u ), + VPKSHUS_OPCODE = (4u << OPCODE_SHIFT | 270u ), + VPKSWUS_OPCODE = (4u << OPCODE_SHIFT | 334u ), + VPKUHUM_OPCODE = (4u << OPCODE_SHIFT | 14u ), + VPKUWUM_OPCODE = (4u << OPCODE_SHIFT | 78u ), + VPKUHUS_OPCODE = (4u << OPCODE_SHIFT | 142u ), + VPKUWUS_OPCODE = (4u << OPCODE_SHIFT | 206u ), + VUPKHPX_OPCODE = (4u << OPCODE_SHIFT | 846u ), + VUPKHSB_OPCODE = (4u << OPCODE_SHIFT | 526u ), + VUPKHSH_OPCODE = (4u << OPCODE_SHIFT | 590u ), + VUPKLPX_OPCODE = (4u << OPCODE_SHIFT | 974u ), + VUPKLSB_OPCODE = (4u << OPCODE_SHIFT | 654u ), + VUPKLSH_OPCODE = (4u << OPCODE_SHIFT | 718u ), + + VMRGHB_OPCODE = (4u << OPCODE_SHIFT | 12u ), + VMRGHW_OPCODE = (4u << OPCODE_SHIFT | 140u ), + VMRGHH_OPCODE = (4u << OPCODE_SHIFT | 76u ), + VMRGLB_OPCODE = (4u << OPCODE_SHIFT | 268u ), + VMRGLW_OPCODE = (4u << OPCODE_SHIFT | 396u ), + VMRGLH_OPCODE = (4u << OPCODE_SHIFT | 332u ), + + VSPLT_OPCODE = (4u << OPCODE_SHIFT | 524u ), + VSPLTH_OPCODE = (4u << OPCODE_SHIFT | 588u ), + VSPLTW_OPCODE = (4u << OPCODE_SHIFT | 652u ), + VSPLTISB_OPCODE= (4u << OPCODE_SHIFT | 780u ), + VSPLTISH_OPCODE= (4u << OPCODE_SHIFT | 844u ), + VSPLTISW_OPCODE= (4u << OPCODE_SHIFT | 908u ), + + VPERM_OPCODE = (4u << OPCODE_SHIFT | 43u ), + VSEL_OPCODE = (4u << OPCODE_SHIFT | 42u ), + + VSL_OPCODE = (4u << OPCODE_SHIFT | 452u ), + VSLDOI_OPCODE = (4u << OPCODE_SHIFT | 44u ), + VSLO_OPCODE = (4u << OPCODE_SHIFT | 1036u ), + VSR_OPCODE = (4u << OPCODE_SHIFT | 708u ), + VSRO_OPCODE = (4u << OPCODE_SHIFT | 1100u ), + + // Vector Integer + VADDCUW_OPCODE = (4u << OPCODE_SHIFT | 384u ), + VADDSHS_OPCODE = (4u << OPCODE_SHIFT | 832u ), + VADDSBS_OPCODE = (4u << OPCODE_SHIFT | 768u ), + VADDSWS_OPCODE = (4u << OPCODE_SHIFT | 896u ), + VADDUBM_OPCODE = (4u << OPCODE_SHIFT | 0u ), + VADDUWM_OPCODE = (4u << OPCODE_SHIFT | 128u ), + VADDUHM_OPCODE = (4u << OPCODE_SHIFT | 64u ), + VADDUBS_OPCODE = (4u << OPCODE_SHIFT | 512u ), + VADDUWS_OPCODE = (4u << OPCODE_SHIFT | 640u ), + VADDUHS_OPCODE = (4u << OPCODE_SHIFT | 576u ), + VSUBCUW_OPCODE = (4u << OPCODE_SHIFT | 1408u ), + VSUBSHS_OPCODE = (4u << OPCODE_SHIFT | 1856u ), + VSUBSBS_OPCODE = (4u << OPCODE_SHIFT | 1792u ), + VSUBSWS_OPCODE = (4u << OPCODE_SHIFT | 1920u ), + VSUBUBM_OPCODE = (4u << OPCODE_SHIFT | 1024u ), + VSUBUWM_OPCODE = (4u << OPCODE_SHIFT | 1152u ), + VSUBUHM_OPCODE = (4u << OPCODE_SHIFT | 1088u ), + VSUBUBS_OPCODE = (4u << OPCODE_SHIFT | 1536u ), + VSUBUWS_OPCODE = (4u << OPCODE_SHIFT | 1664u ), + VSUBUHS_OPCODE = (4u << OPCODE_SHIFT | 1600u ), + + VMULESB_OPCODE = (4u << OPCODE_SHIFT | 776u ), + VMULEUB_OPCODE = (4u << OPCODE_SHIFT | 520u ), + VMULESH_OPCODE = (4u << OPCODE_SHIFT | 840u ), + VMULEUH_OPCODE = (4u << OPCODE_SHIFT | 584u ), + VMULOSB_OPCODE = (4u << OPCODE_SHIFT | 264u ), + VMULOUB_OPCODE = (4u << OPCODE_SHIFT | 8u ), + VMULOSH_OPCODE = (4u << OPCODE_SHIFT | 328u ), + VMULOUH_OPCODE = (4u << OPCODE_SHIFT | 72u ), + VMHADDSHS_OPCODE=(4u << OPCODE_SHIFT | 32u ), + VMHRADDSHS_OPCODE=(4u << OPCODE_SHIFT | 33u ), + VMLADDUHM_OPCODE=(4u << OPCODE_SHIFT | 34u ), + VMSUBUHM_OPCODE= (4u << OPCODE_SHIFT | 36u ), + VMSUMMBM_OPCODE= (4u << OPCODE_SHIFT | 37u ), + VMSUMSHM_OPCODE= (4u << OPCODE_SHIFT | 40u ), + VMSUMSHS_OPCODE= (4u << OPCODE_SHIFT | 41u ), + VMSUMUHM_OPCODE= (4u << OPCODE_SHIFT | 38u ), + VMSUMUHS_OPCODE= (4u << OPCODE_SHIFT | 39u ), + + VSUMSWS_OPCODE = (4u << OPCODE_SHIFT | 1928u ), + VSUM2SWS_OPCODE= (4u << OPCODE_SHIFT | 1672u ), + VSUM4SBS_OPCODE= (4u << OPCODE_SHIFT | 1800u ), + VSUM4UBS_OPCODE= (4u << OPCODE_SHIFT | 1544u ), + VSUM4SHS_OPCODE= (4u << OPCODE_SHIFT | 1608u ), + + VAVGSB_OPCODE = (4u << OPCODE_SHIFT | 1282u ), + VAVGSW_OPCODE = (4u << OPCODE_SHIFT | 1410u ), + VAVGSH_OPCODE = (4u << OPCODE_SHIFT | 1346u ), + VAVGUB_OPCODE = (4u << OPCODE_SHIFT | 1026u ), + VAVGUW_OPCODE = (4u << OPCODE_SHIFT | 1154u ), + VAVGUH_OPCODE = (4u << OPCODE_SHIFT | 1090u ), + + VMAXSB_OPCODE = (4u << OPCODE_SHIFT | 258u ), + VMAXSW_OPCODE = (4u << OPCODE_SHIFT | 386u ), + VMAXSH_OPCODE = (4u << OPCODE_SHIFT | 322u ), + VMAXUB_OPCODE = (4u << OPCODE_SHIFT | 2u ), + VMAXUW_OPCODE = (4u << OPCODE_SHIFT | 130u ), + VMAXUH_OPCODE = (4u << OPCODE_SHIFT | 66u ), + VMINSB_OPCODE = (4u << OPCODE_SHIFT | 770u ), + VMINSW_OPCODE = (4u << OPCODE_SHIFT | 898u ), + VMINSH_OPCODE = (4u << OPCODE_SHIFT | 834u ), + VMINUB_OPCODE = (4u << OPCODE_SHIFT | 514u ), + VMINUW_OPCODE = (4u << OPCODE_SHIFT | 642u ), + VMINUH_OPCODE = (4u << OPCODE_SHIFT | 578u ), + + VCMPEQUB_OPCODE= (4u << OPCODE_SHIFT | 6u ), + VCMPEQUH_OPCODE= (4u << OPCODE_SHIFT | 70u ), + VCMPEQUW_OPCODE= (4u << OPCODE_SHIFT | 134u ), + VCMPGTSH_OPCODE= (4u << OPCODE_SHIFT | 838u ), + VCMPGTSB_OPCODE= (4u << OPCODE_SHIFT | 774u ), + VCMPGTSW_OPCODE= (4u << OPCODE_SHIFT | 902u ), + VCMPGTUB_OPCODE= (4u << OPCODE_SHIFT | 518u ), + VCMPGTUH_OPCODE= (4u << OPCODE_SHIFT | 582u ), + VCMPGTUW_OPCODE= (4u << OPCODE_SHIFT | 646u ), + + VAND_OPCODE = (4u << OPCODE_SHIFT | 1028u ), + VANDC_OPCODE = (4u << OPCODE_SHIFT | 1092u ), + VNOR_OPCODE = (4u << OPCODE_SHIFT | 1284u ), + VOR_OPCODE = (4u << OPCODE_SHIFT | 1156u ), + VXOR_OPCODE = (4u << OPCODE_SHIFT | 1220u ), + VRLB_OPCODE = (4u << OPCODE_SHIFT | 4u ), + VRLW_OPCODE = (4u << OPCODE_SHIFT | 132u ), + VRLH_OPCODE = (4u << OPCODE_SHIFT | 68u ), + VSLB_OPCODE = (4u << OPCODE_SHIFT | 260u ), + VSKW_OPCODE = (4u << OPCODE_SHIFT | 388u ), + VSLH_OPCODE = (4u << OPCODE_SHIFT | 324u ), + VSRB_OPCODE = (4u << OPCODE_SHIFT | 516u ), + VSRW_OPCODE = (4u << OPCODE_SHIFT | 644u ), + VSRH_OPCODE = (4u << OPCODE_SHIFT | 580u ), + VSRAB_OPCODE = (4u << OPCODE_SHIFT | 772u ), + VSRAW_OPCODE = (4u << OPCODE_SHIFT | 900u ), + VSRAH_OPCODE = (4u << OPCODE_SHIFT | 836u ), + + // Vector Floating-Point + // not implemented yet + + // Vector Status and Control + MTVSCR_OPCODE = (4u << OPCODE_SHIFT | 1604u ), + MFVSCR_OPCODE = (4u << OPCODE_SHIFT | 1540u ), + + // Icache and dcache related instructions + DCBA_OPCODE = (31u << OPCODE_SHIFT | 758u << 1), + DCBZ_OPCODE = (31u << OPCODE_SHIFT | 1014u << 1), + DCBST_OPCODE = (31u << OPCODE_SHIFT | 54u << 1), + DCBF_OPCODE = (31u << OPCODE_SHIFT | 86u << 1), + + DCBT_OPCODE = (31u << OPCODE_SHIFT | 278u << 1), + DCBTST_OPCODE = (31u << OPCODE_SHIFT | 246u << 1), + ICBI_OPCODE = (31u << OPCODE_SHIFT | 982u << 1), + + // Instruction synchronization + ISYNC_OPCODE = (19u << OPCODE_SHIFT | 150u << 1), + // Memory barriers + SYNC_OPCODE = (31u << OPCODE_SHIFT | 598u << 1), + EIEIO_OPCODE = (31u << OPCODE_SHIFT | 854u << 1), + + // Trap instructions + TDI_OPCODE = (2u << OPCODE_SHIFT), + TWI_OPCODE = (3u << OPCODE_SHIFT), + TD_OPCODE = (31u << OPCODE_SHIFT | 68u << 1), + TW_OPCODE = (31u << OPCODE_SHIFT | 4u << 1), + + // Atomics. + LWARX_OPCODE = (31u << OPCODE_SHIFT | 20u << 1), + LDARX_OPCODE = (31u << OPCODE_SHIFT | 84u << 1), + STWCX_OPCODE = (31u << OPCODE_SHIFT | 150u << 1), + STDCX_OPCODE = (31u << OPCODE_SHIFT | 214u << 1) + + }; + + // Trap instructions TO bits + enum trap_to_bits { + // single bits + traptoLessThanSigned = 1 << 4, // 0, left end + traptoGreaterThanSigned = 1 << 3, + traptoEqual = 1 << 2, + traptoLessThanUnsigned = 1 << 1, + traptoGreaterThanUnsigned = 1 << 0, // 4, right end + + // compound ones + traptoUnconditional = (traptoLessThanSigned | + traptoGreaterThanSigned | + traptoEqual | + traptoLessThanUnsigned | + traptoGreaterThanUnsigned) + }; + + // Branch hints BH field + enum branch_hint_bh { + // bclr cases: + bhintbhBCLRisReturn = 0, + bhintbhBCLRisNotReturnButSame = 1, + bhintbhBCLRisNotPredictable = 3, + + // bcctr cases: + bhintbhBCCTRisNotReturnButSame = 0, + bhintbhBCCTRisNotPredictable = 3 + }; + + // Branch prediction hints AT field + enum branch_hint_at { + bhintatNoHint = 0, // at=00 + bhintatIsNotTaken = 2, // at=10 + bhintatIsTaken = 3 // at=11 + }; + + // Branch prediction hints + enum branch_hint_concept { + // Use the same encoding as branch_hint_at to simply code. + bhintNoHint = bhintatNoHint, + bhintIsNotTaken = bhintatIsNotTaken, + bhintIsTaken = bhintatIsTaken + }; + + // Used in BO field of branch instruction. + enum branch_condition { + bcondCRbiIs0 = 4, // bo=001at + bcondCRbiIs1 = 12, // bo=011at + bcondAlways = 20 // bo=10100 + }; + + // Branch condition with combined prediction hints. + enum branch_condition_with_hint { + bcondCRbiIs0_bhintNoHint = bcondCRbiIs0 | bhintatNoHint, + bcondCRbiIs0_bhintIsNotTaken = bcondCRbiIs0 | bhintatIsNotTaken, + bcondCRbiIs0_bhintIsTaken = bcondCRbiIs0 | bhintatIsTaken, + bcondCRbiIs1_bhintNoHint = bcondCRbiIs1 | bhintatNoHint, + bcondCRbiIs1_bhintIsNotTaken = bcondCRbiIs1 | bhintatIsNotTaken, + bcondCRbiIs1_bhintIsTaken = bcondCRbiIs1 | bhintatIsTaken, + }; + + // Branch prediction hints. + inline static int add_bhint_to_boint(const int bhint, const int boint) { + switch (boint) { + case bcondCRbiIs0: + case bcondCRbiIs1: + // branch_hint and branch_hint_at have same encodings + assert( (int)bhintNoHint == (int)bhintatNoHint + && (int)bhintIsNotTaken == (int)bhintatIsNotTaken + && (int)bhintIsTaken == (int)bhintatIsTaken, + "wrong encodings"); + assert((bhint & 0x03) == bhint, "wrong encodings"); + return (boint & ~0x03) | bhint; + case bcondAlways: + // no branch_hint + return boint; + default: + ShouldNotReachHere(); + return 0; + } + } + + // Extract bcond from boint. + inline static int inv_boint_bcond(const int boint) { + int r_bcond = boint & ~0x03; + assert(r_bcond == bcondCRbiIs0 || + r_bcond == bcondCRbiIs1 || + r_bcond == bcondAlways, + "bad branch condition"); + return r_bcond; + } + + // Extract bhint from boint. + inline static int inv_boint_bhint(const int boint) { + int r_bhint = boint & 0x03; + assert(r_bhint == bhintatNoHint || + r_bhint == bhintatIsNotTaken || + r_bhint == bhintatIsTaken, + "bad branch hint"); + return r_bhint; + } + + // Calculate opposite of given bcond. + inline static int opposite_bcond(const int bcond) { + switch (bcond) { + case bcondCRbiIs0: + return bcondCRbiIs1; + case bcondCRbiIs1: + return bcondCRbiIs0; + default: + ShouldNotReachHere(); + return 0; + } + } + + // Calculate opposite of given bhint. + inline static int opposite_bhint(const int bhint) { + switch (bhint) { + case bhintatNoHint: + return bhintatNoHint; + case bhintatIsNotTaken: + return bhintatIsTaken; + case bhintatIsTaken: + return bhintatIsNotTaken; + default: + ShouldNotReachHere(); + return 0; + } + } + + // PPC branch instructions + enum ppcops { + b_op = 18, + bc_op = 16, + bcr_op = 19 + }; + + enum Condition { + negative = 0, + less = 0, + positive = 1, + greater = 1, + zero = 2, + equal = 2, + summary_overflow = 3, + }; + + public: + // Helper functions for groups of instructions + + enum Predict { pt = 1, pn = 0 }; // pt = predict taken + + enum Membar_mask_bits { // page 184, v9 + StoreStore = 1 << 3, + LoadStore = 1 << 2, + StoreLoad = 1 << 1, + LoadLoad = 1 << 0, + + Sync = 1 << 6, + MemIssue = 1 << 5, + Lookaside = 1 << 4 + }; + + // instruction must start at passed address + static int instr_len(unsigned char *instr) { return BytesPerInstWord; } + + // instruction must be left-justified in argument + static int instr_len(unsigned long instr) { return BytesPerInstWord; } + + // longest instructions + static int instr_maxlen() { return BytesPerInstWord; } + + // Test if x is within signed immediate range for nbits. + static bool is_simm(int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int min = -( ((int)1) << nbits-1 ); + const int maxplus1 = ( ((int)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + static bool is_simm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong min = -( ((jlong)1) << nbits-1 ); + const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); + return min <= x && x < maxplus1; + } + + // Test if x is within unsigned immediate range for nbits + static bool is_uimm(int x, unsigned int nbits) { + assert(0 < nbits && nbits < 32, "out of bounds"); + const int maxplus1 = ( ((int)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + static bool is_uimm(jlong x, unsigned int nbits) { + assert(0 < nbits && nbits < 64, "out of bounds"); + const jlong maxplus1 = ( ((jlong)1) << nbits ); + return 0 <= x && x < maxplus1; + } + + protected: + // helpers + + // X is supposed to fit in a field "nbits" wide + // and be sign-extended. Check the range. + static void assert_signed_range(intptr_t x, int nbits) { + assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)), + "value out of range"); + } + + static void assert_signed_word_disp_range(intptr_t x, int nbits) { + assert((x & 3) == 0, "not word aligned"); + assert_signed_range(x, nbits + 2); + } + + static void assert_unsigned_const(int x, int nbits) { + assert(juint(x) < juint(1 << nbits), "unsigned constant out of range"); + } + + static int fmask(juint hi_bit, juint lo_bit) { + assert(hi_bit >= lo_bit && hi_bit < 32, "bad bits"); + return (1 << ( hi_bit-lo_bit + 1 )) - 1; + } + + // inverse of u_field + static int inv_u_field(int x, int hi_bit, int lo_bit) { + juint r = juint(x) >> lo_bit; + r &= fmask(hi_bit, lo_bit); + return int(r); + } + + // signed version: extract from field and sign-extend + static int inv_s_field_ppc(int x, int hi_bit, int lo_bit) { + x = x << (31-hi_bit); + x = x >> (31-hi_bit+lo_bit); + return x; + } + + static int u_field(int x, int hi_bit, int lo_bit) { + assert((x & ~fmask(hi_bit, lo_bit)) == 0, "value out of range"); + int r = x << lo_bit; + assert(inv_u_field(r, hi_bit, lo_bit) == x, "just checking"); + return r; + } + + // Same as u_field for signed values + static int s_field(int x, int hi_bit, int lo_bit) { + int nbits = hi_bit - lo_bit + 1; + assert(nbits == 32 || (-(1 << nbits-1) <= x && x < (1 << nbits-1)), + "value out of range"); + x &= fmask(hi_bit, lo_bit); + int r = x << lo_bit; + return r; + } + + // inv_op for ppc instructions + static int inv_op_ppc(int x) { return inv_u_field(x, 31, 26); } + + // Determine target address from li, bd field of branch instruction. + static intptr_t inv_li_field(int x) { + intptr_t r = inv_s_field_ppc(x, 25, 2); + r = (r << 2); + return r; + } + static intptr_t inv_bd_field(int x, intptr_t pos) { + intptr_t r = inv_s_field_ppc(x, 15, 2); + r = (r << 2) + pos; + return r; + } + + #define inv_opp_u_field(x, hi_bit, lo_bit) inv_u_field(x, 31-(lo_bit), 31-(hi_bit)) + #define inv_opp_s_field(x, hi_bit, lo_bit) inv_s_field_ppc(x, 31-(lo_bit), 31-(hi_bit)) + // Extract instruction fields from instruction words. + public: + static int inv_ra_field(int x) { return inv_opp_u_field(x, 15, 11); } + static int inv_rb_field(int x) { return inv_opp_u_field(x, 20, 16); } + static int inv_rt_field(int x) { return inv_opp_u_field(x, 10, 6); } + static int inv_rs_field(int x) { return inv_opp_u_field(x, 10, 6); } + // Ds uses opp_s_field(x, 31, 16), but lowest 2 bits must be 0. + // Inv_ds_field uses range (x, 29, 16) but shifts by 2 to ensure that lowest bits are 0. + static int inv_ds_field(int x) { return inv_opp_s_field(x, 29, 16) << 2; } + static int inv_d1_field(int x) { return inv_opp_s_field(x, 31, 16); } + static int inv_si_field(int x) { return inv_opp_s_field(x, 31, 16); } + static int inv_to_field(int x) { return inv_opp_u_field(x, 10, 6); } + static int inv_lk_field(int x) { return inv_opp_u_field(x, 31, 31); } + static int inv_bo_field(int x) { return inv_opp_u_field(x, 10, 6); } + static int inv_bi_field(int x) { return inv_opp_u_field(x, 15, 11); } + + #define opp_u_field(x, hi_bit, lo_bit) u_field(x, 31-(lo_bit), 31-(hi_bit)) + #define opp_s_field(x, hi_bit, lo_bit) s_field(x, 31-(lo_bit), 31-(hi_bit)) + + // instruction fields + static int aa( int x) { return opp_u_field(x, 30, 30); } + static int ba( int x) { return opp_u_field(x, 15, 11); } + static int bb( int x) { return opp_u_field(x, 20, 16); } + static int bc( int x) { return opp_u_field(x, 25, 21); } + static int bd( int x) { return opp_s_field(x, 29, 16); } + static int bf( ConditionRegister cr) { return bf(cr->encoding()); } + static int bf( int x) { return opp_u_field(x, 8, 6); } + static int bfa(ConditionRegister cr) { return bfa(cr->encoding()); } + static int bfa( int x) { return opp_u_field(x, 13, 11); } + static int bh( int x) { return opp_u_field(x, 20, 19); } + static int bi( int x) { return opp_u_field(x, 15, 11); } + static int bi0(ConditionRegister cr, Condition c) { return (cr->encoding() << 2) | c; } + static int bo( int x) { return opp_u_field(x, 10, 6); } + static int bt( int x) { return opp_u_field(x, 10, 6); } + static int d1( int x) { return opp_s_field(x, 31, 16); } + static int ds( int x) { assert((x & 0x3) == 0, "unaligned offset"); return opp_s_field(x, 31, 16); } + static int eh( int x) { return opp_u_field(x, 31, 31); } + static int flm( int x) { return opp_u_field(x, 14, 7); } + static int fra( FloatRegister r) { return fra(r->encoding());} + static int frb( FloatRegister r) { return frb(r->encoding());} + static int frc( FloatRegister r) { return frc(r->encoding());} + static int frs( FloatRegister r) { return frs(r->encoding());} + static int frt( FloatRegister r) { return frt(r->encoding());} + static int fra( int x) { return opp_u_field(x, 15, 11); } + static int frb( int x) { return opp_u_field(x, 20, 16); } + static int frc( int x) { return opp_u_field(x, 25, 21); } + static int frs( int x) { return opp_u_field(x, 10, 6); } + static int frt( int x) { return opp_u_field(x, 10, 6); } + static int fxm( int x) { return opp_u_field(x, 19, 12); } + static int l10( int x) { return opp_u_field(x, 10, 10); } + static int l15( int x) { return opp_u_field(x, 15, 15); } + static int l910( int x) { return opp_u_field(x, 10, 9); } + static int lev( int x) { return opp_u_field(x, 26, 20); } + static int li( int x) { return opp_s_field(x, 29, 6); } + static int lk( int x) { return opp_u_field(x, 31, 31); } + static int mb2125( int x) { return opp_u_field(x, 25, 21); } + static int me2630( int x) { return opp_u_field(x, 30, 26); } + static int mb2126( int x) { return opp_u_field(((x & 0x1f) << 1) | ((x & 0x20) >> 5), 26, 21); } + static int me2126( int x) { return mb2126(x); } + static int nb( int x) { return opp_u_field(x, 20, 16); } + //static int opcd( int x) { return opp_u_field(x, 5, 0); } // is contained in our opcodes + static int oe( int x) { return opp_u_field(x, 21, 21); } + static int ra( Register r) { return ra(r->encoding()); } + static int ra( int x) { return opp_u_field(x, 15, 11); } + static int rb( Register r) { return rb(r->encoding()); } + static int rb( int x) { return opp_u_field(x, 20, 16); } + static int rc( int x) { return opp_u_field(x, 31, 31); } + static int rs( Register r) { return rs(r->encoding()); } + static int rs( int x) { return opp_u_field(x, 10, 6); } + // we don't want to use R0 in memory accesses, because it has value `0' then + static int ra0mem( Register r) { assert(r != R0, "cannot use register R0 in memory access"); return ra(r); } + static int ra0mem( int x) { assert(x != 0, "cannot use register 0 in memory access"); return ra(x); } + + // register r is target + static int rt( Register r) { return rs(r); } + static int rt( int x) { return rs(x); } + static int rta( Register r) { return ra(r); } + static int rta0mem( Register r) { rta(r); return ra0mem(r); } + + static int sh1620( int x) { return opp_u_field(x, 20, 16); } + static int sh30( int x) { return opp_u_field(x, 30, 30); } + static int sh162030( int x) { return sh1620(x & 0x1f) | sh30((x & 0x20) >> 5); } + static int si( int x) { return opp_s_field(x, 31, 16); } + static int spr( int x) { return opp_u_field(x, 20, 11); } + static int sr( int x) { return opp_u_field(x, 15, 12); } + static int tbr( int x) { return opp_u_field(x, 20, 11); } + static int th( int x) { return opp_u_field(x, 10, 7); } + static int thct( int x) { assert((x&8)==0, "must be valid cache specification"); return th(x); } + static int thds( int x) { assert((x&8)==8, "must be valid stream specification"); return th(x); } + static int to( int x) { return opp_u_field(x, 10, 6); } + static int u( int x) { return opp_u_field(x, 19, 16); } + static int ui( int x) { return opp_u_field(x, 31, 16); } + + // support vector instructions for >= Power6 + static int vra( int x) { return opp_u_field(x, 15, 11); } + static int vrb( int x) { return opp_u_field(x, 20, 16); } + static int vrc( int x) { return opp_u_field(x, 25, 21); } + static int vrs( int x) { return opp_u_field(x, 10, 6); } + static int vrt( int x) { return opp_u_field(x, 10, 6); } + + static int vra( VectorRegister r) { return vra(r->encoding());} + static int vrb( VectorRegister r) { return vrb(r->encoding());} + static int vrc( VectorRegister r) { return vrc(r->encoding());} + static int vrs( VectorRegister r) { return vrs(r->encoding());} + static int vrt( VectorRegister r) { return vrt(r->encoding());} + + static int vsplt_uim( int x) { return opp_u_field(x, 15, 12); } // for vsplt* instructions + static int vsplti_sim(int x) { return opp_u_field(x, 15, 11); } // for vsplti* instructions + static int vsldoi_shb(int x) { return opp_u_field(x, 25, 22); } // for vsldoi instruction + static int vcmp_rc( int x) { return opp_u_field(x, 21, 21); } // for vcmp* instructions + + //static int xo1( int x) { return opp_u_field(x, 29, 21); }// is contained in our opcodes + //static int xo2( int x) { return opp_u_field(x, 30, 21); }// is contained in our opcodes + //static int xo3( int x) { return opp_u_field(x, 30, 22); }// is contained in our opcodes + //static int xo4( int x) { return opp_u_field(x, 30, 26); }// is contained in our opcodes + //static int xo5( int x) { return opp_u_field(x, 29, 27); }// is contained in our opcodes + //static int xo6( int x) { return opp_u_field(x, 30, 27); }// is contained in our opcodes + //static int xo7( int x) { return opp_u_field(x, 31, 30); }// is contained in our opcodes + + protected: + // Compute relative address for branch. + static intptr_t disp(intptr_t x, intptr_t off) { + int xx = x - off; + xx = xx >> 2; + return xx; + } + + public: + // signed immediate, in low bits, nbits long + static int simm(int x, int nbits) { + assert_signed_range(x, nbits); + return x & ((1 << nbits) - 1); + } + + // unsigned immediate, in low bits, nbits long + static int uimm(int x, int nbits) { + assert_unsigned_const(x, nbits); + return x & ((1 << nbits) - 1); + } + + static void set_imm(int* instr, short s) { + short* p = ((short *)instr) + 1; + *p = s; + } + + static int get_imm(address a, int instruction_number) { + short imm; + short *p =((short *)a)+2*instruction_number+1; + imm = *p; + return (int)imm; + } + + static inline int hi16_signed( int x) { return (int)(int16_t)(x >> 16); } + static inline int lo16_unsigned(int x) { return x & 0xffff; } + + protected: + + // Extract the top 32 bits in a 64 bit word. + static int32_t hi32(int64_t x) { + int32_t r = int32_t((uint64_t)x >> 32); + return r; + } + + public: + + static inline unsigned int align_addr(unsigned int addr, unsigned int a) { + return ((addr + (a - 1)) & ~(a - 1)); + } + + static inline bool is_aligned(unsigned int addr, unsigned int a) { + return (0 == addr % a); + } + + void flush() { + AbstractAssembler::flush(); + } + + inline void emit_int32(int); // shadows AbstractAssembler::emit_int32 + inline void emit_data(int); + inline void emit_data(int, RelocationHolder const&); + inline void emit_data(int, relocInfo::relocType rtype); + + // Emit an address. + inline address emit_addr(const address addr = NULL); + + // Emit a function descriptor with the specified entry point, TOC, + // and ENV. If the entry point is NULL, the descriptor will point + // just past the descriptor. + // Use values from friend functions as defaults. + inline address emit_fd(address entry = NULL, + address toc = (address) FunctionDescriptor::friend_toc, + address env = (address) FunctionDescriptor::friend_env); + + ///////////////////////////////////////////////////////////////////////////////////// + // PPC instructions + ///////////////////////////////////////////////////////////////////////////////////// + + // Memory instructions use r0 as hard coded 0, e.g. to simulate loading + // immediates. The normal instruction encoders enforce that r0 is not + // passed to them. Use either extended mnemonics encoders or the special ra0 + // versions. + + // Issue an illegal instruction. + inline void illtrap(); + static inline bool is_illtrap(int x); + + // PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions + inline void addi( Register d, Register a, int si16); + inline void addis(Register d, Register a, int si16); + private: + inline void addi_r0ok( Register d, Register a, int si16); + inline void addis_r0ok(Register d, Register a, int si16); + public: + inline void addic_( Register d, Register a, int si16); + inline void subfic( Register d, Register a, int si16); + inline void add( Register d, Register a, Register b); + inline void add_( Register d, Register a, Register b); + inline void subf( Register d, Register a, Register b); + inline void sub( Register d, Register a, Register b); + inline void subf_( Register d, Register a, Register b); + inline void addc( Register d, Register a, Register b); + inline void addc_( Register d, Register a, Register b); + inline void subfc( Register d, Register a, Register b); + inline void subfc_( Register d, Register a, Register b); + inline void adde( Register d, Register a, Register b); + inline void adde_( Register d, Register a, Register b); + inline void subfe( Register d, Register a, Register b); + inline void subfe_( Register d, Register a, Register b); + inline void neg( Register d, Register a); + inline void neg_( Register d, Register a); + inline void mulli( Register d, Register a, int si16); + inline void mulld( Register d, Register a, Register b); + inline void mulld_( Register d, Register a, Register b); + inline void mullw( Register d, Register a, Register b); + inline void mullw_( Register d, Register a, Register b); + inline void mulhw( Register d, Register a, Register b); + inline void mulhw_( Register d, Register a, Register b); + inline void mulhd( Register d, Register a, Register b); + inline void mulhd_( Register d, Register a, Register b); + inline void mulhdu( Register d, Register a, Register b); + inline void mulhdu_(Register d, Register a, Register b); + inline void divd( Register d, Register a, Register b); + inline void divd_( Register d, Register a, Register b); + inline void divw( Register d, Register a, Register b); + inline void divw_( Register d, Register a, Register b); + + // extended mnemonics + inline void li( Register d, int si16); + inline void lis( Register d, int si16); + inline void addir(Register d, int si16, Register a); + + static bool is_addi(int x) { + return ADDI_OPCODE == (x & ADDI_OPCODE_MASK); + } + static bool is_addis(int x) { + return ADDIS_OPCODE == (x & ADDIS_OPCODE_MASK); + } + static bool is_bxx(int x) { + return BXX_OPCODE == (x & BXX_OPCODE_MASK); + } + static bool is_b(int x) { + return BXX_OPCODE == (x & BXX_OPCODE_MASK) && inv_lk_field(x) == 0; + } + static bool is_bl(int x) { + return BXX_OPCODE == (x & BXX_OPCODE_MASK) && inv_lk_field(x) == 1; + } + static bool is_bcxx(int x) { + return BCXX_OPCODE == (x & BCXX_OPCODE_MASK); + } + static bool is_bxx_or_bcxx(int x) { + return is_bxx(x) || is_bcxx(x); + } + static bool is_bctrl(int x) { + return x == 0x4e800421; + } + static bool is_bctr(int x) { + return x == 0x4e800420; + } + static bool is_bclr(int x) { + return BCLR_OPCODE == (x & XL_FORM_OPCODE_MASK); + } + static bool is_li(int x) { + return is_addi(x) && inv_ra_field(x)==0; + } + static bool is_lis(int x) { + return is_addis(x) && inv_ra_field(x)==0; + } + static bool is_mtctr(int x) { + return MTCTR_OPCODE == (x & MTCTR_OPCODE_MASK); + } + static bool is_ld(int x) { + return LD_OPCODE == (x & LD_OPCODE_MASK); + } + static bool is_std(int x) { + return STD_OPCODE == (x & STD_OPCODE_MASK); + } + static bool is_stdu(int x) { + return STDU_OPCODE == (x & STDU_OPCODE_MASK); + } + static bool is_stdx(int x) { + return STDX_OPCODE == (x & STDX_OPCODE_MASK); + } + static bool is_stdux(int x) { + return STDUX_OPCODE == (x & STDUX_OPCODE_MASK); + } + static bool is_stwx(int x) { + return STWX_OPCODE == (x & STWX_OPCODE_MASK); + } + static bool is_stwux(int x) { + return STWUX_OPCODE == (x & STWUX_OPCODE_MASK); + } + static bool is_stw(int x) { + return STW_OPCODE == (x & STW_OPCODE_MASK); + } + static bool is_stwu(int x) { + return STWU_OPCODE == (x & STWU_OPCODE_MASK); + } + static bool is_ori(int x) { + return ORI_OPCODE == (x & ORI_OPCODE_MASK); + }; + static bool is_oris(int x) { + return ORIS_OPCODE == (x & ORIS_OPCODE_MASK); + }; + static bool is_rldicr(int x) { + return (RLDICR_OPCODE == (x & RLDICR_OPCODE_MASK)); + }; + static bool is_nop(int x) { + return x == 0x60000000; + } + // endgroup opcode for Power6 + static bool is_endgroup(int x) { + return is_ori(x) && inv_ra_field(x)==1 && inv_rs_field(x)==1 && inv_d1_field(x)==0; + } + + + private: + // PPC 1, section 3.3.9, Fixed-Point Compare Instructions + inline void cmpi( ConditionRegister bf, int l, Register a, int si16); + inline void cmp( ConditionRegister bf, int l, Register a, Register b); + inline void cmpli(ConditionRegister bf, int l, Register a, int ui16); + inline void cmpl( ConditionRegister bf, int l, Register a, Register b); + + public: + // extended mnemonics of Compare Instructions + inline void cmpwi( ConditionRegister crx, Register a, int si16); + inline void cmpdi( ConditionRegister crx, Register a, int si16); + inline void cmpw( ConditionRegister crx, Register a, Register b); + inline void cmpd( ConditionRegister crx, Register a, Register b); + inline void cmplwi(ConditionRegister crx, Register a, int ui16); + inline void cmpldi(ConditionRegister crx, Register a, int ui16); + inline void cmplw( ConditionRegister crx, Register a, Register b); + inline void cmpld( ConditionRegister crx, Register a, Register b); + + inline void isel( Register d, Register a, Register b, int bc); + + // PPC 1, section 3.3.11, Fixed-Point Logical Instructions + void andi( Register a, Register s, int ui16); // optimized version + inline void andi_( Register a, Register s, int ui16); + inline void andis_( Register a, Register s, int ui16); + inline void ori( Register a, Register s, int ui16); + inline void oris( Register a, Register s, int ui16); + inline void xori( Register a, Register s, int ui16); + inline void xoris( Register a, Register s, int ui16); + inline void andr( Register a, Register s, Register b); // suffixed by 'r' as 'and' is C++ keyword + inline void and_( Register a, Register s, Register b); + // Turn or0(rx,rx,rx) into a nop and avoid that we accidently emit a + // SMT-priority change instruction (see SMT instructions below). + inline void or_unchecked(Register a, Register s, Register b); + inline void orr( Register a, Register s, Register b); // suffixed by 'r' as 'or' is C++ keyword + inline void or_( Register a, Register s, Register b); + inline void xorr( Register a, Register s, Register b); // suffixed by 'r' as 'xor' is C++ keyword + inline void xor_( Register a, Register s, Register b); + inline void nand( Register a, Register s, Register b); + inline void nand_( Register a, Register s, Register b); + inline void nor( Register a, Register s, Register b); + inline void nor_( Register a, Register s, Register b); + inline void andc( Register a, Register s, Register b); + inline void andc_( Register a, Register s, Register b); + inline void orc( Register a, Register s, Register b); + inline void orc_( Register a, Register s, Register b); + inline void extsb( Register a, Register s); + inline void extsh( Register a, Register s); + inline void extsw( Register a, Register s); + + // extended mnemonics + inline void nop(); + // NOP for FP and BR units (different versions to allow them to be in one group) + inline void fpnop0(); + inline void fpnop1(); + inline void brnop0(); + inline void brnop1(); + inline void brnop2(); + + inline void mr( Register d, Register s); + inline void ori_opt( Register d, int ui16); + inline void oris_opt(Register d, int ui16); + + // endgroup opcode for Power6 + inline void endgroup(); + + // count instructions + inline void cntlzw( Register a, Register s); + inline void cntlzw_( Register a, Register s); + inline void cntlzd( Register a, Register s); + inline void cntlzd_( Register a, Register s); + + // PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions + inline void sld( Register a, Register s, Register b); + inline void sld_( Register a, Register s, Register b); + inline void slw( Register a, Register s, Register b); + inline void slw_( Register a, Register s, Register b); + inline void srd( Register a, Register s, Register b); + inline void srd_( Register a, Register s, Register b); + inline void srw( Register a, Register s, Register b); + inline void srw_( Register a, Register s, Register b); + inline void srad( Register a, Register s, Register b); + inline void srad_( Register a, Register s, Register b); + inline void sraw( Register a, Register s, Register b); + inline void sraw_( Register a, Register s, Register b); + inline void sradi( Register a, Register s, int sh6); + inline void sradi_( Register a, Register s, int sh6); + inline void srawi( Register a, Register s, int sh5); + inline void srawi_( Register a, Register s, int sh5); + + // extended mnemonics for Shift Instructions + inline void sldi( Register a, Register s, int sh6); + inline void sldi_( Register a, Register s, int sh6); + inline void slwi( Register a, Register s, int sh5); + inline void slwi_( Register a, Register s, int sh5); + inline void srdi( Register a, Register s, int sh6); + inline void srdi_( Register a, Register s, int sh6); + inline void srwi( Register a, Register s, int sh5); + inline void srwi_( Register a, Register s, int sh5); + + inline void clrrdi( Register a, Register s, int ui6); + inline void clrrdi_( Register a, Register s, int ui6); + inline void clrldi( Register a, Register s, int ui6); + inline void clrldi_( Register a, Register s, int ui6); + inline void clrlsldi(Register a, Register s, int clrl6, int shl6); + inline void clrlsldi_(Register a, Register s, int clrl6, int shl6); + inline void extrdi( Register a, Register s, int n, int b); + // testbit with condition register + inline void testbitdi(ConditionRegister cr, Register a, Register s, int ui6); + + // rotate instructions + inline void rotldi( Register a, Register s, int n); + inline void rotrdi( Register a, Register s, int n); + inline void rotlwi( Register a, Register s, int n); + inline void rotrwi( Register a, Register s, int n); + + // Rotate Instructions + inline void rldic( Register a, Register s, int sh6, int mb6); + inline void rldic_( Register a, Register s, int sh6, int mb6); + inline void rldicr( Register a, Register s, int sh6, int mb6); + inline void rldicr_( Register a, Register s, int sh6, int mb6); + inline void rldicl( Register a, Register s, int sh6, int mb6); + inline void rldicl_( Register a, Register s, int sh6, int mb6); + inline void rlwinm( Register a, Register s, int sh5, int mb5, int me5); + inline void rlwinm_( Register a, Register s, int sh5, int mb5, int me5); + inline void rldimi( Register a, Register s, int sh6, int mb6); + inline void rldimi_( Register a, Register s, int sh6, int mb6); + inline void rlwimi( Register a, Register s, int sh5, int mb5, int me5); + inline void insrdi( Register a, Register s, int n, int b); + inline void insrwi( Register a, Register s, int n, int b); + + // PPC 1, section 3.3.2 Fixed-Point Load Instructions + // 4 bytes + inline void lwzx( Register d, Register s1, Register s2); + inline void lwz( Register d, int si16, Register s1); + inline void lwzu( Register d, int si16, Register s1); + + // 4 bytes + inline void lwax( Register d, Register s1, Register s2); + inline void lwa( Register d, int si16, Register s1); + + // 2 bytes + inline void lhzx( Register d, Register s1, Register s2); + inline void lhz( Register d, int si16, Register s1); + inline void lhzu( Register d, int si16, Register s1); + + // 2 bytes + inline void lhax( Register d, Register s1, Register s2); + inline void lha( Register d, int si16, Register s1); + inline void lhau( Register d, int si16, Register s1); + + // 1 byte + inline void lbzx( Register d, Register s1, Register s2); + inline void lbz( Register d, int si16, Register s1); + inline void lbzu( Register d, int si16, Register s1); + + // 8 bytes + inline void ldx( Register d, Register s1, Register s2); + inline void ld( Register d, int si16, Register s1); + inline void ldu( Register d, int si16, Register s1); + + // PPC 1, section 3.3.3 Fixed-Point Store Instructions + inline void stwx( Register d, Register s1, Register s2); + inline void stw( Register d, int si16, Register s1); + inline void stwu( Register d, int si16, Register s1); + + inline void sthx( Register d, Register s1, Register s2); + inline void sth( Register d, int si16, Register s1); + inline void sthu( Register d, int si16, Register s1); + + inline void stbx( Register d, Register s1, Register s2); + inline void stb( Register d, int si16, Register s1); + inline void stbu( Register d, int si16, Register s1); + + inline void stdx( Register d, Register s1, Register s2); + inline void std( Register d, int si16, Register s1); + inline void stdu( Register d, int si16, Register s1); + inline void stdux(Register s, Register a, Register b); + + // PPC 1, section 3.3.13 Move To/From System Register Instructions + inline void mtlr( Register s1); + inline void mflr( Register d); + inline void mtctr(Register s1); + inline void mfctr(Register d); + inline void mtcrf(int fxm, Register s); + inline void mfcr( Register d); + inline void mcrf( ConditionRegister crd, ConditionRegister cra); + inline void mtcr( Register s); + + // PPC 1, section 2.4.1 Branch Instructions + inline void b( address a, relocInfo::relocType rt = relocInfo::none); + inline void b( Label& L); + inline void bl( address a, relocInfo::relocType rt = relocInfo::none); + inline void bl( Label& L); + inline void bc( int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none); + inline void bc( int boint, int biint, Label& L); + inline void bcl(int boint, int biint, address a, relocInfo::relocType rt = relocInfo::none); + inline void bcl(int boint, int biint, Label& L); + + inline void bclr( int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none); + inline void bclrl( int boint, int biint, int bhint, relocInfo::relocType rt = relocInfo::none); + inline void bcctr( int boint, int biint, int bhint = bhintbhBCCTRisNotReturnButSame, + relocInfo::relocType rt = relocInfo::none); + inline void bcctrl(int boint, int biint, int bhint = bhintbhBCLRisReturn, + relocInfo::relocType rt = relocInfo::none); + + // helper function for b, bcxx + inline bool is_within_range_of_b(address a, address pc); + inline bool is_within_range_of_bcxx(address a, address pc); + + // get the destination of a bxx branch (b, bl, ba, bla) + static inline address bxx_destination(address baddr); + static inline address bxx_destination(int instr, address pc); + static inline intptr_t bxx_destination_offset(int instr, intptr_t bxx_pos); + + // extended mnemonics for branch instructions + inline void blt(ConditionRegister crx, Label& L); + inline void bgt(ConditionRegister crx, Label& L); + inline void beq(ConditionRegister crx, Label& L); + inline void bso(ConditionRegister crx, Label& L); + inline void bge(ConditionRegister crx, Label& L); + inline void ble(ConditionRegister crx, Label& L); + inline void bne(ConditionRegister crx, Label& L); + inline void bns(ConditionRegister crx, Label& L); + + // Branch instructions with static prediction hints. + inline void blt_predict_taken( ConditionRegister crx, Label& L); + inline void bgt_predict_taken( ConditionRegister crx, Label& L); + inline void beq_predict_taken( ConditionRegister crx, Label& L); + inline void bso_predict_taken( ConditionRegister crx, Label& L); + inline void bge_predict_taken( ConditionRegister crx, Label& L); + inline void ble_predict_taken( ConditionRegister crx, Label& L); + inline void bne_predict_taken( ConditionRegister crx, Label& L); + inline void bns_predict_taken( ConditionRegister crx, Label& L); + inline void blt_predict_not_taken(ConditionRegister crx, Label& L); + inline void bgt_predict_not_taken(ConditionRegister crx, Label& L); + inline void beq_predict_not_taken(ConditionRegister crx, Label& L); + inline void bso_predict_not_taken(ConditionRegister crx, Label& L); + inline void bge_predict_not_taken(ConditionRegister crx, Label& L); + inline void ble_predict_not_taken(ConditionRegister crx, Label& L); + inline void bne_predict_not_taken(ConditionRegister crx, Label& L); + inline void bns_predict_not_taken(ConditionRegister crx, Label& L); + + // for use in conjunction with testbitdi: + inline void btrue( ConditionRegister crx, Label& L); + inline void bfalse(ConditionRegister crx, Label& L); + + inline void bltl(ConditionRegister crx, Label& L); + inline void bgtl(ConditionRegister crx, Label& L); + inline void beql(ConditionRegister crx, Label& L); + inline void bsol(ConditionRegister crx, Label& L); + inline void bgel(ConditionRegister crx, Label& L); + inline void blel(ConditionRegister crx, Label& L); + inline void bnel(ConditionRegister crx, Label& L); + inline void bnsl(ConditionRegister crx, Label& L); + + // extended mnemonics for Branch Instructions via LR + // We use `blr' for returns. + inline void blr(relocInfo::relocType rt = relocInfo::none); + + // extended mnemonics for Branch Instructions with CTR + // bdnz means `decrement CTR and jump to L if CTR is not zero' + inline void bdnz(Label& L); + // Decrement and branch if result is zero. + inline void bdz(Label& L); + // we use `bctr[l]' for jumps/calls in function descriptor glue + // code, e.g. calls to runtime functions + inline void bctr( relocInfo::relocType rt = relocInfo::none); + inline void bctrl(relocInfo::relocType rt = relocInfo::none); + // conditional jumps/branches via CTR + inline void beqctr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); + inline void beqctrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); + inline void bnectr( ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); + inline void bnectrl(ConditionRegister crx, relocInfo::relocType rt = relocInfo::none); + + // condition register logic instructions + inline void crand( int d, int s1, int s2); + inline void crnand(int d, int s1, int s2); + inline void cror( int d, int s1, int s2); + inline void crxor( int d, int s1, int s2); + inline void crnor( int d, int s1, int s2); + inline void creqv( int d, int s1, int s2); + inline void crandc(int d, int s1, int s2); + inline void crorc( int d, int s1, int s2); + + // icache and dcache related instructions + inline void icbi( Register s1, Register s2); + //inline void dcba(Register s1, Register s2); // Instruction for embedded processor only. + inline void dcbz( Register s1, Register s2); + inline void dcbst( Register s1, Register s2); + inline void dcbf( Register s1, Register s2); + + enum ct_cache_specification { + ct_primary_cache = 0, + ct_secondary_cache = 2 + }; + // dcache read hint + inline void dcbt( Register s1, Register s2); + inline void dcbtct( Register s1, Register s2, int ct); + inline void dcbtds( Register s1, Register s2, int ds); + // dcache write hint + inline void dcbtst( Register s1, Register s2); + inline void dcbtstct(Register s1, Register s2, int ct); + + // machine barrier instructions: + // + // - sync two-way memory barrier, aka fence + // - lwsync orders Store|Store, + // Load|Store, + // Load|Load, + // but not Store|Load + // - eieio orders memory accesses for device memory (only) + // - isync invalidates speculatively executed instructions + // From the Power ISA 2.06 documentation: + // "[...] an isync instruction prevents the execution of + // instructions following the isync until instructions + // preceding the isync have completed, [...]" + // From IBM's AIX assembler reference: + // "The isync [...] instructions causes the processor to + // refetch any instructions that might have been fetched + // prior to the isync instruction. The instruction isync + // causes the processor to wait for all previous instructions + // to complete. Then any instructions already fetched are + // discarded and instruction processing continues in the + // environment established by the previous instructions." + // + // semantic barrier instructions: + // (as defined in orderAccess.hpp) + // + // - release orders Store|Store, (maps to lwsync) + // Load|Store + // - acquire orders Load|Store, (maps to lwsync) + // Load|Load + // - fence orders Store|Store, (maps to sync) + // Load|Store, + // Load|Load, + // Store|Load + // + private: + inline void sync(int l); + public: + inline void sync(); + inline void lwsync(); + inline void ptesync(); + inline void eieio(); + inline void isync(); + + inline void release(); + inline void acquire(); + inline void fence(); + + // atomics + inline void lwarx_unchecked(Register d, Register a, Register b, int eh1 = 0); + inline void ldarx_unchecked(Register d, Register a, Register b, int eh1 = 0); + inline bool lxarx_hint_exclusive_access(); + inline void lwarx( Register d, Register a, Register b, bool hint_exclusive_access = false); + inline void ldarx( Register d, Register a, Register b, bool hint_exclusive_access = false); + inline void stwcx_( Register s, Register a, Register b); + inline void stdcx_( Register s, Register a, Register b); + + // Instructions for adjusting thread priority for simultaneous + // multithreading (SMT) on Power5. + private: + inline void smt_prio_very_low(); + inline void smt_prio_medium_high(); + inline void smt_prio_high(); + + public: + inline void smt_prio_low(); + inline void smt_prio_medium_low(); + inline void smt_prio_medium(); + + // trap instructions + inline void twi_0(Register a); // for load with acquire semantics use load+twi_0+isync (trap can't occur) + // NOT FOR DIRECT USE!! + protected: + inline void tdi_unchecked(int tobits, Register a, int si16); + inline void twi_unchecked(int tobits, Register a, int si16); + inline void tdi( int tobits, Register a, int si16); // asserts UseSIGTRAP + inline void twi( int tobits, Register a, int si16); // asserts UseSIGTRAP + inline void td( int tobits, Register a, Register b); // asserts UseSIGTRAP + inline void tw( int tobits, Register a, Register b); // asserts UseSIGTRAP + + static bool is_tdi(int x, int tobits, int ra, int si16) { + return (TDI_OPCODE == (x & TDI_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)) + && (si16 == inv_si_field(x)); + } + + static bool is_twi(int x, int tobits, int ra, int si16) { + return (TWI_OPCODE == (x & TWI_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)) + && (si16 == inv_si_field(x)); + } + + static bool is_twi(int x, int tobits, int ra) { + return (TWI_OPCODE == (x & TWI_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)); + } + + static bool is_td(int x, int tobits, int ra, int rb) { + return (TD_OPCODE == (x & TD_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)) + && (rb == -1/*any reg*/ || rb == inv_rb_field(x)); + } + + static bool is_tw(int x, int tobits, int ra, int rb) { + return (TW_OPCODE == (x & TW_OPCODE_MASK)) + && (tobits == inv_to_field(x)) + && (ra == -1/*any reg*/ || ra == inv_ra_field(x)) + && (rb == -1/*any reg*/ || rb == inv_rb_field(x)); + } + + public: + // PPC floating point instructions + // PPC 1, section 4.6.2 Floating-Point Load Instructions + inline void lfs( FloatRegister d, int si16, Register a); + inline void lfsu( FloatRegister d, int si16, Register a); + inline void lfsx( FloatRegister d, Register a, Register b); + inline void lfd( FloatRegister d, int si16, Register a); + inline void lfdu( FloatRegister d, int si16, Register a); + inline void lfdx( FloatRegister d, Register a, Register b); + + // PPC 1, section 4.6.3 Floating-Point Store Instructions + inline void stfs( FloatRegister s, int si16, Register a); + inline void stfsu( FloatRegister s, int si16, Register a); + inline void stfsx( FloatRegister s, Register a, Register b); + inline void stfd( FloatRegister s, int si16, Register a); + inline void stfdu( FloatRegister s, int si16, Register a); + inline void stfdx( FloatRegister s, Register a, Register b); + + // PPC 1, section 4.6.4 Floating-Point Move Instructions + inline void fmr( FloatRegister d, FloatRegister b); + inline void fmr_( FloatRegister d, FloatRegister b); + + // inline void mffgpr( FloatRegister d, Register b); + // inline void mftgpr( Register d, FloatRegister b); + inline void cmpb( Register a, Register s, Register b); + inline void popcntb(Register a, Register s); + inline void popcntw(Register a, Register s); + inline void popcntd(Register a, Register s); + + inline void fneg( FloatRegister d, FloatRegister b); + inline void fneg_( FloatRegister d, FloatRegister b); + inline void fabs( FloatRegister d, FloatRegister b); + inline void fabs_( FloatRegister d, FloatRegister b); + inline void fnabs( FloatRegister d, FloatRegister b); + inline void fnabs_(FloatRegister d, FloatRegister b); + + // PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic Instructions + inline void fadd( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fadd_( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fadds( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fadds_(FloatRegister d, FloatRegister a, FloatRegister b); + inline void fsub( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fsub_( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fsubs( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fsubs_(FloatRegister d, FloatRegister a, FloatRegister b); + inline void fmul( FloatRegister d, FloatRegister a, FloatRegister c); + inline void fmul_( FloatRegister d, FloatRegister a, FloatRegister c); + inline void fmuls( FloatRegister d, FloatRegister a, FloatRegister c); + inline void fmuls_(FloatRegister d, FloatRegister a, FloatRegister c); + inline void fdiv( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fdiv_( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fdivs( FloatRegister d, FloatRegister a, FloatRegister b); + inline void fdivs_(FloatRegister d, FloatRegister a, FloatRegister b); + + // PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions + inline void frsp( FloatRegister d, FloatRegister b); + inline void fctid( FloatRegister d, FloatRegister b); + inline void fctidz(FloatRegister d, FloatRegister b); + inline void fctiw( FloatRegister d, FloatRegister b); + inline void fctiwz(FloatRegister d, FloatRegister b); + inline void fcfid( FloatRegister d, FloatRegister b); + inline void fcfids(FloatRegister d, FloatRegister b); + + // PPC 1, section 4.6.7 Floating-Point Compare Instructions + inline void fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b); + + inline void fsqrt( FloatRegister d, FloatRegister b); + inline void fsqrts(FloatRegister d, FloatRegister b); + + // Vector instructions for >= Power6. + inline void lvebx( VectorRegister d, Register s1, Register s2); + inline void lvehx( VectorRegister d, Register s1, Register s2); + inline void lvewx( VectorRegister d, Register s1, Register s2); + inline void lvx( VectorRegister d, Register s1, Register s2); + inline void lvxl( VectorRegister d, Register s1, Register s2); + inline void stvebx( VectorRegister d, Register s1, Register s2); + inline void stvehx( VectorRegister d, Register s1, Register s2); + inline void stvewx( VectorRegister d, Register s1, Register s2); + inline void stvx( VectorRegister d, Register s1, Register s2); + inline void stvxl( VectorRegister d, Register s1, Register s2); + inline void lvsl( VectorRegister d, Register s1, Register s2); + inline void lvsr( VectorRegister d, Register s1, Register s2); + inline void vpkpx( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkshss( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkswss( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkshus( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkswus( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkuhum( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkuwum( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkuhus( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vpkuwus( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vupkhpx( VectorRegister d, VectorRegister b); + inline void vupkhsb( VectorRegister d, VectorRegister b); + inline void vupkhsh( VectorRegister d, VectorRegister b); + inline void vupklpx( VectorRegister d, VectorRegister b); + inline void vupklsb( VectorRegister d, VectorRegister b); + inline void vupklsh( VectorRegister d, VectorRegister b); + inline void vmrghb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrghw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrghh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrglb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrglw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmrglh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsplt( VectorRegister d, int ui4, VectorRegister b); + inline void vsplth( VectorRegister d, int ui3, VectorRegister b); + inline void vspltw( VectorRegister d, int ui2, VectorRegister b); + inline void vspltisb( VectorRegister d, int si5); + inline void vspltish( VectorRegister d, int si5); + inline void vspltisw( VectorRegister d, int si5); + inline void vperm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vsel( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vsl( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsldoi( VectorRegister d, VectorRegister a, VectorRegister b, int si4); + inline void vslo( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsr( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsro( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddcuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddshs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddsbs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddsws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddubm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vadduwm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vadduhm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vaddubs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vadduws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vadduhs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubcuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubshs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubsbs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubsws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsububm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubuwm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubuhm( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsububs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubuws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsubuhs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulesb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmuleub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulesh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmuleuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulosb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmuloub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulosh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmulouh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmhaddshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmhraddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmladduhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsubuhm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsummbm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsumshm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsumshs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsumuhm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vmsumuhs( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c); + inline void vsumsws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsum2sws( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsum4sbs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsum4ubs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsum4shs( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavgsb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavgsw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavgsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavgub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavguw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vavguh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxsb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxsw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vmaxuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminsb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminsw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vminuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtub( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtuh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtuw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequb_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequh_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpequw_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsh_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsb_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtsw_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtub_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtuh_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vcmpgtuw_(VectorRegister d, VectorRegister a, VectorRegister b); + inline void vand( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vandc( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vnor( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vor( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vxor( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vrlb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vrlw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vrlh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vslb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vskw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vslh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrb( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrh( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrab( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsraw( VectorRegister d, VectorRegister a, VectorRegister b); + inline void vsrah( VectorRegister d, VectorRegister a, VectorRegister b); + // Vector Floating-Point not implemented yet + inline void mtvscr( VectorRegister b); + inline void mfvscr( VectorRegister d); + + // The following encoders use r0 as second operand. These instructions + // read r0 as '0'. + inline void lwzx( Register d, Register s2); + inline void lwz( Register d, int si16); + inline void lwax( Register d, Register s2); + inline void lwa( Register d, int si16); + inline void lhzx( Register d, Register s2); + inline void lhz( Register d, int si16); + inline void lhax( Register d, Register s2); + inline void lha( Register d, int si16); + inline void lbzx( Register d, Register s2); + inline void lbz( Register d, int si16); + inline void ldx( Register d, Register s2); + inline void ld( Register d, int si16); + inline void stwx( Register d, Register s2); + inline void stw( Register d, int si16); + inline void sthx( Register d, Register s2); + inline void sth( Register d, int si16); + inline void stbx( Register d, Register s2); + inline void stb( Register d, int si16); + inline void stdx( Register d, Register s2); + inline void std( Register d, int si16); + + // PPC 2, section 3.2.1 Instruction Cache Instructions + inline void icbi( Register s2); + // PPC 2, section 3.2.2 Data Cache Instructions + //inlinevoid dcba( Register s2); // Instruction for embedded processor only. + inline void dcbz( Register s2); + inline void dcbst( Register s2); + inline void dcbf( Register s2); + // dcache read hint + inline void dcbt( Register s2); + inline void dcbtct( Register s2, int ct); + inline void dcbtds( Register s2, int ds); + // dcache write hint + inline void dcbtst( Register s2); + inline void dcbtstct(Register s2, int ct); + + // Atomics: use ra0mem to disallow R0 as base. + inline void lwarx_unchecked(Register d, Register b, int eh1); + inline void ldarx_unchecked(Register d, Register b, int eh1); + inline void lwarx( Register d, Register b, bool hint_exclusive_access); + inline void ldarx( Register d, Register b, bool hint_exclusive_access); + inline void stwcx_(Register s, Register b); + inline void stdcx_(Register s, Register b); + inline void lfs( FloatRegister d, int si16); + inline void lfsx( FloatRegister d, Register b); + inline void lfd( FloatRegister d, int si16); + inline void lfdx( FloatRegister d, Register b); + inline void stfs( FloatRegister s, int si16); + inline void stfsx( FloatRegister s, Register b); + inline void stfd( FloatRegister s, int si16); + inline void stfdx( FloatRegister s, Register b); + inline void lvebx( VectorRegister d, Register s2); + inline void lvehx( VectorRegister d, Register s2); + inline void lvewx( VectorRegister d, Register s2); + inline void lvx( VectorRegister d, Register s2); + inline void lvxl( VectorRegister d, Register s2); + inline void stvebx(VectorRegister d, Register s2); + inline void stvehx(VectorRegister d, Register s2); + inline void stvewx(VectorRegister d, Register s2); + inline void stvx( VectorRegister d, Register s2); + inline void stvxl( VectorRegister d, Register s2); + inline void lvsl( VectorRegister d, Register s2); + inline void lvsr( VectorRegister d, Register s2); + + // RegisterOrConstant versions. + // These emitters choose between the versions using two registers and + // those with register and immediate, depending on the content of roc. + // If the constant is not encodable as immediate, instructions to + // load the constant are emitted beforehand. Store instructions need a + // tmp reg if the constant is not encodable as immediate. + // Size unpredictable. + void ld( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lwa( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lwz( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lha( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lhz( Register d, RegisterOrConstant roc, Register s1 = noreg); + void lbz( Register d, RegisterOrConstant roc, Register s1 = noreg); + void std( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); + void stw( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); + void sth( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); + void stb( Register d, RegisterOrConstant roc, Register s1 = noreg, Register tmp = noreg); + void add( Register d, RegisterOrConstant roc, Register s1); + void subf(Register d, RegisterOrConstant roc, Register s1); + void cmpd(ConditionRegister d, RegisterOrConstant roc, Register s1); + + + // Emit several instructions to load a 64 bit constant. This issues a fixed + // instruction pattern so that the constant can be patched later on. + enum { + load_const_size = 5 * BytesPerInstWord + }; + void load_const(Register d, long a, Register tmp = noreg); + inline void load_const(Register d, void* a, Register tmp = noreg); + inline void load_const(Register d, Label& L, Register tmp = noreg); + inline void load_const(Register d, AddressLiteral& a, Register tmp = noreg); + + // Load a 64 bit constant, optimized, not identifyable. + // Tmp can be used to increase ILP. Set return_simm16_rest=true to get a + // 16 bit immediate offset. This is useful if the offset can be encoded in + // a succeeding instruction. + int load_const_optimized(Register d, long a, Register tmp = noreg, bool return_simm16_rest = false); + inline int load_const_optimized(Register d, void* a, Register tmp = noreg, bool return_simm16_rest = false) { + return load_const_optimized(d, (long)(unsigned long)a, tmp, return_simm16_rest); + } + + // Creation + Assembler(CodeBuffer* code) : AbstractAssembler(code) { +#ifdef CHECK_DELAY + delay_state = no_delay; +#endif + } + + // Testing +#ifndef PRODUCT + void test_asm(); +#endif +}; + + +#endif // CPU_PPC_VM_ASSEMBLER_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,792 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP +#define CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + +inline void Assembler::emit_int32(int x) { + AbstractAssembler::emit_int32(x); +} + +inline void Assembler::emit_data(int x) { + emit_int32(x); +} + +inline void Assembler::emit_data(int x, relocInfo::relocType rtype) { + relocate(rtype); + emit_int32(x); +} + +inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { + relocate(rspec); + emit_int32(x); +} + +// Emit an address +inline address Assembler::emit_addr(const address addr) { + address start = pc(); + emit_address(addr); + return start; +} + +// Emit a function descriptor with the specified entry point, TOC, and +// ENV. If the entry point is NULL, the descriptor will point just +// past the descriptor. +inline address Assembler::emit_fd(address entry, address toc, address env) { + FunctionDescriptor* fd = (FunctionDescriptor*)pc(); + + assert(sizeof(FunctionDescriptor) == 3*sizeof(address), "function descriptor size"); + + (void)emit_addr(); + (void)emit_addr(); + (void)emit_addr(); + + fd->set_entry(entry == NULL ? pc() : entry); + fd->set_toc(toc); + fd->set_env(env); + + return (address)fd; +} + +// Issue an illegal instruction. 0 is guaranteed to be an illegal instruction. +inline void Assembler::illtrap() { Assembler::emit_int32(0); } +inline bool Assembler::is_illtrap(int x) { return x == 0; } + +// PPC 1, section 3.3.8, Fixed-Point Arithmetic Instructions +inline void Assembler::addi( Register d, Register a, int si16) { assert(a != R0, "r0 not allowed"); addi_r0ok( d, a, si16); } +inline void Assembler::addis( Register d, Register a, int si16) { assert(a != R0, "r0 not allowed"); addis_r0ok(d, a, si16); } +inline void Assembler::addi_r0ok(Register d,Register a,int si16) { emit_int32(ADDI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::addis_r0ok(Register d,Register a,int si16) { emit_int32(ADDIS_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::addic_( Register d, Register a, int si16) { emit_int32(ADDIC__OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::subfic( Register d, Register a, int si16) { emit_int32(SUBFIC_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::add( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::add_( Register d, Register a, Register b) { emit_int32(ADD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::subf( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::sub( Register d, Register a, Register b) { subf(d, b, a); } +inline void Assembler::subf_( Register d, Register a, Register b) { emit_int32(SUBF_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::addc( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::addc_( Register d, Register a, Register b) { emit_int32(ADDC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::subfc( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::subfc_( Register d, Register a, Register b) { emit_int32(SUBFC_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::adde( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::adde_( Register d, Register a, Register b) { emit_int32(ADDE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::subfe( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::subfe_( Register d, Register a, Register b) { emit_int32(SUBFE_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::neg( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(0)); } +inline void Assembler::neg_( Register d, Register a) { emit_int32(NEG_OPCODE | rt(d) | ra(a) | oe(0) | rc(1)); } +inline void Assembler::mulli( Register d, Register a, int si16) { emit_int32(MULLI_OPCODE | rt(d) | ra(a) | simm(si16, 16)); } +inline void Assembler::mulld( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::mulld_( Register d, Register a, Register b) { emit_int32(MULLD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::mullw( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::mullw_( Register d, Register a, Register b) { emit_int32(MULLW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::mulhw( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); } +inline void Assembler::mulhw_( Register d, Register a, Register b) { emit_int32(MULHW_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); } +inline void Assembler::mulhd( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); } +inline void Assembler::mulhd_( Register d, Register a, Register b) { emit_int32(MULHD_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); } +inline void Assembler::mulhdu( Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(0)); } +inline void Assembler::mulhdu_(Register d, Register a, Register b) { emit_int32(MULHDU_OPCODE | rt(d) | ra(a) | rb(b) | rc(1)); } +inline void Assembler::divd( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::divd_( Register d, Register a, Register b) { emit_int32(DIVD_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } +inline void Assembler::divw( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(0)); } +inline void Assembler::divw_( Register d, Register a, Register b) { emit_int32(DIVW_OPCODE | rt(d) | ra(a) | rb(b) | oe(0) | rc(1)); } + +// extended mnemonics +inline void Assembler::li( Register d, int si16) { Assembler::addi_r0ok( d, R0, si16); } +inline void Assembler::lis( Register d, int si16) { Assembler::addis_r0ok(d, R0, si16); } +inline void Assembler::addir(Register d, int si16, Register a) { Assembler::addi(d, a, si16); } + +// PPC 1, section 3.3.9, Fixed-Point Compare Instructions +inline void Assembler::cmpi( ConditionRegister f, int l, Register a, int si16) { emit_int32( CMPI_OPCODE | bf(f) | l10(l) | ra(a) | simm(si16,16)); } +inline void Assembler::cmp( ConditionRegister f, int l, Register a, Register b) { emit_int32( CMP_OPCODE | bf(f) | l10(l) | ra(a) | rb(b)); } +inline void Assembler::cmpli( ConditionRegister f, int l, Register a, int ui16) { emit_int32( CMPLI_OPCODE | bf(f) | l10(l) | ra(a) | uimm(ui16,16)); } +inline void Assembler::cmpl( ConditionRegister f, int l, Register a, Register b) { emit_int32( CMPL_OPCODE | bf(f) | l10(l) | ra(a) | rb(b)); } + +// extended mnemonics of Compare Instructions +inline void Assembler::cmpwi( ConditionRegister crx, Register a, int si16) { Assembler::cmpi( crx, 0, a, si16); } +inline void Assembler::cmpdi( ConditionRegister crx, Register a, int si16) { Assembler::cmpi( crx, 1, a, si16); } +inline void Assembler::cmpw( ConditionRegister crx, Register a, Register b) { Assembler::cmp( crx, 0, a, b); } +inline void Assembler::cmpd( ConditionRegister crx, Register a, Register b) { Assembler::cmp( crx, 1, a, b); } +inline void Assembler::cmplwi(ConditionRegister crx, Register a, int ui16) { Assembler::cmpli(crx, 0, a, ui16); } +inline void Assembler::cmpldi(ConditionRegister crx, Register a, int ui16) { Assembler::cmpli(crx, 1, a, ui16); } +inline void Assembler::cmplw( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 0, a, b); } +inline void Assembler::cmpld( ConditionRegister crx, Register a, Register b) { Assembler::cmpl( crx, 1, a, b); } + +inline void Assembler::isel(Register d, Register a, Register b, int c) { emit_int32(ISEL_OPCODE | rt(d) | ra(a) | rb(b) | bc(c)); } + +// PPC 1, section 3.3.11, Fixed-Point Logical Instructions +inline void Assembler::andi_( Register a, Register s, int ui16) { emit_int32(ANDI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::andis_( Register a, Register s, int ui16) { emit_int32(ANDIS_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::ori( Register a, Register s, int ui16) { emit_int32(ORI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::oris( Register a, Register s, int ui16) { emit_int32(ORIS_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::xori( Register a, Register s, int ui16) { emit_int32(XORI_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::xoris( Register a, Register s, int ui16) { emit_int32(XORIS_OPCODE | rta(a) | rs(s) | uimm(ui16, 16)); } +inline void Assembler::andr( Register a, Register s, Register b) { emit_int32(AND_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::and_( Register a, Register s, Register b) { emit_int32(AND_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } + +inline void Assembler::or_unchecked(Register a, Register s, Register b){ emit_int32(OR_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::orr( Register a, Register s, Register b) { if (a==s && s==b) { Assembler::nop(); } else { Assembler::or_unchecked(a,s,b); } } +inline void Assembler::or_( Register a, Register s, Register b) { emit_int32(OR_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::xorr( Register a, Register s, Register b) { emit_int32(XOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::xor_( Register a, Register s, Register b) { emit_int32(XOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::nand( Register a, Register s, Register b) { emit_int32(NAND_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::nand_( Register a, Register s, Register b) { emit_int32(NAND_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::nor( Register a, Register s, Register b) { emit_int32(NOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::nor_( Register a, Register s, Register b) { emit_int32(NOR_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::andc( Register a, Register s, Register b) { emit_int32(ANDC_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::andc_( Register a, Register s, Register b) { emit_int32(ANDC_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::orc( Register a, Register s, Register b) { emit_int32(ORC_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::orc_( Register a, Register s, Register b) { emit_int32(ORC_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::extsb( Register a, Register s) { emit_int32(EXTSB_OPCODE | rta(a) | rs(s) | rc(0)); } +inline void Assembler::extsh( Register a, Register s) { emit_int32(EXTSH_OPCODE | rta(a) | rs(s) | rc(0)); } +inline void Assembler::extsw( Register a, Register s) { emit_int32(EXTSW_OPCODE | rta(a) | rs(s) | rc(0)); } + +// extended mnemonics +inline void Assembler::nop() { Assembler::ori(R0, R0, 0); } +// NOP for FP and BR units (different versions to allow them to be in one group) +inline void Assembler::fpnop0() { Assembler::fmr(F30, F30); } +inline void Assembler::fpnop1() { Assembler::fmr(F31, F31); } +inline void Assembler::brnop0() { Assembler::mcrf(CCR2, CCR2); } +inline void Assembler::brnop1() { Assembler::mcrf(CCR3, CCR3); } +inline void Assembler::brnop2() { Assembler::mcrf(CCR4, CCR4); } + +inline void Assembler::mr( Register d, Register s) { Assembler::orr(d, s, s); } +inline void Assembler::ori_opt( Register d, int ui16) { if (ui16!=0) Assembler::ori( d, d, ui16); } +inline void Assembler::oris_opt(Register d, int ui16) { if (ui16!=0) Assembler::oris(d, d, ui16); } + +inline void Assembler::endgroup() { Assembler::ori(R1, R1, 0); } + +// count instructions +inline void Assembler::cntlzw( Register a, Register s) { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(0)); } +inline void Assembler::cntlzw_( Register a, Register s) { emit_int32(CNTLZW_OPCODE | rta(a) | rs(s) | rc(1)); } +inline void Assembler::cntlzd( Register a, Register s) { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(0)); } +inline void Assembler::cntlzd_( Register a, Register s) { emit_int32(CNTLZD_OPCODE | rta(a) | rs(s) | rc(1)); } + +// PPC 1, section 3.3.12, Fixed-Point Rotate and Shift Instructions +inline void Assembler::sld( Register a, Register s, Register b) { emit_int32(SLD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::sld_( Register a, Register s, Register b) { emit_int32(SLD_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::slw( Register a, Register s, Register b) { emit_int32(SLW_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::slw_( Register a, Register s, Register b) { emit_int32(SLW_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::srd( Register a, Register s, Register b) { emit_int32(SRD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::srd_( Register a, Register s, Register b) { emit_int32(SRD_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::srw( Register a, Register s, Register b) { emit_int32(SRW_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::srw_( Register a, Register s, Register b) { emit_int32(SRW_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::srad( Register a, Register s, Register b) { emit_int32(SRAD_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::srad_( Register a, Register s, Register b) { emit_int32(SRAD_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::sraw( Register a, Register s, Register b) { emit_int32(SRAW_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::sraw_( Register a, Register s, Register b) { emit_int32(SRAW_OPCODE | rta(a) | rs(s) | rb(b) | rc(1)); } +inline void Assembler::sradi( Register a, Register s, int sh6) { emit_int32(SRADI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | rc(0)); } +inline void Assembler::sradi_( Register a, Register s, int sh6) { emit_int32(SRADI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | rc(1)); } +inline void Assembler::srawi( Register a, Register s, int sh5) { emit_int32(SRAWI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | rc(0)); } +inline void Assembler::srawi_( Register a, Register s, int sh5) { emit_int32(SRAWI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | rc(1)); } + +// extended mnemonics for Shift Instructions +inline void Assembler::sldi( Register a, Register s, int sh6) { Assembler::rldicr(a, s, sh6, 63-sh6); } +inline void Assembler::sldi_( Register a, Register s, int sh6) { Assembler::rldicr_(a, s, sh6, 63-sh6); } +inline void Assembler::slwi( Register a, Register s, int sh5) { Assembler::rlwinm(a, s, sh5, 0, 31-sh5); } +inline void Assembler::slwi_( Register a, Register s, int sh5) { Assembler::rlwinm_(a, s, sh5, 0, 31-sh5); } +inline void Assembler::srdi( Register a, Register s, int sh6) { Assembler::rldicl(a, s, 64-sh6, sh6); } +inline void Assembler::srdi_( Register a, Register s, int sh6) { Assembler::rldicl_(a, s, 64-sh6, sh6); } +inline void Assembler::srwi( Register a, Register s, int sh5) { Assembler::rlwinm(a, s, 32-sh5, sh5, 31); } +inline void Assembler::srwi_( Register a, Register s, int sh5) { Assembler::rlwinm_(a, s, 32-sh5, sh5, 31); } + +inline void Assembler::clrrdi( Register a, Register s, int ui6) { Assembler::rldicr(a, s, 0, 63-ui6); } +inline void Assembler::clrrdi_( Register a, Register s, int ui6) { Assembler::rldicr_(a, s, 0, 63-ui6); } +inline void Assembler::clrldi( Register a, Register s, int ui6) { Assembler::rldicl(a, s, 0, ui6); } +inline void Assembler::clrldi_( Register a, Register s, int ui6) { Assembler::rldicl_(a, s, 0, ui6); } +inline void Assembler::clrlsldi( Register a, Register s, int clrl6, int shl6) { Assembler::rldic( a, s, shl6, clrl6-shl6); } +inline void Assembler::clrlsldi_(Register a, Register s, int clrl6, int shl6) { Assembler::rldic_(a, s, shl6, clrl6-shl6); } +inline void Assembler::extrdi( Register a, Register s, int n, int b){ Assembler::rldicl(a, s, b+n, 64-n); } +// testbit with condition register. +inline void Assembler::testbitdi(ConditionRegister cr, Register a, Register s, int ui6) { + Assembler::rldicr(a, s, 63-ui6, 0); + Assembler::cmpdi(cr, a, 0); +} + +// rotate instructions +inline void Assembler::rotldi( Register a, Register s, int n) { Assembler::rldicl(a, s, n, 0); } +inline void Assembler::rotrdi( Register a, Register s, int n) { Assembler::rldicl(a, s, 64-n, 0); } +inline void Assembler::rotlwi( Register a, Register s, int n) { Assembler::rlwinm(a, s, n, 0, 31); } +inline void Assembler::rotrwi( Register a, Register s, int n) { Assembler::rlwinm(a, s, 32-n, 0, 31); } + +inline void Assembler::rldic( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIC_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); } +inline void Assembler::rldic_( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIC_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); } +inline void Assembler::rldicr( Register a, Register s, int sh6, int mb6) { emit_int32(RLDICR_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); } +inline void Assembler::rldicr_( Register a, Register s, int sh6, int mb6) { emit_int32(RLDICR_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); } +inline void Assembler::rldicl( Register a, Register s, int sh6, int me6) { emit_int32(RLDICL_OPCODE | rta(a) | rs(s) | sh162030(sh6) | me2126(me6) | rc(0)); } +inline void Assembler::rldicl_( Register a, Register s, int sh6, int me6) { emit_int32(RLDICL_OPCODE | rta(a) | rs(s) | sh162030(sh6) | me2126(me6) | rc(1)); } +inline void Assembler::rlwinm( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWINM_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(0)); } +inline void Assembler::rlwinm_( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWINM_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(1)); } +inline void Assembler::rldimi( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIMI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(0)); } +inline void Assembler::rlwimi( Register a, Register s, int sh5, int mb5, int me5){ emit_int32(RLWIMI_OPCODE | rta(a) | rs(s) | sh1620(sh5) | mb2125(mb5) | me2630(me5) | rc(0)); } +inline void Assembler::rldimi_( Register a, Register s, int sh6, int mb6) { emit_int32(RLDIMI_OPCODE | rta(a) | rs(s) | sh162030(sh6) | mb2126(mb6) | rc(1)); } +inline void Assembler::insrdi( Register a, Register s, int n, int b) { Assembler::rldimi(a, s, 64-(b+n), b); } +inline void Assembler::insrwi( Register a, Register s, int n, int b) { Assembler::rlwimi(a, s, 32-(b+n), b, b+n-1); } + +// PPC 1, section 3.3.2 Fixed-Point Load Instructions +inline void Assembler::lwzx( Register d, Register s1, Register s2) { emit_int32(LWZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lwz( Register d, int si16, Register s1) { emit_int32(LWZ_OPCODE | rt(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::lwzu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LWZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::lwax( Register d, Register s1, Register s2) { emit_int32(LWAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lwa( Register d, int si16, Register s1) { emit_int32(LWA_OPCODE | rt(d) | ds(si16) | ra0mem(s1));} + +inline void Assembler::lhzx( Register d, Register s1, Register s2) { emit_int32(LHZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lhz( Register d, int si16, Register s1) { emit_int32(LHZ_OPCODE | rt(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::lhzu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::lhax( Register d, Register s1, Register s2) { emit_int32(LHAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lha( Register d, int si16, Register s1) { emit_int32(LHA_OPCODE | rt(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::lhau( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHAU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::lbzx( Register d, Register s1, Register s2) { emit_int32(LBZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::lbz( Register d, int si16, Register s1) { emit_int32(LBZ_OPCODE | rt(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::lbzu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LBZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::ld( Register d, int si16, Register s1) { emit_int32(LD_OPCODE | rt(d) | ds(si16) | ra0mem(s1));} +inline void Assembler::ldx( Register d, Register s1, Register s2) { emit_int32(LDX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::ldu( Register d, int si16, Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LDU_OPCODE | rt(d) | ds(si16) | rta0mem(s1));} + +// PPC 1, section 3.3.3 Fixed-Point Store Instructions +inline void Assembler::stwx( Register d, Register s1, Register s2) { emit_int32(STWX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::stw( Register d, int si16, Register s1) { emit_int32(STW_OPCODE | rs(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::stwu( Register d, int si16, Register s1) { emit_int32(STWU_OPCODE | rs(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::sthx( Register d, Register s1, Register s2) { emit_int32(STHX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::sth( Register d, int si16, Register s1) { emit_int32(STH_OPCODE | rs(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::sthu( Register d, int si16, Register s1) { emit_int32(STHU_OPCODE | rs(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::stbx( Register d, Register s1, Register s2) { emit_int32(STBX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::stb( Register d, int si16, Register s1) { emit_int32(STB_OPCODE | rs(d) | d1(si16) | ra0mem(s1));} +inline void Assembler::stbu( Register d, int si16, Register s1) { emit_int32(STBU_OPCODE | rs(d) | d1(si16) | rta0mem(s1));} + +inline void Assembler::std( Register d, int si16, Register s1) { emit_int32(STD_OPCODE | rs(d) | ds(si16) | ra0mem(s1));} +inline void Assembler::stdx( Register d, Register s1, Register s2) { emit_int32(STDX_OPCODE | rs(d) | ra0mem(s1) | rb(s2));} +inline void Assembler::stdu( Register d, int si16, Register s1) { emit_int32(STDU_OPCODE | rs(d) | ds(si16) | rta0mem(s1));} +inline void Assembler::stdux(Register s, Register a, Register b) { emit_int32(STDUX_OPCODE| rs(s) | rta0mem(a) | rb(b));} + +// PPC 1, section 3.3.13 Move To/From System Register Instructions +inline void Assembler::mtlr( Register s1) { emit_int32(MTLR_OPCODE | rs(s1)); } +inline void Assembler::mflr( Register d ) { emit_int32(MFLR_OPCODE | rt(d)); } +inline void Assembler::mtctr(Register s1) { emit_int32(MTCTR_OPCODE | rs(s1)); } +inline void Assembler::mfctr(Register d ) { emit_int32(MFCTR_OPCODE | rt(d)); } +inline void Assembler::mtcrf(int afxm, Register s){ emit_int32(MTCRF_OPCODE | fxm(afxm) | rs(s)); } +inline void Assembler::mfcr( Register d ) { emit_int32(MFCR_OPCODE | rt(d)); } +inline void Assembler::mcrf( ConditionRegister crd, ConditionRegister cra) + { emit_int32(MCRF_OPCODE | bf(crd) | bfa(cra)); } +inline void Assembler::mtcr( Register s) { Assembler::mtcrf(0xff, s); } + +// SAP JVM 2006-02-13 PPC branch instruction. +// PPC 1, section 2.4.1 Branch Instructions +inline void Assembler::b( address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(0), rt); } +inline void Assembler::b( Label& L) { b( target(L)); } +inline void Assembler::bl(address a, relocInfo::relocType rt) { emit_data(BXX_OPCODE| li(disp( intptr_t(a), intptr_t(pc()))) |aa(0)|lk(1), rt); } +inline void Assembler::bl(Label& L) { bl(target(L)); } +inline void Assembler::bc( int boint, int biint, address a, relocInfo::relocType rt) { emit_data(BCXX_OPCODE| bo(boint) | bi(biint) | bd(disp( intptr_t(a), intptr_t(pc()))) | aa(0) | lk(0), rt); } +inline void Assembler::bc( int boint, int biint, Label& L) { bc(boint, biint, target(L)); } +inline void Assembler::bcl(int boint, int biint, address a, relocInfo::relocType rt) { emit_data(BCXX_OPCODE| bo(boint) | bi(biint) | bd(disp( intptr_t(a), intptr_t(pc()))) | aa(0)|lk(1)); } +inline void Assembler::bcl(int boint, int biint, Label& L) { bcl(boint, biint, target(L)); } + +inline void Assembler::bclr( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCLR_OPCODE | bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(0), rt); } +inline void Assembler::bclrl( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCLR_OPCODE | bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(1), rt); } +inline void Assembler::bcctr( int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(0), rt); } +inline void Assembler::bcctrl(int boint, int biint, int bhint, relocInfo::relocType rt) { emit_data(BCCTR_OPCODE| bo(boint) | bi(biint) | bh(bhint) | aa(0) | lk(1), rt); } + +// helper function for b +inline bool Assembler::is_within_range_of_b(address a, address pc) { + // Guard against illegal branch targets, e.g. -1 (see CompiledStaticCall and ad-file). + if ((((uint64_t)a) & 0x3) != 0) return false; + + const int range = 1 << (29-6); // li field is from bit 6 to bit 29. + int value = disp(intptr_t(a), intptr_t(pc)); + bool result = -range <= value && value < range-1; +#ifdef ASSERT + if (result) li(value); // Assert that value is in correct range. +#endif + return result; +} + +// helper functions for bcxx. +inline bool Assembler::is_within_range_of_bcxx(address a, address pc) { + // Guard against illegal branch targets, e.g. -1 (see CompiledStaticCall and ad-file). + if ((((uint64_t)a) & 0x3) != 0) return false; + + const int range = 1 << (29-16); // bd field is from bit 16 to bit 29. + int value = disp(intptr_t(a), intptr_t(pc)); + bool result = -range <= value && value < range-1; +#ifdef ASSERT + if (result) bd(value); // Assert that value is in correct range. +#endif + return result; +} + +// Get the destination of a bxx branch (b, bl, ba, bla). +address Assembler::bxx_destination(address baddr) { return bxx_destination(*(int*)baddr, baddr); } +address Assembler::bxx_destination(int instr, address pc) { return (address)bxx_destination_offset(instr, (intptr_t)pc); } +intptr_t Assembler::bxx_destination_offset(int instr, intptr_t bxx_pos) { + intptr_t displ = inv_li_field(instr); + return bxx_pos + displ; +} + +// Extended mnemonics for Branch Instructions +inline void Assembler::blt(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, less), L); } +inline void Assembler::bgt(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, greater), L); } +inline void Assembler::beq(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, equal), L); } +inline void Assembler::bso(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs1, bi0(crx, summary_overflow), L); } +inline void Assembler::bge(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, less), L); } +inline void Assembler::ble(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, greater), L); } +inline void Assembler::bne(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, equal), L); } +inline void Assembler::bns(ConditionRegister crx, Label& L) { Assembler::bc(bcondCRbiIs0, bi0(crx, summary_overflow), L); } + +// Branch instructions with static prediction hints. +inline void Assembler::blt_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, less), L); } +inline void Assembler::bgt_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, greater), L); } +inline void Assembler::beq_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, equal), L); } +inline void Assembler::bso_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsTaken, bi0(crx, summary_overflow), L); } +inline void Assembler::bge_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, less), L); } +inline void Assembler::ble_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, greater), L); } +inline void Assembler::bne_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, equal), L); } +inline void Assembler::bns_predict_taken (ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsTaken, bi0(crx, summary_overflow), L); } +inline void Assembler::blt_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, less), L); } +inline void Assembler::bgt_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, greater), L); } +inline void Assembler::beq_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, equal), L); } +inline void Assembler::bso_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs1_bhintIsNotTaken, bi0(crx, summary_overflow), L); } +inline void Assembler::bge_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, less), L); } +inline void Assembler::ble_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, greater), L); } +inline void Assembler::bne_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, equal), L); } +inline void Assembler::bns_predict_not_taken(ConditionRegister crx, Label& L) { bc(bcondCRbiIs0_bhintIsNotTaken, bi0(crx, summary_overflow), L); } + +// For use in conjunction with testbitdi: +inline void Assembler::btrue( ConditionRegister crx, Label& L) { Assembler::bne(crx, L); } +inline void Assembler::bfalse(ConditionRegister crx, Label& L) { Assembler::beq(crx, L); } + +inline void Assembler::bltl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, less), L); } +inline void Assembler::bgtl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, greater), L); } +inline void Assembler::beql(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, equal), L); } +inline void Assembler::bsol(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs1, bi0(crx, summary_overflow), L); } +inline void Assembler::bgel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, less), L); } +inline void Assembler::blel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, greater), L); } +inline void Assembler::bnel(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, equal), L); } +inline void Assembler::bnsl(ConditionRegister crx, Label& L) { Assembler::bcl(bcondCRbiIs0, bi0(crx, summary_overflow), L); } + +// Extended mnemonics for Branch Instructions via LR. +// We use `blr' for returns. +inline void Assembler::blr(relocInfo::relocType rt) { Assembler::bclr(bcondAlways, 0, bhintbhBCLRisReturn, rt); } + +// Extended mnemonics for Branch Instructions with CTR. +// Bdnz means `decrement CTR and jump to L if CTR is not zero'. +inline void Assembler::bdnz(Label& L) { Assembler::bc(16, 0, L); } +// Decrement and branch if result is zero. +inline void Assembler::bdz(Label& L) { Assembler::bc(18, 0, L); } +// We use `bctr[l]' for jumps/calls in function descriptor glue +// code, e.g. for calls to runtime functions. +inline void Assembler::bctr( relocInfo::relocType rt) { Assembler::bcctr(bcondAlways, 0, bhintbhBCCTRisNotReturnButSame, rt); } +inline void Assembler::bctrl(relocInfo::relocType rt) { Assembler::bcctrl(bcondAlways, 0, bhintbhBCCTRisNotReturnButSame, rt); } +// Conditional jumps/branches via CTR. +inline void Assembler::beqctr( ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctr( bcondCRbiIs1, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); } +inline void Assembler::beqctrl(ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctrl(bcondCRbiIs1, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); } +inline void Assembler::bnectr( ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctr( bcondCRbiIs0, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); } +inline void Assembler::bnectrl(ConditionRegister crx, relocInfo::relocType rt) { Assembler::bcctrl(bcondCRbiIs0, bi0(crx, equal), bhintbhBCCTRisNotReturnButSame, rt); } + +// condition register logic instructions +inline void Assembler::crand( int d, int s1, int s2) { emit_int32(CRAND_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crnand(int d, int s1, int s2) { emit_int32(CRNAND_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::cror( int d, int s1, int s2) { emit_int32(CROR_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crxor( int d, int s1, int s2) { emit_int32(CRXOR_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crnor( int d, int s1, int s2) { emit_int32(CRNOR_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::creqv( int d, int s1, int s2) { emit_int32(CREQV_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crandc(int d, int s1, int s2) { emit_int32(CRANDC_OPCODE | bt(d) | ba(s1) | bb(s2)); } +inline void Assembler::crorc( int d, int s1, int s2) { emit_int32(CRORC_OPCODE | bt(d) | ba(s1) | bb(s2)); } + +// PPC 2, section 3.2.1 Instruction Cache Instructions +inline void Assembler::icbi( Register s1, Register s2) { emit_int32( ICBI_OPCODE | ra0mem(s1) | rb(s2) ); } +// PPC 2, section 3.2.2 Data Cache Instructions +//inline void Assembler::dcba( Register s1, Register s2) { emit_int32( DCBA_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbz( Register s1, Register s2) { emit_int32( DCBZ_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbst( Register s1, Register s2) { emit_int32( DCBST_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbf( Register s1, Register s2) { emit_int32( DCBF_OPCODE | ra0mem(s1) | rb(s2) ); } +// dcache read hint +inline void Assembler::dcbt( Register s1, Register s2) { emit_int32( DCBT_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbtct( Register s1, Register s2, int ct) { emit_int32( DCBT_OPCODE | ra0mem(s1) | rb(s2) | thct(ct)); } +inline void Assembler::dcbtds( Register s1, Register s2, int ds) { emit_int32( DCBT_OPCODE | ra0mem(s1) | rb(s2) | thds(ds)); } +// dcache write hint +inline void Assembler::dcbtst( Register s1, Register s2) { emit_int32( DCBTST_OPCODE | ra0mem(s1) | rb(s2) ); } +inline void Assembler::dcbtstct(Register s1, Register s2, int ct) { emit_int32( DCBTST_OPCODE | ra0mem(s1) | rb(s2) | thct(ct)); } + +// machine barrier instructions: +inline void Assembler::sync(int a) { emit_int32( SYNC_OPCODE | l910(a)); } +inline void Assembler::sync() { Assembler::sync(0); } +inline void Assembler::lwsync() { Assembler::sync(1); } +inline void Assembler::ptesync() { Assembler::sync(2); } +inline void Assembler::eieio() { emit_int32( EIEIO_OPCODE); } +inline void Assembler::isync() { emit_int32( ISYNC_OPCODE); } + +inline void Assembler::release() { Assembler::lwsync(); } +inline void Assembler::acquire() { Assembler::lwsync(); } +inline void Assembler::fence() { Assembler::sync(); } + +// atomics +// Use ra0mem to disallow R0 as base. +inline void Assembler::lwarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } +inline void Assembler::ldarx_unchecked(Register d, Register a, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | ra0mem(a) | rb(b) | eh(eh1)); } +inline bool Assembler::lxarx_hint_exclusive_access() { return VM_Version::has_lxarxeh(); } +inline void Assembler::lwarx( Register d, Register a, Register b, bool hint_exclusive_access) { lwarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::ldarx( Register d, Register a, Register b, bool hint_exclusive_access) { ldarx_unchecked(d, a, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::stwcx_(Register s, Register a, Register b) { emit_int32( STWCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } +inline void Assembler::stdcx_(Register s, Register a, Register b) { emit_int32( STDCX_OPCODE | rs(s) | ra0mem(a) | rb(b) | rc(1)); } + +// Instructions for adjusting thread priority +// for simultaneous multithreading (SMT) on POWER5. +inline void Assembler::smt_prio_very_low() { Assembler::or_unchecked(R31, R31, R31); } +inline void Assembler::smt_prio_low() { Assembler::or_unchecked(R1, R1, R1); } +inline void Assembler::smt_prio_medium_low() { Assembler::or_unchecked(R6, R6, R6); } +inline void Assembler::smt_prio_medium() { Assembler::or_unchecked(R2, R2, R2); } +inline void Assembler::smt_prio_medium_high() { Assembler::or_unchecked(R5, R5, R5); } +inline void Assembler::smt_prio_high() { Assembler::or_unchecked(R3, R3, R3); } + +inline void Assembler::twi_0(Register a) { twi_unchecked(0, a, 0);} + +// trap instructions +inline void Assembler::tdi_unchecked(int tobits, Register a, int si16){ emit_int32( TDI_OPCODE | to(tobits) | ra(a) | si(si16)); } +inline void Assembler::twi_unchecked(int tobits, Register a, int si16){ emit_int32( TWI_OPCODE | to(tobits) | ra(a) | si(si16)); } +inline void Assembler::tdi(int tobits, Register a, int si16) { assert(UseSIGTRAP, "precondition"); tdi_unchecked(tobits, a, si16); } +inline void Assembler::twi(int tobits, Register a, int si16) { assert(UseSIGTRAP, "precondition"); twi_unchecked(tobits, a, si16); } +inline void Assembler::td( int tobits, Register a, Register b) { assert(UseSIGTRAP, "precondition"); emit_int32( TD_OPCODE | to(tobits) | ra(a) | rb(b)); } +inline void Assembler::tw( int tobits, Register a, Register b) { assert(UseSIGTRAP, "precondition"); emit_int32( TW_OPCODE | to(tobits) | ra(a) | rb(b)); } + +// FLOATING POINT instructions ppc. +// PPC 1, section 4.6.2 Floating-Point Load Instructions +// Use ra0mem instead of ra in some instructions below. +inline void Assembler::lfs( FloatRegister d, int si16, Register a) { emit_int32( LFS_OPCODE | frt(d) | ra0mem(a) | simm(si16,16)); } +inline void Assembler::lfsu(FloatRegister d, int si16, Register a) { emit_int32( LFSU_OPCODE | frt(d) | ra(a) | simm(si16,16)); } +inline void Assembler::lfsx(FloatRegister d, Register a, Register b) { emit_int32( LFSX_OPCODE | frt(d) | ra0mem(a) | rb(b)); } +inline void Assembler::lfd( FloatRegister d, int si16, Register a) { emit_int32( LFD_OPCODE | frt(d) | ra0mem(a) | simm(si16,16)); } +inline void Assembler::lfdu(FloatRegister d, int si16, Register a) { emit_int32( LFDU_OPCODE | frt(d) | ra(a) | simm(si16,16)); } +inline void Assembler::lfdx(FloatRegister d, Register a, Register b) { emit_int32( LFDX_OPCODE | frt(d) | ra0mem(a) | rb(b)); } + +// PPC 1, section 4.6.3 Floating-Point Store Instructions +// Use ra0mem instead of ra in some instructions below. +inline void Assembler::stfs( FloatRegister s, int si16, Register a) { emit_int32( STFS_OPCODE | frs(s) | ra0mem(a) | simm(si16,16)); } +inline void Assembler::stfsu(FloatRegister s, int si16, Register a) { emit_int32( STFSU_OPCODE | frs(s) | ra(a) | simm(si16,16)); } +inline void Assembler::stfsx(FloatRegister s, Register a, Register b){ emit_int32( STFSX_OPCODE | frs(s) | ra0mem(a) | rb(b)); } +inline void Assembler::stfd( FloatRegister s, int si16, Register a) { emit_int32( STFD_OPCODE | frs(s) | ra0mem(a) | simm(si16,16)); } +inline void Assembler::stfdu(FloatRegister s, int si16, Register a) { emit_int32( STFDU_OPCODE | frs(s) | ra(a) | simm(si16,16)); } +inline void Assembler::stfdx(FloatRegister s, Register a, Register b){ emit_int32( STFDX_OPCODE | frs(s) | ra0mem(a) | rb(b)); } + +// PPC 1, section 4.6.4 Floating-Point Move Instructions +inline void Assembler::fmr( FloatRegister d, FloatRegister b) { emit_int32( FMR_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fmr_(FloatRegister d, FloatRegister b) { emit_int32( FMR_OPCODE | frt(d) | frb(b) | rc(1)); } + +// These are special Power6 opcodes, reused for "lfdepx" and "stfdepx" +// on Power7. Do not use. +//inline void Assembler::mffgpr( FloatRegister d, Register b) { emit_int32( MFFGPR_OPCODE | frt(d) | rb(b) | rc(0)); } +//inline void Assembler::mftgpr( Register d, FloatRegister b) { emit_int32( MFTGPR_OPCODE | rt(d) | frb(b) | rc(0)); } +// add cmpb and popcntb to detect ppc power version. +inline void Assembler::cmpb( Register a, Register s, Register b) { emit_int32( CMPB_OPCODE | rta(a) | rs(s) | rb(b) | rc(0)); } +inline void Assembler::popcntb(Register a, Register s) { emit_int32( POPCNTB_OPCODE | rta(a) | rs(s)); }; +inline void Assembler::popcntw(Register a, Register s) { emit_int32( POPCNTW_OPCODE | rta(a) | rs(s)); }; +inline void Assembler::popcntd(Register a, Register s) { emit_int32( POPCNTD_OPCODE | rta(a) | rs(s)); }; + +inline void Assembler::fneg( FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fneg_( FloatRegister d, FloatRegister b) { emit_int32( FNEG_OPCODE | frt(d) | frb(b) | rc(1)); } +inline void Assembler::fabs( FloatRegister d, FloatRegister b) { emit_int32( FABS_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fabs_( FloatRegister d, FloatRegister b) { emit_int32( FABS_OPCODE | frt(d) | frb(b) | rc(1)); } +inline void Assembler::fnabs( FloatRegister d, FloatRegister b) { emit_int32( FNABS_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fnabs_(FloatRegister d, FloatRegister b) { emit_int32( FNABS_OPCODE | frt(d) | frb(b) | rc(1)); } + +// PPC 1, section 4.6.5.1 Floating-Point Elementary Arithmetic Instructions +inline void Assembler::fadd( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADD_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fadd_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADD_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fadds( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADDS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fadds_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FADDS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fsub( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUB_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fsub_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUB_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fsubs( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUBS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fsubs_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FSUBS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fmul( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMUL_OPCODE | frt(d) | fra(a) | frc(c) | rc(0)); } +inline void Assembler::fmul_( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMUL_OPCODE | frt(d) | fra(a) | frc(c) | rc(1)); } +inline void Assembler::fmuls( FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMULS_OPCODE | frt(d) | fra(a) | frc(c) | rc(0)); } +inline void Assembler::fmuls_(FloatRegister d, FloatRegister a, FloatRegister c) { emit_int32( FMULS_OPCODE | frt(d) | fra(a) | frc(c) | rc(1)); } +inline void Assembler::fdiv( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIV_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fdiv_( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIV_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } +inline void Assembler::fdivs( FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(0)); } +inline void Assembler::fdivs_(FloatRegister d, FloatRegister a, FloatRegister b) { emit_int32( FDIVS_OPCODE | frt(d) | fra(a) | frb(b) | rc(1)); } + +// PPC 1, section 4.6.6 Floating-Point Rounding and Conversion Instructions +inline void Assembler::frsp( FloatRegister d, FloatRegister b) { emit_int32( FRSP_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fctid( FloatRegister d, FloatRegister b) { emit_int32( FCTID_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fctidz(FloatRegister d, FloatRegister b) { emit_int32( FCTIDZ_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fctiw( FloatRegister d, FloatRegister b) { emit_int32( FCTIW_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fctiwz(FloatRegister d, FloatRegister b) { emit_int32( FCTIWZ_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fcfid( FloatRegister d, FloatRegister b) { emit_int32( FCFID_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fcfids(FloatRegister d, FloatRegister b) { emit_int32( FCFIDS_OPCODE | frt(d) | frb(b) | rc(0)); } + +// PPC 1, section 4.6.7 Floating-Point Compare Instructions +inline void Assembler::fcmpu( ConditionRegister crx, FloatRegister a, FloatRegister b) { emit_int32( FCMPU_OPCODE | bf(crx) | fra(a) | frb(b)); } + +// PPC 1, section 5.2.1 Floating-Point Arithmetic Instructions +inline void Assembler::fsqrt( FloatRegister d, FloatRegister b) { emit_int32( FSQRT_OPCODE | frt(d) | frb(b) | rc(0)); } +inline void Assembler::fsqrts(FloatRegister d, FloatRegister b) { emit_int32( FSQRTS_OPCODE | frt(d) | frb(b) | rc(0)); } + +// Vector instructions for >= Power6. +inline void Assembler::lvebx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEBX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvehx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEHX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvewx( VectorRegister d, Register s1, Register s2) { emit_int32( LVEWX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvx( VectorRegister d, Register s1, Register s2) { emit_int32( LVX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvxl( VectorRegister d, Register s1, Register s2) { emit_int32( LVXL_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvebx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEBX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvehx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEHX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvewx(VectorRegister d, Register s1, Register s2) { emit_int32( STVEWX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvx( VectorRegister d, Register s1, Register s2) { emit_int32( STVX_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::stvxl( VectorRegister d, Register s1, Register s2) { emit_int32( STVXL_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvsl( VectorRegister d, Register s1, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } +inline void Assembler::lvsr( VectorRegister d, Register s1, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | ra0mem(s1) | rb(s2)); } + +inline void Assembler::vpkpx( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKPX_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkshss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHSS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkswss( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWSS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkshus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSHUS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkswus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKSWUS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkuhum( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUHUM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkuwum( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUWUM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkuhus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUHUS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vpkuwus( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VPKUWUS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vupkhpx( VectorRegister d, VectorRegister b) { emit_int32( VUPKHPX_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupkhsb( VectorRegister d, VectorRegister b) { emit_int32( VUPKHSB_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupkhsh( VectorRegister d, VectorRegister b) { emit_int32( VUPKHSH_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupklpx( VectorRegister d, VectorRegister b) { emit_int32( VUPKLPX_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupklsb( VectorRegister d, VectorRegister b) { emit_int32( VUPKLSB_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vupklsh( VectorRegister d, VectorRegister b) { emit_int32( VUPKLSH_OPCODE | vrt(d) | vrb(b)); } +inline void Assembler::vmrghb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrghw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrghh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGHH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrglb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrglw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmrglh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMRGLH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsplt( VectorRegister d, int ui4, VectorRegister b) { emit_int32( VSPLT_OPCODE | vrt(d) | vsplt_uim(uimm(ui4,4)) | vrb(b)); } +inline void Assembler::vsplth( VectorRegister d, int ui3, VectorRegister b) { emit_int32( VSPLTH_OPCODE | vrt(d) | vsplt_uim(uimm(ui3,3)) | vrb(b)); } +inline void Assembler::vspltw( VectorRegister d, int ui2, VectorRegister b) { emit_int32( VSPLTW_OPCODE | vrt(d) | vsplt_uim(uimm(ui2,2)) | vrb(b)); } +inline void Assembler::vspltisb(VectorRegister d, int si5) { emit_int32( VSPLTISB_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); } +inline void Assembler::vspltish(VectorRegister d, int si5) { emit_int32( VSPLTISH_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); } +inline void Assembler::vspltisw(VectorRegister d, int si5) { emit_int32( VSPLTISW_OPCODE| vrt(d) | vsplti_sim(simm(si5,5))); } +inline void Assembler::vperm( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c){ emit_int32( VPERM_OPCODE | vrt(d) | vra(a) | vrb(b) | vrc(c)); } +inline void Assembler::vsel( VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c){ emit_int32( VSEL_OPCODE | vrt(d) | vra(a) | vrb(b) | vrc(c)); } +inline void Assembler::vsl( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSL_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsldoi( VectorRegister d, VectorRegister a, VectorRegister b, int si4) { emit_int32( VSLDOI_OPCODE| vrt(d) | vra(a) | vrb(b) | vsldoi_shb(simm(si4,4))); } +inline void Assembler::vslo( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLO_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsr( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSR_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsro( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRO_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddcuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDCUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddshs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddsbs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddubm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUBM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vadduwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vadduhm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUHM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vaddubs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vadduws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vadduhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VADDUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubcuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBCUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubshs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubsbs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsububm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubuwm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubuhm( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHM_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsububs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubuws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsubuhs( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUBUHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulesb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmuleub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULEUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulesh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULESH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmuleuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULEUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulosb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmuloub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulosh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmulouh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMULOUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmhaddshs(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMHADDSHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmhraddshs(VectorRegister d,VectorRegister a,VectorRegister b, VectorRegister c) { emit_int32( VMHRADDSHS_OPCODE| vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmladduhm(VectorRegister d,VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMLADDUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsubuhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUBUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsummbm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMMBM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsumshm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMSHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsumshs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMSHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsumuhm(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHM_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vmsumuhs(VectorRegister d, VectorRegister a, VectorRegister b, VectorRegister c) { emit_int32( VMSUMUHS_OPCODE | vrt(d) | vra(a) | vrb(b)| vrc(c)); } +inline void Assembler::vsumsws( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUMSWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsum2sws(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM2SWS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsum4sbs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4SBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsum4ubs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4UBS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsum4shs(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSUM4SHS_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavgsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavgsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavgsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavgub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavguw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vavguh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAVGUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vmaxuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMAXUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminsb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminsw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminsh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINSH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminub( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminuw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vminuh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VMINUH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vcmpequb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpequh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpequw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtsh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtsb(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtsw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtub(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtuh(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpgtuw(VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(0)); } +inline void Assembler::vcmpequb_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpequh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpequw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPEQUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtsh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtsb_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtsw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTSW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtub_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUB_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtuh_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUH_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vcmpgtuw_(VectorRegister d,VectorRegister a, VectorRegister b) { emit_int32( VCMPGTUW_OPCODE | vrt(d) | vra(a) | vrb(b) | vcmp_rc(1)); } +inline void Assembler::vand( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VAND_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vandc( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VANDC_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vnor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VNOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vxor( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VXOR_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vrlb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vrlw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vrlh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VRLH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vslb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vskw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSKW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vslh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSLH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrb( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrh( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrab( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAB_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsraw( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAW_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::vsrah( VectorRegister d, VectorRegister a, VectorRegister b) { emit_int32( VSRAH_OPCODE | vrt(d) | vra(a) | vrb(b)); } +inline void Assembler::mtvscr( VectorRegister b) { emit_int32( MTVSCR_OPCODE | vrb(b)); } +inline void Assembler::mfvscr( VectorRegister d) { emit_int32( MFVSCR_OPCODE | vrt(d)); } + +// ra0 version +inline void Assembler::lwzx( Register d, Register s2) { emit_int32( LWZX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lwz( Register d, int si16 ) { emit_int32( LWZ_OPCODE | rt(d) | d1(si16));} +inline void Assembler::lwax( Register d, Register s2) { emit_int32( LWAX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lwa( Register d, int si16 ) { emit_int32( LWA_OPCODE | rt(d) | ds(si16));} +inline void Assembler::lhzx( Register d, Register s2) { emit_int32( LHZX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lhz( Register d, int si16 ) { emit_int32( LHZ_OPCODE | rt(d) | d1(si16));} +inline void Assembler::lhax( Register d, Register s2) { emit_int32( LHAX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lha( Register d, int si16 ) { emit_int32( LHA_OPCODE | rt(d) | d1(si16));} +inline void Assembler::lbzx( Register d, Register s2) { emit_int32( LBZX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::lbz( Register d, int si16 ) { emit_int32( LBZ_OPCODE | rt(d) | d1(si16));} +inline void Assembler::ld( Register d, int si16 ) { emit_int32( LD_OPCODE | rt(d) | ds(si16));} +inline void Assembler::ldx( Register d, Register s2) { emit_int32( LDX_OPCODE | rt(d) | rb(s2));} +inline void Assembler::stwx( Register d, Register s2) { emit_int32( STWX_OPCODE | rs(d) | rb(s2));} +inline void Assembler::stw( Register d, int si16 ) { emit_int32( STW_OPCODE | rs(d) | d1(si16));} +inline void Assembler::sthx( Register d, Register s2) { emit_int32( STHX_OPCODE | rs(d) | rb(s2));} +inline void Assembler::sth( Register d, int si16 ) { emit_int32( STH_OPCODE | rs(d) | d1(si16));} +inline void Assembler::stbx( Register d, Register s2) { emit_int32( STBX_OPCODE | rs(d) | rb(s2));} +inline void Assembler::stb( Register d, int si16 ) { emit_int32( STB_OPCODE | rs(d) | d1(si16));} +inline void Assembler::std( Register d, int si16 ) { emit_int32( STD_OPCODE | rs(d) | ds(si16));} +inline void Assembler::stdx( Register d, Register s2) { emit_int32( STDX_OPCODE | rs(d) | rb(s2));} + +// ra0 version +inline void Assembler::icbi( Register s2) { emit_int32( ICBI_OPCODE | rb(s2) ); } +//inline void Assembler::dcba( Register s2) { emit_int32( DCBA_OPCODE | rb(s2) ); } +inline void Assembler::dcbz( Register s2) { emit_int32( DCBZ_OPCODE | rb(s2) ); } +inline void Assembler::dcbst( Register s2) { emit_int32( DCBST_OPCODE | rb(s2) ); } +inline void Assembler::dcbf( Register s2) { emit_int32( DCBF_OPCODE | rb(s2) ); } +inline void Assembler::dcbt( Register s2) { emit_int32( DCBT_OPCODE | rb(s2) ); } +inline void Assembler::dcbtct( Register s2, int ct) { emit_int32( DCBT_OPCODE | rb(s2) | thct(ct)); } +inline void Assembler::dcbtds( Register s2, int ds) { emit_int32( DCBT_OPCODE | rb(s2) | thds(ds)); } +inline void Assembler::dcbtst( Register s2) { emit_int32( DCBTST_OPCODE | rb(s2) ); } +inline void Assembler::dcbtstct(Register s2, int ct) { emit_int32( DCBTST_OPCODE | rb(s2) | thct(ct)); } + +// ra0 version +inline void Assembler::lwarx_unchecked(Register d, Register b, int eh1) { emit_int32( LWARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } +inline void Assembler::ldarx_unchecked(Register d, Register b, int eh1) { emit_int32( LDARX_OPCODE | rt(d) | rb(b) | eh(eh1)); } +inline void Assembler::lwarx( Register d, Register b, bool hint_exclusive_access){ lwarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::ldarx( Register d, Register b, bool hint_exclusive_access){ ldarx_unchecked(d, b, (hint_exclusive_access && lxarx_hint_exclusive_access() && UseExtendedLoadAndReserveInstructionsPPC64) ? 1 : 0); } +inline void Assembler::stwcx_(Register s, Register b) { emit_int32( STWCX_OPCODE | rs(s) | rb(b) | rc(1)); } +inline void Assembler::stdcx_(Register s, Register b) { emit_int32( STDCX_OPCODE | rs(s) | rb(b) | rc(1)); } + +// ra0 version +inline void Assembler::lfs( FloatRegister d, int si16) { emit_int32( LFS_OPCODE | frt(d) | simm(si16,16)); } +inline void Assembler::lfsx(FloatRegister d, Register b) { emit_int32( LFSX_OPCODE | frt(d) | rb(b)); } +inline void Assembler::lfd( FloatRegister d, int si16) { emit_int32( LFD_OPCODE | frt(d) | simm(si16,16)); } +inline void Assembler::lfdx(FloatRegister d, Register b) { emit_int32( LFDX_OPCODE | frt(d) | rb(b)); } + +// ra0 version +inline void Assembler::stfs( FloatRegister s, int si16) { emit_int32( STFS_OPCODE | frs(s) | simm(si16, 16)); } +inline void Assembler::stfsx(FloatRegister s, Register b) { emit_int32( STFSX_OPCODE | frs(s) | rb(b)); } +inline void Assembler::stfd( FloatRegister s, int si16) { emit_int32( STFD_OPCODE | frs(s) | simm(si16, 16)); } +inline void Assembler::stfdx(FloatRegister s, Register b) { emit_int32( STFDX_OPCODE | frs(s) | rb(b)); } + +// ra0 version +inline void Assembler::lvebx( VectorRegister d, Register s2) { emit_int32( LVEBX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvehx( VectorRegister d, Register s2) { emit_int32( LVEHX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvewx( VectorRegister d, Register s2) { emit_int32( LVEWX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvx( VectorRegister d, Register s2) { emit_int32( LVX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvxl( VectorRegister d, Register s2) { emit_int32( LVXL_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvebx(VectorRegister d, Register s2) { emit_int32( STVEBX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvehx(VectorRegister d, Register s2) { emit_int32( STVEHX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvewx(VectorRegister d, Register s2) { emit_int32( STVEWX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvx( VectorRegister d, Register s2) { emit_int32( STVX_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::stvxl( VectorRegister d, Register s2) { emit_int32( STVXL_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvsl( VectorRegister d, Register s2) { emit_int32( LVSL_OPCODE | vrt(d) | rb(s2)); } +inline void Assembler::lvsr( VectorRegister d, Register s2) { emit_int32( LVSR_OPCODE | vrt(d) | rb(s2)); } + + +inline void Assembler::load_const(Register d, void* x, Register tmp) { + load_const(d, (long)x, tmp); +} + +// Load a 64 bit constant encoded by a `Label'. This works for bound +// labels as well as unbound ones. For unbound labels, the code will +// be patched as soon as the label gets bound. +inline void Assembler::load_const(Register d, Label& L, Register tmp) { + load_const(d, target(L), tmp); +} + +// Load a 64 bit constant encoded by an AddressLiteral. patchable. +inline void Assembler::load_const(Register d, AddressLiteral& a, Register tmp) { + assert(d != R0, "R0 not allowed"); + // First relocate (we don't change the offset in the RelocationHolder, + // just pass a.rspec()), then delegate to load_const(Register, long). + relocate(a.rspec()); + load_const(d, (long)a.value(), tmp); +} + + +#endif // CPU_PPC_VM_ASSEMBLER_PPC_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/bytecodeInterpreter_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_BYTECODEINTERPRETER_PPC_HPP +#define CPU_PPC_VM_BYTECODEINTERPRETER_PPC_HPP + +// Platform specific for C++ based Interpreter +#define LOTS_OF_REGS /* Lets interpreter use plenty of registers */ + +private: + + // Save the bottom of the stack after frame manager setup. For ease of restoration after return + // from recursive interpreter call. + intptr_t* _frame_bottom; // Saved bottom of frame manager frame. + address _last_Java_pc; // Pc to return to in frame manager. + intptr_t* _last_Java_fp; // frame pointer + intptr_t* _last_Java_sp; // stack pointer + interpreterState _self_link; // Previous interpreter state // sometimes points to self??? + double _native_fresult; // Save result of native calls that might return floats. + intptr_t _native_lresult; // Save result of native calls that might return handle/longs. + +public: + address last_Java_pc(void) { return _last_Java_pc; } + intptr_t* last_Java_fp(void) { return _last_Java_fp; } + + static ByteSize native_lresult_offset() { + return byte_offset_of(BytecodeInterpreter, _native_lresult); + } + + static ByteSize native_fresult_offset() { + return byte_offset_of(BytecodeInterpreter, _native_fresult); + } + + static void pd_layout_interpreterState(interpreterState istate, address last_Java_pc, intptr_t* last_Java_fp); + +#define SET_LAST_JAVA_FRAME() THREAD->frame_anchor()->set(istate->_last_Java_sp, istate->_last_Java_pc); +#define RESET_LAST_JAVA_FRAME() THREAD->frame_anchor()->clear(); + + +// Macros for accessing the stack. +#undef STACK_INT +#undef STACK_FLOAT +#undef STACK_ADDR +#undef STACK_OBJECT +#undef STACK_DOUBLE +#undef STACK_LONG + +// JavaStack Implementation +#define STACK_SLOT(offset) ((address) &topOfStack[-(offset)]) +#define STACK_INT(offset) (*((jint*) &topOfStack[-(offset)])) +#define STACK_FLOAT(offset) (*((jfloat *) &topOfStack[-(offset)])) +#define STACK_OBJECT(offset) (*((oop *) &topOfStack [-(offset)])) +#define STACK_DOUBLE(offset) (((VMJavaVal64*) &topOfStack[-(offset)])->d) +#define STACK_LONG(offset) (((VMJavaVal64 *) &topOfStack[-(offset)])->l) + +#define SET_STACK_SLOT(value, offset) (*(intptr_t*)&topOfStack[-(offset)] = *(intptr_t*)(value)) +#define SET_STACK_ADDR(value, offset) (*((address *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_INT(value, offset) (*((jint *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_FLOAT(value, offset) (*((jfloat *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_OBJECT(value, offset) (*((oop *)&topOfStack[-(offset)]) = (value)) +#define SET_STACK_DOUBLE(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = (value)) +#define SET_STACK_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->d = \ + ((VMJavaVal64*)(addr))->d) +#define SET_STACK_LONG(value, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = (value)) +#define SET_STACK_LONG_FROM_ADDR(addr, offset) (((VMJavaVal64*)&topOfStack[-(offset)])->l = \ + ((VMJavaVal64*)(addr))->l) +// JavaLocals implementation + +#define LOCALS_SLOT(offset) ((intptr_t*)&locals[-(offset)]) +#define LOCALS_ADDR(offset) ((address)locals[-(offset)]) +#define LOCALS_INT(offset) (*(jint*)&(locals[-(offset)])) +#define LOCALS_OBJECT(offset) ((oop)locals[-(offset)]) +#define LOCALS_LONG_AT(offset) (((address)&locals[-((offset) + 1)])) +#define LOCALS_DOUBLE_AT(offset) (((address)&locals[-((offset) + 1)])) + +#define SET_LOCALS_SLOT(value, offset) (*(intptr_t*)&locals[-(offset)] = *(intptr_t *)(value)) +#define SET_LOCALS_INT(value, offset) (*((jint *)&locals[-(offset)]) = (value)) +#define SET_LOCALS_DOUBLE(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = (value)) +#define SET_LOCALS_LONG(value, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->l = (value)) +#define SET_LOCALS_DOUBLE_FROM_ADDR(addr, offset) (((VMJavaVal64*)&locals[-((offset)+1)])->d = \ + + +#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_PP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/bytecodeInterpreter_ppc.inline.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,290 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP +#define CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP + +#ifdef CC_INTERP + +// Inline interpreter functions for ppc. + +#include <math.h> + +inline jfloat BytecodeInterpreter::VMfloatAdd(jfloat op1, jfloat op2) { return op1 + op2; } +inline jfloat BytecodeInterpreter::VMfloatSub(jfloat op1, jfloat op2) { return op1 - op2; } +inline jfloat BytecodeInterpreter::VMfloatMul(jfloat op1, jfloat op2) { return op1 * op2; } +inline jfloat BytecodeInterpreter::VMfloatDiv(jfloat op1, jfloat op2) { return op1 / op2; } +inline jfloat BytecodeInterpreter::VMfloatRem(jfloat op1, jfloat op2) { return (jfloat)fmod((double)op1, (double)op2); } + +inline jfloat BytecodeInterpreter::VMfloatNeg(jfloat op) { return -op; } + +inline int32_t BytecodeInterpreter::VMfloatCompare(jfloat op1, jfloat op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); + +} + +inline void BytecodeInterpreter::VMmemCopy64(uint32_t to[2], const uint32_t from[2]) { + to[0] = from[0]; to[1] = from[1]; +} + +// The long operations depend on compiler support for "long long" on ppc. + +inline jlong BytecodeInterpreter::VMlongAdd(jlong op1, jlong op2) { + return op1 + op2; +} + +inline jlong BytecodeInterpreter::VMlongAnd(jlong op1, jlong op2) { + return op1 & op2; +} + +inline jlong BytecodeInterpreter::VMlongDiv(jlong op1, jlong op2) { + if (op1 == min_jlong && op2 == -1) return op1; + return op1 / op2; +} + +inline jlong BytecodeInterpreter::VMlongMul(jlong op1, jlong op2) { + return op1 * op2; +} + +inline jlong BytecodeInterpreter::VMlongOr(jlong op1, jlong op2) { + return op1 | op2; +} + +inline jlong BytecodeInterpreter::VMlongSub(jlong op1, jlong op2) { + return op1 - op2; +} + +inline jlong BytecodeInterpreter::VMlongXor(jlong op1, jlong op2) { + return op1 ^ op2; +} + +inline jlong BytecodeInterpreter::VMlongRem(jlong op1, jlong op2) { + if (op1 == min_jlong && op2 == -1) return 0; + return op1 % op2; +} + +inline jlong BytecodeInterpreter::VMlongUshr(jlong op1, jint op2) { + return ((uint64_t) op1) >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShr(jlong op1, jint op2) { + return op1 >> (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongShl(jlong op1, jint op2) { + return op1 << (op2 & 0x3F); +} + +inline jlong BytecodeInterpreter::VMlongNeg(jlong op) { + return -op; +} + +inline jlong BytecodeInterpreter::VMlongNot(jlong op) { + return ~op; +} + +inline int32_t BytecodeInterpreter::VMlongLtz(jlong op) { + return (op <= 0); +} + +inline int32_t BytecodeInterpreter::VMlongGez(jlong op) { + return (op >= 0); +} + +inline int32_t BytecodeInterpreter::VMlongEqz(jlong op) { + return (op == 0); +} + +inline int32_t BytecodeInterpreter::VMlongEq(jlong op1, jlong op2) { + return (op1 == op2); +} + +inline int32_t BytecodeInterpreter::VMlongNe(jlong op1, jlong op2) { + return (op1 != op2); +} + +inline int32_t BytecodeInterpreter::VMlongGe(jlong op1, jlong op2) { + return (op1 >= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLe(jlong op1, jlong op2) { + return (op1 <= op2); +} + +inline int32_t BytecodeInterpreter::VMlongLt(jlong op1, jlong op2) { + return (op1 < op2); +} + +inline int32_t BytecodeInterpreter::VMlongGt(jlong op1, jlong op2) { + return (op1 > op2); +} + +inline int32_t BytecodeInterpreter::VMlongCompare(jlong op1, jlong op2) { + return (VMlongLt(op1, op2) ? -1 : VMlongGt(op1, op2) ? 1 : 0); +} + +// Long conversions + +inline jdouble BytecodeInterpreter::VMlong2Double(jlong val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMlong2Float(jlong val) { + return (jfloat) val; +} + +inline jint BytecodeInterpreter::VMlong2Int(jlong val) { + return (jint) val; +} + +// Double Arithmetic + +inline jdouble BytecodeInterpreter::VMdoubleAdd(jdouble op1, jdouble op2) { + return op1 + op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleDiv(jdouble op1, jdouble op2) { + return op1 / op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleMul(jdouble op1, jdouble op2) { + return op1 * op2; +} + +inline jdouble BytecodeInterpreter::VMdoubleNeg(jdouble op) { + return -op; +} + +inline jdouble BytecodeInterpreter::VMdoubleRem(jdouble op1, jdouble op2) { + return fmod(op1, op2); +} + +inline jdouble BytecodeInterpreter::VMdoubleSub(jdouble op1, jdouble op2) { + return op1 - op2; +} + +inline int32_t BytecodeInterpreter::VMdoubleCompare(jdouble op1, jdouble op2, int32_t direction) { + return ( op1 < op2 ? -1 : + op1 > op2 ? 1 : + op1 == op2 ? 0 : + (direction == -1 || direction == 1) ? direction : 0); +} + +// Double Conversions + +inline jfloat BytecodeInterpreter::VMdouble2Float(jdouble val) { + return (jfloat) val; +} + +// Float Conversions + +inline jdouble BytecodeInterpreter::VMfloat2Double(jfloat op) { + return (jdouble) op; +} + +// Integer Arithmetic + +inline jint BytecodeInterpreter::VMintAdd(jint op1, jint op2) { + return op1 + op2; +} + +inline jint BytecodeInterpreter::VMintAnd(jint op1, jint op2) { + return op1 & op2; +} + +inline jint BytecodeInterpreter::VMintDiv(jint op1, jint op2) { + /* it's possible we could catch this special case implicitly */ + if ((juint)op1 == 0x80000000 && op2 == -1) return op1; + else return op1 / op2; +} + +inline jint BytecodeInterpreter::VMintMul(jint op1, jint op2) { + return op1 * op2; +} + +inline jint BytecodeInterpreter::VMintNeg(jint op) { + return -op; +} + +inline jint BytecodeInterpreter::VMintOr(jint op1, jint op2) { + return op1 | op2; +} + +inline jint BytecodeInterpreter::VMintRem(jint op1, jint op2) { + /* it's possible we could catch this special case implicitly */ + if ((juint)op1 == 0x80000000 && op2 == -1) return 0; + else return op1 % op2; +} + +inline jint BytecodeInterpreter::VMintShl(jint op1, jint op2) { + return op1 << (op2 & 0x1f); +} + +inline jint BytecodeInterpreter::VMintShr(jint op1, jint op2) { + return op1 >> (op2 & 0x1f); +} + +inline jint BytecodeInterpreter::VMintSub(jint op1, jint op2) { + return op1 - op2; +} + +inline juint BytecodeInterpreter::VMintUshr(jint op1, jint op2) { + return ((juint) op1) >> (op2 & 0x1f); +} + +inline jint BytecodeInterpreter::VMintXor(jint op1, jint op2) { + return op1 ^ op2; +} + +inline jdouble BytecodeInterpreter::VMint2Double(jint val) { + return (jdouble) val; +} + +inline jfloat BytecodeInterpreter::VMint2Float(jint val) { + return (jfloat) val; +} + +inline jlong BytecodeInterpreter::VMint2Long(jint val) { + return (jlong) val; +} + +inline jchar BytecodeInterpreter::VMint2Char(jint val) { + return (jchar) val; +} + +inline jshort BytecodeInterpreter::VMint2Short(jint val) { + return (jshort) val; +} + +inline jbyte BytecodeInterpreter::VMint2Byte(jint val) { + return (jbyte) val; +} + +#endif // CC_INTERP + +#endif // CPU_PPC_VM_BYTECODEINTERPRETER_PPC_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/bytecodes_ppc.cpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/bytecodes.hpp" + +void Bytecodes::pd_initialize() { + // No ppc specific initialization. +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/bytecodes_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_BYTECODES_PPC_HPP +#define CPU_PPC_VM_BYTECODES_PPC_HPP + +// No ppc64 specific bytecodes + +#endif // CPU_PPC_VM_BYTECODES_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/bytes_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,156 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_BYTES_PPC_HPP +#define CPU_PPC_VM_BYTES_PPC_HPP + +#include "memory/allocation.hpp" + +class Bytes: AllStatic { + public: + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering + // PowerPC needs to check for alignment. + + // can I count on address always being a pointer to an unsigned char? Yes + + // Returns true, if the byte ordering used by Java is different from the nativ byte ordering + // of the underlying machine. For example, true for Intel x86, False, for Solaris on Sparc. + static inline bool is_Java_byte_ordering_different() { return false; } + + // Thus, a swap between native and Java ordering is always a no-op: + static inline u2 swap_u2(u2 x) { return x; } + static inline u4 swap_u4(u4 x) { return x; } + static inline u8 swap_u8(u8 x) { return x; } + + static inline u2 get_native_u2(address p) { + return (intptr_t(p) & 1) == 0 + ? *(u2*)p + : ( u2(p[0]) << 8 ) + | ( u2(p[1]) ); + } + + static inline u4 get_native_u4(address p) { + switch (intptr_t(p) & 3) { + case 0: return *(u4*)p; + + case 2: return ( u4( ((u2*)p)[0] ) << 16 ) + | ( u4( ((u2*)p)[1] ) ); + + default: return ( u4(p[0]) << 24 ) + | ( u4(p[1]) << 16 ) + | ( u4(p[2]) << 8 ) + | u4(p[3]); + } + } + + static inline u8 get_native_u8(address p) { + switch (intptr_t(p) & 7) { + case 0: return *(u8*)p; + + case 4: return ( u8( ((u4*)p)[0] ) << 32 ) + | ( u8( ((u4*)p)[1] ) ); + + case 2: return ( u8( ((u2*)p)[0] ) << 48 ) + | ( u8( ((u2*)p)[1] ) << 32 ) + | ( u8( ((u2*)p)[2] ) << 16 ) + | ( u8( ((u2*)p)[3] ) ); + + default: return ( u8(p[0]) << 56 ) + | ( u8(p[1]) << 48 ) + | ( u8(p[2]) << 40 ) + | ( u8(p[3]) << 32 ) + | ( u8(p[4]) << 24 ) + | ( u8(p[5]) << 16 ) + | ( u8(p[6]) << 8 ) + | u8(p[7]); + } + } + + + + static inline void put_native_u2(address p, u2 x) { + if ( (intptr_t(p) & 1) == 0 ) { *(u2*)p = x; } + else { + p[0] = x >> 8; + p[1] = x; + } + } + + static inline void put_native_u4(address p, u4 x) { + switch ( intptr_t(p) & 3 ) { + case 0: *(u4*)p = x; + break; + + case 2: ((u2*)p)[0] = x >> 16; + ((u2*)p)[1] = x; + break; + + default: ((u1*)p)[0] = x >> 24; + ((u1*)p)[1] = x >> 16; + ((u1*)p)[2] = x >> 8; + ((u1*)p)[3] = x; + break; + } + } + + static inline void put_native_u8(address p, u8 x) { + switch ( intptr_t(p) & 7 ) { + case 0: *(u8*)p = x; + break; + + case 4: ((u4*)p)[0] = x >> 32; + ((u4*)p)[1] = x; + break; + + case 2: ((u2*)p)[0] = x >> 48; + ((u2*)p)[1] = x >> 32; + ((u2*)p)[2] = x >> 16; + ((u2*)p)[3] = x; + break; + + default: ((u1*)p)[0] = x >> 56; + ((u1*)p)[1] = x >> 48; + ((u1*)p)[2] = x >> 40; + ((u1*)p)[3] = x >> 32; + ((u1*)p)[4] = x >> 24; + ((u1*)p)[5] = x >> 16; + ((u1*)p)[6] = x >> 8; + ((u1*)p)[7] = x; + } + } + + + // Efficient reading and writing of unaligned unsigned data in Java byte ordering (i.e. big-endian ordering) + // (no byte-order reversal is needed since Power CPUs are big-endian oriented). + static inline u2 get_Java_u2(address p) { return get_native_u2(p); } + static inline u4 get_Java_u4(address p) { return get_native_u4(p); } + static inline u8 get_Java_u8(address p) { return get_native_u8(p); } + + static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, x); } + static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, x); } + static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, x); } +}; + +#endif // CPU_PPC_VM_BYTES_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/codeBuffer_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_CODEBUFFER_PPC_HPP +#define CPU_PPC_VM_CODEBUFFER_PPC_HPP + +private: + void pd_initialize() {} + +public: + void flush_bundle(bool start_new_bundle) {} + +#endif // CPU_PPC_VM_CODEBUFFER_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/compiledIC_ppc.cpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,261 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "code/compiledIC.hpp" +#include "code/icBuffer.hpp" +#include "code/nmethod.hpp" +#include "memory/resourceArea.hpp" +#include "runtime/mutexLocker.hpp" +#include "runtime/safepoint.hpp" +#ifdef COMPILER2 +#include "opto/matcher.hpp" +#endif + +// Release the CompiledICHolder* associated with this call site is there is one. +void CompiledIC::cleanup_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + if (is_icholder_entry(call->destination())) { + NativeMovConstReg* value = nativeMovConstReg_at(call_site->cached_value()); + InlineCacheBuffer::queue_for_release((CompiledICHolder*)value->data()); + } +} + +bool CompiledIC::is_icholder_call_site(virtual_call_Relocation* call_site) { + // This call site might have become stale so inspect it carefully. + NativeCall* call = nativeCall_at(call_site->addr()); + return is_icholder_entry(call->destination()); +} + +//----------------------------------------------------------------------------- +// High-level access to an inline cache. Guaranteed to be MT-safe. + +CompiledIC::CompiledIC(nmethod* nm, NativeCall* call) + : _ic_call(call) +{ + address ic_call = call->instruction_address(); + + assert(ic_call != NULL, "ic_call address must be set"); + assert(nm != NULL, "must pass nmethod"); + assert(nm->contains(ic_call), "must be in nmethod"); + + // Search for the ic_call at the given address. + RelocIterator iter(nm, ic_call, ic_call+1); + bool ret = iter.next(); + assert(ret == true, "relocInfo must exist at this address"); + assert(iter.addr() == ic_call, "must find ic_call"); + if (iter.type() == relocInfo::virtual_call_type) { + virtual_call_Relocation* r = iter.virtual_call_reloc(); + _is_optimized = false; + _value = nativeMovConstReg_at(r->cached_value()); + } else { + assert(iter.type() == relocInfo::opt_virtual_call_type, "must be a virtual call"); + _is_optimized = true; + _value = NULL; + } +} + +// ---------------------------------------------------------------------------- + +// A PPC CompiledStaticCall looks like this: +// +// >>>> consts +// +// [call target1] +// [IC cache] +// [call target2] +// +// <<<< consts +// >>>> insts +// +// bl offset16 -+ -+ ??? // How many bits available? +// | | +// <<<< insts | | +// >>>> stubs | | +// | |- trampoline_stub_Reloc +// trampoline stub: | <-+ +// r2 = toc | +// r2 = [r2 + offset] | // Load call target1 from const section +// mtctr r2 | +// bctr |- static_stub_Reloc +// comp_to_interp_stub: <---+ +// r1 = toc +// ICreg = [r1 + IC_offset] // Load IC from const section +// r1 = [r1 + offset] // Load call target2 from const section +// mtctr r1 +// bctr +// +// <<<< stubs +// +// The call instruction in the code either +// - branches directly to a compiled method if offset encodable in instruction +// - branches to the trampoline stub if offset to compiled method not encodable +// - branches to the compiled_to_interp stub if target interpreted +// +// Further there are three relocations from the loads to the constants in +// the constant section. +// +// Usage of r1 and r2 in the stubs allows to distinguish them. + +const int IC_pos_in_java_to_interp_stub = 8; +#define __ _masm. +void CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf) { +#ifdef COMPILER2 + // Get the mark within main instrs section which is set to the address of the call. + address call_addr = cbuf.insts_mark(); + + // Note that the code buffer's insts_mark is always relative to insts. + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + + // Start the stub. + address stub = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); + if (stub == NULL) { + Compile::current()->env()->record_out_of_memory_failure(); + return; + } + + // For java_to_interp stubs we use R11_scratch1 as scratch register + // and in call trampoline stubs we use R12_scratch2. This way we + // can distinguish them (see is_NativeCallTrampolineStub_at()). + Register reg_scratch = R11_scratch1; + + // Create a static stub relocation which relates this stub + // with the call instruction at insts_call_instruction_offset in the + // instructions code-section. + __ relocate(static_stub_Relocation::spec(call_addr)); + const int stub_start_offset = __ offset(); + + // Now, create the stub's code: + // - load the TOC + // - load the inline cache oop from the constant pool + // - load the call target from the constant pool + // - call + __ calculate_address_from_global_toc(reg_scratch, __ method_toc()); + AddressLiteral ic = __ allocate_metadata_address((Metadata *)NULL); + __ load_const_from_method_toc(as_Register(Matcher::inline_cache_reg_encode()), ic, reg_scratch); + + if (ReoptimizeCallSequences) { + __ b64_patchable((address)-1, relocInfo::none); + } else { + AddressLiteral a((address)-1); + __ load_const_from_method_toc(reg_scratch, a, reg_scratch); + __ mtctr(reg_scratch); + __ bctr(); + } + + // FIXME: Assert that the stub can be identified and patched. + + // Java_to_interp_stub_size should be good. + assert((__ offset() - stub_start_offset) <= CompiledStaticCall::to_interp_stub_size(), + "should be good size"); + assert(!is_NativeCallTrampolineStub_at(__ addr_at(stub_start_offset)), + "must not confuse java_to_interp with trampoline stubs"); + + // End the stub. + __ end_a_stub(); +#else + ShouldNotReachHere(); +#endif +} +#undef __ + +// Size of java_to_interp stub, this doesn't need to be accurate but it must +// be larger or equal to the real size of the stub. +// Used for optimization in Compile::Shorten_branches. +int CompiledStaticCall::to_interp_stub_size() { + return 12 * BytesPerInstWord; +} + +// Relocation entries for call stub, compiled java to interpreter. +// Used for optimization in Compile::Shorten_branches. +int CompiledStaticCall::reloc_to_interp_stub() { + return 5; +} + +void CompiledStaticCall::set_to_interpreted(methodHandle callee, address entry) { + address stub = find_stub(); + guarantee(stub != NULL, "stub not found"); + + if (TraceICs) { + ResourceMark rm; + tty->print_cr("CompiledStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", + instruction_address(), + callee->name_and_sig_as_C_string()); + } + + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), + "a) MT-unsafe modification of inline cache"); + assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, + "b) MT-unsafe modification of inline cache"); + + // Update stub. + method_holder->set_data((intptr_t)callee()); + jump->set_jump_destination(entry); + + // Update jump to call. + set_destination_mt_safe(stub); +} + +void CompiledStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { + assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); + // Reset stub. + address stub = static_stub->addr(); + assert(stub != NULL, "stub not found"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + method_holder->set_data(0); + jump->set_jump_destination((address)-1); +} + +//----------------------------------------------------------------------------- +// Non-product mode code +#ifndef PRODUCT + +void CompiledStaticCall::verify() { + // Verify call. + NativeCall::verify(); + if (os::is_MP()) { + verify_alignment(); + } + + // Verify stub. + address stub = find_stub(); + assert(stub != NULL, "no stub found for static call"); + // Creation also verifies the object. + NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + IC_pos_in_java_to_interp_stub); + NativeJump* jump = nativeJump_at(method_holder->next_instruction_address()); + + // Verify state. + assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); +} + +#endif // !PRODUCT
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/copy_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_COPY_PPC_HPP +#define CPU_PPC_VM_COPY_PPC_HPP + +#ifndef PPC64 +#error "copy currently only implemented for PPC64" +#endif + +// Inline functions for memory copy and fill. + +static void pd_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: (void)memcpy(to, from, count * HeapWordSize); + break; + } +} + +static void pd_disjoint_words_atomic(HeapWord* from, HeapWord* to, size_t count) { + switch (count) { + case 8: to[7] = from[7]; + case 7: to[6] = from[6]; + case 6: to[5] = from[5]; + case 5: to[4] = from[4]; + case 4: to[3] = from[3]; + case 3: to[2] = from[2]; + case 2: to[1] = from[1]; + case 1: to[0] = from[0]; + case 0: break; + default: while (count-- > 0) { + *to++ = *from++; + } + break; + } +} + +static void pd_aligned_conjoint_words(HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); +} + +static void pd_aligned_disjoint_words(HeapWord* from, HeapWord* to, size_t count) { + pd_disjoint_words(from, to, count); +} + +static void pd_conjoint_bytes(void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +static void pd_conjoint_bytes_atomic(void* from, void* to, size_t count) { + (void)memmove(to, from, count); +} + +// Template for atomic, element-wise copy. +template <class T> +static void copy_conjoint_atomic(T* from, T* to, size_t count) { + if (from > to) { + while (count-- > 0) { + // Copy forwards + *to++ = *from++; + } + } else { + from += count - 1; + to += count - 1; + while (count-- > 0) { + // Copy backwards + *to-- = *from--; + } + } +} + +static void pd_conjoint_jshorts_atomic(jshort* from, jshort* to, size_t count) { + copy_conjoint_atomic<jshort>(from, to, count); +} + +static void pd_conjoint_jints_atomic(jint* from, jint* to, size_t count) { + copy_conjoint_atomic<jint>(from, to, count); +} + +static void pd_conjoint_jlongs_atomic(jlong* from, jlong* to, size_t count) { + copy_conjoint_atomic<jlong>(from, to, count); +} + +static void pd_conjoint_oops_atomic(oop* from, oop* to, size_t count) { + copy_conjoint_atomic<oop>(from, to, count); +} + +static void pd_arrayof_conjoint_bytes(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_bytes_atomic(from, to, count); +} + +static void pd_arrayof_conjoint_jshorts(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); +} + +static void pd_arrayof_conjoint_jints(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); +} + +static void pd_arrayof_conjoint_jlongs(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); +} + +static void pd_arrayof_conjoint_oops(HeapWord* from, HeapWord* to, size_t count) { + pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); +} + +static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { + julong* to = (julong*)tohw; + julong v = ((julong)value << 32) | value; + while (count-- > 0) { + *to++ = v; + } +} + +static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { + pd_fill_to_words(tohw, count, value); +} + +static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { + (void)memset(to, value, count); +} + +static void pd_zero_to_words(HeapWord* tohw, size_t count) { + pd_fill_to_words(tohw, count, 0); +} + +static void pd_zero_to_bytes(void* to, size_t count) { + (void)memset(to, 0, count); +} + +#endif // CPU_PPC_VM_COPY_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/cppInterpreterGenerator_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP +#define CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP + + address generate_normal_entry(void); + address generate_native_entry(void); + + void lock_method(void); + void unlock_method(void); + + void generate_counter_incr(Label& overflow); + void generate_counter_overflow(Label& do_continue); + + void generate_more_monitors(); + void generate_deopt_handling(Register result_index); + + void generate_compute_interpreter_state(Label& exception_return); + +#endif // CPU_PPC_VM_CPPINTERPRETERGENERATOR_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/cppInterpreter_ppc.cpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,3044 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "asm/macroAssembler.inline.hpp" +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/cppInterpreter.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterGenerator.hpp" +#include "interpreter/interpreterRuntime.hpp" +#include "oops/arrayOop.hpp" +#include "oops/methodData.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "prims/jvmtiExport.hpp" +#include "prims/jvmtiThreadState.hpp" +#include "runtime/arguments.hpp" +#include "runtime/deoptimization.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/interfaceSupport.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" +#include "runtime/timer.hpp" +#include "runtime/vframeArray.hpp" +#include "utilities/debug.hpp" +#ifdef SHARK +#include "shark/shark_globals.hpp" +#endif + +#ifdef CC_INTERP + +#define __ _masm-> + +// Contains is used for identifying interpreter frames during a stack-walk. +// A frame with a PC in InterpretMethod must be identified as a normal C frame. +bool CppInterpreter::contains(address pc) { + return _code->contains(pc); +} + +#ifdef PRODUCT +#define BLOCK_COMMENT(str) // nothing +#else +#define BLOCK_COMMENT(str) __ block_comment(str) +#endif + +#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") + +static address interpreter_frame_manager = NULL; +static address frame_manager_specialized_return = NULL; +static address native_entry = NULL; + +static address interpreter_return_address = NULL; + +static address unctrap_frame_manager_entry = NULL; + +static address deopt_frame_manager_return_atos = NULL; +static address deopt_frame_manager_return_btos = NULL; +static address deopt_frame_manager_return_itos = NULL; +static address deopt_frame_manager_return_ltos = NULL; +static address deopt_frame_manager_return_ftos = NULL; +static address deopt_frame_manager_return_dtos = NULL; +static address deopt_frame_manager_return_vtos = NULL; + +// A result handler converts/unboxes a native call result into +// a java interpreter/compiler result. The current frame is an +// interpreter frame. +address CppInterpreterGenerator::generate_result_handler_for(BasicType type) { + return AbstractInterpreterGenerator::generate_result_handler_for(type); +} + +// tosca based result to c++ interpreter stack based result. +address CppInterpreterGenerator::generate_tosca_to_stack_converter(BasicType type) { + // + // A result is in the native abi result register from a native + // method call. We need to return this result to the interpreter by + // pushing the result on the interpreter's stack. + // + // Registers alive: + // R3_ARG1(R3_RET)/F1_ARG1(F1_RET) - result to move + // R4_ARG2 - address of tos + // LR + // + // Registers updated: + // R3_RET(R3_ARG1) - address of new tos (== R17_tos for T_VOID) + // + + int number_of_used_slots = 1; + + const Register tos = R4_ARG2; + Label done; + Label is_false; + + address entry = __ pc(); + + switch (type) { + case T_BOOLEAN: + __ cmpwi(CCR0, R3_RET, 0); + __ beq(CCR0, is_false); + __ li(R3_RET, 1); + __ stw(R3_RET, 0, tos); + __ b(done); + __ bind(is_false); + __ li(R3_RET, 0); + __ stw(R3_RET, 0, tos); + break; + case T_BYTE: + case T_CHAR: + case T_SHORT: + case T_INT: + __ stw(R3_RET, 0, tos); + break; + case T_LONG: + number_of_used_slots = 2; + // mark unused slot for debugging + // long goes to topmost slot + __ std(R3_RET, -BytesPerWord, tos); + __ li(R3_RET, 0); + __ std(R3_RET, 0, tos); + break; + case T_OBJECT: + __ verify_oop(R3_RET); + __ std(R3_RET, 0, tos); + break; + case T_FLOAT: + __ stfs(F1_RET, 0, tos); + break; + case T_DOUBLE: + number_of_used_slots = 2; + // mark unused slot for debugging + __ li(R3_RET, 0); + __ std(R3_RET, 0, tos); + // double goes to topmost slot + __ stfd(F1_RET, -BytesPerWord, tos); + break; + case T_VOID: + number_of_used_slots = 0; + break; + default: + ShouldNotReachHere(); + } + + __ BIND(done); + + // new expression stack top + __ addi(R3_RET, tos, -BytesPerWord * number_of_used_slots); + + __ blr(); + + return entry; +} + +address CppInterpreterGenerator::generate_stack_to_stack_converter(BasicType type) { + // + // Copy the result from the callee's stack to the caller's stack, + // caller and callee both being interpreted. + // + // Registers alive + // R3_ARG1 - address of callee's tos + BytesPerWord + // R4_ARG2 - address of caller's tos [i.e. free location] + // LR + // + // stack grows upwards, memory grows downwards. + // + // [ free ] <-- callee's tos + // [ optional result ] <-- R3_ARG1 + // [ optional dummy ] + // ... + // [ free ] <-- caller's tos, R4_ARG2 + // ... + // Registers updated + // R3_RET(R3_ARG1) - address of caller's new tos + // + // stack grows upwards, memory grows downwards. + // + // [ free ] <-- current tos, R3_RET + // [ optional result ] + // [ optional dummy ] + // ... + // + + const Register from = R3_ARG1; + const Register ret = R3_ARG1; + const Register tos = R4_ARG2; + const Register tmp1 = R21_tmp1; + const Register tmp2 = R22_tmp2; + + address entry = __ pc(); + + switch (type) { + case T_BOOLEAN: + case T_BYTE: + case T_CHAR: + case T_SHORT: + case T_INT: + case T_FLOAT: + __ lwz(tmp1, 0, from); + __ stw(tmp1, 0, tos); + // New expression stack top. + __ addi(ret, tos, - BytesPerWord); + break; + case T_LONG: + case T_DOUBLE: + // Move both entries for debug purposes even though only one is live. + __ ld(tmp1, BytesPerWord, from); + __ ld(tmp2, 0, from); + __ std(tmp1, 0, tos); + __ std(tmp2, -BytesPerWord, tos); + // New expression stack top. + __ addi(ret, tos, - 2 * BytesPerWord); // two slots + break; + case T_OBJECT: + __ ld(tmp1, 0, from); + __ verify_oop(tmp1); + __ std(tmp1, 0, tos); + // New expression stack top. + __ addi(ret, tos, - BytesPerWord); + break; + case T_VOID: + // New expression stack top. + __ mr(ret, tos); + break; + default: + ShouldNotReachHere(); + } + + __ blr(); + + return entry; +} + +address CppInterpreterGenerator::generate_stack_to_native_abi_converter(BasicType type) { + // + // Load a result from the callee's stack into the caller's expecting + // return register, callee being interpreted, caller being call stub + // or jit code. + // + // Registers alive + // R3_ARG1 - callee expression tos + BytesPerWord + // LR + // + // stack grows upwards, memory grows downwards. + // + // [ free ] <-- callee's tos + // [ optional result ] <-- R3_ARG1 + // [ optional dummy ] + // ... + // + // Registers updated + // R3_RET(R3_ARG1)/F1_RET - result + // + + const Register from = R3_ARG1; + const Register ret = R3_ARG1; + const FloatRegister fret = F1_ARG1; + + address entry = __ pc(); + + // Implemented uniformly for both kinds of endianness. The interpreter + // implements boolean, byte, char, and short as jint (4 bytes). + switch (type) { + case T_BOOLEAN: + case T_CHAR: + // zero extension + __ lwz(ret, 0, from); + break; + case T_BYTE: + case T_SHORT: + case T_INT: + // sign extension + __ lwa(ret, 0, from); + break; + case T_LONG: + __ ld(ret, 0, from); + break; + case T_OBJECT: + __ ld(ret, 0, from); + __ verify_oop(ret); + break; + case T_FLOAT: + __ lfs(fret, 0, from); + break; + case T_DOUBLE: + __ lfd(fret, 0, from); + break; + case T_VOID: + break; + default: + ShouldNotReachHere(); + } + + __ blr(); + + return entry; +} + +address CppInterpreter::return_entry(TosState state, int length) { + assert(interpreter_return_address != NULL, "Not initialized"); + return interpreter_return_address; +} + +address CppInterpreter::deopt_entry(TosState state, int length) { + address ret = NULL; + if (length != 0) { + switch (state) { + case atos: ret = deopt_frame_manager_return_atos; break; + case btos: ret = deopt_frame_manager_return_itos; break; + case ctos: + case stos: + case itos: ret = deopt_frame_manager_return_itos; break; + case ltos: ret = deopt_frame_manager_return_ltos; break; + case ftos: ret = deopt_frame_manager_return_ftos; break; + case dtos: ret = deopt_frame_manager_return_dtos; break; + case vtos: ret = deopt_frame_manager_return_vtos; break; + default: ShouldNotReachHere(); + } + } else { + ret = unctrap_frame_manager_entry; // re-execute the bytecode (e.g. uncommon trap, popframe) + } + assert(ret != NULL, "Not initialized"); + return ret; +} + +// +// Helpers for commoning out cases in the various type of method entries. +// + +// +// Registers alive +// R16_thread - JavaThread* +// R1_SP - old stack pointer +// R19_method - callee's Method +// R17_tos - address of caller's tos (prepushed) +// R15_prev_state - address of caller's BytecodeInterpreter or 0 +// return_pc in R21_tmp15 (only when called within generate_native_entry) +// +// Registers updated +// R14_state - address of callee's interpreter state +// R1_SP - new stack pointer +// CCR4_is_synced - current method is synchronized +// +void CppInterpreterGenerator::generate_compute_interpreter_state(Label& stack_overflow_return) { + // + // Stack layout at this point: + // + // F1 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // alignment (optional) + // [F1's outgoing Java arguments] <-- R17_tos + // ... + // F2 [PARENT_IJAVA_FRAME_ABI] + // ... + + //============================================================================= + // Allocate space for locals other than the parameters, the + // interpreter state, monitors, and the expression stack. + + const Register local_count = R21_tmp1; + const Register parameter_count = R22_tmp2; + const Register max_stack = R23_tmp3; + // Must not be overwritten within this method! + // const Register return_pc = R29_tmp9; + + const ConditionRegister is_synced = CCR4_is_synced; + const ConditionRegister is_native = CCR6; + const ConditionRegister is_static = CCR7; + + assert(is_synced != is_native, "condition code registers must be distinct"); + assert(is_synced != is_static, "condition code registers must be distinct"); + assert(is_native != is_static, "condition code registers must be distinct"); + + { + + // Local registers + const Register top_frame_size = R24_tmp4; + const Register access_flags = R25_tmp5; + const Register state_offset = R26_tmp6; + Register mem_stack_limit = R27_tmp7; + const Register page_size = R28_tmp8; + + BLOCK_COMMENT("compute_interpreter_state {"); + + // access_flags = method->access_flags(); + // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size"); + __ lwa(access_flags, method_(access_flags)); + + // parameter_count = method->constMethod->size_of_parameters(); + // TODO: PPC port: assert(2 == ConstMethod::sz_size_of_parameters(), "unexpected field size"); + __ ld(max_stack, in_bytes(Method::const_offset()), R19_method); // Max_stack holds constMethod for a while. + __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), max_stack); + + // local_count = method->constMethod()->max_locals(); + // TODO: PPC port: assert(2 == ConstMethod::sz_max_locals(), "unexpected field size"); + __ lhz(local_count, in_bytes(ConstMethod::size_of_locals_offset()), max_stack); + + // max_stack = method->constMethod()->max_stack(); + // TODO: PPC port: assert(2 == ConstMethod::sz_max_stack(), "unexpected field size"); + __ lhz(max_stack, in_bytes(ConstMethod::max_stack_offset()), max_stack); + + if (EnableInvokeDynamic) { + // Take into account 'extra_stack_entries' needed by method handles (see method.hpp). + __ addi(max_stack, max_stack, Method::extra_stack_entries()); + } + + // mem_stack_limit = thread->stack_limit(); + __ ld(mem_stack_limit, thread_(stack_overflow_limit)); + + // Point locals at the first argument. Method's locals are the + // parameters on top of caller's expression stack. + + // tos points past last Java argument + __ sldi(R18_locals, parameter_count, Interpreter::logStackElementSize); + __ add(R18_locals, R17_tos, R18_locals); + + // R18_locals - i*BytesPerWord points to i-th Java local (i starts at 0) + + // Set is_native, is_synced, is_static - will be used later. + __ testbitdi(is_native, R0, access_flags, JVM_ACC_NATIVE_BIT); + __ testbitdi(is_synced, R0, access_flags, JVM_ACC_SYNCHRONIZED_BIT); + assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); + __ testbitdi(is_static, R0, access_flags, JVM_ACC_STATIC_BIT); + + // PARENT_IJAVA_FRAME_ABI + // + // frame_size = + // round_to((local_count - parameter_count)*BytesPerWord + + // 2*BytesPerWord + + // alignment + + // frame::interpreter_frame_cinterpreterstate_size_in_bytes() + // sizeof(PARENT_IJAVA_FRAME_ABI) + // method->is_synchronized() ? sizeof(BasicObjectLock) : 0 + + // max_stack*BytesPerWord, + // 16) + // + // Note that this calculation is exactly mirrored by + // AbstractInterpreter::layout_activation_impl() [ and + // AbstractInterpreter::size_activation() ]. Which is used by + // deoptimization so that it can allocate the proper sized + // frame. This only happens for interpreted frames so the extra + // notes below about max_stack below are not important. The other + // thing to note is that for interpreter frames other than the + // current activation the size of the stack is the size of the live + // portion of the stack at the particular bcp and NOT the maximum + // stack that the method might use. + // + // If we're calling a native method, we replace max_stack (which is + // zero) with space for the worst-case signature handler varargs + // vector, which is: + // + // max_stack = max(Argument::n_register_parameters, parameter_count+2); + // + // We add two slots to the parameter_count, one for the jni + // environment and one for a possible native mirror. We allocate + // space for at least the number of ABI registers, even though + // InterpreterRuntime::slow_signature_handler won't write more than + // parameter_count+2 words when it creates the varargs vector at the + // top of the stack. The generated slow signature handler will just + // load trash into registers beyond the necessary number. We're + // still going to cut the stack back by the ABI register parameter + // count so as to get SP+16 pointing at the ABI outgoing parameter + // area, so we need to allocate at least that much even though we're + // going to throw it away. + // + + // Adjust max_stack for native methods: + Label skip_native_calculate_max_stack; + __ bfalse(is_native, skip_native_calculate_max_stack); + // if (is_native) { + // max_stack = max(Argument::n_register_parameters, parameter_count+2); + __ addi(max_stack, parameter_count, 2*Interpreter::stackElementWords); + __ cmpwi(CCR0, max_stack, Argument::n_register_parameters); + __ bge(CCR0, skip_native_calculate_max_stack); + __ li(max_stack, Argument::n_register_parameters); + // } + __ bind(skip_native_calculate_max_stack); + // max_stack is now in bytes + __ slwi(max_stack, max_stack, Interpreter::logStackElementSize); + + // Calculate number of non-parameter locals (in slots): + Label not_java; + __ btrue(is_native, not_java); + // if (!is_native) { + // local_count = non-parameter local count + __ sub(local_count, local_count, parameter_count); + // } else { + // // nothing to do: method->max_locals() == 0 for native methods + // } + __ bind(not_java); + + + // Calculate top_frame_size and parent_frame_resize. + { + const Register parent_frame_resize = R12_scratch2; + + BLOCK_COMMENT("Compute top_frame_size."); + // top_frame_size = TOP_IJAVA_FRAME_ABI + // + size of interpreter state + __ li(top_frame_size, frame::top_ijava_frame_abi_size + + frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + // + max_stack + __ add(top_frame_size, top_frame_size, max_stack); + // + stack slots for a BasicObjectLock for synchronized methods + { + Label not_synced; + __ bfalse(is_synced, not_synced); + __ addi(top_frame_size, top_frame_size, frame::interpreter_frame_monitor_size_in_bytes()); + __ bind(not_synced); + } + // align + __ round_to(top_frame_size, frame::alignment_in_bytes); + + + BLOCK_COMMENT("Compute parent_frame_resize."); + // parent_frame_resize = R1_SP - R17_tos + __ sub(parent_frame_resize, R1_SP, R17_tos); + //__ li(parent_frame_resize, 0); + // + PARENT_IJAVA_FRAME_ABI + // + extra two slots for the no-parameter/no-locals + // method result + __ addi(parent_frame_resize, parent_frame_resize, + frame::parent_ijava_frame_abi_size + + 2*Interpreter::stackElementSize); + // + (locals_count - params_count) + __ sldi(R0, local_count, Interpreter::logStackElementSize); + __ add(parent_frame_resize, parent_frame_resize, R0); + // align + __ round_to(parent_frame_resize, frame::alignment_in_bytes); + + // + // Stack layout at this point: + // + // The new frame F0 hasn't yet been pushed, F1 is still the top frame. + // + // F0 [TOP_IJAVA_FRAME_ABI] + // alignment (optional) + // [F0's full operand stack] + // [F0's monitors] (optional) + // [F0's BytecodeInterpreter object] + // F1 [PARENT_IJAVA_FRAME_ABI] + // alignment (optional) + // [F0's Java result] + // [F0's non-arg Java locals] + // [F1's outgoing Java arguments] <-- R17_tos + // ... + // F2 [PARENT_IJAVA_FRAME_ABI] + // ... + + + // Calculate new R14_state + // and + // test that the new memory stack pointer is above the limit, + // throw a StackOverflowError otherwise. + __ sub(R11_scratch1/*F1's SP*/, R1_SP, parent_frame_resize); + __ addi(R14_state, R11_scratch1/*F1's SP*/, + -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + __ sub(R11_scratch1/*F0's SP*/, + R11_scratch1/*F1's SP*/, top_frame_size); + + BLOCK_COMMENT("Test for stack overflow:"); + __ cmpld(CCR0/*is_stack_overflow*/, R11_scratch1, mem_stack_limit); + __ blt(CCR0/*is_stack_overflow*/, stack_overflow_return); + + + //============================================================================= + // Frame_size doesn't overflow the stack. Allocate new frame and + // initialize interpreter state. + + // Register state + // + // R15 - local_count + // R16 - parameter_count + // R17 - max_stack + // + // R18 - frame_size + // R19 - access_flags + // CCR4_is_synced - is_synced + // + // GR_Lstate - pointer to the uninitialized new BytecodeInterpreter. + + // _last_Java_pc just needs to be close enough that we can identify + // the frame as an interpreted frame. It does not need to be the + // exact return address from either calling + // BytecodeInterpreter::InterpretMethod or the call to a jni native method. + // So we can initialize it here with a value of a bundle in this + // code fragment. We only do this initialization for java frames + // where InterpretMethod needs a a way to get a good pc value to + // store in the thread state. For interpreter frames used to call + // jni native code we just zero the value in the state and move an + // ip as needed in the native entry code. + // + // const Register last_Java_pc_addr = GR24_SCRATCH; // QQQ 27 + // const Register last_Java_pc = GR26_SCRATCH; + + // Must reference stack before setting new SP since Windows + // will not be able to deliver the exception on a bad SP. + // Windows also insists that we bang each page one at a time in order + // for the OS to map in the reserved pages. If we bang only + // the final page, Windows stops delivering exceptions to our + // VectoredExceptionHandler and terminates our program. + // Linux only requires a single bang but it's rare to have + // to bang more than 1 page so the code is enabled for both OS's. + + // BANG THE STACK + // + // Nothing to do for PPC, because updating the SP will automatically + // bang the page. + + // Up to here we have calculated the delta for the new C-frame and + // checked for a stack-overflow. Now we can savely update SP and + // resize the C-frame. + + // R14_state has already been calculated. + __ push_interpreter_frame(top_frame_size, parent_frame_resize, + R25_tmp5, R26_tmp6, R27_tmp7, R28_tmp8); + + } + + // + // Stack layout at this point: + // + // F0 has been been pushed! + // + // F0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // alignment (optional) (now it's here, if required) + // [F0's full operand stack] + // [F0's monitors] (optional) + // [F0's BytecodeInterpreter object] + // F1 [PARENT_IJAVA_FRAME_ABI] + // alignment (optional) (now it's here, if required) + // [F0's Java result] + // [F0's non-arg Java locals] + // [F1's outgoing Java arguments] + // ... + // F2 [PARENT_IJAVA_FRAME_ABI] + // ... + // + // R14_state points to F0's BytecodeInterpreter object. + // + + } + + //============================================================================= + // new BytecodeInterpreter-object is save, let's initialize it: + BLOCK_COMMENT("New BytecodeInterpreter-object is save."); + + { + // Locals + const Register bytecode_addr = R24_tmp4; + const Register constants = R25_tmp5; + const Register tos = R26_tmp6; + const Register stack_base = R27_tmp7; + const Register local_addr = R28_tmp8; + { + Label L; + __ btrue(is_native, L); + // if (!is_native) { + // bytecode_addr = constMethod->codes(); + __ ld(bytecode_addr, method_(const)); + __ addi(bytecode_addr, bytecode_addr, in_bytes(ConstMethod::codes_offset())); + // } + __ bind(L); + } + + __ ld(constants, in_bytes(Method::const_offset()), R19_method); + __ ld(constants, in_bytes(ConstMethod::constants_offset()), constants); + + // state->_prev_link = prev_state; + __ std(R15_prev_state, state_(_prev_link)); + + // For assertions only. + // TODO: not needed anyway because it coincides with `_monitor_base'. remove! + // state->_self_link = state; + DEBUG_ONLY(__ std(R14_state, state_(_self_link));) + + // state->_thread = thread; + __ std(R16_thread, state_(_thread)); + + // state->_method = method; + __ std(R19_method, state_(_method)); + + // state->_locals = locals; + __ std(R18_locals, state_(_locals)); + + // state->_oop_temp = NULL; + __ li(R0, 0); + __ std(R0, state_(_oop_temp)); + + // state->_last_Java_fp = *R1_SP // Use *R1_SP as fp + __ ld(R0, _abi(callers_sp), R1_SP); + __ std(R0, state_(_last_Java_fp)); + + BLOCK_COMMENT("load Stack base:"); + { + // Stack_base. + // if (!method->synchronized()) { + // stack_base = state; + // } else { + // stack_base = (uintptr_t)state - sizeof(BasicObjectLock); + // } + Label L; + __ mr(stack_base, R14_state); + __ bfalse(is_synced, L); + __ addi(stack_base, stack_base, -frame::interpreter_frame_monitor_size_in_bytes()); + __ bind(L); + } + + // state->_mdx = NULL; + __ li(R0, 0); + __ std(R0, state_(_mdx)); + + { + // if (method->is_native()) state->_bcp = NULL; + // else state->_bcp = bytecode_addr; + Label label1, label2; + __ bfalse(is_native, label1); + __ std(R0, state_(_bcp)); + __ b(label2); + __ bind(label1); + __ std(bytecode_addr, state_(_bcp)); + __ bind(label2); + } + + + // state->_result._to_call._callee = NULL; + __ std(R0, state_(_result._to_call._callee)); + + // state->_monitor_base = state; + __ std(R14_state, state_(_monitor_base)); + + // state->_msg = BytecodeInterpreter::method_entry; + __ li(R0, BytecodeInterpreter::method_entry); + __ stw(R0, state_(_msg)); + + // state->_last_Java_sp = R1_SP; + __ std(R1_SP, state_(_last_Java_sp)); + + // state->_stack_base = stack_base; + __ std(stack_base, state_(_stack_base)); + + // tos = stack_base - 1 slot (prepushed); + // state->_stack.Tos(tos); + __ addi(tos, stack_base, - Interpreter::stackElementSize); + __ std(tos, state_(_stack)); + + + { + BLOCK_COMMENT("get last_Java_pc:"); + // if (!is_native) state->_last_Java_pc = <some_ip_in_this_code_buffer>; + // else state->_last_Java_pc = NULL; (just for neatness) + Label label1, label2; + __ btrue(is_native, label1); + __ get_PC_trash_LR(R0); + __ std(R0, state_(_last_Java_pc)); + __ b(label2); + __ bind(label1); + __ li(R0, 0); + __ std(R0, state_(_last_Java_pc)); + __ bind(label2); + } + + + // stack_limit = tos - max_stack; + __ sub(R0, tos, max_stack); + // state->_stack_limit = stack_limit; + __ std(R0, state_(_stack_limit)); + + + // cache = method->constants()->cache(); + __ ld(R0, ConstantPool::cache_offset_in_bytes(), constants); + // state->_constants = method->constants()->cache(); + __ std(R0, state_(_constants)); + + + + //============================================================================= + // synchronized method, allocate and initialize method object lock. + // if (!method->is_synchronized()) goto fill_locals_with_0x0s; + Label fill_locals_with_0x0s; + __ bfalse(is_synced, fill_locals_with_0x0s); + + // pool_holder = method->constants()->pool_holder(); + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + { + Label label1, label2; + // lockee = NULL; for java methods, correct value will be inserted in BytecodeInterpretMethod.hpp + __ li(R0,0); + __ bfalse(is_native, label2); + + __ bfalse(is_static, label1); + // if (method->is_static()) lockee = + // pool_holder->klass_part()->java_mirror(); + __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), constants); + __ ld(R0/*lockee*/, mirror_offset, R11_scratch1/*pool_holder*/); + __ b(label2); + + __ bind(label1); + // else lockee = *(oop*)locals; + __ ld(R0/*lockee*/, 0, R18_locals); + __ bind(label2); + + // monitor->set_obj(lockee); + __ std(R0/*lockee*/, BasicObjectLock::obj_offset_in_bytes(), stack_base); + } + + // See if we need to zero the locals + __ BIND(fill_locals_with_0x0s); + + + //============================================================================= + // fill locals with 0x0s + Label locals_zeroed; + __ btrue(is_native, locals_zeroed); + + if (true /* zerolocals */ || ClearInterpreterLocals) { + // local_count is already num_locals_slots - num_param_slots + __ sldi(R0, parameter_count, Interpreter::logStackElementSize); + __ sub(local_addr, R18_locals, R0); + __ cmpdi(CCR0, local_count, 0); + __ ble(CCR0, locals_zeroed); + + __ mtctr(local_count); + //__ ld_const_addr(R0, (address) 0xcafe0000babe); + __ li(R0, 0); + + Label zero_slot; + __ bind(zero_slot); + + // first local is at local_addr + __ std(R0, 0, local_addr); + __ addi(local_addr, local_addr, -BytesPerWord); + __ bdnz(zero_slot); + } + + __ BIND(locals_zeroed); + + } + BLOCK_COMMENT("} compute_interpreter_state"); +} + +// Generate code to initiate compilation on invocation counter overflow. +void CppInterpreterGenerator::generate_counter_overflow(Label& continue_entry) { + // Registers alive + // R14_state + // R16_thread + // + // Registers updated + // R14_state + // R3_ARG1 (=R3_RET) + // R4_ARG2 + + // After entering the vm we remove the activation and retry the + // entry point in case the compilation is complete. + + // InterpreterRuntime::frequency_counter_overflow takes one argument + // that indicates if the counter overflow occurs at a backwards + // branch (NULL bcp). We pass zero. The call returns the address + // of the verified entry point for the method or NULL if the + // compilation did not complete (either went background or bailed + // out). + __ li(R4_ARG2, 0); + + // Pass false to call_VM so it doesn't check for pending exceptions, + // since at this point in the method invocation the exception + // handler would try to exit the monitor of synchronized methods + // which haven't been entered yet. + // + // Returns verified_entry_point or NULL, we don't care which. + // + // Do not use the variant `frequency_counter_overflow' that returns + // a structure, because this will change the argument list by a + // hidden parameter (gcc 4.1). + + __ call_VM(noreg, + CAST_FROM_FN_PTR(address, InterpreterRuntime::frequency_counter_overflow), + R4_ARG2, + false); + // Returns verified_entry_point or NULL, we don't care which as we ignore it + // and run interpreted. + + // Reload method, it may have moved. + __ ld(R19_method, state_(_method)); + + // We jump now to the label "continue_after_compile". + __ b(continue_entry); +} + +// Increment invocation count and check for overflow. +// +// R19_method must contain Method* of method to profile. +void CppInterpreterGenerator::generate_counter_incr(Label& overflow) { + Label done; + const Register Rcounters = R12_scratch2; + const Register iv_be_count = R11_scratch1; + const Register invocation_limit = R12_scratch2; + const Register invocation_limit_addr = invocation_limit; + + // Load and ev. allocate MethodCounters object. + __ get_method_counters(R19_method, Rcounters, done); + + // Update standard invocation counters. + __ increment_invocation_counter(Rcounters, iv_be_count, R0); + + // Compare against limit. + BLOCK_COMMENT("Compare counter against limit:"); + assert(4 == sizeof(InvocationCounter::InterpreterInvocationLimit), + "must be 4 bytes"); + __ load_const(invocation_limit_addr, (address)&InvocationCounter::InterpreterInvocationLimit); + __ lwa(invocation_limit, 0, invocation_limit_addr); + __ cmpw(CCR0, iv_be_count, invocation_limit); + __ bge(CCR0, overflow); + __ bind(done); +} + +// +// Call a JNI method. +// +// Interpreter stub for calling a native method. (C++ interpreter) +// This sets up a somewhat different looking stack for calling the native method +// than the typical interpreter frame setup. +// +address CppInterpreterGenerator::generate_native_entry(void) { + if (native_entry != NULL) return native_entry; + address entry = __ pc(); + + // Read + // R16_thread + // R15_prev_state - address of caller's BytecodeInterpreter, if this snippet + // gets called by the frame manager. + // R19_method - callee's Method + // R17_tos - address of caller's tos + // R1_SP - caller's stack pointer + // R21_sender_SP - initial caller sp + // + // Update + // R14_state - address of caller's BytecodeInterpreter + // R3_RET - integer result, if any. + // F1_RET - float result, if any. + // + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // alignment (optional) + // [outgoing Java arguments] <-- R17_tos + // ... + // PARENT [PARENT_IJAVA_FRAME_ABI] + // ... + // + + const bool inc_counter = UseCompiler || CountCompiledCalls; + + const Register signature_handler_fd = R21_tmp1; + const Register pending_exception = R22_tmp2; + const Register result_handler_addr = R23_tmp3; + const Register native_method_fd = R24_tmp4; + const Register access_flags = R25_tmp5; + const Register active_handles = R26_tmp6; + const Register sync_state = R27_tmp7; + const Register sync_state_addr = sync_state; // Address is dead after use. + const Register suspend_flags = R24_tmp4; + + const Register return_pc = R28_tmp8; // Register will be locked for some time. + + const ConditionRegister is_synced = CCR4_is_synced; // Live-on-exit from compute_interpreter_state. + + + // R1_SP still points to caller's SP at this point. + + // Save initial_caller_sp to caller's abi. The caller frame must be + // resized before returning to get rid of the c2i arguments (if + // any). + // Override the saved SP with the senderSP so we can pop c2i + // arguments (if any) off when we return + __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); + + // Save LR to caller's frame. We don't use _abi(lr) here, because it is not safe. + __ mflr(return_pc); + __ std(return_pc, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + + assert(return_pc->is_nonvolatile(), "return_pc must be a non-volatile register"); + + __ verify_method_ptr(R19_method); + + //============================================================================= + + // If this snippet gets called by the frame manager (at label + // `call_special'), then R15_prev_state is valid. If this snippet + // is not called by the frame manager, but e.g. by the call stub or + // by compiled code, then R15_prev_state is invalid. + { + // Set R15_prev_state to 0 if we don't return to the frame + // manager; we will return to the call_stub or to compiled code + // instead. If R15_prev_state is 0 there will be only one + // interpreter frame (we will set this up later) in this C frame! + // So we must take care about retrieving prev_state_(_prev_link) + // and restoring R1_SP when popping that interpreter. + Label prev_state_is_valid; + + __ load_const(R11_scratch1/*frame_manager_returnpc_addr*/, (address)&frame_manager_specialized_return); + __ ld(R12_scratch2/*frame_manager_returnpc*/, 0, R11_scratch1/*frame_manager_returnpc_addr*/); + __ cmpd(CCR0, return_pc, R12_scratch2/*frame_manager_returnpc*/); + __ beq(CCR0, prev_state_is_valid); + + __ li(R15_prev_state, 0); + + __ BIND(prev_state_is_valid); + } + + //============================================================================= + // Allocate new frame and initialize interpreter state. + + Label exception_return; + Label exception_return_sync_check; + Label stack_overflow_return; + + // Generate new interpreter state and jump to stack_overflow_return in case of + // a stack overflow. + generate_compute_interpreter_state(stack_overflow_return); + + //============================================================================= + // Increment invocation counter. On overflow, entry to JNI method + // will be compiled. + Label invocation_counter_overflow; + if (inc_counter) { + generate_counter_incr(invocation_counter_overflow); + } + + Label continue_after_compile; + __ BIND(continue_after_compile); + + // access_flags = method->access_flags(); + // Load access flags. + assert(access_flags->is_nonvolatile(), + "access_flags must be in a non-volatile register"); + // Type check. + // TODO: PPC port: assert(4 == methodOopDesc::sz_access_flags(), "unexpected field size"); + __ lwz(access_flags, method_(access_flags)); + + // We don't want to reload R19_method and access_flags after calls + // to some helper functions. + assert(R19_method->is_nonvolatile(), "R19_method must be a non-volatile register"); + + // Check for synchronized methods. Must happen AFTER invocation counter + // check, so method is not locked if counter overflows. + + { + Label method_is_not_synced; + // Is_synced is still alive. + assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); + __ bfalse(is_synced, method_is_not_synced); + + lock_method(); + // Reload method, it may have moved. + __ ld(R19_method, state_(_method)); + + __ BIND(method_is_not_synced); + } + + // jvmti/jvmpi support + __ notify_method_entry(); + + // Reload method, it may have moved. + __ ld(R19_method, state_(_method)); + + //============================================================================= + // Get and call the signature handler + + __ ld(signature_handler_fd, method_(signature_handler)); + Label call_signature_handler; + + __ cmpdi(CCR0, signature_handler_fd, 0); + __ bne(CCR0, call_signature_handler); + + // Method has never been called. Either generate a specialized + // handler or point to the slow one. + // + // Pass parameter 'false' to avoid exception check in call_VM. + __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), R19_method, false); + + // Check for an exception while looking up the target method. If we + // incurred one, bail. + __ ld(pending_exception, thread_(pending_exception)); + __ cmpdi(CCR0, pending_exception, 0); + __ bne(CCR0, exception_return_sync_check); // has pending exception + + // reload method + __ ld(R19_method, state_(_method)); + + // Reload signature handler, it may have been created/assigned in the meanwhile + __ ld(signature_handler_fd, method_(signature_handler)); + + __ BIND(call_signature_handler); + + // Before we call the signature handler we push a new frame to + // protect the interpreter frame volatile registers when we return + // from jni but before we can get back to Java. + + // First set the frame anchor while the SP/FP registers are + // convenient and the slow signature handler can use this same frame + // anchor. + + // We have a TOP_IJAVA_FRAME here, which belongs to us. + __ set_top_ijava_frame_at_SP_as_last_Java_frame(R1_SP, R12_scratch2/*tmp*/); + + // Now the interpreter frame (and its call chain) have been + // invalidated and flushed. We are now protected against eager + // being enabled in native code. Even if it goes eager the + // registers will be reloaded as clean and we will invalidate after + // the call so no spurious flush should be possible. + + // Call signature handler and pass locals address. + // + // Our signature handlers copy required arguments to the C stack + // (outgoing C args), R3_ARG1 to R10_ARG8, and F1_ARG1 to + // F13_ARG13. + __ mr(R3_ARG1, R18_locals); + __ ld(signature_handler_fd, 0, signature_handler_fd); + __ call_stub(signature_handler_fd); + // reload method + __ ld(R19_method, state_(_method)); + + // Remove the register parameter varargs slots we allocated in + // compute_interpreter_state. SP+16 ends up pointing to the ABI + // outgoing argument area. + // + // Not needed on PPC64. + //__ add(SP, SP, Argument::n_register_parameters*BytesPerWord); + + assert(result_handler_addr->is_nonvolatile(), "result_handler_addr must be in a non-volatile register"); + // Save across call to native method. + __ mr(result_handler_addr, R3_RET); + + // Set up fixed parameters and call the native method. + // If the method is static, get mirror into R4_ARG2. + + { + Label method_is_not_static; + // access_flags is non-volatile and still, no need to restore it + + // restore access flags + __ testbitdi(CCR0, R0, access_flags, JVM_ACC_STATIC_BIT); + __ bfalse(CCR0, method_is_not_static); + + // constants = method->constants(); + __ ld(R11_scratch1, in_bytes(Method::const_offset()), R19_method); + __ ld(R11_scratch1/*constants*/, in_bytes(ConstMethod::constants_offset()), R11_scratch1); + // pool_holder = method->constants()->pool_holder(); + __ ld(R11_scratch1/*pool_holder*/, ConstantPool::pool_holder_offset_in_bytes(), + R11_scratch1/*constants*/); + + const int mirror_offset = in_bytes(Klass::java_mirror_offset()); + + // mirror = pool_holder->klass_part()->java_mirror(); + __ ld(R0/*mirror*/, mirror_offset, R11_scratch1/*pool_holder*/); + // state->_native_mirror = mirror; + __ std(R0/*mirror*/, state_(_oop_temp)); + // R4_ARG2 = &state->_oop_temp; + __ addir(R4_ARG2, state_(_oop_temp)); + + __ BIND(method_is_not_static); + } + + // At this point, arguments have been copied off the stack into + // their JNI positions. Oops are boxed in-place on the stack, with + // handles copied to arguments. The result handler address is in a + // register. + + // pass JNIEnv address as first parameter + __ addir(R3_ARG1, thread_(jni_environment)); + + // Load the native_method entry before we change the thread state. + __ ld(native_method_fd, method_(native_function)); + + //============================================================================= + // Transition from _thread_in_Java to _thread_in_native. As soon as + // we make this change the safepoint code needs to be certain that + // the last Java frame we established is good. The pc in that frame + // just needs to be near here not an actual return address. + + // We use release_store_fence to update values like the thread state, where + // we don't want the current thread to continue until all our prior memory + // accesses (including the new thread state) are visible to other threads. + __ li(R0, _thread_in_native); + __ release(); + + // TODO: PPC port: assert(4 == JavaThread::sz_thread_state(), "unexpected field size"); + __ stw(R0, thread_(thread_state)); + + if (UseMembar) { + __ fence(); + } + + //============================================================================= + // Call the native method. Argument registers must not have been + // overwritten since "__ call_stub(signature_handler);" (except for + // ARG1 and ARG2 for static methods) + __ call_c(native_method_fd); + + __ std(R3_RET, state_(_native_lresult)); + __ stfd(F1_RET, state_(_native_fresult)); + + // The frame_manager_lr field, which we use for setting the last + // java frame, gets overwritten by the signature handler. Restore + // it now. + __ get_PC_trash_LR(R11_scratch1); + __ std(R11_scratch1, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + + // Because of GC R19_method may no longer be valid. + + // Block, if necessary, before resuming in _thread_in_Java state. + // In order for GC to work, don't clear the last_Java_sp until after + // blocking. + + + + //============================================================================= + // Switch thread to "native transition" state before reading the + // synchronization state. This additional state is necessary + // because reading and testing the synchronization state is not + // atomic w.r.t. GC, as this scenario demonstrates: Java thread A, + // in _thread_in_native state, loads _not_synchronized and is + // preempted. VM thread changes sync state to synchronizing and + // suspends threads for GC. Thread A is resumed to finish this + // native method, but doesn't block here since it didn't see any + // synchronization in progress, and escapes. + + // We use release_store_fence to update values like the thread state, where + // we don't want the current thread to continue until all our prior memory + // accesses (including the new thread state) are visible to other threads. + __ li(R0/*thread_state*/, _thread_in_native_trans); + __ release(); + __ stw(R0/*thread_state*/, thread_(thread_state)); + if (UseMembar) { + __ fence(); + } + // Write serialization page so that the VM thread can do a pseudo remote + // membar. We use the current thread pointer to calculate a thread + // specific offset to write to within the page. This minimizes bus + // traffic due to cache line collision. + else { + __ serialize_memory(R16_thread, R11_scratch1, R12_scratch2); + } + + // Now before we return to java we must look for a current safepoint + // (a new safepoint can not start since we entered native_trans). + // We must check here because a current safepoint could be modifying + // the callers registers right this moment. + + // Acquire isn't strictly necessary here because of the fence, but + // sync_state is declared to be volatile, so we do it anyway. + __ load_const(sync_state_addr, SafepointSynchronize::address_of_state()); + + // TODO: PPC port: assert(4 == SafepointSynchronize::sz_state(), "unexpected field size"); + __ lwz(sync_state, 0, sync_state_addr); + + // TODO: PPC port: assert(4 == Thread::sz_suspend_flags(), "unexpected field size"); + __ lwz(suspend_flags, thread_(suspend_flags)); + + __ acquire(); + + Label sync_check_done; + Label do_safepoint; + // No synchronization in progress nor yet synchronized + __ cmpwi(CCR0, sync_state, SafepointSynchronize::_not_synchronized); + // not suspended + __ cmpwi(CCR1, suspend_flags, 0); + + __ bne(CCR0, do_safepoint); + __ beq(CCR1, sync_check_done); + __ bind(do_safepoint); + // Block. We do the call directly and leave the current + // last_Java_frame setup undisturbed. We must save any possible + // native result acrosss the call. No oop is present + + __ mr(R3_ARG1, R16_thread); + __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans), + relocInfo::none); + __ bind(sync_check_done); + + //============================================================================= + // <<<<<< Back in Interpreter Frame >>>>> + + // We are in thread_in_native_trans here and back in the normal + // interpreter frame. We don't have to do anything special about + // safepoints and we can switch to Java mode anytime we are ready. + + // Note: frame::interpreter_frame_result has a dependency on how the + // method result is saved across the call to post_method_exit. For + // native methods it assumes that the non-FPU/non-void result is + // saved in _native_lresult and a FPU result in _native_fresult. If + // this changes then the interpreter_frame_result implementation + // will need to be updated too. + + // On PPC64, we have stored the result directly after the native call. + + //============================================================================= + // back in Java + + // We use release_store_fence to update values like the thread state, where + // we don't want the current thread to continue until all our prior memory + // accesses (including the new thread state) are visible to other threads. + __ li(R0/*thread_state*/, _thread_in_Java); + __ release(); + __ stw(R0/*thread_state*/, thread_(thread_state)); + if (UseMembar) { + __ fence(); + } + + __ reset_last_Java_frame(); + + // Reload GR27_method, call killed it. We can't look at + // state->_method until we're back in java state because in java + // state gc can't happen until we get to a safepoint. + // + // We've set thread_state to _thread_in_Java already, so restoring + // R19_method from R14_state works; R19_method is invalid, because + // GC may have happened. + __ ld(R19_method, state_(_method)); // reload method, may have moved + + // jvmdi/jvmpi support. Whether we've got an exception pending or + // not, and whether unlocking throws an exception or not, we notify + // on native method exit. If we do have an exception, we'll end up + // in the caller's context to handle it, so if we don't do the + // notify here, we'll drop it on the floor. + + __ notify_method_exit(true/*native method*/, + ilgl /*illegal state (not used for native methods)*/); + + + + //============================================================================= + // Handle exceptions + + // See if we must unlock. + // + { + Label method_is_not_synced; + // is_synced is still alive + assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); + __ bfalse(is_synced, method_is_not_synced); + + unlock_method(); + + __ bind(method_is_not_synced); + } + + // Reset active handles after returning from native. + // thread->active_handles()->clear(); + __ ld(active_handles, thread_(active_handles)); + // JNIHandleBlock::_top is an int. + // TODO: PPC port: assert(4 == JNIHandleBlock::top_size_in_bytes(), "unexpected field size"); + __ li(R0, 0); + __ stw(R0, JNIHandleBlock::top_offset_in_bytes(), active_handles); + + Label no_pending_exception_from_native_method; + __ ld(R0/*pending_exception*/, thread_(pending_exception)); + __ cmpdi(CCR0, R0/*pending_exception*/, 0); + __ beq(CCR0, no_pending_exception_from_native_method); + + + //----------------------------------------------------------------------------- + // An exception is pending. We call into the runtime only if the + // caller was not interpreted. If it was interpreted the + // interpreter will do the correct thing. If it isn't interpreted + // (call stub/compiled code) we will change our return and continue. + __ BIND(exception_return); + + Label return_to_initial_caller_with_pending_exception; + __ cmpdi(CCR0, R15_prev_state, 0); + __ beq(CCR0, return_to_initial_caller_with_pending_exception); + + // We are returning to an interpreter activation, just pop the state, + // pop our frame, leave the exception pending, and return. + __ pop_interpreter_state(/*prev_state_may_be_0=*/false); + __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2); + __ mtlr(R21_tmp1); + __ blr(); + + __ BIND(exception_return_sync_check); + + assert(is_synced->is_nonvolatile(), "is_synced must be non-volatile"); + __ bfalse(is_synced, exception_return); + unlock_method(); + __ b(exception_return); + + + __ BIND(return_to_initial_caller_with_pending_exception); + // We are returning to a c2i-adapter / call-stub, get the address of the + // exception handler, pop the frame and return to the handler. + + // First, pop to caller's frame. + __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2); + + __ push_frame_abi112(0, R11_scratch1); + // Get the address of the exception handler. + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), + R16_thread, + R21_tmp1 /* return pc */); + __ pop_frame(); + + // Load the PC of the the exception handler into LR. + __ mtlr(R3_RET); + + // Load exception into R3_ARG1 and clear pending exception in thread. + __ ld(R3_ARG1/*exception*/, thread_(pending_exception)); + __ li(R4_ARG2, 0); + __ std(R4_ARG2, thread_(pending_exception)); + + // Load the original return pc into R4_ARG2. + __ mr(R4_ARG2/*issuing_pc*/, R21_tmp1); + + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // Return to exception handler. + __ blr(); + + + //----------------------------------------------------------------------------- + // No exception pending. + __ BIND(no_pending_exception_from_native_method); + + // Move native method result back into proper registers and return. + // Invoke result handler (may unbox/promote). + __ ld(R3_RET, state_(_native_lresult)); + __ lfd(F1_RET, state_(_native_fresult)); + __ call_stub(result_handler_addr); + + // We have created a new BytecodeInterpreter object, now we must destroy it. + // + // Restore previous R14_state and caller's SP. R15_prev_state may + // be 0 here, because our caller may be the call_stub or compiled + // code. + __ pop_interpreter_state(/*prev_state_may_be_0=*/true); + __ pop_interpreter_frame(R11_scratch1, R12_scratch2, R21_tmp1 /* set to return pc */, R22_tmp2); + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // Must use the return pc which was loaded from the caller's frame + // as the VM uses return-pc-patching for deoptimization. + __ mtlr(R21_tmp1); + __ blr(); + + + + //============================================================================= + // We encountered an exception while computing the interpreter + // state, so R14_state isn't valid. Act as if we just returned from + // the callee method with a pending exception. + __ BIND(stack_overflow_return); + + // + // Register state: + // R14_state invalid; trashed by compute_interpreter_state + // R15_prev_state valid, but may be 0 + // + // R1_SP valid, points to caller's SP; wasn't yet updated by + // compute_interpreter_state + // + + // Create exception oop and make it pending. + + // Throw the exception via RuntimeStub "throw_StackOverflowError_entry". + // + // Previously, we called C-Code directly. As a consequence, a + // possible GC tried to process the argument oops of the top frame + // (see RegisterMap::clear, which sets the corresponding flag to + // true). This lead to crashes because: + // 1. The top register map did not contain locations for the argument registers + // 2. The arguments are dead anyway, could be already overwritten in the worst case + // Solution: Call via special runtime stub that pushes it's own + // frame. This runtime stub has the flag "CodeBlob::caller_must_gc_arguments()" + // set to "false", what prevents the dead arguments getting GC'd. + // + // 2 cases exist: + // 1. We were called by the c2i adapter / call stub + // 2. We were called by the frame manager + // + // Both cases are handled by this code: + // 1. - initial_caller_sp was saved in both cases on entry, so it's safe to load it back even if it was not changed. + // - control flow will be: + // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of caller method + // 2. - control flow will be: + // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->rethrow_excp_entry of frame manager->resume_method + // Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state + // registers using the stack and resume the calling method with a pending excp. + + // Pop any c2i extension from the stack, restore LR just to be sure + __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + __ mtlr(R0); + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // Load target address of the runtime stub. + __ load_const(R12_scratch2, (StubRoutines::throw_StackOverflowError_entry())); + __ mtctr(R12_scratch2); + __ bctr(); + + + //============================================================================= + // Counter overflow. + + if (inc_counter) { + // Handle invocation counter overflow + __ bind(invocation_counter_overflow); + + generate_counter_overflow(continue_after_compile); + } + + native_entry = entry; + return entry; +} + +bool AbstractInterpreter::can_be_compiled(methodHandle m) { + // No special entry points that preclude compilation. + return true; +} + +// Unlock the current method. +// +void CppInterpreterGenerator::unlock_method(void) { + // Find preallocated monitor and unlock method. Method monitor is + // the first one. + + // Registers alive + // R14_state + // + // Registers updated + // volatiles + // + const Register monitor = R4_ARG2; + + // Pass address of initial monitor we allocated. + // + // First monitor. + __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes()); + + // Unlock method + __ unlock_object(monitor); +} + +// Lock the current method. +// +void CppInterpreterGenerator::lock_method(void) { + // Find preallocated monitor and lock method. Method monitor is the + // first one. + + // + // Registers alive + // R14_state + // + // Registers updated + // volatiles + // + + const Register monitor = R4_ARG2; + const Register object = R5_ARG3; + + // Pass address of initial monitor we allocated. + __ addi(monitor, R14_state, -frame::interpreter_frame_monitor_size_in_bytes()); + + // Pass object address. + __ ld(object, BasicObjectLock::obj_offset_in_bytes(), monitor); + + // Lock method. + __ lock_object(monitor, object); +} + +// Generate code for handling resuming a deopted method. +void CppInterpreterGenerator::generate_deopt_handling(Register result_index) { + + //============================================================================= + // Returning from a compiled method into a deopted method. The + // bytecode at the bcp has completed. The result of the bytecode is + // in the native abi (the tosca for the template based + // interpreter). Any stack space that was used by the bytecode that + // has completed has been removed (e.g. parameters for an invoke) so + // all that we have to do is place any pending result on the + // expression stack and resume execution on the next bytecode. + + Label return_from_deopt_common; + + // R3_RET and F1_RET are live here! Load the array index of the + // required result stub address and continue at return_from_deopt_common. + + // Deopt needs to jump to here to enter the interpreter (return a result). + deopt_frame_manager_return_atos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_OBJECT)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_btos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_BOOLEAN)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_itos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_INT)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_ltos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_LONG)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_ftos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_FLOAT)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_dtos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE)); + __ b(return_from_deopt_common); + + deopt_frame_manager_return_vtos = __ pc(); + __ li(result_index, AbstractInterpreter::BasicType_as_index(T_VOID)); + // Last one, fall-through to return_from_deopt_common. + + // Deopt return common. An index is present that lets us move any + // possible result being return to the interpreter's stack. + // + __ BIND(return_from_deopt_common); + +} + +// Generate the code to handle a more_monitors message from the c++ interpreter. +void CppInterpreterGenerator::generate_more_monitors() { + + // + // Registers alive + // R16_thread - JavaThread* + // R15_prev_state - previous BytecodeInterpreter or 0 + // R14_state - BytecodeInterpreter* address of receiver's interpreter state + // R1_SP - old stack pointer + // + // Registers updated + // R1_SP - new stack pointer + // + + // Very-local scratch registers. + const Register old_tos = R21_tmp1; + const Register new_tos = R22_tmp2; + const Register stack_base = R23_tmp3; + const Register stack_limit = R24_tmp4; + const Register slot = R25_tmp5; + const Register n_slots = R25_tmp5; + + // Interpreter state fields. + const Register msg = R24_tmp4; + + // Load up relevant interpreter state. + + __ ld(stack_base, state_(_stack_base)); // Old stack_base + __ ld(old_tos, state_(_stack)); // Old tos + __ ld(stack_limit, state_(_stack_limit)); // Old stack_limit + + // extracted monitor_size + int monitor_size = frame::interpreter_frame_monitor_size_in_bytes(); + assert(Assembler::is_aligned((unsigned int)monitor_size, + (unsigned int)frame::alignment_in_bytes), + "size of a monitor must respect alignment of SP"); + + // Save and restore top LR + __ ld(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + __ resize_frame(-monitor_size, R11_scratch1);// Allocate space for new monitor + __ std(R12_scratch2, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + // Initial_caller_sp is used as unextended_sp for non initial callers. + __ std(R1_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); + __ addi(stack_base, stack_base, -monitor_size); // New stack_base + __ addi(new_tos, old_tos, -monitor_size); // New tos + __ addi(stack_limit, stack_limit, -monitor_size); // New stack_limit + + __ std(R1_SP, state_(_last_Java_sp)); // Update frame_bottom + + __ std(stack_base, state_(_stack_base)); // Update stack_base + __ std(new_tos, state_(_stack)); // Update tos + __ std(stack_limit, state_(_stack_limit)); // Update stack_limit + + __ li(msg, BytecodeInterpreter::got_monitors); // Tell interpreter we allocated the lock + __ stw(msg, state_(_msg)); + + // Shuffle expression stack down. Recall that stack_base points + // just above the new expression stack bottom. Old_tos and new_tos + // are used to scan thru the old and new expression stacks. + + Label copy_slot, copy_slot_finished; + __ sub(n_slots, stack_base, new_tos); + __ srdi_(n_slots, n_slots, LogBytesPerWord); // compute number of slots to copy + assert(LogBytesPerWord == 3, "conflicts assembler instructions"); + __ beq(CCR0, copy_slot_finished); // nothing to copy + + __ mtctr(n_slots); + + // loop + __ bind(copy_slot); + __ ldu(slot, BytesPerWord, old_tos); // slot = *++old_tos; + __ stdu(slot, BytesPerWord, new_tos); // *++new_tos = slot; + __ bdnz(copy_slot); + + __ bind(copy_slot_finished); + + // Restart interpreter + __ li(R0, 0); + __ std(R0, BasicObjectLock::obj_offset_in_bytes(), stack_base); // Mark lock as unused +} + +address CppInterpreterGenerator::generate_normal_entry(void) { + if (interpreter_frame_manager != NULL) return interpreter_frame_manager; + + address entry = __ pc(); + + address return_from_native_pc = (address) NULL; + + // Initial entry to frame manager (from call_stub or c2i_adapter) + + // + // Registers alive + // R16_thread - JavaThread* + // R19_method - callee's Method (method to be invoked) + // R17_tos - address of sender tos (prepushed) + // R1_SP - SP prepared by call stub such that caller's outgoing args are near top + // LR - return address to caller (call_stub or c2i_adapter) + // R21_sender_SP - initial caller sp + // + // Registers updated + // R15_prev_state - 0 + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // alignment (optional) + // [outgoing Java arguments] <-- R17_tos + // ... + // PARENT [PARENT_IJAVA_FRAME_ABI] + // ... + // + + // Save initial_caller_sp to caller's abi. + // The caller frame must be resized before returning to get rid of + // the c2i part on top of the calling compiled frame (if any). + // R21_tmp1 must match sender_sp in gen_c2i_adapter. + // Now override the saved SP with the senderSP so we can pop c2i + // arguments (if any) off when we return. + __ std(R21_sender_SP, _top_ijava_frame_abi(initial_caller_sp), R1_SP); + + // Save LR to caller's frame. We don't use _abi(lr) here, + // because it is not safe. + __ mflr(R0); + __ std(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + + // If we come here, it is the first invocation of the frame manager. + // So there is no previous interpreter state. + __ li(R15_prev_state, 0); + + + // Fall through to where "recursive" invocations go. + + //============================================================================= + // Dispatch an instance of the interpreter. Recursive activations + // come here. + + Label re_dispatch; + __ BIND(re_dispatch); + + // + // Registers alive + // R16_thread - JavaThread* + // R19_method - callee's Method + // R17_tos - address of caller's tos (prepushed) + // R15_prev_state - address of caller's BytecodeInterpreter or 0 + // R1_SP - caller's SP trimmed such that caller's outgoing args are near top. + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] + // alignment (optional) + // [outgoing Java arguments] + // ... + // PARENT [PARENT_IJAVA_FRAME_ABI] + // ... + + // fall through to interpreted execution + + //============================================================================= + // Allocate a new Java frame and initialize the new interpreter state. + + Label stack_overflow_return; + + // Create a suitable new Java frame plus a new BytecodeInterpreter instance + // in the current (frame manager's) C frame. + generate_compute_interpreter_state(stack_overflow_return); + + // fall through + + //============================================================================= + // Interpreter dispatch. + + Label call_interpreter; + __ BIND(call_interpreter); + + // + // Registers alive + // R16_thread - JavaThread* + // R15_prev_state - previous BytecodeInterpreter or 0 + // R14_state - address of receiver's BytecodeInterpreter + // R1_SP - receiver's stack pointer + // + + // Thread fields. + const Register pending_exception = R21_tmp1; + + // Interpreter state fields. + const Register msg = R24_tmp4; + + // MethodOop fields. + const Register parameter_count = R25_tmp5; + const Register result_index = R26_tmp6; + + const Register dummy = R28_tmp8; + + // Address of various interpreter stubs. + // R29_tmp9 is reserved. + const Register stub_addr = R27_tmp7; + + // Uncommon trap needs to jump to here to enter the interpreter + // (re-execute current bytecode). + unctrap_frame_manager_entry = __ pc(); + + // If we are profiling, store our fp (BSP) in the thread so we can + // find it during a tick. + if (Arguments::has_profile()) { + // On PPC64 we store the pointer to the current BytecodeInterpreter, + // instead of the bsp of ia64. This should suffice to be able to + // find all interesting information. + __ std(R14_state, thread_(last_interpreter_fp)); + } + + // R16_thread, R14_state and R15_prev_state are nonvolatile + // registers. There is no need to save these. If we needed to save + // some state in the current Java frame, this could be a place to do + // so. + + // Call Java bytecode dispatcher passing "BytecodeInterpreter* istate". + __ call_VM_leaf(CAST_FROM_FN_PTR(address, + JvmtiExport::can_post_interpreter_events() + ? BytecodeInterpreter::runWithChecks + : BytecodeInterpreter::run), + R14_state); + + interpreter_return_address = __ last_calls_return_pc(); + + // R16_thread, R14_state and R15_prev_state have their values preserved. + + // If we are profiling, clear the fp in the thread to tell + // the profiler that we are no longer in the interpreter. + if (Arguments::has_profile()) { + __ li(R11_scratch1, 0); + __ std(R11_scratch1, thread_(last_interpreter_fp)); + } + + // Load message from bytecode dispatcher. + // TODO: PPC port: guarantee(4 == BytecodeInterpreter::sz_msg(), "unexpected field size"); + __ lwz(msg, state_(_msg)); + + + Label more_monitors; + Label return_from_native; + Label return_from_native_common; + Label return_from_native_no_exception; + Label return_from_interpreted_method; + Label return_from_recursive_activation; + Label unwind_recursive_activation; + Label resume_interpreter; + Label return_to_initial_caller; + Label unwind_initial_activation; + Label unwind_initial_activation_pending_exception; + Label call_method; + Label call_special; + Label retry_method; + Label retry_method_osr; + Label popping_frame; + Label throwing_exception; + + // Branch according to the received message + + __ cmpwi(CCR1, msg, BytecodeInterpreter::call_method); + __ cmpwi(CCR2, msg, BytecodeInterpreter::return_from_method); + + __ beq(CCR1, call_method); + __ beq(CCR2, return_from_interpreted_method); + + __ cmpwi(CCR3, msg, BytecodeInterpreter::more_monitors); + __ cmpwi(CCR4, msg, BytecodeInterpreter::throwing_exception); + + __ beq(CCR3, more_monitors); + __ beq(CCR4, throwing_exception); + + __ cmpwi(CCR5, msg, BytecodeInterpreter::popping_frame); + __ cmpwi(CCR6, msg, BytecodeInterpreter::do_osr); + + __ beq(CCR5, popping_frame); + __ beq(CCR6, retry_method_osr); + + __ stop("bad message from interpreter"); + + + //============================================================================= + // Add a monitor just below the existing one(s). State->_stack_base + // points to the lowest existing one, so we insert the new one just + // below it and shuffle the expression stack down. Ref. the above + // stack layout picture, we must update _stack_base, _stack, _stack_limit + // and _last_Java_sp in the interpreter state. + + __ BIND(more_monitors); + + generate_more_monitors(); + __ b(call_interpreter); + + generate_deopt_handling(result_index); + + // Restoring the R14_state is already done by the deopt_blob. + + // Current tos includes no parameter slots. + __ ld(R17_tos, state_(_stack)); + __ li(msg, BytecodeInterpreter::deopt_resume); + __ b(return_from_native_common); + + // We are sent here when we are unwinding from a native method or + // adapter with an exception pending. We need to notify the interpreter + // that there is an exception to process. + // We arrive here also if the frame manager called an (interpreted) target + // which returns with a StackOverflow exception. + // The control flow is in this case is: + // frame_manager->throw_excp_stub->forward_excp->rethrow_excp_entry + + AbstractInterpreter::_rethrow_exception_entry = __ pc(); + + // Restore R14_state. + __ ld(R14_state, 0, R1_SP); + __ addi(R14_state, R14_state, + -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + + // Store exception oop into thread object. + __ std(R3_RET, thread_(pending_exception)); + __ li(msg, BytecodeInterpreter::method_resume /*rethrow_exception*/); + // + // NOTE: the interpreter frame as setup be deopt does NOT include + // any parameter slots (good thing since we have no callee here + // and couldn't remove them) so we don't have to do any calculations + // here to figure it out. + // + __ ld(R17_tos, state_(_stack)); + __ b(return_from_native_common); + + + //============================================================================= + // Returning from a native method. Result is in the native abi + // location so we must move it to the java expression stack. + + __ BIND(return_from_native); + guarantee(return_from_native_pc == (address) NULL, "precondition"); + return_from_native_pc = __ pc(); + + // Restore R14_state. + __ ld(R14_state, 0, R1_SP); + __ addi(R14_state, R14_state, + -frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + + // + // Registers alive + // R16_thread + // R14_state - address of caller's BytecodeInterpreter. + // R3_RET - integer result, if any. + // F1_RET - float result, if any. + // + // Registers updated + // R19_method - callee's Method + // R17_tos - caller's tos, with outgoing args popped + // result_index - index of result handler. + // msg - message for resuming interpreter. + // + + // Very-local scratch registers. + + const ConditionRegister have_pending_exception = CCR0; + + // Load callee Method, gc may have moved it. + __ ld(R19_method, state_(_result._to_call._callee)); + + // Load address of caller's tos. includes parameter slots. + __ ld(R17_tos, state_(_stack)); + + // Pop callee's parameters. + + __ ld(parameter_count, in_bytes(Method::const_offset()), R19_method); + __ lhz(parameter_count, in_bytes(ConstMethod::size_of_parameters_offset()), parameter_count); + __ sldi(parameter_count, parameter_count, Interpreter::logStackElementSize); + __ add(R17_tos, R17_tos, parameter_count); + + // Result stub address array index + // TODO: PPC port: assert(4 == methodOopDesc::sz_result_index(), "unexpected field size"); + __ lwa(result_index, method_(result_index)); + + __ li(msg, BytecodeInterpreter::method_resume); + + // + // Registers alive + // R16_thread + // R14_state - address of caller's BytecodeInterpreter. + // R17_tos - address of caller's tos with outgoing args already popped + // R3_RET - integer return value, if any. + // F1_RET - float return value, if any. + // result_index - index of result handler. + // msg - message for resuming interpreter. + // + // Registers updated + // R3_RET - new address of caller's tos, including result, if any + // + + __ BIND(return_from_native_common); + + // Check for pending exception + __ ld(pending_exception, thread_(pending_exception)); + __ cmpdi(CCR0, pending_exception, 0); + __ beq(CCR0, return_from_native_no_exception); + + // If there's a pending exception, we really have no result, so + // R3_RET is dead. Resume_interpreter assumes the new tos is in + // R3_RET. + __ mr(R3_RET, R17_tos); + // `resume_interpreter' expects R15_prev_state to be alive. + __ ld(R15_prev_state, state_(_prev_link)); + __ b(resume_interpreter); + + __ BIND(return_from_native_no_exception); + + // No pending exception, copy method result from native ABI register + // to tos. + + // Address of stub descriptor address array. + __ load_const(stub_addr, CppInterpreter::tosca_result_to_stack()); + + // Pass address of tos to stub. + __ mr(R4_ARG2, R17_tos); + + // Address of stub descriptor address. + __ sldi(result_index, result_index, LogBytesPerWord); + __ add(stub_addr, stub_addr, result_index); + + // Stub descriptor address. + __ ld(stub_addr, 0, stub_addr); + + // TODO: don't do this via a call, do it in place! + // + // call stub via descriptor + // in R3_ARG1/F1_ARG1: result value (R3_RET or F1_RET) + __ call_stub(stub_addr); + + // new tos = result of call in R3_RET + + // `resume_interpreter' expects R15_prev_state to be alive. + __ ld(R15_prev_state, state_(_prev_link)); + __ b(resume_interpreter); + + //============================================================================= + // We encountered an exception while computing the interpreter + // state, so R14_state isn't valid. Act as if we just returned from + // the callee method with a pending exception. + __ BIND(stack_overflow_return); + + // + // Registers alive + // R16_thread - JavaThread* + // R1_SP - old stack pointer + // R19_method - callee's Method + // R17_tos - address of caller's tos (prepushed) + // R15_prev_state - address of caller's BytecodeInterpreter or 0 + // R18_locals - address of callee's locals array + // + // Registers updated + // R3_RET - address of resuming tos, if recursive unwind + + Label Lskip_unextend_SP; + + { + const ConditionRegister is_initial_call = CCR0; + const Register tos_save = R21_tmp1; + const Register tmp = R22_tmp2; + + assert(tos_save->is_nonvolatile(), "need a nonvolatile"); + + // Is the exception thrown in the initial Java frame of this frame + // manager frame? + __ cmpdi(is_initial_call, R15_prev_state, 0); + __ bne(is_initial_call, Lskip_unextend_SP); + + // Pop any c2i extension from the stack. This is necessary in the + // non-recursive case (that is we were called by the c2i adapter, + // meaning we have to prev state). In this case we entered the frame + // manager through a special entry which pushes the orignal + // unextended SP to the stack. Here we load it back. + __ ld(R0, _top_ijava_frame_abi(frame_manager_lr), R1_SP); + __ mtlr(R0); + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // Fall through + + __ bind(Lskip_unextend_SP); + + // Throw the exception via RuntimeStub "throw_StackOverflowError_entry". + // + // Previously, we called C-Code directly. As a consequence, a + // possible GC tried to process the argument oops of the top frame + // (see RegisterMap::clear, which sets the corresponding flag to + // true). This lead to crashes because: + // 1. The top register map did not contain locations for the argument registers + // 2. The arguments are dead anyway, could be already overwritten in the worst case + // Solution: Call via special runtime stub that pushes it's own frame. This runtime stub has the flag + // "CodeBlob::caller_must_gc_arguments()" set to "false", what prevents the dead arguments getting GC'd. + // + // 2 cases exist: + // 1. We were called by the c2i adapter / call stub + // 2. We were called by the frame manager + // + // Both cases are handled by this code: + // 1. - initial_caller_sp was saved on stack => Load it back and we're ok + // - control flow will be: + // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep->excp_blob of calling method + // 2. - control flow will be: + // throw_stackoverflow_stub->VM->throw_stackoverflow_stub->forward_excep-> + // ->rethrow_excp_entry of frame manager->resume_method + // Since we restored the caller SP above, the rethrow_excp_entry can restore the original interpreter state + // registers using the stack and resume the calling method with a pending excp. + + __ load_const(R3_ARG1, (StubRoutines::throw_StackOverflowError_entry())); + __ mtctr(R3_ARG1); + __ bctr(); + } + //============================================================================= + // We have popped a frame from an interpreted call. We are assured + // of returning to an interpreted call by the popframe abi. We have + // no return value all we have to do is pop the current frame and + // then make sure that the top of stack (of the caller) gets set to + // where it was when we entered the callee (i.e. the args are still + // in place). Or we are returning to the interpreter. In the first + // case we must extract result (if any) from the java expression + // stack and store it in the location the native abi would expect + // for a call returning this type. In the second case we must simply + // do a stack to stack move as we unwind. + + __ BIND(popping_frame); + + // Registers alive + // R14_state + // R15_prev_state + // R17_tos + // + // Registers updated + // R19_method + // R3_RET + // msg + { + Label L; + + // Reload callee method, gc may have moved it. + __ ld(R19_method, state_(_method)); + + // We may be returning to a deoptimized frame in which case the + // usual assumption of a recursive return is not true. + + // not equal = is recursive call + __ cmpdi(CCR0, R15_prev_state, 0); + + __ bne(CCR0, L); + + // Pop_frame capability. + // The pop_frame api says that the underlying frame is a Java frame, in this case + // (prev_state==null) it must be a compiled frame: + // + // Stack at this point: I, C2I + C, ... + // + // The outgoing arguments of the call have just been copied (popframe_preserve_args). + // By the pop_frame api, we must end up in an interpreted frame. So the compiled frame + // will be deoptimized. Deoptimization will restore the outgoing arguments from + // popframe_preserve_args, adjust the tos such that it includes the popframe_preserve_args, + // and adjust the bci such that the call will be executed again. + // We have no results, just pop the interpreter frame, resize the compiled frame to get rid + // of the c2i extension and return to the deopt_handler. + __ b(unwind_initial_activation); + + // is recursive call + __ bind(L); + + // Resume_interpreter expects the original tos in R3_RET. + __ ld(R3_RET, prev_state_(_stack)); + + // We're done. + __ li(msg, BytecodeInterpreter::popping_frame); + + __ b(unwind_recursive_activation); + } + + + //============================================================================= + + // We have finished an interpreted call. We are either returning to + // native (call_stub/c2) or we are returning to the interpreter. + // When returning to native, we must extract the result (if any) + // from the java expression stack and store it in the location the + // native abi expects. When returning to the interpreter we must + // simply do a stack to stack move as we unwind. + + __ BIND(return_from_interpreted_method); + + // + // Registers alive + // R16_thread - JavaThread* + // R15_prev_state - address of caller's BytecodeInterpreter or 0 + // R14_state - address of callee's interpreter state + // R1_SP - callee's stack pointer + // + // Registers updated + // R19_method - callee's method + // R3_RET - address of result (new caller's tos), + // + // if returning to interpreted + // msg - message for interpreter, + // if returning to interpreted + // + + // Check if this is the initial invocation of the frame manager. + // If so, R15_prev_state will be null. + __ cmpdi(CCR0, R15_prev_state, 0); + + // Reload callee method, gc may have moved it. + __ ld(R19_method, state_(_method)); + + // Load the method's result type. + __ lwz(result_index, method_(result_index)); + + // Go to return_to_initial_caller if R15_prev_state is null. + __ beq(CCR0, return_to_initial_caller); + + // Copy callee's result to caller's expression stack via inline stack-to-stack + // converters. + { + Register new_tos = R3_RET; + Register from_temp = R4_ARG2; + Register from = R5_ARG3; + Register tos = R6_ARG4; + Register tmp1 = R7_ARG5; + Register tmp2 = R8_ARG6; + + ConditionRegister result_type_is_void = CCR1; + ConditionRegister result_type_is_long = CCR2; + ConditionRegister result_type_is_double = CCR3; + + Label stack_to_stack_void; + Label stack_to_stack_double_slot; // T_LONG, T_DOUBLE + Label stack_to_stack_single_slot; // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT + Label stack_to_stack_done; + + // Pass callee's address of tos + BytesPerWord + __ ld(from_temp, state_(_stack)); + + // result type: void + __ cmpwi(result_type_is_void, result_index, AbstractInterpreter::BasicType_as_index(T_VOID)); + + // Pass caller's tos == callee's locals address + __ ld(tos, state_(_locals)); + + // result type: long + __ cmpwi(result_type_is_long, result_index, AbstractInterpreter::BasicType_as_index(T_LONG)); + + __ addi(from, from_temp, Interpreter::stackElementSize); + + // !! don't branch above this line !! + + // handle void + __ beq(result_type_is_void, stack_to_stack_void); + + // result type: double + __ cmpwi(result_type_is_double, result_index, AbstractInterpreter::BasicType_as_index(T_DOUBLE)); + + // handle long or double + __ beq(result_type_is_long, stack_to_stack_double_slot); + __ beq(result_type_is_double, stack_to_stack_double_slot); + + // fall through to single slot types (incl. object) + + { + __ BIND(stack_to_stack_single_slot); + // T_BOOLEAN, T_BYTE, T_CHAR, T_SHORT, T_INT, T_FLOAT, T_OBJECT + + __ ld(tmp1, 0, from); + __ std(tmp1, 0, tos); + // New expression stack top + __ addi(new_tos, tos, - BytesPerWord); + + __ b(stack_to_stack_done); + } + + { + __ BIND(stack_to_stack_double_slot); + // T_LONG, T_DOUBLE + + // Move both entries for debug purposes even though only one is live + __ ld(tmp1, BytesPerWord, from); + __ ld(tmp2, 0, from); + __ std(tmp1, 0, tos); + __ std(tmp2, -BytesPerWord, tos); + + // new expression stack top + __ addi(new_tos, tos, - 2 * BytesPerWord); // two slots + __ b(stack_to_stack_done); + } + + { + __ BIND(stack_to_stack_void); + // T_VOID + + // new expression stack top + __ mr(new_tos, tos); + // fall through to stack_to_stack_done + } + + __ BIND(stack_to_stack_done); + } + + // new tos = R3_RET + + // Get the message for the interpreter + __ li(msg, BytecodeInterpreter::method_resume); + + // And fall thru + + + //============================================================================= + // Restore caller's interpreter state and pass pointer to caller's + // new tos to caller. + + __ BIND(unwind_recursive_activation); + + // + // Registers alive + // R15_prev_state - address of caller's BytecodeInterpreter + // R3_RET - address of caller's tos + // msg - message for caller's BytecodeInterpreter + // R1_SP - callee's stack pointer + // + // Registers updated + // R14_state - address of caller's BytecodeInterpreter + // R15_prev_state - address of its parent or 0 + // + + // Pop callee's interpreter and set R14_state to caller's interpreter. + __ pop_interpreter_state(/*prev_state_may_be_0=*/false); + + // And fall thru + + + //============================================================================= + // Resume the (calling) interpreter after a call. + + __ BIND(resume_interpreter); + + // + // Registers alive + // R14_state - address of resuming BytecodeInterpreter + // R15_prev_state - address of its parent or 0 + // R3_RET - address of resuming tos + // msg - message for resuming interpreter + // R1_SP - callee's stack pointer + // + // Registers updated + // R1_SP - caller's stack pointer + // + + // Restore C stack pointer of caller (resuming interpreter), + // R14_state already points to the resuming BytecodeInterpreter. + __ pop_interpreter_frame_to_state(R14_state, R21_tmp1, R11_scratch1, R12_scratch2); + + // Store new address of tos (holding return value) in interpreter state. + __ std(R3_RET, state_(_stack)); + + // Store message for interpreter. + __ stw(msg, state_(_msg)); + + __ b(call_interpreter); + + //============================================================================= + // Interpreter returning to native code (call_stub/c1/c2) from + // initial activation. Convert stack result and unwind activation. + + __ BIND(return_to_initial_caller); + + // + // Registers alive + // R19_method - callee's Method + // R14_state - address of callee's interpreter state + // R16_thread - JavaThread + // R1_SP - callee's stack pointer + // + // Registers updated + // R3_RET/F1_RET - result in expected output register + // + + // If we have an exception pending we have no result and we + // must figure out where to really return to. + // + __ ld(pending_exception, thread_(pending_exception)); + __ cmpdi(CCR0, pending_exception, 0); + __ bne(CCR0, unwind_initial_activation_pending_exception); + + __ lwa(result_index, method_(result_index)); + + // Address of stub descriptor address array. + __ load_const(stub_addr, CppInterpreter::stack_result_to_native()); + + // Pass address of callee's tos + BytesPerWord. + // Will then point directly to result. + __ ld(R3_ARG1, state_(_stack)); + __ addi(R3_ARG1, R3_ARG1, Interpreter::stackElementSize); + + // Address of stub descriptor address + __ sldi(result_index, result_index, LogBytesPerWord); + __ add(stub_addr, stub_addr, result_index); + + // Stub descriptor address + __ ld(stub_addr, 0, stub_addr); + + // TODO: don't do this via a call, do it in place! + // + // call stub via descriptor + __ call_stub(stub_addr); + + __ BIND(unwind_initial_activation); + + // Unwind from initial activation. No exception is pending. + + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // ... + // CALLER [PARENT_IJAVA_FRAME_ABI] + // ... + // CALLER [unextended ABI] + // ... + // + // The CALLER frame has a C2I adapter or is an entry-frame. + // + + // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and + // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME. + // But, we simply restore the return pc from the caller's frame and + // use the caller's initial_caller_sp as the new SP which pops the + // interpreter frame and "resizes" the caller's frame to its "unextended" + // size. + + // get rid of top frame + __ pop_frame(); + + // Load return PC from parent frame. + __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP); + + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + // update LR + __ mtlr(R21_tmp1); + + // return + __ blr(); + + //============================================================================= + // Unwind from initial activation. An exception is pending + + __ BIND(unwind_initial_activation_pending_exception); + + // + // Stack layout at this point: + // + // 0 [TOP_IJAVA_FRAME_ABI] <-- R1_SP + // ... + // CALLER [PARENT_IJAVA_FRAME_ABI] + // ... + // CALLER [unextended ABI] + // ... + // + // The CALLER frame has a C2I adapter or is an entry-frame. + // + + // An interpreter frame exists, we may pop the TOP_IJAVA_FRAME and + // turn the caller's PARENT_IJAVA_FRAME back into a TOP_IJAVA_FRAME. + // But, we just pop the current TOP_IJAVA_FRAME and fall through + + __ pop_frame(); + __ ld(R3_ARG1, _top_ijava_frame_abi(lr), R1_SP); + + // + // Stack layout at this point: + // + // CALLER [PARENT_IJAVA_FRAME_ABI] <-- R1_SP + // ... + // CALLER [unextended ABI] + // ... + // + // The CALLER frame has a C2I adapter or is an entry-frame. + // + // Registers alive + // R16_thread + // R3_ARG1 - return address to caller + // + // Registers updated + // R3_ARG1 - address of pending exception + // R4_ARG2 - issuing pc = return address to caller + // LR - address of exception handler stub + // + + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + __ mr(R14, R3_ARG1); // R14 := ARG1 + __ mr(R4_ARG2, R3_ARG1); // ARG2 := ARG1 + + // Find the address of the "catch_exception" stub. + __ push_frame_abi112(0, R11_scratch1); + __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), + R16_thread, + R4_ARG2); + __ pop_frame(); + + // Load continuation address into LR. + __ mtlr(R3_RET); + + // Load address of pending exception and clear it in thread object. + __ ld(R3_ARG1/*R3_RET*/, thread_(pending_exception)); + __ li(R4_ARG2, 0); + __ std(R4_ARG2, thread_(pending_exception)); + + // re-load issuing pc + __ mr(R4_ARG2, R14); + + // Branch to found exception handler. + __ blr(); + + //============================================================================= + // Call a new method. Compute new args and trim the expression stack + // to only what we are currently using and then recurse. + + __ BIND(call_method); + + // + // Registers alive + // R16_thread + // R14_state - address of caller's BytecodeInterpreter + // R1_SP - caller's stack pointer + // + // Registers updated + // R15_prev_state - address of caller's BytecodeInterpreter + // R17_tos - address of caller's tos + // R19_method - callee's Method + // R1_SP - trimmed back + // + + // Very-local scratch registers. + + const Register offset = R21_tmp1; + const Register tmp = R22_tmp2; + const Register self_entry = R23_tmp3; + const Register stub_entry = R24_tmp4; + + const ConditionRegister cr = CCR0; + + // Load the address of the frame manager. + __ load_const(self_entry, &interpreter_frame_manager); + __ ld(self_entry, 0, self_entry); + + // Load BytecodeInterpreter._result._to_call._callee (callee's Method). + __ ld(R19_method, state_(_result._to_call._callee)); + // Load BytecodeInterpreter._stack (outgoing tos). + __ ld(R17_tos, state_(_stack)); + + // Save address of caller's BytecodeInterpreter. + __ mr(R15_prev_state, R14_state); + + // Load the callee's entry point. + // Load BytecodeInterpreter._result._to_call._callee_entry_point. + __ ld(stub_entry, state_(_result._to_call._callee_entry_point)); + + // Check whether stub_entry is equal to self_entry. + __ cmpd(cr, self_entry, stub_entry); + // if (self_entry == stub_entry) + // do a re-dispatch + __ beq(cr, re_dispatch); + // else + // call the specialized entry (adapter for jni or compiled code) + __ BIND(call_special); + + // + // Call the entry generated by `InterpreterGenerator::generate_native_entry'. + // + // Registers alive + // R16_thread + // R15_prev_state - address of caller's BytecodeInterpreter + // R19_method - callee's Method + // R17_tos - address of caller's tos + // R1_SP - caller's stack pointer + // + + // Mark return from specialized entry for generate_native_entry. + guarantee(return_from_native_pc != (address) NULL, "precondition"); + frame_manager_specialized_return = return_from_native_pc; + + // Set sender_SP in case we call interpreter native wrapper which + // will expect it. Compiled code should not care. + __ mr(R21_sender_SP, R1_SP); + + // Do a tail call here, and let the link register point to + // frame_manager_specialized_return which is return_from_native_pc. + __ load_const(tmp, frame_manager_specialized_return); + __ call_stub_and_return_to(stub_entry, tmp /* return_pc=tmp */); + + + //============================================================================= + // + // InterpretMethod triggered OSR compilation of some Java method M + // and now asks to run the compiled code. We call this code the + // `callee'. + // + // This is our current idea on how OSR should look like on PPC64: + // + // While interpreting a Java method M the stack is: + // + // (InterpretMethod (M), IJAVA_FRAME (M), ANY_FRAME, ...). + // + // After having OSR compiled M, `InterpretMethod' returns to the + // frame manager, sending the message `retry_method_osr'. The stack + // is: + // + // (IJAVA_FRAME (M), ANY_FRAME, ...). + // + // The compiler will have generated an `nmethod' suitable for + // continuing execution of M at the bytecode index at which OSR took + // place. So now the frame manager calls the OSR entry. The OSR + // entry sets up a JIT_FRAME for M and continues execution of M with + // initial state determined by the IJAVA_FRAME. + // + // (JIT_FRAME (M), IJAVA_FRAME (M), ANY_FRAME, ...). + // + + __ BIND(retry_method_osr); + { + // + // Registers alive + // R16_thread + // R15_prev_state - address of caller's BytecodeInterpreter + // R14_state - address of callee's BytecodeInterpreter + // R1_SP - callee's SP before call to InterpretMethod + // + // Registers updated + // R17 - pointer to callee's locals array + // (declared via `interpreter_arg_ptr_reg' in the AD file) + // R19_method - callee's Method + // R1_SP - callee's SP (will become SP of OSR adapter frame) + // + + // Provide a debugger breakpoint in the frame manager if breakpoints + // in osr'd methods are requested. +#ifdef COMPILER2 + NOT_PRODUCT( if (OptoBreakpointOSR) { __ illtrap(); } ) +#endif + + // Load callee's pointer to locals array from callee's state. + // __ ld(R17, state_(_locals)); + + // Load osr entry. + __ ld(R12_scratch2, state_(_result._osr._osr_entry)); + + // Load address of temporary osr buffer to arg1. + __ ld(R3_ARG1, state_(_result._osr._osr_buf)); + __ mtctr(R12_scratch2); + + // Load method oop, gc may move it during execution of osr'd method. + __ ld(R22_tmp2, state_(_method)); + // Load message 'call_method'. + __ li(R23_tmp3, BytecodeInterpreter::call_method); + + { + // Pop the IJAVA frame of the method which we are going to call osr'd. + Label no_state, skip_no_state; + __ pop_interpreter_state(/*prev_state_may_be_0=*/true); + __ cmpdi(CCR0, R14_state,0); + __ beq(CCR0, no_state); + // return to interpreter + __ pop_interpreter_frame_to_state(R14_state, R11_scratch1, R12_scratch2, R21_tmp1); + + // Init _result._to_call._callee and tell gc that it contains a valid oop + // by setting _msg to 'call_method'. + __ std(R22_tmp2, state_(_result._to_call._callee)); + // TODO: PPC port: assert(4 == BytecodeInterpreter::sz_msg(), "unexpected field size"); + __ stw(R23_tmp3, state_(_msg)); + + __ load_const(R21_tmp1, frame_manager_specialized_return); + __ b(skip_no_state); + __ bind(no_state); + + // Return to initial caller. + + // Get rid of top frame. + __ pop_frame(); + + // Load return PC from parent frame. + __ ld(R21_tmp1, _parent_ijava_frame_abi(lr), R1_SP); + + // Resize frame to get rid of a potential extension. + __ resize_frame_to_initial_caller(R11_scratch1, R12_scratch2); + + __ bind(skip_no_state); + + // Update LR with return pc. + __ mtlr(R21_tmp1); + } + // Jump to the osr entry point. + __ bctr(); + + } + + //============================================================================= + // Interpreted method "returned" with an exception, pass it on. + // Pass no result, unwind activation and continue/return to + // interpreter/call_stub/c2. + + __ BIND(throwing_exception); + + // Check if this is the initial invocation of the frame manager. If + // so, previous interpreter state in R15_prev_state will be null. + + // New tos of caller is callee's first parameter address, that is + // callee's incoming arguments are popped. + __ ld(R3_RET, state_(_locals)); + + // Check whether this is an initial call. + __ cmpdi(CCR0, R15_prev_state, 0); + // Yes, called from the call stub or from generated code via a c2i frame. + __ beq(CCR0, unwind_initial_activation_pending_exception); + + // Send resume message, interpreter will see the exception first. + + __ li(msg, BytecodeInterpreter::method_resume); + __ b(unwind_recursive_activation); + + + //============================================================================= + // Push the last instruction out to the code buffer. + + { + __ unimplemented("end of InterpreterGenerator::generate_normal_entry", 128); + } + + interpreter_frame_manager = entry; + return interpreter_frame_manager; +} + +// Generate code for various sorts of method entries +// +address AbstractInterpreterGenerator::generate_method_entry(AbstractInterpreter::MethodKind kind) { + address entry_point = NULL; + + switch (kind) { + case Interpreter::zerolocals : break; + case Interpreter::zerolocals_synchronized : break; + case Interpreter::native : // Fall thru + case Interpreter::native_synchronized : entry_point = ((CppInterpreterGenerator*)this)->generate_native_entry(); break; + case Interpreter::empty : break; + case Interpreter::accessor : entry_point = ((InterpreterGenerator*)this)->generate_accessor_entry(); break; + case Interpreter::abstract : entry_point = ((InterpreterGenerator*)this)->generate_abstract_entry(); break; + // These are special interpreter intrinsics which we don't support so far. + case Interpreter::java_lang_math_sin : break; + case Interpreter::java_lang_math_cos : break; + case Interpreter::java_lang_math_tan : break; + case Interpreter::java_lang_math_abs : break; + case Interpreter::java_lang_math_log : break; + case Interpreter::java_lang_math_log10 : break; + case Interpreter::java_lang_math_sqrt : break; + case Interpreter::java_lang_math_pow : break; + case Interpreter::java_lang_math_exp : break; + case Interpreter::java_lang_ref_reference_get: entry_point = ((InterpreterGenerator*)this)->generate_Reference_get_entry(); break; + default : ShouldNotReachHere(); break; + } + + if (entry_point) { + return entry_point; + } + return ((InterpreterGenerator*)this)->generate_normal_entry(); +} + +InterpreterGenerator::InterpreterGenerator(StubQueue* code) + : CppInterpreterGenerator(code) { + generate_all(); // down here so it can be "virtual" +} + +// How much stack a topmost interpreter method activation needs in words. +int AbstractInterpreter::size_top_interpreter_activation(Method* method) { + // Computation is in bytes not words to match layout_activation_impl + // below, but the return is in words. + + // + // 0 [TOP_IJAVA_FRAME_ABI] \ + // alignment (optional) \ | + // [operand stack / Java parameters] > stack | | + // [monitors] (optional) > monitors | | + // [PARENT_IJAVA_FRAME_ABI] \ | | + // [BytecodeInterpreter object] > interpreter \ | | | + // alignment (optional) | round | parent | round | top + // [Java result] (2 slots) > result | | | | + // [Java non-arg locals] \ locals | | | | + // [arg locals] / / / / / + // + + int locals = method->max_locals() * BytesPerWord; + int interpreter = frame::interpreter_frame_cinterpreterstate_size_in_bytes(); + int result = 2 * BytesPerWord; + + int parent = round_to(interpreter + result + locals, 16) + frame::parent_ijava_frame_abi_size; + + int stack = method->max_stack() * BytesPerWord; + int monitors = method->is_synchronized() ? frame::interpreter_frame_monitor_size_in_bytes() : 0; + int top = round_to(parent + monitors + stack, 16) + frame::top_ijava_frame_abi_size; + + return (top / BytesPerWord); +} + +void BytecodeInterpreter::layout_interpreterState(interpreterState to_fill, + frame* caller, + frame* current, + Method* method, + intptr_t* locals, + intptr_t* stack, + intptr_t* stack_base, + intptr_t* monitor_base, + intptr_t* frame_sp, + bool is_top_frame) { + // What about any vtable? + // + to_fill->_thread = JavaThread::current(); + // This gets filled in later but make it something recognizable for now. + to_fill->_bcp = method->code_base(); + to_fill->_locals = locals; + to_fill->_constants = method->constants()->cache(); + to_fill->_method = method; + to_fill->_mdx = NULL; + to_fill->_stack = stack; + + if (is_top_frame && JavaThread::current()->popframe_forcing_deopt_reexecution()) { + to_fill->_msg = deopt_resume2; + } else { + to_fill->_msg = method_resume; + } + to_fill->_result._to_call._bcp_advance = 0; + to_fill->_result._to_call._callee_entry_point = NULL; // doesn't matter to anyone + to_fill->_result._to_call._callee = NULL; // doesn't matter to anyone + to_fill->_prev_link = NULL; + + if (caller->is_interpreted_frame()) { + interpreterState prev = caller->get_interpreterState(); + + // Support MH calls. Make sure the interpreter will return the right address: + // 1. Caller did ordinary interpreted->compiled call call: Set a prev_state + // which makes the CPP interpreter return to frame manager "return_from_interpreted_method" + // entry after finishing execution. + // 2. Caller did a MH call: If the caller has a MethodHandleInvoke in it's + // state (invariant: must be the caller of the bottom vframe) we used the + // "call_special" entry to do the call, meaning the arguments have not been + // popped from the stack. Therefore, don't enter a prev state in this case + // in order to return to "return_from_native" frame manager entry which takes + // care of popping arguments. Also, don't overwrite the MH.invoke Method in + // the prev_state in order to be able to figure out the number of arguments to + // pop. + // The parameter method can represent MethodHandle.invokeExact(...). + // The MethodHandleCompiler generates these synthetic Methods, + // including bytecodes, if an invokedynamic call gets inlined. In + // this case we want to return like from any other interpreted + // Java call, so we set _prev_link. + to_fill->_prev_link = prev; + + if (*prev->_bcp == Bytecodes::_invokeinterface || *prev->_bcp == Bytecodes::_invokedynamic) { + prev->_result._to_call._bcp_advance = 5; + } else { + prev->_result._to_call._bcp_advance = 3; + } + } + to_fill->_oop_temp = NULL; + to_fill->_stack_base = stack_base; + // Need +1 here because stack_base points to the word just above the + // first expr stack entry and stack_limit is supposed to point to + // the word just below the last expr stack entry. See + // generate_compute_interpreter_state. + to_fill->_stack_limit = stack_base - (method->max_stack() + 1); + to_fill->_monitor_base = (BasicObjectLock*) monitor_base; + + to_fill->_frame_bottom = frame_sp; + + // PPC64 specific + to_fill->_last_Java_pc = NULL; + to_fill->_last_Java_fp = NULL; + to_fill->_last_Java_sp = frame_sp; +#ifdef ASSERT + to_fill->_self_link = to_fill; + to_fill->_native_fresult = 123456.789; + to_fill->_native_lresult = CONST64(0xdeafcafedeadc0de); +#endif +} + +void BytecodeInterpreter::pd_layout_interpreterState(interpreterState istate, + address last_Java_pc, + intptr_t* last_Java_fp) { + istate->_last_Java_pc = last_Java_pc; + istate->_last_Java_fp = last_Java_fp; +} + +int AbstractInterpreter::layout_activation(Method* method, + int temps, // Number of slots on java expression stack in use. + int popframe_args, + int monitors, // Number of active monitors. + int caller_actual_parameters, + int callee_params,// Number of slots for callee parameters. + int callee_locals,// Number of slots for locals. + frame* caller, + frame* interpreter_frame, + bool is_top_frame, + bool is_bottom_frame) { + + // NOTE this code must exactly mimic what + // InterpreterGenerator::generate_compute_interpreter_state() does + // as far as allocating an interpreter frame. However there is an + // exception. With the C++ based interpreter only the top most frame + // has a full sized expression stack. The 16 byte slop factor is + // both the abi scratch area and a place to hold a result from a + // callee on its way to the callers stack. + + int monitor_size = frame::interpreter_frame_monitor_size_in_bytes() * monitors; + int frame_size; + int top_frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() + + monitor_size + + (method->max_stack() *Interpreter::stackElementWords * BytesPerWord) + + 2*BytesPerWord, + frame::alignment_in_bytes) + + frame::top_ijava_frame_abi_size; + if (is_top_frame) { + frame_size = top_frame_size; + } else { + frame_size = round_to(frame::interpreter_frame_cinterpreterstate_size_in_bytes() + + monitor_size + + ((temps - callee_params + callee_locals) * + Interpreter::stackElementWords * BytesPerWord) + + 2*BytesPerWord, + frame::alignment_in_bytes) + + frame::parent_ijava_frame_abi_size; + assert(popframe_args==0, "non-zero for top_frame only"); + } + + // If we actually have a frame to layout we must now fill in all the pieces. + if (interpreter_frame != NULL) { + + intptr_t sp = (intptr_t)interpreter_frame->sp(); + intptr_t fp = *(intptr_t *)sp; + assert(fp == (intptr_t)caller->sp(), "fp must match"); + interpreterState cur_state = + (interpreterState)(fp - frame::interpreter_frame_cinterpreterstate_size_in_bytes()); + + // Now fill in the interpreterState object. + + intptr_t* locals; + if (caller->is_interpreted_frame()) { + // Locals must agree with the caller because it will be used to set the + // caller's tos when we return. + interpreterState prev = caller->get_interpreterState(); + // Calculate start of "locals" for MH calls. For MH calls, the + // current method() (= MH target) and prev->callee() (= + // MH.invoke*()) are different and especially have different + // signatures. To pop the argumentsof the caller, we must use + // the prev->callee()->size_of_arguments() because that's what + // the caller actually pushed. Currently, for synthetic MH + // calls (deoptimized from inlined MH calls), detected by + // is_method_handle_invoke(), we use the callee's arguments + // because here, the caller's and callee's signature match. + if (true /*!caller->is_at_mh_callsite()*/) { + locals = prev->stack() + method->size_of_parameters(); + } else { + // Normal MH call. + locals = prev->stack() + prev->callee()->size_of_parameters(); + } + } else { + bool is_deopted; + locals = (intptr_t*) (fp + ((method->max_locals() - 1) * BytesPerWord) + + frame::parent_ijava_frame_abi_size); + } + + intptr_t* monitor_base = (intptr_t*) cur_state; + intptr_t* stack_base = (intptr_t*) ((intptr_t) monitor_base - monitor_size); + + // Provide pop_frame capability on PPC64, add popframe_args. + // +1 because stack is always prepushed. + intptr_t* stack = (intptr_t*) ((intptr_t) stack_base - (temps + popframe_args + 1) * BytesPerWord); + + BytecodeInterpreter::layout_interpreterState(cur_state, + caller, + interpreter_frame, + method, + locals, + stack, + stack_base, + monitor_base, + (intptr_t*)(((intptr_t)fp)-top_frame_size), + is_top_frame); + + BytecodeInterpreter::pd_layout_interpreterState(cur_state, interpreter_return_address, + interpreter_frame->fp()); + } + return frame_size/BytesPerWord; +} + +#endif // CC_INTERP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/cppInterpreter_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_CPPINTERPRETER_PPC_HPP +#define CPU_PPC_VM_CPPINTERPRETER_PPC_HPP + + protected: + + // Size of interpreter code. Increase if too small. Interpreter will + // fail with a guarantee ("not enough space for interpreter generation"); + // if too small. + // Run with +PrintInterpreter to get the VM to print out the size. + // Max size with JVMTI + + const static int InterpreterCodeSize = 12*K; + +#endif // CPU_PPC_VM_CPPINTERPRETER_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/debug_ppc.cpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "code/codeCache.hpp" +#include "code/nmethod.hpp" +#include "runtime/frame.hpp" +#include "runtime/init.hpp" +#include "runtime/os.hpp" +#include "utilities/debug.hpp" +#include "utilities/top.hpp" + +void pd_ps(frame f) {}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/depChecker_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_DEPCHECKER_PPC_HPP +#define CPU_PPC_VM_DEPCHECKER_PPC_HPP + +// Nothing to do on ppc64 + +#endif // CPU_PPC_VM_DEPCHECKER_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/disassembler_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_DISASSEMBLER_PPC_HPP +#define CPU_PPC_VM_DISASSEMBLER_PPC_HPP + + static int pd_instruction_alignment() { + return sizeof(int); + } + + static const char* pd_cpu_opts() { + return "ppc64"; + } + +#endif // CPU_PPC_VM_DISASSEMBLER_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/frame_ppc.cpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "oops/markOop.hpp" +#include "oops/method.hpp" +#include "oops/oop.inline.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/handles.inline.hpp" +#include "runtime/javaCalls.hpp" +#include "runtime/monitorChunk.hpp" +#include "runtime/signature.hpp" +#include "runtime/stubCodeGenerator.hpp" +#include "runtime/stubRoutines.hpp" +#include "vmreg_ppc.inline.hpp" +#ifdef COMPILER1 +#include "c1/c1_Runtime1.hpp" +#include "runtime/vframeArray.hpp" +#endif + +#ifndef CC_INTERP +#error "CC_INTERP must be defined on PPC64" +#endif + +#ifdef ASSERT +void RegisterMap::check_location_valid() { +} +#endif // ASSERT + +bool frame::safe_for_sender(JavaThread *thread) { + bool safe = false; + address cursp = (address)sp(); + address curfp = (address)fp(); + if ((cursp != NULL && curfp != NULL && + (cursp <= thread->stack_base() && cursp >= thread->stack_base() - thread->stack_size())) && + (curfp <= thread->stack_base() && curfp >= thread->stack_base() - thread->stack_size())) { + safe = true; + } + return safe; +} + +bool frame::is_interpreted_frame() const { + return Interpreter::contains(pc()); +} + +frame frame::sender_for_entry_frame(RegisterMap *map) const { + assert(map != NULL, "map must be set"); + // Java frame called from C; skip all C frames and return top C + // frame of that chunk as the sender. + JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); + assert(!entry_frame_is_first(), "next Java fp must be non zero"); + assert(jfa->last_Java_sp() > _sp, "must be above this frame on stack"); + map->clear(); + assert(map->include_argument_oops(), "should be set by clear"); + + if (jfa->last_Java_pc() != NULL) { + frame fr(jfa->last_Java_sp(), jfa->last_Java_pc()); + return fr; + } + // Last_java_pc is not set, if we come here from compiled code. The + // constructor retrieves the PC from the stack. + frame fr(jfa->last_Java_sp()); + return fr; +} + +frame frame::sender_for_interpreter_frame(RegisterMap *map) const { + // Pass callers initial_caller_sp as unextended_sp. + return frame(sender_sp(), sender_pc(), (intptr_t*)((parent_ijava_frame_abi *)callers_abi())->initial_caller_sp); +} + +frame frame::sender_for_compiled_frame(RegisterMap *map) const { + assert(map != NULL, "map must be set"); + + // Frame owned by compiler. + address pc = *compiled_sender_pc_addr(_cb); + frame caller(compiled_sender_sp(_cb), pc); + + // Now adjust the map. + + // Get the rest. + if (map->update_map()) { + // Tell GC to use argument oopmaps for some runtime stubs that need it. + map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); + if (_cb->oop_maps() != NULL) { + OopMapSet::update_register_map(this, map); + } + } + + return caller; +} + +intptr_t* frame::compiled_sender_sp(CodeBlob* cb) const { + return sender_sp(); +} + +address* frame::compiled_sender_pc_addr(CodeBlob* cb) const { + return sender_pc_addr(); +} + +frame frame::sender(RegisterMap* map) const { + // Default is we do have to follow them. The sender_for_xxx will + // update it accordingly. + map->set_include_argument_oops(false); + + if (is_entry_frame()) return sender_for_entry_frame(map); + if (is_interpreted_frame()) return sender_for_interpreter_frame(map); + assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); + + if (_cb != NULL) { + return sender_for_compiled_frame(map); + } + // Must be native-compiled frame, i.e. the marshaling code for native + // methods that exists in the core system. + return frame(sender_sp(), sender_pc()); +} + +void frame::patch_pc(Thread* thread, address pc) { + if (TracePcPatching) { + tty->print_cr("patch_pc at address " PTR_FORMAT " [" PTR_FORMAT " -> " PTR_FORMAT "]", + &((address*) _sp)[-1], ((address*) _sp)[-1], pc); + } + own_abi()->lr = (uint64_t)pc; + _cb = CodeCache::find_blob(pc); + if (_cb != NULL && _cb->is_nmethod() && ((nmethod*)_cb)->is_deopt_pc(_pc)) { + address orig = (((nmethod*)_cb)->get_original_pc(this)); + assert(orig == _pc, "expected original to be stored before patching"); + _deopt_state = is_deoptimized; + // Leave _pc as is. + } else { + _deopt_state = not_deoptimized; + _pc = pc; + } +} + +void frame::pd_gc_epilog() { + if (is_interpreted_frame()) { + // Set constant pool cache entry for interpreter. + Method* m = interpreter_frame_method(); + + *interpreter_frame_cpoolcache_addr() = m->constants()->cache(); + } +} + +bool frame::is_interpreted_frame_valid(JavaThread* thread) const { + // Is there anything to do? + assert(is_interpreted_frame(), "Not an interpreted frame"); + return true; +} + +BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { + assert(is_interpreted_frame(), "interpreted frame expected"); + Method* method = interpreter_frame_method(); + BasicType type = method->result_type(); + +#ifdef CC_INTERP + if (method->is_native()) { + // Prior to calling into the runtime to notify the method exit the possible + // result value is saved into the interpreter frame. + interpreterState istate = get_interpreterState(); + address lresult = (address)istate + in_bytes(BytecodeInterpreter::native_lresult_offset()); + address fresult = (address)istate + in_bytes(BytecodeInterpreter::native_fresult_offset()); + + switch (method->result_type()) { + case T_OBJECT: + case T_ARRAY: { + oop* obj_p = *(oop**)lresult; + oop obj = (obj_p == NULL) ? NULL : *obj_p; + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + break; + } + // We use std/stfd to store the values. + case T_BOOLEAN : value_result->z = (jboolean) *(unsigned long*)lresult; break; + case T_INT : value_result->i = (jint) *(long*)lresult; break; + case T_CHAR : value_result->c = (jchar) *(unsigned long*)lresult; break; + case T_SHORT : value_result->s = (jshort) *(long*)lresult; break; + case T_BYTE : value_result->z = (jbyte) *(long*)lresult; break; + case T_LONG : value_result->j = (jlong) *(long*)lresult; break; + case T_FLOAT : value_result->f = (jfloat) *(double*)fresult; break; + case T_DOUBLE : value_result->d = (jdouble) *(double*)fresult; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + } else { + intptr_t* tos_addr = interpreter_frame_tos_address(); + switch (method->result_type()) { + case T_OBJECT: + case T_ARRAY: { + oop obj = *(oop*)tos_addr; + assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); + *oop_result = obj; + } + case T_BOOLEAN : value_result->z = (jboolean) *(jint*)tos_addr; break; + case T_BYTE : value_result->b = (jbyte) *(jint*)tos_addr; break; + case T_CHAR : value_result->c = (jchar) *(jint*)tos_addr; break; + case T_SHORT : value_result->s = (jshort) *(jint*)tos_addr; break; + case T_INT : value_result->i = *(jint*)tos_addr; break; + case T_LONG : value_result->j = *(jlong*)tos_addr; break; + case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; + case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; + case T_VOID : /* Nothing to do */ break; + default : ShouldNotReachHere(); + } + } +#else + Unimplemented(); +#endif + return type; +} + +#ifndef PRODUCT + +void frame::describe_pd(FrameValues& values, int frame_no) { + if (is_interpreted_frame()) { +#ifdef CC_INTERP + interpreterState istate = get_interpreterState(); + values.describe(frame_no, (intptr_t*)istate, "istate"); + values.describe(frame_no, (intptr_t*)&(istate->_thread), " thread"); + values.describe(frame_no, (intptr_t*)&(istate->_bcp), " bcp"); + values.describe(frame_no, (intptr_t*)&(istate->_locals), " locals"); + values.describe(frame_no, (intptr_t*)&(istate->_constants), " constants"); + values.describe(frame_no, (intptr_t*)&(istate->_method), err_msg(" method = %s", istate->_method->name_and_sig_as_C_string())); + values.describe(frame_no, (intptr_t*)&(istate->_mdx), " mdx"); + values.describe(frame_no, (intptr_t*)&(istate->_stack), " stack"); + values.describe(frame_no, (intptr_t*)&(istate->_msg), err_msg(" msg = %s", BytecodeInterpreter::C_msg(istate->_msg))); + values.describe(frame_no, (intptr_t*)&(istate->_result), " result"); + values.describe(frame_no, (intptr_t*)&(istate->_prev_link), " prev_link"); + values.describe(frame_no, (intptr_t*)&(istate->_oop_temp), " oop_temp"); + values.describe(frame_no, (intptr_t*)&(istate->_stack_base), " stack_base"); + values.describe(frame_no, (intptr_t*)&(istate->_stack_limit), " stack_limit"); + values.describe(frame_no, (intptr_t*)&(istate->_monitor_base), " monitor_base"); + values.describe(frame_no, (intptr_t*)&(istate->_frame_bottom), " frame_bottom"); + values.describe(frame_no, (intptr_t*)&(istate->_last_Java_pc), " last_Java_pc"); + values.describe(frame_no, (intptr_t*)&(istate->_last_Java_fp), " last_Java_fp"); + values.describe(frame_no, (intptr_t*)&(istate->_last_Java_sp), " last_Java_sp"); + values.describe(frame_no, (intptr_t*)&(istate->_self_link), " self_link"); + values.describe(frame_no, (intptr_t*)&(istate->_native_fresult), " native_fresult"); + values.describe(frame_no, (intptr_t*)&(istate->_native_lresult), " native_lresult"); +#else + Unimplemented(); +#endif + } +} +#endif + +void frame::adjust_unextended_sp() { + // If we are returning to a compiled MethodHandle call site, the + // saved_fp will in fact be a saved value of the unextended SP. The + // simplest way to tell whether we are returning to such a call site + // is as follows: + + if (is_compiled_frame() && false /*is_at_mh_callsite()*/) { // TODO PPC port + // If the sender PC is a deoptimization point, get the original + // PC. For MethodHandle call site the unextended_sp is stored in + // saved_fp. + _unextended_sp = _fp - _cb->frame_size(); + +#ifdef ASSERT + nmethod *sender_nm = _cb->as_nmethod_or_null(); + assert(sender_nm && *_sp == *_unextended_sp, "backlink changed"); + + intptr_t* sp = _unextended_sp; // check if stack can be walked from here + for (int x = 0; x < 5; ++x) { // check up to a couple of backlinks + intptr_t* prev_sp = *(intptr_t**)sp; + if (prev_sp == 0) break; // end of stack + assert(prev_sp>sp, "broken stack"); + sp = prev_sp; + } + + if (sender_nm->is_deopt_mh_entry(_pc)) { // checks for deoptimization + address original_pc = sender_nm->get_original_pc(this); + assert(sender_nm->insts_contains(original_pc), "original PC must be in nmethod"); + assert(sender_nm->is_method_handle_return(original_pc), "must be"); + } +#endif + } +} + +intptr_t *frame::initial_deoptimization_info() { + // unused... but returns fp() to minimize changes introduced by 7087445 + return fp(); +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/frame_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,449 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_FRAME_PPC_HPP +#define CPU_PPC_VM_FRAME_PPC_HPP + +#include "runtime/synchronizer.hpp" +#include "utilities/top.hpp" + +#ifndef CC_INTERP +#error "CC_INTERP must be defined on PPC64" +#endif + + // C frame layout on PPC-64. + // + // In this figure the stack grows upwards, while memory grows + // downwards. See "64-bit PowerPC ELF ABI Supplement Version 1.7", + // IBM Corp. (2003-10-29) + // (http://math-atlas.sourceforge.net/devel/assembly/PPC-elf64abi-1.7.pdf). + // + // Square brackets denote stack regions possibly larger + // than a single 64 bit slot. + // + // STACK: + // 0 [C_FRAME] <-- SP after prolog (mod 16 = 0) + // [C_FRAME] <-- SP before prolog + // ... + // [C_FRAME] + // + // C_FRAME: + // 0 [ABI_112] + // 112 CARG_9: outgoing arg 9 (arg_1 ... arg_8 via gpr_3 ... gpr_{10}) + // ... + // 40+M*8 CARG_M: outgoing arg M (M is the maximum of outgoing args taken over all call sites in the procedure) + // local 1 + // ... + // local N + // spill slot for vector reg (16 bytes aligned) + // ... + // spill slot for vector reg + // alignment (4 or 12 bytes) + // V SR_VRSAVE + // V+4 spill slot for GR + // ... ... + // spill slot for GR + // spill slot for FR + // ... + // spill slot for FR + // + // ABI_48: + // 0 caller's SP + // 8 space for condition register (CR) for next call + // 16 space for link register (LR) for next call + // 24 reserved + // 32 reserved + // 40 space for TOC (=R2) register for next call + // + // ABI_112: + // 0 [ABI_48] + // 48 CARG_1: spill slot for outgoing arg 1. used by next callee. + // ... ... + // 104 CARG_8: spill slot for outgoing arg 8. used by next callee. + // + + public: + + // C frame layout + + enum { + // stack alignment + alignment_in_bytes = 16, + // log_2(16*8 bits) = 7. + log_2_of_alignment_in_bits = 7 + }; + + // ABI_48: + struct abi_48 { + uint64_t callers_sp; + uint64_t cr; //_16 + uint64_t lr; + uint64_t reserved1; //_16 + uint64_t reserved2; + uint64_t toc; //_16 + // nothing to add here! + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + abi_48_size = sizeof(abi_48) + }; + + struct abi_112 : abi_48 { + uint64_t carg_1; + uint64_t carg_2; //_16 + uint64_t carg_3; + uint64_t carg_4; //_16 + uint64_t carg_5; + uint64_t carg_6; //_16 + uint64_t carg_7; + uint64_t carg_8; //_16 + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + abi_112_size = sizeof(abi_112) + }; + + #define _abi(_component) \ + (offset_of(frame::abi_112, _component)) + + struct abi_112_spill : abi_112 { + // additional spill slots + uint64_t spill_ret; + uint64_t spill_fret; //_16 + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + abi_112_spill_size = sizeof(abi_112_spill) + }; + + #define _abi_112_spill(_component) \ + (offset_of(frame::abi_112_spill, _component)) + + // non-volatile GPRs: + + struct spill_nonvolatiles { + uint64_t r14; + uint64_t r15; //_16 + uint64_t r16; + uint64_t r17; //_16 + uint64_t r18; + uint64_t r19; //_16 + uint64_t r20; + uint64_t r21; //_16 + uint64_t r22; + uint64_t r23; //_16 + uint64_t r24; + uint64_t r25; //_16 + uint64_t r26; + uint64_t r27; //_16 + uint64_t r28; + uint64_t r29; //_16 + uint64_t r30; + uint64_t r31; //_16 + + double f14; + double f15; + double f16; + double f17; + double f18; + double f19; + double f20; + double f21; + double f22; + double f23; + double f24; + double f25; + double f26; + double f27; + double f28; + double f29; + double f30; + double f31; + + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + spill_nonvolatiles_size = sizeof(spill_nonvolatiles) + }; + + #define _spill_nonvolatiles_neg(_component) \ + (int)(-frame::spill_nonvolatiles_size + offset_of(frame::spill_nonvolatiles, _component)) + + // Frame layout for the Java interpreter on PPC64. + // + // This frame layout provides a C-like frame for every Java frame. + // + // In these figures the stack grows upwards, while memory grows + // downwards. Square brackets denote regions possibly larger than + // single 64 bit slots. + // + // STACK (no JNI, no compiled code, no library calls, + // interpreter-loop is active): + // 0 [InterpretMethod] + // [TOP_IJAVA_FRAME] + // [PARENT_IJAVA_FRAME] + // ... + // [PARENT_IJAVA_FRAME] + // [ENTRY_FRAME] + // [C_FRAME] + // ... + // [C_FRAME] + // + // TOP_IJAVA_FRAME: + // 0 [TOP_IJAVA_FRAME_ABI] + // alignment (optional) + // [operand stack] + // [monitors] (optional) + // [cInterpreter object] + // result, locals, and arguments are in parent frame! + // + // PARENT_IJAVA_FRAME: + // 0 [PARENT_IJAVA_FRAME_ABI] + // alignment (optional) + // [callee's Java result] + // [callee's locals w/o arguments] + // [outgoing arguments] + // [used part of operand stack w/o arguments] + // [monitors] (optional) + // [cInterpreter object] + // + // ENTRY_FRAME: + // 0 [PARENT_IJAVA_FRAME_ABI] + // alignment (optional) + // [callee's Java result] + // [callee's locals w/o arguments] + // [outgoing arguments] + // [ENTRY_FRAME_LOCALS] + // + // PARENT_IJAVA_FRAME_ABI: + // 0 [ABI_48] + // top_frame_sp + // initial_caller_sp + // + // TOP_IJAVA_FRAME_ABI: + // 0 [PARENT_IJAVA_FRAME_ABI] + // carg_3_unused + // carg_4_unused + // carg_5_unused + // carg_6_unused + // carg_7_unused + // frame_manager_lr + // + + // PARENT_IJAVA_FRAME_ABI + + struct parent_ijava_frame_abi : abi_48 { + // SOE registers. + // C2i adapters spill their top-frame stack-pointer here. + uint64_t top_frame_sp; // carg_1 + // Sp of calling compiled frame before it was resized by the c2i + // adapter or sp of call stub. Does not contain a valid value for + // non-initial frames. + uint64_t initial_caller_sp; // carg_2 + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + parent_ijava_frame_abi_size = sizeof(parent_ijava_frame_abi) + }; + + #define _parent_ijava_frame_abi(_component) \ + (offset_of(frame::parent_ijava_frame_abi, _component)) + + // TOP_IJAVA_FRAME_ABI + + struct top_ijava_frame_abi : parent_ijava_frame_abi { + uint64_t carg_3_unused; // carg_3 + uint64_t card_4_unused; //_16 carg_4 + uint64_t carg_5_unused; // carg_5 + uint64_t carg_6_unused; //_16 carg_6 + uint64_t carg_7_unused; // carg_7 + // Use arg8 for storing frame_manager_lr. The size of + // top_ijava_frame_abi must match abi_112. + uint64_t frame_manager_lr; //_16 carg_8 + // nothing to add here! + // aligned to frame::alignment_in_bytes (16) + }; + + enum { + top_ijava_frame_abi_size = sizeof(top_ijava_frame_abi) + }; + + #define _top_ijava_frame_abi(_component) \ + (offset_of(frame::top_ijava_frame_abi, _component)) + + // ENTRY_FRAME + + struct entry_frame_locals { + uint64_t call_wrapper_address; + uint64_t result_address; //_16 + uint64_t result_type; + uint64_t arguments_tos_address; //_16 + // aligned to frame::alignment_in_bytes (16) + uint64_t r[spill_nonvolatiles_size/sizeof(uint64_t)]; + }; + + enum { + entry_frame_locals_size = sizeof(entry_frame_locals) + }; + + #define _entry_frame_locals_neg(_component) \ + (int)(-frame::entry_frame_locals_size + offset_of(frame::entry_frame_locals, _component)) + + + // Frame layout for JIT generated methods + // + // In these figures the stack grows upwards, while memory grows + // downwards. Square brackets denote regions possibly larger than single + // 64 bit slots. + // + // STACK (interpreted Java calls JIT generated Java): + // [JIT_FRAME] <-- SP (mod 16 = 0) + // [TOP_IJAVA_FRAME] + // ... + // + // JIT_FRAME (is a C frame according to PPC-64 ABI): + // [out_preserve] + // [out_args] + // [spills] + // [pad_1] + // [monitor] (optional) + // ... + // [monitor] (optional) + // [pad_2] + // [in_preserve] added / removed by prolog / epilog + // + + // JIT_ABI (TOP and PARENT) + + struct jit_abi { + uint64_t callers_sp; + uint64_t cr; + uint64_t lr; + uint64_t toc; + // Nothing to add here! + // NOT ALIGNED to frame::alignment_in_bytes (16). + }; + + struct jit_out_preserve : jit_abi { + // Nothing to add here! + }; + + struct jit_in_preserve { + // Nothing to add here! + }; + + enum { + jit_out_preserve_size = sizeof(jit_out_preserve), + jit_in_preserve_size = sizeof(jit_in_preserve) + }; + + struct jit_monitor { + uint64_t monitor[1]; + }; + + enum { + jit_monitor_size = sizeof(jit_monitor), + }; + + private: + + // STACK: + // ... + // [THIS_FRAME] <-- this._sp (stack pointer for this frame) + // [CALLER_FRAME] <-- this.fp() (_sp of caller's frame) + // ... + // + + // frame pointer for this frame + intptr_t* _fp; + + // The frame's stack pointer before it has been extended by a c2i adapter; + // needed by deoptimization + intptr_t* _unextended_sp; + void adjust_unextended_sp(); + + public: + + // Accessors for fields + intptr_t* fp() const { return _fp; } + + // Accessors for ABIs + inline abi_48* own_abi() const { return (abi_48*) _sp; } + inline abi_48* callers_abi() const { return (abi_48*) _fp; } + + private: + + // Find codeblob and set deopt_state. + inline void find_codeblob_and_set_pc_and_deopt_state(address pc); + + public: + + // Constructors + inline frame(intptr_t* sp); + frame(intptr_t* sp, address pc); + inline frame(intptr_t* sp, address pc, intptr_t* unextended_sp); + + private: + + intptr_t* compiled_sender_sp(CodeBlob* cb) const; + address* compiled_sender_pc_addr(CodeBlob* cb) const; + address* sender_pc_addr(void) const; + + public: + +#ifdef CC_INTERP + // Additional interface for interpreter frames: + inline interpreterState get_interpreterState() const; +#endif + + // Size of a monitor in bytes. + static int interpreter_frame_monitor_size_in_bytes(); + + // The size of a cInterpreter object. + static inline int interpreter_frame_cinterpreterstate_size_in_bytes(); + + private: + + // PPC port: permgen stuff + ConstantPoolCache** interpreter_frame_cpoolcache_addr() const; + + public: + + // Additional interface for entry frames: + inline entry_frame_locals* get_entry_frame_locals() const { + return (entry_frame_locals*) (((address) fp()) - entry_frame_locals_size); + } + + enum { + // normal return address is 1 bundle past PC + pc_return_offset = 0 + }; + +#endif // CPU_PPC_VM_FRAME_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/frame_ppc.inline.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_FRAME_PPC_INLINE_HPP +#define CPU_PPC_VM_FRAME_PPC_INLINE_HPP + +#ifndef CC_INTERP +#error "CC_INTERP must be defined on PPC64" +#endif + +// Inline functions for ppc64 frames: + +// Find codeblob and set deopt_state. +inline void frame::find_codeblob_and_set_pc_and_deopt_state(address pc) { + assert(pc != NULL, "precondition: must have PC"); + + _cb = CodeCache::find_blob(pc); + _pc = pc; // Must be set for get_deopt_original_pc() + + _fp = (intptr_t*)own_abi()->callers_sp; + // Use _fp - frame_size, needs to be done between _cb and _pc initialization + // and get_deopt_original_pc. + adjust_unextended_sp(); + + address original_pc = nmethod::get_deopt_original_pc(this); + if (original_pc != NULL) { + _pc = original_pc; + _deopt_state = is_deoptimized; + } else { + _deopt_state = not_deoptimized; + } + + assert(((uint64_t)_sp & 0xf) == 0, "SP must be 16-byte aligned"); +} + +// Constructors + +// Initialize all fields, _unextended_sp will be adjusted in find_codeblob_and_set_pc_and_deopt_state. +inline frame::frame() : _sp(NULL), _unextended_sp(NULL), _fp(NULL), _cb(NULL), _pc(NULL), _deopt_state(unknown) {} + +inline frame::frame(intptr_t* sp) : _sp(sp), _unextended_sp(sp) { + find_codeblob_and_set_pc_and_deopt_state((address)own_abi()->lr); // also sets _fp and adjusts _unextended_sp +} + +inline frame::frame(intptr_t* sp, address pc) : _sp(sp), _unextended_sp(sp) { + find_codeblob_and_set_pc_and_deopt_state(pc); // also sets _fp and adjusts _unextended_sp +} + +inline frame::frame(intptr_t* sp, address pc, intptr_t* unextended_sp) : _sp(sp), _unextended_sp(unextended_sp) { + find_codeblob_and_set_pc_and_deopt_state(pc); // also sets _fp and adjusts _unextended_sp +} + +// Accessors + +// Return unique id for this frame. The id must have a value where we +// can distinguish identity and younger/older relationship. NULL +// represents an invalid (incomparable) frame. +inline intptr_t* frame::id(void) const { + // Use the _unextended_pc as the frame's ID. Because we have no + // adapters, but resized compiled frames, some of the new code + // (e.g. JVMTI) wouldn't work if we return the (current) SP of the + // frame. + return _unextended_sp; +} + +// Return true if this frame is older (less recent activation) than +// the frame represented by id. +inline bool frame::is_older(intptr_t* id) const { + assert(this->id() != NULL && id != NULL, "NULL frame id"); + // Stack grows towards smaller addresses on ppc64. + return this->id() > id; +} + +inline int frame::frame_size(RegisterMap* map) const { + // Stack grows towards smaller addresses on PPC64: sender is at a higher address. + return sender_sp() - sp(); +} + +// Return the frame's stack pointer before it has been extended by a +// c2i adapter. This is needed by deoptimization for ignoring c2i adapter +// frames. +inline intptr_t* frame::unextended_sp() const { + return _unextended_sp; +} + +// All frames have this field. +inline address frame::sender_pc() const { + return (address)callers_abi()->lr; +} +inline address* frame::sender_pc_addr() const { + return (address*)&(callers_abi()->lr); +} + +// All frames have this field. +inline intptr_t* frame::sender_sp() const { + return (intptr_t*)callers_abi(); +} + +// All frames have this field. +inline intptr_t* frame::link() const { + return (intptr_t*)callers_abi()->callers_sp; +} + +inline intptr_t* frame::real_fp() const { + return fp(); +} + +#ifdef CC_INTERP + +inline interpreterState frame::get_interpreterState() const { + return (interpreterState)(((address)callers_abi()) + - frame::interpreter_frame_cinterpreterstate_size_in_bytes()); +} + +inline intptr_t** frame::interpreter_frame_locals_addr() const { + interpreterState istate = get_interpreterState(); + return (intptr_t**)&istate->_locals; +} + +inline intptr_t* frame::interpreter_frame_bcx_addr() const { + interpreterState istate = get_interpreterState(); + return (intptr_t*)&istate->_bcp; +} + +inline intptr_t* frame::interpreter_frame_mdx_addr() const { + interpreterState istate = get_interpreterState(); + return (intptr_t*)&istate->_mdx; +} + +inline intptr_t* frame::interpreter_frame_expression_stack() const { + return (intptr_t*)interpreter_frame_monitor_end() - 1; +} + +inline jint frame::interpreter_frame_expression_stack_direction() { + return -1; +} + +// top of expression stack +inline intptr_t* frame::interpreter_frame_tos_address() const { + interpreterState istate = get_interpreterState(); + return istate->_stack + 1; +} + +inline intptr_t* frame::interpreter_frame_tos_at(jint offset) const { + return &interpreter_frame_tos_address()[offset]; +} + +// monitor elements + +// in keeping with Intel side: end is lower in memory than begin; +// and beginning element is oldest element +// Also begin is one past last monitor. + +inline BasicObjectLock* frame::interpreter_frame_monitor_begin() const { + return get_interpreterState()->monitor_base(); +} + +inline BasicObjectLock* frame::interpreter_frame_monitor_end() const { + return (BasicObjectLock*)get_interpreterState()->stack_base(); +} + +inline int frame::interpreter_frame_cinterpreterstate_size_in_bytes() { + // Size of an interpreter object. Not aligned with frame size. + return round_to(sizeof(BytecodeInterpreter), 8); +} + +inline Method** frame::interpreter_frame_method_addr() const { + interpreterState istate = get_interpreterState(); + return &istate->_method; +} + +// Constant pool cache + +inline ConstantPoolCache** frame::interpreter_frame_cpoolcache_addr() const { + interpreterState istate = get_interpreterState(); + return &istate->_constants; // should really use accessor +} + +inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { + interpreterState istate = get_interpreterState(); + return &istate->_constants; +} +#endif // CC_INTERP + +inline int frame::interpreter_frame_monitor_size() { + // Number of stack slots for a monitor. + return round_to(BasicObjectLock::size(), // number of stack slots + WordsPerLong); // number of stack slots for a Java long +} + +inline int frame::interpreter_frame_monitor_size_in_bytes() { + return frame::interpreter_frame_monitor_size() * wordSize; +} + +// entry frames + +inline intptr_t* frame::entry_frame_argument_at(int offset) const { + // Since an entry frame always calls the interpreter first, the + // parameters are on the stack and relative to known register in the + // entry frame. + intptr_t* tos = (intptr_t*)get_entry_frame_locals()->arguments_tos_address; + return &tos[offset + 1]; // prepushed tos +} + +inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { + return (JavaCallWrapper**)&get_entry_frame_locals()->call_wrapper_address; +} + +inline oop frame::saved_oop_result(RegisterMap* map) const { + return *((oop*)map->location(R3->as_VMReg())); +} + +inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { + *((oop*)map->location(R3->as_VMReg())) = obj; +} + +#endif // CPU_PPC_VM_FRAME_PPC_INLINE_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/globalDefinitions_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,34 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP +#define CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP + +// Size of PPC Instructions +const int BytesPerInstWord = 4; + +const int StackAlignmentInBytes = 16; + +#endif // CPU_PPC_VM_GLOBALDEFINITIONS_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/globals_ppc.hpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#ifndef CPU_PPC_VM_GLOBALS_PPC_HPP +#define CPU_PPC_VM_GLOBALS_PPC_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, ConvertSleepToYield, true); +define_pd_global(bool, ShareVtableStubs, false); // Improves performance markedly for mtrt and compress. +define_pd_global(bool, NeedsDeoptSuspend, false); // Only register window machines need this. + + +define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks. +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast. + +// Use large code-entry alignment. +define_pd_global(intx, CodeEntryAlignment, 128); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 1500); + +define_pd_global(intx, PreInflateSpin, 10); + +// Flags for template interpreter. +define_pd_global(bool, RewriteBytecodes, true); +define_pd_global(bool, RewriteFrequentPairs, true); + +define_pd_global(bool, UseMembar, false); + +// GC Ergo Flags +define_pd_global(intx, CMSYoungGenPerWorker, 16*M); // Default max size of CMS young gen, per GC worker thread. + + +// Platform dependent flag handling: flags only defined on this platform. +#define ARCH_FLAGS(develop, product, diagnostic, experimental, notproduct) \ + product(uintx, PowerArchitecturePPC64, 0, \ + "CPU Version: x for PowerX. Currently recognizes Power5 to " \ + "Power7. Default is 0. CPUs newer than Power7 will be " \ + "recognized as Power7.") \ + \ + /* Reoptimize code-sequences of calls at runtime, e.g. replace an */ \ + /* indirect call by a direct call. */ \ + product(bool, ReoptimizeCallSequences, true, \ + "Reoptimize code-sequences of calls at runtime.") \ + \ + product(bool, UseLoadInstructionsForStackBangingPPC64, false, \ + "Use load instructions for stack banging.") \ + \ + /* special instructions */ \ + \ + product(bool, UseCountLeadingZerosInstructionsPPC64, true, \ + "Use count leading zeros instructions.") \ + \ + product(bool, UseExtendedLoadAndReserveInstructionsPPC64, false, \ + "Use extended versions of load-and-reserve instructions.") \ + \ + product(bool, UseRotateAndMaskInstructionsPPC64, true, \ + "Use rotate and mask instructions.") \ + \ + product(bool, UseStaticBranchPredictionInCompareAndSwapPPC64, true, \ + "Use static branch prediction hints in CAS operations.") \ + \ + /* Trap based checks. */ \ + /* Trap based checks use the ppc trap instructions to check certain */ \ + /* conditions. This instruction raises a SIGTRAP caught by the */ \ + /* exception handler of the VM. */ \ + product(bool, UseSIGTRAP, false, \ + "Allow trap instructions that make use of SIGTRAP. Use this to " \ + "switch off all optimizations requiring SIGTRAP.") \ + product(bool, TrapBasedICMissChecks, true, \ + "Raise and handle SIGTRAP if inline cache miss detected.") \ + product(bool, TrapBasedNotEntrantChecks, true, \ + "Raise and handle SIGTRAP if calling not entrant or zombie" \ + " method.") \ + product(bool, TrapBasedNullChecks, true, \ + "Generate code for null checks that uses a cmp and trap " \ + "instruction raising SIGTRAP. This is only used if an access to" \ + "null (+offset) will not raise a SIGSEGV.") \ + product(bool, TrapBasedRangeChecks, true, \ + "Raise and handle SIGTRAP if array out of bounds check fails.") \ + product(bool, TraceTraps, false, "Trace all traps the signal handler" \ + "handles.") \ + \ + product(bool, ZapMemory, false, "Write 0x0101... to empty memory." \ + " Use this to ease debugging.") \ + + + +#endif // CPU_PPC_VM_GLOBALS_PPC_HPP
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/icBuffer_ppc.cpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "asm/assembler.hpp" +#include "assembler_ppc.inline.hpp" +#include "code/icBuffer.hpp" +#include "gc_interface/collectedHeap.inline.hpp" +#include "interpreter/bytecodes.hpp" +#include "memory/resourceArea.hpp" +#include "nativeInst_ppc.hpp" +#include "oops/oop.inline.hpp" +#include "oops/oop.inline2.hpp" + +#define __ masm. + +int InlineCacheBuffer::ic_stub_code_size() { + return MacroAssembler::load_const_size + MacroAssembler::b64_patchable_size; +} + +void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { + ResourceMark rm; + CodeBuffer code(code_begin, ic_stub_code_size()); + MacroAssembler masm(&code); + // Note: even though the code contains an embedded metadata, we do not need reloc info + // because + // (1) the metadata is old (i.e., doesn't matter for scavenges) + // (2) these ICStubs are removed *before* a GC happens, so the roots disappear. + + // Load the oop ... + __ load_const(R19_method, (address) cached_value, R0); + // ... and jump to entry point. + __ b64_patchable((address) entry_point, relocInfo::none); + + __ flush(); +} + +address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + NativeJump* jump = nativeJump_at(move->next_instruction_address()); + return jump->jump_destination(); +} + +void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { + NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object + void* o = (void*)move->data(); + return o; +} +
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/ppc/vm/icache_ppc.cpp Fri Aug 02 16:46:45 2013 +0200 @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright 2012, 2013 SAP AG. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + * + */ + +#include "precompiled.hpp" +#include "assembler_ppc.inline.hpp" +#include "runtime/icache.hpp" + +// Use inline assembler to implement icache flush. +int ppc64_flush_icache(address start, int lines, int magic){ + address end = start + (unsigned int)lines*ICache::line_size; + assert(start <= end, "flush_icache parms"); + + // store modified cache lines from data cache + for (address a=start; a<end; a+=ICache::line_size) { + __asm__ __volatile__( + "dcbst 0, %0 \n" + : + : "r" (a) + : "memory"); + } + + // sync instruction + __asm__ __volatile__( + "sync \n" + : + : + : "memory"); + + // invalidate respective cache lines in instruction cache + for (address a=start; a<end; a+=ICache::line_size) { + __asm__ __volatile__( + "icbi 0, %0 \n" + : + : "r" (a) + : "memory"); + } + + // discard fetched instructions + __asm__ __volatile__( + "isync \n" + : + : + : "memory"); + + return magi