changeset 5955:b11238ed5526

8050942: PPC64: implement template interpreter for ppc64le Reviewed-by: kvn, goetz
author asmundak
date Mon, 28 Jul 2014 10:19:55 +0200
parents ca381ff70dd9
children 3760cef47599
files src/cpu/ppc/vm/assembler_ppc.hpp src/cpu/ppc/vm/assembler_ppc.inline.hpp src/cpu/ppc/vm/interp_masm_ppc_64.cpp src/cpu/ppc/vm/interp_masm_ppc_64.hpp src/cpu/ppc/vm/templateInterpreter_ppc.cpp src/cpu/ppc/vm/templateTable_ppc_64.cpp
diffstat 6 files changed, 144 insertions(+), 59 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/ppc/vm/assembler_ppc.hpp	Thu Jul 17 10:21:31 2014 +0200
+++ b/src/cpu/ppc/vm/assembler_ppc.hpp	Mon Jul 28 10:19:55 2014 +0200
@@ -301,6 +301,7 @@
     LWZ_OPCODE   = (32u << OPCODE_SHIFT),
     LWZX_OPCODE  = (31u << OPCODE_SHIFT |  23u << 1),
     LWZU_OPCODE  = (33u << OPCODE_SHIFT),
+    LWBRX_OPCODE = (31u << OPCODE_SHIFT |  534 << 1),
 
     LHA_OPCODE   = (42u << OPCODE_SHIFT),
     LHAX_OPCODE  = (31u << OPCODE_SHIFT | 343u << 1),
@@ -309,6 +310,7 @@
     LHZ_OPCODE   = (40u << OPCODE_SHIFT),
     LHZX_OPCODE  = (31u << OPCODE_SHIFT | 279u << 1),
     LHZU_OPCODE  = (41u << OPCODE_SHIFT),
+    LHBRX_OPCODE = (31u << OPCODE_SHIFT |  790 << 1),
 
     LBZ_OPCODE   = (34u << OPCODE_SHIFT),
     LBZX_OPCODE  = (31u << OPCODE_SHIFT |  87u << 1),
@@ -1369,11 +1371,17 @@
   inline void lwax( Register d, Register s1, Register s2);
   inline void lwa(  Register d, int si16,    Register s1);
 
+  // 4 bytes reversed
+  inline void lwbrx( Register d, Register s1, Register s2);
+
   // 2 bytes
   inline void lhzx( Register d, Register s1, Register s2);
   inline void lhz(  Register d, int si16,    Register s1);
   inline void lhzu( Register d, int si16,    Register s1);
 
+  // 2 bytes reversed
+  inline void lhbrx( Register d, Register s1, Register s2);
+
   // 2 bytes
   inline void lhax( Register d, Register s1, Register s2);
   inline void lha(  Register d, int si16,    Register s1);
@@ -1866,10 +1874,12 @@
   inline void lwz(  Register d, int si16);
   inline void lwax( Register d, Register s2);
   inline void lwa(  Register d, int si16);
+  inline void lwbrx(Register d, Register s2);
   inline void lhzx( Register d, Register s2);
   inline void lhz(  Register d, int si16);
   inline void lhax( Register d, Register s2);
   inline void lha(  Register d, int si16);
+  inline void lhbrx(Register d, Register s2);
   inline void lbzx( Register d, Register s2);
   inline void lbz(  Register d, int si16);
   inline void ldx(  Register d, Register s2);
--- a/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Thu Jul 17 10:21:31 2014 +0200
+++ b/src/cpu/ppc/vm/assembler_ppc.inline.hpp	Mon Jul 28 10:19:55 2014 +0200
@@ -272,10 +272,14 @@
 inline void Assembler::lwax( Register d, Register s1, Register s2) { emit_int32(LWAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::lwa(  Register d, int si16,    Register s1) { emit_int32(LWA_OPCODE  | rt(d) | ds(si16)   | ra0mem(s1));}
 
+inline void Assembler::lwbrx( Register d, Register s1, Register s2) { emit_int32(LWBRX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+
 inline void Assembler::lhzx( Register d, Register s1, Register s2) { emit_int32(LHZX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::lhz(  Register d, int si16,    Register s1) { emit_int32(LHZ_OPCODE  | rt(d) | d1(si16)   | ra0mem(s1));}
 inline void Assembler::lhzu( Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHZU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
 
+inline void Assembler::lhbrx( Register d, Register s1, Register s2) { emit_int32(LHBRX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
+
 inline void Assembler::lhax( Register d, Register s1, Register s2) { emit_int32(LHAX_OPCODE | rt(d) | ra0mem(s1) | rb(s2));}
 inline void Assembler::lha(  Register d, int si16,    Register s1) { emit_int32(LHA_OPCODE  | rt(d) | d1(si16)   | ra0mem(s1));}
 inline void Assembler::lhau( Register d, int si16,    Register s1) { assert(d != s1, "according to ibm manual"); emit_int32(LHAU_OPCODE | rt(d) | d1(si16) | rta0mem(s1));}
@@ -742,10 +746,12 @@
 inline void Assembler::lwz(  Register d, int si16   ) { emit_int32( LWZ_OPCODE  | rt(d) | d1(si16));}
 inline void Assembler::lwax( Register d, Register s2) { emit_int32( LWAX_OPCODE | rt(d) | rb(s2));}
 inline void Assembler::lwa(  Register d, int si16   ) { emit_int32( LWA_OPCODE  | rt(d) | ds(si16));}
+inline void Assembler::lwbrx(Register d, Register s2) { emit_int32( LWBRX_OPCODE| rt(d) | rb(s2));}
 inline void Assembler::lhzx( Register d, Register s2) { emit_int32( LHZX_OPCODE | rt(d) | rb(s2));}
 inline void Assembler::lhz(  Register d, int si16   ) { emit_int32( LHZ_OPCODE  | rt(d) | d1(si16));}
 inline void Assembler::lhax( Register d, Register s2) { emit_int32( LHAX_OPCODE | rt(d) | rb(s2));}
 inline void Assembler::lha(  Register d, int si16   ) { emit_int32( LHA_OPCODE  | rt(d) | d1(si16));}
+inline void Assembler::lhbrx(Register d, Register s2) { emit_int32( LHBRX_OPCODE| rt(d) | rb(s2));}
 inline void Assembler::lbzx( Register d, Register s2) { emit_int32( LBZX_OPCODE | rt(d) | rb(s2));}
 inline void Assembler::lbz(  Register d, int si16   ) { emit_int32( LBZ_OPCODE  | rt(d) | d1(si16));}
 inline void Assembler::ld(   Register d, int si16   ) { emit_int32( LD_OPCODE   | rt(d) | ds(si16));}
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Thu Jul 17 10:21:31 2014 +0200
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.cpp	Mon Jul 28 10:19:55 2014 +0200
@@ -119,9 +119,15 @@
     // Call the Interpreter::remove_activation_preserving_args_entry()
     // func to get the address of the same-named entrypoint in the
     // generated interpreter code.
+#if defined(ABI_ELFv2)
+    call_c(CAST_FROM_FN_PTR(address,
+                            Interpreter::remove_activation_preserving_args_entry),
+           relocInfo::none);
+#else
     call_c(CAST_FROM_FN_PTR(FunctionDescriptor*,
                             Interpreter::remove_activation_preserving_args_entry),
            relocInfo::none);
+#endif
 
     // Jump to Interpreter::_remove_activation_preserving_args_entry.
     mtctr(R3_RET);
@@ -331,29 +337,40 @@
 void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(int         bcp_offset,
                                                           Register    Rdst,
                                                           signedOrNot is_signed) {
+#if defined(VM_LITTLE_ENDIAN)
+  if (bcp_offset) {
+    load_const_optimized(Rdst, bcp_offset);
+    lhbrx(Rdst, R14_bcp, Rdst);
+  } else {
+    lhbrx(Rdst, R14_bcp);
+  }
+  if (is_signed == Signed) {
+    extsh(Rdst, Rdst);
+  }
+#else
   // Read Java big endian format.
   if (is_signed == Signed) {
     lha(Rdst, bcp_offset, R14_bcp);
   } else {
     lhz(Rdst, bcp_offset, R14_bcp);
   }
-#if 0
-  assert(Rtmp != Rdst, "need separate temp register");
-  Register Rfirst = Rtmp;
-  lbz(Rfirst, bcp_offset, R14_bcp); // first byte
-  lbz(Rdst, bcp_offset+1, R14_bcp); // second byte
-
-  // Rdst = ((Rfirst<<8) & 0xFF00) | (Rdst &~ 0xFF00)
-  rldimi(/*RA=*/Rdst, /*RS=*/Rfirst, /*sh=*/8, /*mb=*/48);
-  if (is_signed == Signed) {
-    extsh(Rdst, Rdst);
-  }
 #endif
 }
 
 void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(int         bcp_offset,
                                                           Register    Rdst,
                                                           signedOrNot is_signed) {
+#if defined(VM_LITTLE_ENDIAN)
+  if (bcp_offset) {
+    load_const_optimized(Rdst, bcp_offset);
+    lwbrx(Rdst, R14_bcp, Rdst);
+  } else {
+    lwbrx(Rdst, R14_bcp);
+  }
+  if (is_signed == Signed) {
+    extsw(Rdst, Rdst);
+  }
+#else
   // Read Java big endian format.
   if (bcp_offset & 3) { // Offset unaligned?
     load_const_optimized(Rdst, bcp_offset);
@@ -369,6 +386,7 @@
       lwz(Rdst, bcp_offset, R14_bcp);
     }
   }
+#endif
 }
 
 // Load the constant pool cache index from the bytecode stream.
@@ -377,11 +395,17 @@
 //   - Rdst, Rscratch
 void InterpreterMacroAssembler::get_cache_index_at_bcp(Register Rdst, int bcp_offset, size_t index_size) {
   assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
+  // Cache index is always in the native format, courtesy of Rewriter.
   if (index_size == sizeof(u2)) {
-    get_2_byte_integer_at_bcp(bcp_offset, Rdst, Unsigned);
+    lhz(Rdst, bcp_offset, R14_bcp);
   } else if (index_size == sizeof(u4)) {
     assert(EnableInvokeDynamic, "giant index used only for JSR 292");
-    get_4_byte_integer_at_bcp(bcp_offset, Rdst, Signed);
+    if (bcp_offset & 3) {
+      load_const_optimized(Rdst, bcp_offset);
+      lwax(Rdst, R14_bcp, Rdst);
+    } else {
+      lwa(Rdst, bcp_offset, R14_bcp);
+    }
     assert(constantPoolCacheOopDesc::decode_secondary_index(~123) == 123, "else change next line");
     nand(Rdst, Rdst, Rdst); // convert to plain index
   } else if (index_size == sizeof(u1)) {
@@ -397,6 +421,30 @@
   sldi(cache, cache, exact_log2(in_words(ConstantPoolCacheEntry::size()) * BytesPerWord));
   add(cache, R27_constPoolCache, cache);
 }
+
+// Load 4-byte signed or unsigned integer in Java format (that is, big-endian format)
+// from (Rsrc)+offset.
+void InterpreterMacroAssembler::get_u4(Register Rdst, Register Rsrc, int offset,
+                                       signedOrNot is_signed) {
+#if defined(VM_LITTLE_ENDIAN)
+  if (offset) {
+    load_const_optimized(Rdst, offset);
+    lwbrx(Rdst, Rdst, Rsrc);
+  } else {
+    lwbrx(Rdst, Rsrc);
+  }
+  if (is_signed == Signed) {
+    extsw(Rdst, Rdst);
+  }
+#else
+  if (is_signed == Signed) {
+    lwa(Rdst, offset, Rsrc);
+  } else {
+    lwz(Rdst, offset, Rsrc);
+  }
+#endif
+}
+
 #if 0
 // Load object from cpool->resolved_references(index).
 void InterpreterMacroAssembler::load_resolved_reference_at_index(Register result, Register index) {
--- a/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Thu Jul 17 10:21:31 2014 +0200
+++ b/src/cpu/ppc/vm/interp_masm_ppc_64.hpp	Mon Jul 28 10:19:55 2014 +0200
@@ -130,6 +130,7 @@
 
   void get_cache_and_index_at_bcp(Register cache, int bcp_offset, size_t index_size = sizeof(u2));
 
+  void get_u4(Register Rdst, Register Rsrc, int offset, signedOrNot is_signed);
 
   // common code
 
--- a/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Thu Jul 17 10:21:31 2014 +0200
+++ b/src/cpu/ppc/vm/templateInterpreter_ppc.cpp	Mon Jul 28 10:19:55 2014 +0200
@@ -187,8 +187,13 @@
   __ get_cache_and_index_at_bcp(cache, 1);
 
   __ bind(Lgot_cache);
-  // Big Endian (get least significant byte of 64 bit value):
+  // Get least significant byte of 64 bit value:
+#if defined(VM_LITTLE_ENDIAN)
+  __ lbz(size, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::flags_offset()), cache);
+#else
   __ lbz(size, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::flags_offset()) + 7, cache);
+#endif
+
   __ sldi(size, size, Interpreter::logStackElementSize);
   __ add(R15_esp, R15_esp, size);
   __ dispatch_next(state, step);
@@ -874,7 +879,9 @@
   // Our signature handlers copy required arguments to the C stack
   // (outgoing C args), R3_ARG1 to R10_ARG8, and FARG1 to FARG13.
   __ mr(R3_ARG1, R18_locals);
+#if !defined(ABI_ELFv2)
   __ ld(signature_handler_fd, 0, signature_handler_fd);
+#endif
 
   __ call_stub(signature_handler_fd);
   // Reload method, it may have moved.
@@ -1039,8 +1046,13 @@
   // native result across the call. No oop is present.
 
   __ mr(R3_ARG1, R16_thread);
+#if defined(ABI_ELFv2)
+  __ call_c(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
+            relocInfo::none);
+#else
   __ call_c(CAST_FROM_FN_PTR(FunctionDescriptor*, JavaThread::check_special_condition_for_native_trans),
             relocInfo::none);
+#endif
 
   __ bind(sync_check_done);
 
--- a/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Thu Jul 17 10:21:31 2014 +0200
+++ b/src/cpu/ppc/vm/templateTable_ppc_64.cpp	Mon Jul 28 10:19:55 2014 +0200
@@ -190,8 +190,12 @@
       assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
       assert(load_bc_into_bc_reg, "we use bc_reg as temp");
       __ get_cache_and_index_at_bcp(Rtemp /* dst = cache */, 1);
-      // Big Endian: ((*(cache+indices))>>((1+byte_no)*8))&0xFF
+      // ((*(cache+indices))>>((1+byte_no)*8))&0xFF:
+#if defined(VM_LITTLE_ENDIAN)
+      __ lbz(Rnew_bc, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 1 + byte_no, Rtemp);
+#else
       __ lbz(Rnew_bc, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (1 + byte_no), Rtemp);
+#endif
       __ cmpwi(CCR0, Rnew_bc, 0);
       __ li(Rnew_bc, (unsigned int)(unsigned char)new_bc);
       __ beq(CCR0, L_patch_done);
@@ -211,7 +215,7 @@
     __ lbz(Rtemp, 0, R14_bcp);
     __ cmpwi(CCR0, Rtemp, (unsigned int)(unsigned char)Bytecodes::_breakpoint);
     __ bne(CCR0, L_fast_patch);
-    // Perform the quickening, slowly, in the bowels of the breakpoint table.
+   // Perform the quickening, slowly, in the bowels of the breakpoint table.
     __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), R19_method, R14_bcp, Rnew_bc);
     __ b(L_patch_done);
     __ bind(L_fast_patch);
@@ -1877,8 +1881,8 @@
   __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));
 
   // Load lo & hi.
-  __ lwz(Rlow_byte, BytesPerInt, Rdef_offset_addr);
-  __ lwz(Rhigh_byte, BytesPerInt * 2, Rdef_offset_addr);
+  __ get_u4(Rlow_byte, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
+  __ get_u4(Rhigh_byte, Rdef_offset_addr, 2 *BytesPerInt, InterpreterMacroAssembler::Unsigned);
 
   // Check for default case (=index outside [low,high]).
   __ cmpw(CCR0, R17_tos, Rlow_byte);
@@ -1892,12 +1896,17 @@
   __ profile_switch_case(Rindex, Rhigh_byte /* scratch */, Rscratch1, Rscratch2);
   __ sldi(Rindex, Rindex, LogBytesPerInt);
   __ addi(Rindex, Rindex, 3 * BytesPerInt);
+#if defined(VM_LITTLE_ENDIAN)
+  __ lwbrx(Roffset, Rdef_offset_addr, Rindex);
+  __ extsw(Roffset, Roffset);
+#else
   __ lwax(Roffset, Rdef_offset_addr, Rindex);
+#endif
   __ b(Ldispatch);
 
   __ bind(Ldefault_case);
   __ profile_switch_default(Rhigh_byte, Rscratch1);
-  __ lwa(Roffset, 0, Rdef_offset_addr);
+  __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);
 
   __ bind(Ldispatch);
 
@@ -1913,12 +1922,11 @@
 // Table switch using linear search through cases.
 // Bytecode stream format:
 // Bytecode (1) | 4-byte padding | default offset (4) | count (4) | value/offset pair1 (8) | value/offset pair2 (8) | ...
-// Note: Everything is big-endian format here. So on little endian machines, we have to revers offset and count and cmp value.
+// Note: Everything is big-endian format here.
 void TemplateTable::fast_linearswitch() {
   transition(itos, vtos);
 
-  Label Lloop_entry, Lsearch_loop, Lfound, Lcontinue_execution, Ldefault_case;
-
+  Label Lloop_entry, Lsearch_loop, Lcontinue_execution, Ldefault_case;
   Register Rcount           = R3_ARG1,
            Rcurrent_pair    = R4_ARG2,
            Rdef_offset_addr = R5_ARG3, // Is going to contain address of default offset.
@@ -1932,47 +1940,40 @@
   __ clrrdi(Rdef_offset_addr, Rdef_offset_addr, log2_long((jlong)BytesPerInt));
 
   // Setup loop counter and limit.
-  __ lwz(Rcount, BytesPerInt, Rdef_offset_addr);    // Load count.
+  __ get_u4(Rcount, Rdef_offset_addr, BytesPerInt, InterpreterMacroAssembler::Unsigned);
   __ addi(Rcurrent_pair, Rdef_offset_addr, 2 * BytesPerInt); // Rcurrent_pair now points to first pair.
 
-  // Set up search loop.
+  __ mtctr(Rcount);
   __ cmpwi(CCR0, Rcount, 0);
-  __ beq(CCR0, Ldefault_case);
-
-  __ mtctr(Rcount);
-
-  // linear table search
-  __ bind(Lsearch_loop);
-
-  __ lwz(Rvalue, 0, Rcurrent_pair);
-  __ lwa(Roffset, 1 * BytesPerInt, Rcurrent_pair);
-
-  __ cmpw(CCR0, Rvalue, Rcmp_value);
-  __ beq(CCR0, Lfound);
-
-  __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt);
-  __ bdnz(Lsearch_loop);
-
-  // default case
+  __ bne(CCR0, Lloop_entry);
+
+  // Default case
   __ bind(Ldefault_case);
-
-  __ lwa(Roffset, 0, Rdef_offset_addr);
+  __ get_u4(Roffset, Rdef_offset_addr, 0, InterpreterMacroAssembler::Signed);
   if (ProfileInterpreter) {
     __ profile_switch_default(Rdef_offset_addr, Rcount/* scratch */);
-    __ b(Lcontinue_execution);
   }
-
-  // Entry found, skip Roffset bytecodes and continue.
-  __ bind(Lfound);
+  __ b(Lcontinue_execution);
+
+  // Next iteration
+  __ bind(Lsearch_loop);
+  __ bdz(Ldefault_case);
+  __ addi(Rcurrent_pair, Rcurrent_pair, 2 * BytesPerInt);
+  __ bind(Lloop_entry);
+  __ get_u4(Rvalue, Rcurrent_pair, 0, InterpreterMacroAssembler::Unsigned);
+  __ cmpw(CCR0, Rvalue, Rcmp_value);
+  __ bne(CCR0, Lsearch_loop);
+
+  // Found, load offset.
+  __ get_u4(Roffset, Rcurrent_pair, BytesPerInt, InterpreterMacroAssembler::Signed);
+  // Calculate case index and profile
+  __ mfctr(Rcurrent_pair);
   if (ProfileInterpreter) {
-    // Calc the num of the pair we hit. Careful, Rcurrent_pair points 2 ints
-    // beyond the actual current pair due to the auto update load above!
-    __ sub(Rcurrent_pair, Rcurrent_pair, Rdef_offset_addr);
-    __ addi(Rcurrent_pair, Rcurrent_pair, - 2 * BytesPerInt);
-    __ srdi(Rcurrent_pair, Rcurrent_pair, LogBytesPerInt + 1);
+    __ sub(Rcurrent_pair, Rcount, Rcurrent_pair);
     __ profile_switch_case(Rcurrent_pair, Rcount /*scratch*/, Rdef_offset_addr/*scratch*/, Rscratch);
-    __ bind(Lcontinue_execution);
   }
+
+  __ bind(Lcontinue_execution);
   __ add(R14_bcp, Roffset, R14_bcp);
   __ dispatch_next(vtos);
 }
@@ -2028,7 +2029,7 @@
 
   // ... initialize i & j ...
   __ li(Ri,0);
-  __ lwz(Rj, -BytesPerInt, Rarray);
+  __ get_u4(Rj, Rarray, -BytesPerInt, InterpreterMacroAssembler::Unsigned);
 
   // ... and start.
   Label entry;
@@ -2045,7 +2046,11 @@
     //   i = h;
     // }
     __ sldi(Rscratch, Rh, log_entry_size);
+#if defined(VM_LITTLE_ENDIAN)
+    __ lwbrx(Rscratch, Rscratch, Rarray);
+#else
     __ lwzx(Rscratch, Rscratch, Rarray);
+#endif
 
     // if (key < current value)
     //   Rh = Rj
@@ -2077,20 +2082,20 @@
   // Ri = value offset
   __ sldi(Ri, Ri, log_entry_size);
   __ add(Ri, Ri, Rarray);
-  __ lwz(Rscratch, 0, Ri);
+  __ get_u4(Rscratch, Ri, 0, InterpreterMacroAssembler::Unsigned);
 
   Label not_found;
   // Ri = offset offset
   __ cmpw(CCR0, Rkey, Rscratch);
   __ beq(CCR0, not_found);
   // entry not found -> j = default offset
-  __ lwz(Rj, -2 * BytesPerInt, Rarray);
+  __ get_u4(Rj, Rarray, -2 * BytesPerInt, InterpreterMacroAssembler::Unsigned);
   __ b(default_case);
 
   __ bind(not_found);
   // entry found -> j = offset
   __ profile_switch_case(Rh, Rj, Rscratch, Rkey);
-  __ lwz(Rj, BytesPerInt, Ri);
+  __ get_u4(Rj, Ri, BytesPerInt, InterpreterMacroAssembler::Unsigned);
 
   if (ProfileInterpreter) {
     __ b(continue_execution);
@@ -2194,8 +2199,11 @@
   } else {
     assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
     // We are resolved if the indices offset contains the current bytecode.
-    // Big Endian:
-    __ lbz(Rscratch, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache);
+#if defined(VM_LITTLE_ENDIAN)
+  __ lbz(Rscratch, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()) + byte_no + 1, Rcache);
+#else
+  __ lbz(Rscratch, in_bytes(constantPoolCacheOopDesc::base_offset() + ConstantPoolCacheEntry::indices_offset()) + 7 - (byte_no + 1), Rcache);
+#endif
     // Acquire by cmp-br-isync (see below).
     __ cmpdi(CCR0, Rscratch, (int)bytecode());
     __ beq(CCR0, Lresolved);