--- a/indy.compiler.patch Mon Sep 14 15:50:53 2009 +0200
+++ b/indy.compiler.patch Mon Sep 14 16:46:23 2009 +0200
@@ -53,6 +53,29 @@ diff --git a/src/cpu/x86/vm/frame_x86.cp
}
frame frame::sender(RegisterMap* map) const {
+diff --git a/src/cpu/x86/vm/methodHandles_x86.cpp b/src/cpu/x86/vm/methodHandles_x86.cpp
+--- a/src/cpu/x86/vm/methodHandles_x86.cpp
++++ b/src/cpu/x86/vm/methodHandles_x86.cpp
+@@ -678,7 +678,7 @@
+
+ // original 32-bit vmdata word must be of this form:
+ // | MBZ:16 | signBitCount:8 | srcDstTypes:8 | conversionOp:8 |
+- __ xchgl(rcx, rbx_vminfo); // free rcx for shifts
++ __ xchgptr(rcx, rbx_vminfo); // free rcx for shifts
+ __ shll(rdx_temp /*, rcx*/);
+ Label zero_extend, done;
+ __ testl(rcx, CONV_VMINFO_SIGN_FLAG);
+@@ -693,8 +693,8 @@
+ __ shrl(rdx_temp /*, rcx*/);
+
+ __ bind(done);
+- __ movptr(vmarg, rdx_temp);
+- __ xchgl(rcx, rbx_vminfo); // restore rcx_recv
++ __ movl(vmarg, rdx_temp);
++ __ xchgptr(rcx, rbx_vminfo); // restore rcx_recv
+
+ __ jump_to_method_handle_entry(rcx_recv, rdx_temp);
+ }
diff --git a/src/cpu/x86/vm/runtime_x86_32.cpp b/src/cpu/x86/vm/runtime_x86_32.cpp
--- a/src/cpu/x86/vm/runtime_x86_32.cpp
+++ b/src/cpu/x86/vm/runtime_x86_32.cpp
@@ -101,6 +124,105 @@ diff --git a/src/cpu/x86/vm/runtime_x86_
// rcx: exception handler
// rdx: exception pc
__ jmp (rcx);
+diff --git a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+--- a/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
++++ b/src/cpu/x86/vm/sharedRuntime_x86_64.cpp
+@@ -638,6 +638,10 @@
+
+ __ movptr(rax, Address(rsp, 0));
+
++ // Must preserve original SP for loading incoming arguments because
++ // we need to align the outgoing SP for compiled code.
++ __ movptr(r11, rsp);
++
+ // Cut-out for having no stack args. Since up to 2 int/oop args are passed
+ // in registers, we will occasionally have no stack args.
+ int comp_words_on_stack = 0;
+@@ -661,6 +665,10 @@
+ // as far as the placement of the call instruction
+ __ push(rax);
+
++ // Put saved SP in another register
++ const Register saved_sp = rax;
++ __ movptr(saved_sp, r11);
++
+ // Will jump to the compiled code just as if compiled code was doing it.
+ // Pre-load the register-jump target early, to schedule it better.
+ __ movptr(r11, Address(rbx, in_bytes(methodOopDesc::from_compiled_offset())));
+@@ -680,11 +688,7 @@
+ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
+ "scrambled load targets?");
+ // Load in argument order going down.
+- // int ld_off = (total_args_passed + comp_words_on_stack -i)*wordSize;
+- // base ld_off on r13 (sender_sp) as the stack alignment makes offsets from rsp
+- // unpredictable
+- int ld_off = ((total_args_passed - 1) - i)*Interpreter::stackElementSize();
+-
++ int ld_off = (total_args_passed - i)*Interpreter::stackElementSize() + Interpreter::value_offset_in_bytes();
+ // Point to interpreter value (vs. tag)
+ int next_off = ld_off - Interpreter::stackElementSize();
+ //
+@@ -699,10 +703,14 @@
+ if (r_1->is_stack()) {
+ // Convert stack slot to an SP offset (+ wordSize to account for return address )
+ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size + wordSize;
++
++ // We can use r13 as a temp here because compiled code doesn't need r13 as an input
++ // and if we end up going thru a c2i because of a miss a reasonable value of r13
++ // we be generated.
+ if (!r_2->is_valid()) {
+ // sign extend???
+- __ movl(rax, Address(r13, ld_off));
+- __ movptr(Address(rsp, st_off), rax);
++ __ movl(r13, Address(saved_sp, ld_off));
++ __ movptr(Address(rsp, st_off), r13);
+ } else {
+ //
+ // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
+@@ -715,9 +723,9 @@
+ // ld_off is MSW so get LSW
+ const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
+ next_off : ld_off;
+- __ movq(rax, Address(r13, offset));
++ __ movq(r13, Address(saved_sp, offset));
+ // st_off is LSW (i.e. reg.first())
+- __ movq(Address(rsp, st_off), rax);
++ __ movq(Address(rsp, st_off), r13);
+ }
+ } else if (r_1->is_Register()) { // Register argument
+ Register r = r_1->as_Register();
+@@ -732,16 +740,16 @@
+ next_off : ld_off;
+
+ // this can be a misaligned move
+- __ movq(r, Address(r13, offset));
++ __ movq(r, Address(saved_sp, offset));
+ } else {
+ // sign extend and use a full word?
+- __ movl(r, Address(r13, ld_off));
++ __ movl(r, Address(saved_sp, ld_off));
+ }
+ } else {
+ if (!r_2->is_valid()) {
+- __ movflt(r_1->as_XMMRegister(), Address(r13, ld_off));
++ __ movflt(r_1->as_XMMRegister(), Address(saved_sp, ld_off));
+ } else {
+- __ movdbl(r_1->as_XMMRegister(), Address(r13, next_off));
++ __ movdbl(r_1->as_XMMRegister(), Address(saved_sp, next_off));
+ }
+ }
+ }
+@@ -3319,6 +3327,10 @@
+
+ // rax: exception handler
+
++ // Restore SP from BP if the exception PC is a MethodHandle call.
++ __ cmpl(Address(r15_thread, JavaThread::is_method_handle_exception_offset()), 0);
++ __ cmovptr(Assembler::notEqual, rsp, rbp);
++
+ // We have a handler in rax (could be deopt blob).
+ __ mov(r8, rax);
+
diff --git a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
--- a/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
+++ b/src/cpu/x86/vm/templateInterpreter_x86_32.cpp
@@ -109,6 +231,36 @@ diff --git a/src/cpu/x86/vm/templateInte
if (interpreter_frame != NULL) {
#ifdef ASSERT
- assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
++ if (!EnableMethodHandles)
++ // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
++ // Probably, since deoptimization doesn't work yet.
++ assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(), "Frame not properly walkable");
+ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
+ #endif
+
+diff --git a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
+--- a/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
++++ b/src/cpu/x86/vm/templateInterpreter_x86_64.cpp
+@@ -449,8 +449,12 @@
+ __ addptr(rax, stack_base);
+ __ subptr(rax, stack_size);
+
++ // Use the maximum number of pages we might bang.
++ const int max_pages = StackShadowPages > (StackRedPages+StackYellowPages) ? StackShadowPages :
++ (StackRedPages+StackYellowPages);
++
+ // add in the red and yellow zone sizes
+- __ addptr(rax, (StackRedPages + StackYellowPages) * page_size);
++ __ addptr(rax, max_pages * page_size);
+
+ // check against the current stack bottom
+ __ cmpptr(rsp, rax);
+@@ -1502,8 +1506,10 @@
+ tempcount* Interpreter::stackElementWords() + popframe_extra_args;
+ if (interpreter_frame != NULL) {
+ #ifdef ASSERT
+- assert(caller->unextended_sp() == interpreter_frame->interpreter_frame_sender_sp(),
+- "Frame not properly walkable");
+ if (!EnableMethodHandles)
+ // @@@ FIXME: Should we correct interpreter_frame_sender_sp in the calling sequences?
+ // Probably, since deoptimization doesn't work yet.
@@ -256,6 +408,109 @@ diff --git a/src/cpu/x86/vm/x86_32.ad b/
+ call_epilog,
+ post_call_FPU );
+ ins_pipe( pipe_slow );
++ ins_pc_relative(1);
++ ins_alignment(4);
++%}
++
+ // Call Java Dynamic Instruction
+ // Note: If this code changes, the corresponding ret_addr_offset() and
+ // compute_padding() functions will have to be adjusted.
+diff --git a/src/cpu/x86/vm/x86_64.ad b/src/cpu/x86/vm/x86_64.ad
+--- a/src/cpu/x86/vm/x86_64.ad
++++ b/src/cpu/x86/vm/x86_64.ad
+@@ -551,12 +551,19 @@
+
+ #define __ _masm.
+
++static int preserve_SP_size() {
++ return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg)
++}
++
+ // !!!!! Special hack to get all types of calls to specify the byte offset
+ // from the start of the call to the point where the return address
+ // will point.
+ int MachCallStaticJavaNode::ret_addr_offset()
+ {
+- return 5; // 5 bytes from start of call to where return address points
++ int offset = 5; // 5 bytes from start of call to where return address points
++ if (_method_handle_invoke)
++ offset += preserve_SP_size();
++ return offset;
+ }
+
+ int MachCallDynamicJavaNode::ret_addr_offset()
+@@ -589,6 +596,15 @@
+
+ // The address of the call instruction needs to be 4-byte aligned to
+ // ensure that it does not span a cache line so that it can be patched.
++int CallStaticJavaHandleNode::compute_padding(int current_offset) const
++{
++ current_offset += preserve_SP_size(); // skip mov rbp, rsp
++ current_offset += 1; // skip call opcode byte
++ return round_to(current_offset, alignment_required()) - current_offset;
++}
++
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
+ int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
+ {
+ current_offset += 11; // skip movq instruction + call opcode byte
+@@ -2608,6 +2624,21 @@
+ RELOC_DISP32);
+ %}
+
++ enc_class preserve_SP %{
++ debug_only(int off0 = cbuf.code_size());
++ MacroAssembler _masm(&cbuf);
++ // RBP is preserved across all calls, even compiled calls.
++ // Use it to preserve RSP in places where the callee might change the SP.
++ __ movptr(rbp, rsp);
++ debug_only(int off1 = cbuf.code_size());
++ assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
++ %}
++
++ enc_class restore_SP %{
++ MacroAssembler _masm(&cbuf);
++ __ movptr(rsp, rbp);
++ %}
++
+ enc_class Java_Static_Call(method meth)
+ %{
+ // JAVA STATIC CALL
+@@ -12953,9 +12984,9 @@
+ // Call Java Static Instruction
+ // Note: If this code changes, the corresponding ret_addr_offset() and
+ // compute_padding() functions will have to be adjusted.
+-instruct CallStaticJavaDirect(method meth)
+-%{
++instruct CallStaticJavaDirect(method meth) %{
+ match(CallStaticJava);
++ predicate(!((CallStaticJavaNode*) n)->is_method_handle_invoke());
+ effect(USE meth);
+
+ ins_cost(300);
+@@ -12967,6 +12998,28 @@
+ ins_alignment(4);
+ %}
+
++// Call Java Static Instruction (method handle version)
++// Note: If this code changes, the corresponding ret_addr_offset() and
++// compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaHandle(method meth, rbp_RegP rbp) %{
++ match(CallStaticJava);
++ predicate(((CallStaticJavaNode*) n)->is_method_handle_invoke());
++ effect(USE meth, KILL rbp);
++ // RBP is saved by all callees (for interpreter stack correction).
++ // We use it here for a similar purpose, in {preserve,restore}_SP.
++
++ ins_cost(300);
++ format %{ "call,static/MethodHandle " %}
++ opcode(0xE8); /* E8 cd */
++ ins_encode(preserve_SP,
++ Java_Static_Call(meth),
++ restore_SP,
++ call_epilog);
++ ins_pipe(pipe_slow);
+ ins_pc_relative(1);
+ ins_alignment(4);
+%}