annotate src/cpu/x86/vm/x86_32.ad @ 747:93c14e5562c4

6823354: Add intrinsics for {Integer,Long}.{numberOfLeadingZeros,numberOfTrailingZeros}() Summary: These methods can be instrinsified by using bit scan, bit test, and population count instructions. Reviewed-by: kvn, never
author twisti
date Wed, 06 May 2009 00:27:52 -0700
parents fbde8ec322d0
children 2056494941db
rev   line source
duke@0 1 //
twisti@603 2 // Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
duke@0 3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
duke@0 4 //
duke@0 5 // This code is free software; you can redistribute it and/or modify it
duke@0 6 // under the terms of the GNU General Public License version 2 only, as
duke@0 7 // published by the Free Software Foundation.
duke@0 8 //
duke@0 9 // This code is distributed in the hope that it will be useful, but WITHOUT
duke@0 10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
duke@0 11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
duke@0 12 // version 2 for more details (a copy is included in the LICENSE file that
duke@0 13 // accompanied this code).
duke@0 14 //
duke@0 15 // You should have received a copy of the GNU General Public License version
duke@0 16 // 2 along with this work; if not, write to the Free Software Foundation,
duke@0 17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
duke@0 18 //
duke@0 19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
duke@0 20 // CA 95054 USA or visit www.sun.com if you need additional information or
duke@0 21 // have any questions.
duke@0 22 //
duke@0 23 //
duke@0 24
duke@0 25 // X86 Architecture Description File
duke@0 26
duke@0 27 //----------REGISTER DEFINITION BLOCK------------------------------------------
duke@0 28 // This information is used by the matcher and the register allocator to
duke@0 29 // describe individual registers and classes of registers within the target
duke@0 30 // archtecture.
duke@0 31
duke@0 32 register %{
duke@0 33 //----------Architecture Description Register Definitions----------------------
duke@0 34 // General Registers
duke@0 35 // "reg_def" name ( register save type, C convention save type,
duke@0 36 // ideal register type, encoding );
duke@0 37 // Register Save Types:
duke@0 38 //
duke@0 39 // NS = No-Save: The register allocator assumes that these registers
duke@0 40 // can be used without saving upon entry to the method, &
duke@0 41 // that they do not need to be saved at call sites.
duke@0 42 //
duke@0 43 // SOC = Save-On-Call: The register allocator assumes that these registers
duke@0 44 // can be used without saving upon entry to the method,
duke@0 45 // but that they must be saved at call sites.
duke@0 46 //
duke@0 47 // SOE = Save-On-Entry: The register allocator assumes that these registers
duke@0 48 // must be saved before using them upon entry to the
duke@0 49 // method, but they do not need to be saved at call
duke@0 50 // sites.
duke@0 51 //
duke@0 52 // AS = Always-Save: The register allocator assumes that these registers
duke@0 53 // must be saved before using them upon entry to the
duke@0 54 // method, & that they must be saved at call sites.
duke@0 55 //
duke@0 56 // Ideal Register Type is used to determine how to save & restore a
duke@0 57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
duke@0 58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
duke@0 59 //
duke@0 60 // The encoding number is the actual bit-pattern placed into the opcodes.
duke@0 61
duke@0 62 // General Registers
duke@0 63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
duke@0 64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
duke@0 65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
duke@0 66
duke@0 67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
duke@0 68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
duke@0 69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
duke@0 70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
duke@0 71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
duke@0 72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
duke@0 73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
duke@0 74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
duke@0 75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
duke@0 76
duke@0 77 // Special Registers
duke@0 78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
duke@0 79
duke@0 80 // Float registers. We treat TOS/FPR0 special. It is invisible to the
duke@0 81 // allocator, and only shows up in the encodings.
duke@0 82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
duke@0 83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
duke@0 84 // Ok so here's the trick FPR1 is really st(0) except in the midst
duke@0 85 // of emission of assembly for a machnode. During the emission the fpu stack
duke@0 86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
duke@0 87 // the stack will not have this element so FPR1 == st(0) from the
duke@0 88 // oopMap viewpoint. This same weirdness with numbering causes
duke@0 89 // instruction encoding to have to play games with the register
duke@0 90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
duke@0 91 // where it does flt->flt moves to see an example
duke@0 92 //
duke@0 93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
duke@0 94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
duke@0 95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
duke@0 96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
duke@0 97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
duke@0 98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
duke@0 99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
duke@0 100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
duke@0 101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
duke@0 102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
duke@0 103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
duke@0 104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
duke@0 105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
duke@0 106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
duke@0 107
duke@0 108 // XMM registers. 128-bit registers or 4 words each, labeled a-d.
duke@0 109 // Word a in each register holds a Float, words ab hold a Double.
duke@0 110 // We currently do not use the SIMD capabilities, so registers cd
duke@0 111 // are unused at the moment.
duke@0 112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
duke@0 113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
duke@0 114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
duke@0 115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
duke@0 116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
duke@0 117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
duke@0 118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
duke@0 119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
duke@0 120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
duke@0 121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
duke@0 122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
duke@0 123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
duke@0 124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
duke@0 125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
duke@0 126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
duke@0 127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
duke@0 128
duke@0 129 // Specify priority of register selection within phases of register
duke@0 130 // allocation. Highest priority is first. A useful heuristic is to
duke@0 131 // give registers a low priority when they are required by machine
duke@0 132 // instructions, like EAX and EDX. Registers which are used as
twisti@580 133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
duke@0 134 // For the Intel integer registers, the equivalent Long pairs are
duke@0 135 // EDX:EAX, EBX:ECX, and EDI:EBP.
duke@0 136 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
duke@0 137 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
duke@0 138 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
duke@0 139 FPR6L, FPR6H, FPR7L, FPR7H );
duke@0 140
duke@0 141 alloc_class chunk1( XMM0a, XMM0b,
duke@0 142 XMM1a, XMM1b,
duke@0 143 XMM2a, XMM2b,
duke@0 144 XMM3a, XMM3b,
duke@0 145 XMM4a, XMM4b,
duke@0 146 XMM5a, XMM5b,
duke@0 147 XMM6a, XMM6b,
duke@0 148 XMM7a, XMM7b, EFLAGS);
duke@0 149
duke@0 150
duke@0 151 //----------Architecture Description Register Classes--------------------------
duke@0 152 // Several register classes are automatically defined based upon information in
duke@0 153 // this architecture description.
duke@0 154 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
duke@0 155 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
duke@0 156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
duke@0 157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
duke@0 158 //
duke@0 159 // Class for all registers
duke@0 160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
duke@0 161 // Class for general registers
duke@0 162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
duke@0 163 // Class for general registers which may be used for implicit null checks on win95
duke@0 164 // Also safe for use by tailjump. We don't want to allocate in rbp,
duke@0 165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
duke@0 166 // Class of "X" registers
duke@0 167 reg_class x_reg(EBX, ECX, EDX, EAX);
duke@0 168 // Class of registers that can appear in an address with no offset.
duke@0 169 // EBP and ESP require an extra instruction byte for zero offset.
duke@0 170 // Used in fast-unlock
duke@0 171 reg_class p_reg(EDX, EDI, ESI, EBX);
duke@0 172 // Class for general registers not including ECX
duke@0 173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
duke@0 174 // Class for general registers not including EAX
duke@0 175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
duke@0 176 // Class for general registers not including EAX or EBX.
duke@0 177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
duke@0 178 // Class of EAX (for multiply and divide operations)
duke@0 179 reg_class eax_reg(EAX);
duke@0 180 // Class of EBX (for atomic add)
duke@0 181 reg_class ebx_reg(EBX);
duke@0 182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
duke@0 183 reg_class ecx_reg(ECX);
duke@0 184 // Class of EDX (for multiply and divide operations)
duke@0 185 reg_class edx_reg(EDX);
duke@0 186 // Class of EDI (for synchronization)
duke@0 187 reg_class edi_reg(EDI);
duke@0 188 // Class of ESI (for synchronization)
duke@0 189 reg_class esi_reg(ESI);
duke@0 190 // Singleton class for interpreter's stack pointer
duke@0 191 reg_class ebp_reg(EBP);
duke@0 192 // Singleton class for stack pointer
duke@0 193 reg_class sp_reg(ESP);
duke@0 194 // Singleton class for instruction pointer
duke@0 195 // reg_class ip_reg(EIP);
duke@0 196 // Singleton class for condition codes
duke@0 197 reg_class int_flags(EFLAGS);
duke@0 198 // Class of integer register pairs
duke@0 199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
duke@0 200 // Class of integer register pairs that aligns with calling convention
duke@0 201 reg_class eadx_reg( EAX,EDX );
duke@0 202 reg_class ebcx_reg( ECX,EBX );
duke@0 203 // Not AX or DX, used in divides
duke@0 204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
duke@0 205
duke@0 206 // Floating point registers. Notice FPR0 is not a choice.
duke@0 207 // FPR0 is not ever allocated; we use clever encodings to fake
duke@0 208 // a 2-address instructions out of Intels FP stack.
duke@0 209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
duke@0 210
duke@0 211 // make a register class for SSE registers
duke@0 212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
duke@0 213
duke@0 214 // make a double register class for SSE2 registers
duke@0 215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
duke@0 216 XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
duke@0 217
duke@0 218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
duke@0 219 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
duke@0 220 FPR7L,FPR7H );
duke@0 221
duke@0 222 reg_class flt_reg0( FPR1L );
duke@0 223 reg_class dbl_reg0( FPR1L,FPR1H );
duke@0 224 reg_class dbl_reg1( FPR2L,FPR2H );
duke@0 225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
duke@0 226 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
duke@0 227
duke@0 228 // XMM6 and XMM7 could be used as temporary registers for long, float and
duke@0 229 // double values for SSE2.
duke@0 230 reg_class xdb_reg6( XMM6a,XMM6b );
duke@0 231 reg_class xdb_reg7( XMM7a,XMM7b );
duke@0 232 %}
duke@0 233
duke@0 234
duke@0 235 //----------SOURCE BLOCK-------------------------------------------------------
duke@0 236 // This is a block of C++ code which provides values, functions, and
duke@0 237 // definitions necessary in the rest of the architecture description
duke@0 238 source %{
never@297 239 #define RELOC_IMM32 Assembler::imm_operand
duke@0 240 #define RELOC_DISP32 Assembler::disp32_operand
duke@0 241
duke@0 242 #define __ _masm.
duke@0 243
duke@0 244 // How to find the high register of a Long pair, given the low register
duke@0 245 #define HIGH_FROM_LOW(x) ((x)+2)
duke@0 246
duke@0 247 // These masks are used to provide 128-bit aligned bitmasks to the XMM
duke@0 248 // instructions, to allow sign-masking or sign-bit flipping. They allow
duke@0 249 // fast versions of NegF/NegD and AbsF/AbsD.
duke@0 250
duke@0 251 // Note: 'double' and 'long long' have 32-bits alignment on x86.
duke@0 252 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
duke@0 253 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
duke@0 254 // of 128-bits operands for SSE instructions.
duke@0 255 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
duke@0 256 // Store the value to a 128-bits operand.
duke@0 257 operand[0] = lo;
duke@0 258 operand[1] = hi;
duke@0 259 return operand;
duke@0 260 }
duke@0 261
duke@0 262 // Buffer for 128-bits masks used by SSE instructions.
duke@0 263 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
duke@0 264
duke@0 265 // Static initialization during VM startup.
duke@0 266 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
duke@0 267 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
duke@0 268 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
duke@0 269 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
duke@0 270
duke@0 271 // !!!!! Special hack to get all type of calls to specify the byte offset
duke@0 272 // from the start of the call to the point where the return address
duke@0 273 // will point.
duke@0 274 int MachCallStaticJavaNode::ret_addr_offset() {
duke@0 275 return 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0); // 5 bytes from start of call to where return address points
duke@0 276 }
duke@0 277
duke@0 278 int MachCallDynamicJavaNode::ret_addr_offset() {
duke@0 279 return 10 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0); // 10 bytes from start of call to where return address points
duke@0 280 }
duke@0 281
duke@0 282 static int sizeof_FFree_Float_Stack_All = -1;
duke@0 283
duke@0 284 int MachCallRuntimeNode::ret_addr_offset() {
duke@0 285 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
duke@0 286 return sizeof_FFree_Float_Stack_All + 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);
duke@0 287 }
duke@0 288
duke@0 289 // Indicate if the safepoint node needs the polling page as an input.
duke@0 290 // Since x86 does have absolute addressing, it doesn't.
duke@0 291 bool SafePointNode::needs_polling_address_input() {
duke@0 292 return false;
duke@0 293 }
duke@0 294
duke@0 295 //
duke@0 296 // Compute padding required for nodes which need alignment
duke@0 297 //
duke@0 298
duke@0 299 // The address of the call instruction needs to be 4-byte aligned to
duke@0 300 // ensure that it does not span a cache line so that it can be patched.
duke@0 301 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
duke@0 302 if (Compile::current()->in_24_bit_fp_mode())
duke@0 303 current_offset += 6; // skip fldcw in pre_call_FPU, if any
duke@0 304 current_offset += 1; // skip call opcode byte
duke@0 305 return round_to(current_offset, alignment_required()) - current_offset;
duke@0 306 }
duke@0 307
duke@0 308 // The address of the call instruction needs to be 4-byte aligned to
duke@0 309 // ensure that it does not span a cache line so that it can be patched.
duke@0 310 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
duke@0 311 if (Compile::current()->in_24_bit_fp_mode())
duke@0 312 current_offset += 6; // skip fldcw in pre_call_FPU, if any
duke@0 313 current_offset += 5; // skip MOV instruction
duke@0 314 current_offset += 1; // skip call opcode byte
duke@0 315 return round_to(current_offset, alignment_required()) - current_offset;
duke@0 316 }
duke@0 317
duke@0 318 #ifndef PRODUCT
duke@0 319 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
duke@0 320 st->print("INT3");
duke@0 321 }
duke@0 322 #endif
duke@0 323
duke@0 324 // EMIT_RM()
duke@0 325 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
duke@0 326 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
duke@0 327 *(cbuf.code_end()) = c;
duke@0 328 cbuf.set_code_end(cbuf.code_end() + 1);
duke@0 329 }
duke@0 330
duke@0 331 // EMIT_CC()
duke@0 332 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
duke@0 333 unsigned char c = (unsigned char)( f1 | f2 );
duke@0 334 *(cbuf.code_end()) = c;
duke@0 335 cbuf.set_code_end(cbuf.code_end() + 1);
duke@0 336 }
duke@0 337
duke@0 338 // EMIT_OPCODE()
duke@0 339 void emit_opcode(CodeBuffer &cbuf, int code) {
duke@0 340 *(cbuf.code_end()) = (unsigned char)code;
duke@0 341 cbuf.set_code_end(cbuf.code_end() + 1);
duke@0 342 }
duke@0 343
duke@0 344 // EMIT_OPCODE() w/ relocation information
duke@0 345 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
duke@0 346 cbuf.relocate(cbuf.inst_mark() + offset, reloc);
duke@0 347 emit_opcode(cbuf, code);
duke@0 348 }
duke@0 349
duke@0 350 // EMIT_D8()
duke@0 351 void emit_d8(CodeBuffer &cbuf, int d8) {
duke@0 352 *(cbuf.code_end()) = (unsigned char)d8;
duke@0 353 cbuf.set_code_end(cbuf.code_end() + 1);
duke@0 354 }
duke@0 355
duke@0 356 // EMIT_D16()
duke@0 357 void emit_d16(CodeBuffer &cbuf, int d16) {
duke@0 358 *((short *)(cbuf.code_end())) = d16;
duke@0 359 cbuf.set_code_end(cbuf.code_end() + 2);
duke@0 360 }
duke@0 361
duke@0 362 // EMIT_D32()
duke@0 363 void emit_d32(CodeBuffer &cbuf, int d32) {
duke@0 364 *((int *)(cbuf.code_end())) = d32;
duke@0 365 cbuf.set_code_end(cbuf.code_end() + 4);
duke@0 366 }
duke@0 367
duke@0 368 // emit 32 bit value and construct relocation entry from relocInfo::relocType
duke@0 369 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
duke@0 370 int format) {
duke@0 371 cbuf.relocate(cbuf.inst_mark(), reloc, format);
duke@0 372
duke@0 373 *((int *)(cbuf.code_end())) = d32;
duke@0 374 cbuf.set_code_end(cbuf.code_end() + 4);
duke@0 375 }
duke@0 376
duke@0 377 // emit 32 bit value and construct relocation entry from RelocationHolder
duke@0 378 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
duke@0 379 int format) {
duke@0 380 #ifdef ASSERT
duke@0 381 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
duke@0 382 assert(oop(d32)->is_oop() && oop(d32)->is_perm(), "cannot embed non-perm oops in code");
duke@0 383 }
duke@0 384 #endif
duke@0 385 cbuf.relocate(cbuf.inst_mark(), rspec, format);
duke@0 386
duke@0 387 *((int *)(cbuf.code_end())) = d32;
duke@0 388 cbuf.set_code_end(cbuf.code_end() + 4);
duke@0 389 }
duke@0 390
duke@0 391 // Access stack slot for load or store
duke@0 392 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
duke@0 393 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
duke@0 394 if( -128 <= disp && disp <= 127 ) {
duke@0 395 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
duke@0 396 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
duke@0 397 emit_d8 (cbuf, disp); // Displacement // R/M byte
duke@0 398 } else {
duke@0 399 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
duke@0 400 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
duke@0 401 emit_d32(cbuf, disp); // Displacement // R/M byte
duke@0 402 }
duke@0 403 }
duke@0 404
duke@0 405 // eRegI ereg, memory mem) %{ // emit_reg_mem
duke@0 406 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
duke@0 407 // There is no index & no scale, use form without SIB byte
duke@0 408 if ((index == 0x4) &&
duke@0 409 (scale == 0) && (base != ESP_enc)) {
duke@0 410 // If no displacement, mode is 0x0; unless base is [EBP]
duke@0 411 if ( (displace == 0) && (base != EBP_enc) ) {
duke@0 412 emit_rm(cbuf, 0x0, reg_encoding, base);
duke@0 413 }
duke@0 414 else { // If 8-bit displacement, mode 0x1
duke@0 415 if ((displace >= -128) && (displace <= 127)
duke@0 416 && !(displace_is_oop) ) {
duke@0 417 emit_rm(cbuf, 0x1, reg_encoding, base);
duke@0 418 emit_d8(cbuf, displace);
duke@0 419 }
duke@0 420 else { // If 32-bit displacement
duke@0 421 if (base == -1) { // Special flag for absolute address
duke@0 422 emit_rm(cbuf, 0x0, reg_encoding, 0x5);
duke@0 423 // (manual lies; no SIB needed here)
duke@0 424 if ( displace_is_oop ) {
duke@0 425 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
duke@0 426 } else {
duke@0 427 emit_d32 (cbuf, displace);
duke@0 428 }
duke@0 429 }
duke@0 430 else { // Normal base + offset
duke@0 431 emit_rm(cbuf, 0x2, reg_encoding, base);
duke@0 432 if ( displace_is_oop ) {
duke@0 433 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
duke@0 434 } else {
duke@0 435 emit_d32 (cbuf, displace);
duke@0 436 }
duke@0 437 }
duke@0 438 }
duke@0 439 }
duke@0 440 }
duke@0 441 else { // Else, encode with the SIB byte
duke@0 442 // If no displacement, mode is 0x0; unless base is [EBP]
duke@0 443 if (displace == 0 && (base != EBP_enc)) { // If no displacement
duke@0 444 emit_rm(cbuf, 0x0, reg_encoding, 0x4);
duke@0 445 emit_rm(cbuf, scale, index, base);
duke@0 446 }
duke@0 447 else { // If 8-bit displacement, mode 0x1
duke@0 448 if ((displace >= -128) && (displace <= 127)
duke@0 449 && !(displace_is_oop) ) {
duke@0 450 emit_rm(cbuf, 0x1, reg_encoding, 0x4);
duke@0 451 emit_rm(cbuf, scale, index, base);
duke@0 452 emit_d8(cbuf, displace);
duke@0 453 }
duke@0 454 else { // If 32-bit displacement
duke@0 455 if (base == 0x04 ) {
duke@0 456 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
duke@0 457 emit_rm(cbuf, scale, index, 0x04);
duke@0 458 } else {
duke@0 459 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
duke@0 460 emit_rm(cbuf, scale, index, base);
duke@0 461 }
duke@0 462 if ( displace_is_oop ) {
duke@0 463 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
duke@0 464 } else {
duke@0 465 emit_d32 (cbuf, displace);
duke@0 466 }
duke@0 467 }
duke@0 468 }
duke@0 469 }
duke@0 470 }
duke@0 471
duke@0 472
duke@0 473 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
duke@0 474 if( dst_encoding == src_encoding ) {
duke@0 475 // reg-reg copy, use an empty encoding
duke@0 476 } else {
duke@0 477 emit_opcode( cbuf, 0x8B );
duke@0 478 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
duke@0 479 }
duke@0 480 }
duke@0 481
duke@0 482 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
duke@0 483 if( dst_encoding == src_encoding ) {
duke@0 484 // reg-reg copy, use an empty encoding
duke@0 485 } else {
duke@0 486 MacroAssembler _masm(&cbuf);
duke@0 487
duke@0 488 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
duke@0 489 }
duke@0 490 }
duke@0 491
duke@0 492
duke@0 493 //=============================================================================
duke@0 494 #ifndef PRODUCT
duke@0 495 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
duke@0 496 Compile* C = ra_->C;
duke@0 497 if( C->in_24_bit_fp_mode() ) {
never@406 498 st->print("FLDCW 24 bit fpu control word");
never@406 499 st->print_cr(""); st->print("\t");
duke@0 500 }
duke@0 501
duke@0 502 int framesize = C->frame_slots() << LogBytesPerInt;
duke@0 503 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
duke@0 504 // Remove two words for return addr and rbp,
duke@0 505 framesize -= 2*wordSize;
duke@0 506
duke@0 507 // Calls to C2R adapters often do not accept exceptional returns.
duke@0 508 // We require that their callers must bang for them. But be careful, because
duke@0 509 // some VM calls (such as call site linkage) can use several kilobytes of
duke@0 510 // stack. But the stack safety zone should account for that.
duke@0 511 // See bugs 4446381, 4468289, 4497237.
duke@0 512 if (C->need_stack_bang(framesize)) {
never@406 513 st->print_cr("# stack bang"); st->print("\t");
duke@0 514 }
never@406 515 st->print_cr("PUSHL EBP"); st->print("\t");
duke@0 516
duke@0 517 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
never@406 518 st->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check");
never@406 519 st->print_cr(""); st->print("\t");
duke@0 520 framesize -= wordSize;
duke@0 521 }
duke@0 522
duke@0 523 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
duke@0 524 if (framesize) {
never@406 525 st->print("SUB ESP,%d\t# Create frame",framesize);
duke@0 526 }
duke@0 527 } else {
never@406 528 st->print("SUB ESP,%d\t# Create frame",framesize);
duke@0 529 }
duke@0 530 }
duke@0 531 #endif
duke@0 532
duke@0 533
duke@0 534 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
duke@0 535 Compile* C = ra_->C;
duke@0 536
duke@0 537 if (UseSSE >= 2 && VerifyFPU) {
duke@0 538 MacroAssembler masm(&cbuf);
duke@0 539 masm.verify_FPU(0, "FPU stack must be clean on entry");
duke@0 540 }
duke@0 541
duke@0 542 // WARNING: Initial instruction MUST be 5 bytes or longer so that
duke@0 543 // NativeJump::patch_verified_entry will be able to patch out the entry
duke@0 544 // code safely. The fldcw is ok at 6 bytes, the push to verify stack
duke@0 545 // depth is ok at 5 bytes, the frame allocation can be either 3 or
duke@0 546 // 6 bytes. So if we don't do the fldcw or the push then we must
duke@0 547 // use the 6 byte frame allocation even if we have no frame. :-(
duke@0 548 // If method sets FPU control word do it now
duke@0 549 if( C->in_24_bit_fp_mode() ) {
duke@0 550 MacroAssembler masm(&cbuf);
duke@0 551 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
duke@0 552 }
duke@0 553
duke@0 554 int framesize = C->frame_slots() << LogBytesPerInt;
duke@0 555 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
duke@0 556 // Remove two words for return addr and rbp,
duke@0 557 framesize -= 2*wordSize;
duke@0 558
duke@0 559 // Calls to C2R adapters often do not accept exceptional returns.
duke@0 560 // We require that their callers must bang for them. But be careful, because
duke@0 561 // some VM calls (such as call site linkage) can use several kilobytes of
duke@0 562 // stack. But the stack safety zone should account for that.
duke@0 563 // See bugs 4446381, 4468289, 4497237.
duke@0 564 if (C->need_stack_bang(framesize)) {
duke@0 565 MacroAssembler masm(&cbuf);
duke@0 566 masm.generate_stack_overflow_check(framesize);
duke@0 567 }
duke@0 568
duke@0 569 // We always push rbp, so that on return to interpreter rbp, will be
duke@0 570 // restored correctly and we can correct the stack.
duke@0 571 emit_opcode(cbuf, 0x50 | EBP_enc);
duke@0 572
duke@0 573 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
duke@0 574 emit_opcode(cbuf, 0x68); // push 0xbadb100d
duke@0 575 emit_d32(cbuf, 0xbadb100d);
duke@0 576 framesize -= wordSize;
duke@0 577 }
duke@0 578
duke@0 579 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
duke@0 580 if (framesize) {
duke@0 581 emit_opcode(cbuf, 0x83); // sub SP,#framesize
duke@0 582 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
duke@0 583 emit_d8(cbuf, framesize);
duke@0 584 }
duke@0 585 } else {
duke@0 586 emit_opcode(cbuf, 0x81); // sub SP,#framesize
duke@0 587 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
duke@0 588 emit_d32(cbuf, framesize);
duke@0 589 }
duke@0 590 C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
duke@0 591
duke@0 592 #ifdef ASSERT
duke@0 593 if (VerifyStackAtCalls) {
duke@0 594 Label L;
duke@0 595 MacroAssembler masm(&cbuf);
never@297 596 masm.push(rax);
never@297 597 masm.mov(rax, rsp);
never@297 598 masm.andptr(rax, StackAlignmentInBytes-1);
never@297 599 masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
never@297 600 masm.pop(rax);
duke@0 601 masm.jcc(Assembler::equal, L);
duke@0 602 masm.stop("Stack is not properly aligned!");
duke@0 603 masm.bind(L);
duke@0 604 }
duke@0 605 #endif
duke@0 606
duke@0 607 }
duke@0 608
duke@0 609 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
duke@0 610 return MachNode::size(ra_); // too many variables; just compute it the hard way
duke@0 611 }
duke@0 612
duke@0 613 int MachPrologNode::reloc() const {
duke@0 614 return 0; // a large enough number
duke@0 615 }
duke@0 616
duke@0 617 //=============================================================================
duke@0 618 #ifndef PRODUCT
duke@0 619 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
duke@0 620 Compile *C = ra_->C;
duke@0 621 int framesize = C->frame_slots() << LogBytesPerInt;
duke@0 622 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
duke@0 623 // Remove two words for return addr and rbp,
duke@0 624 framesize -= 2*wordSize;
duke@0 625
duke@0 626 if( C->in_24_bit_fp_mode() ) {
duke@0 627 st->print("FLDCW standard control word");
duke@0 628 st->cr(); st->print("\t");
duke@0 629 }
duke@0 630 if( framesize ) {
duke@0 631 st->print("ADD ESP,%d\t# Destroy frame",framesize);
duke@0 632 st->cr(); st->print("\t");
duke@0 633 }
duke@0 634 st->print_cr("POPL EBP"); st->print("\t");
duke@0 635 if( do_polling() && C->is_method_compilation() ) {
duke@0 636 st->print("TEST PollPage,EAX\t! Poll Safepoint");
duke@0 637 st->cr(); st->print("\t");
duke@0 638 }
duke@0 639 }
duke@0 640 #endif
duke@0 641
duke@0 642 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
duke@0 643 Compile *C = ra_->C;
duke@0 644
duke@0 645 // If method set FPU control word, restore to standard control word
duke@0 646 if( C->in_24_bit_fp_mode() ) {
duke@0 647 MacroAssembler masm(&cbuf);
duke@0 648 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
duke@0 649 }
duke@0 650
duke@0 651 int framesize = C->frame_slots() << LogBytesPerInt;
duke@0 652 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
duke@0 653 // Remove two words for return addr and rbp,
duke@0 654 framesize -= 2*wordSize;
duke@0 655
duke@0 656 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
duke@0 657
duke@0 658 if( framesize >= 128 ) {
duke@0 659 emit_opcode(cbuf, 0x81); // add SP, #framesize
duke@0 660 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
duke@0 661 emit_d32(cbuf, framesize);
duke@0 662 }
duke@0 663 else if( framesize ) {
duke@0 664 emit_opcode(cbuf, 0x83); // add SP, #framesize
duke@0 665 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
duke@0 666 emit_d8(cbuf, framesize);
duke@0 667 }
duke@0 668
duke@0 669 emit_opcode(cbuf, 0x58 | EBP_enc);
duke@0 670
duke@0 671 if( do_polling() && C->is_method_compilation() ) {
duke@0 672 cbuf.relocate(cbuf.code_end(), relocInfo::poll_return_type, 0);
duke@0 673 emit_opcode(cbuf,0x85);
duke@0 674 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
duke@0 675 emit_d32(cbuf, (intptr_t)os::get_polling_page());
duke@0 676 }
duke@0 677 }
duke@0 678
duke@0 679 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
duke@0 680 Compile *C = ra_->C;
duke@0 681 // If method set FPU control word, restore to standard control word
duke@0 682 int size = C->in_24_bit_fp_mode() ? 6 : 0;
duke@0 683 if( do_polling() && C->is_method_compilation() ) size += 6;
duke@0 684
duke@0 685 int framesize = C->frame_slots() << LogBytesPerInt;
duke@0 686 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
duke@0 687 // Remove two words for return addr and rbp,
duke@0 688 framesize -= 2*wordSize;
duke@0 689
duke@0 690 size++; // popl rbp,
duke@0 691
duke@0 692 if( framesize >= 128 ) {
duke@0 693 size += 6;
duke@0 694 } else {
duke@0 695 size += framesize ? 3 : 0;
duke@0 696 }
duke@0 697 return size;
duke@0 698 }
duke@0 699
duke@0 700 int MachEpilogNode::reloc() const {
duke@0 701 return 0; // a large enough number
duke@0 702 }
duke@0 703
duke@0 704 const Pipeline * MachEpilogNode::pipeline() const {
duke@0 705 return MachNode::pipeline_class();
duke@0 706 }
duke@0 707
duke@0 708 int MachEpilogNode::safepoint_offset() const { return 0; }
duke@0 709
duke@0 710 //=============================================================================
duke@0 711
duke@0 712 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
duke@0 713 static enum RC rc_class( OptoReg::Name reg ) {
duke@0 714
duke@0 715 if( !OptoReg::is_valid(reg) ) return rc_bad;
duke@0 716 if (OptoReg::is_stack(reg)) return rc_stack;
duke@0 717
duke@0 718 VMReg r = OptoReg::as_VMReg(reg);
duke@0 719 if (r->is_Register()) return rc_int;
duke@0 720 if (r->is_FloatRegister()) {
duke@0 721 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
duke@0 722 return rc_float;
duke@0 723 }
duke@0 724 assert(r->is_XMMRegister(), "must be");
duke@0 725 return rc_xmm;
duke@0 726 }
duke@0 727
never@406 728 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
never@406 729 int opcode, const char *op_str, int size, outputStream* st ) {
duke@0 730 if( cbuf ) {
duke@0 731 emit_opcode (*cbuf, opcode );
duke@0 732 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
duke@0 733 #ifndef PRODUCT
duke@0 734 } else if( !do_size ) {
never@406 735 if( size != 0 ) st->print("\n\t");
duke@0 736 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
never@406 737 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
never@406 738 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
duke@0 739 } else { // FLD, FST, PUSH, POP
never@406 740 st->print("%s [ESP + #%d]",op_str,offset);
duke@0 741 }
duke@0 742 #endif
duke@0 743 }
duke@0 744 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
duke@0 745 return size+3+offset_size;
duke@0 746 }
duke@0 747
duke@0 748 // Helper for XMM registers. Extra opcode bits, limited syntax.
duke@0 749 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
never@406 750 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
duke@0 751 if( cbuf ) {
duke@0 752 if( reg_lo+1 == reg_hi ) { // double move?
duke@0 753 if( is_load && !UseXmmLoadAndClearUpper )
duke@0 754 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
duke@0 755 else
duke@0 756 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
duke@0 757 } else {
duke@0 758 emit_opcode(*cbuf, 0xF3 );
duke@0 759 }
duke@0 760 emit_opcode(*cbuf, 0x0F );
duke@0 761 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
duke@0 762 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load
duke@0 763 else
duke@0 764 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
duke@0 765 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
duke@0 766 #ifndef PRODUCT
duke@0 767 } else if( !do_size ) {
never@406 768 if( size != 0 ) st->print("\n\t");
duke@0 769 if( reg_lo+1 == reg_hi ) { // double move?
never@406 770 if( is_load ) st->print("%s %s,[ESP + #%d]",
duke@0 771 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
duke@0 772 Matcher::regName[reg_lo], offset);
never@406 773 else st->print("MOVSD [ESP + #%d],%s",
duke@0 774 offset, Matcher::regName[reg_lo]);
duke@0 775 } else {
never@406 776 if( is_load ) st->print("MOVSS %s,[ESP + #%d]",
duke@0 777 Matcher::regName[reg_lo], offset);
never@406 778 else st->print("MOVSS [ESP + #%d],%s",
duke@0 779 offset, Matcher::regName[reg_lo]);
duke@0 780 }
duke@0 781 #endif
duke@0 782 }
duke@0 783 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
duke@0 784 return size+5+offset_size;
duke@0 785 }
duke@0 786
duke@0 787
duke@0 788 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
never@406 789 int src_hi, int dst_hi, int size, outputStream* st ) {
duke@0 790 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
duke@0 791 if( cbuf ) {
duke@0 792 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
duke@0 793 emit_opcode(*cbuf, 0x66 );
duke@0 794 }
duke@0 795 emit_opcode(*cbuf, 0x0F );
duke@0 796 emit_opcode(*cbuf, 0x28 );
duke@0 797 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
duke@0 798 #ifndef PRODUCT
duke@0 799 } else if( !do_size ) {
never@406 800 if( size != 0 ) st->print("\n\t");
duke@0 801 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
never@406 802 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
duke@0 803 } else {
never@406 804 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
duke@0 805 }
duke@0 806 #endif
duke@0 807 }
duke@0 808 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
duke@0 809 } else {
duke@0 810 if( cbuf ) {
duke@0 811 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
duke@0 812 emit_opcode(*cbuf, 0x0F );
duke@0 813 emit_opcode(*cbuf, 0x10 );
duke@0 814 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
duke@0 815 #ifndef PRODUCT
duke@0 816 } else if( !do_size ) {
never@406 817 if( size != 0 ) st->print("\n\t");
duke@0 818 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
never@406 819 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
duke@0 820 } else {
never@406 821 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
duke@0 822 }
duke@0 823 #endif
duke@0 824 }
duke@0 825 return size+4;
duke@0 826 }
duke@0 827 }
duke@0 828
never@406 829 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
duke@0 830 if( cbuf ) {
duke@0 831 emit_opcode(*cbuf, 0x8B );
duke@0 832 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
duke@0 833 #ifndef PRODUCT
duke@0 834 } else if( !do_size ) {
never@406 835 if( size != 0 ) st->print("\n\t");
never@406 836 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
duke@0 837 #endif
duke@0 838 }
duke@0 839 return size+2;
duke@0 840 }
duke@0 841
never@406 842 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
never@406 843 int offset, int size, outputStream* st ) {
duke@0 844 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
duke@0 845 if( cbuf ) {
duke@0 846 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
duke@0 847 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
duke@0 848 #ifndef PRODUCT
duke@0 849 } else if( !do_size ) {
never@406 850 if( size != 0 ) st->print("\n\t");
never@406 851 st->print("FLD %s",Matcher::regName[src_lo]);
duke@0 852 #endif
duke@0 853 }
duke@0 854 size += 2;
duke@0 855 }
duke@0 856
duke@0 857 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
duke@0 858 const char *op_str;
duke@0 859 int op;
duke@0 860 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
duke@0 861 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
duke@0 862 op = 0xDD;
duke@0 863 } else { // 32-bit store
duke@0 864 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
duke@0 865 op = 0xD9;
duke@0 866 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
duke@0 867 }
duke@0 868
never@406 869 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
duke@0 870 }
duke@0 871
duke@0 872 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
duke@0 873 // Get registers to move
duke@0 874 OptoReg::Name src_second = ra_->get_reg_second(in(1));
duke@0 875 OptoReg::Name src_first = ra_->get_reg_first(in(1));
duke@0 876 OptoReg::Name dst_second = ra_->get_reg_second(this );
duke@0 877 OptoReg::Name dst_first = ra_->get_reg_first(this );
duke@0 878
duke@0 879 enum RC src_second_rc = rc_class(src_second);
duke@0 880 enum RC src_first_rc = rc_class(src_first);
duke@0 881 enum RC dst_second_rc = rc_class(dst_second);
duke@0 882 enum RC dst_first_rc = rc_class(dst_first);
duke@0 883
duke@0 884 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
duke@0 885
duke@0 886 // Generate spill code!
duke@0 887 int size = 0;
duke@0 888
duke@0 889 if( src_first == dst_first && src_second == dst_second )
duke@0 890 return size; // Self copy, no move
duke@0 891
duke@0 892 // --------------------------------------
duke@0 893 // Check for mem-mem move. push/pop to move.
duke@0 894 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
duke@0 895 if( src_second == dst_first ) { // overlapping stack copy ranges
duke@0 896 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
never@406 897 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
never@406 898 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
duke@0 899 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
duke@0 900 }
duke@0 901 // move low bits
never@406 902 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
never@406 903 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
duke@0 904 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
never@406 905 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
never@406 906 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
duke@0 907 }
duke@0 908 return size;
duke@0 909 }
duke@0 910
duke@0 911 // --------------------------------------
duke@0 912 // Check for integer reg-reg copy
duke@0 913 if( src_first_rc == rc_int && dst_first_rc == rc_int )
never@406 914 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
duke@0 915
duke@0 916 // Check for integer store
duke@0 917 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
never@406 918 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
duke@0 919
duke@0 920 // Check for integer load
duke@0 921 if( dst_first_rc == rc_int && src_first_rc == rc_stack )
never@406 922 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
duke@0 923
duke@0 924 // --------------------------------------
duke@0 925 // Check for float reg-reg copy
duke@0 926 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
duke@0 927 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
duke@0 928 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
duke@0 929 if( cbuf ) {
duke@0 930
duke@0 931 // Note the mucking with the register encode to compensate for the 0/1
duke@0 932 // indexing issue mentioned in a comment in the reg_def sections
duke@0 933 // for FPR registers many lines above here.
duke@0 934
duke@0 935 if( src_first != FPR1L_num ) {
duke@0 936 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
duke@0 937 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
duke@0 938 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
duke@0 939 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
duke@0 940 } else {
duke@0 941 emit_opcode (*cbuf, 0xDD ); // FST ST(i)
duke@0 942 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
duke@0 943 }
duke@0 944 #ifndef PRODUCT
duke@0 945 } else if( !do_size ) {
duke@0 946 if( size != 0 ) st->print("\n\t");
duke@0 947 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
duke@0 948 else st->print( "FST %s", Matcher::regName[dst_first]);
duke@0 949 #endif
duke@0 950 }
duke@0 951 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
duke@0 952 }
duke@0 953
duke@0 954 // Check for float store
duke@0 955 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
never@406 956 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
duke@0 957 }
duke@0 958
duke@0 959 // Check for float load
duke@0 960 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
duke@0 961 int offset = ra_->reg2offset(src_first);
duke@0 962 const char *op_str;
duke@0 963 int op;
duke@0 964 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
duke@0 965 op_str = "FLD_D";
duke@0 966 op = 0xDD;
duke@0 967 } else { // 32-bit load
duke@0 968 op_str = "FLD_S";
duke@0 969 op = 0xD9;
duke@0 970 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
duke@0 971 }
duke@0 972 if( cbuf ) {
duke@0 973 emit_opcode (*cbuf, op );
duke@0 974 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
duke@0 975 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
duke@0 976 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
duke@0 977 #ifndef PRODUCT
duke@0 978 } else if( !do_size ) {
duke@0 979 if( size != 0 ) st->print("\n\t");
duke@0 980 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
duke@0 981 #endif
duke@0 982 }
duke@0 983 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
duke@0 984 return size + 3+offset_size+2;
duke@0 985 }
duke@0 986
duke@0 987 // Check for xmm reg-reg copy
duke@0 988 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
duke@0 989 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
duke@0 990 (src_first+1 == src_second && dst_first+1 == dst_second),
duke@0 991 "no non-adjacent float-moves" );
never@406 992 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
duke@0 993 }
duke@0 994
duke@0 995 // Check for xmm store
duke@0 996 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
never@406 997 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
duke@0 998 }
duke@0 999
duke@0 1000 // Check for float xmm load
duke@0 1001 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
never@406 1002 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
duke@0 1003 }
duke@0 1004
duke@0 1005 // Copy from float reg to xmm reg
duke@0 1006 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
duke@0 1007 // copy to the top of stack from floating point reg
duke@0 1008 // and use LEA to preserve flags
duke@0 1009 if( cbuf ) {
duke@0 1010 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
duke@0 1011 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
duke@0 1012 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
duke@0 1013 emit_d8(*cbuf,0xF8);
duke@0 1014 #ifndef PRODUCT
duke@0 1015 } else if( !do_size ) {
duke@0 1016 if( size != 0 ) st->print("\n\t");
duke@0 1017 st->print("LEA ESP,[ESP-8]");
duke@0 1018 #endif
duke@0 1019 }
duke@0 1020 size += 4;
duke@0 1021
never@406 1022 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
duke@0 1023
duke@0 1024 // Copy from the temp memory to the xmm reg.
never@406 1025 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
duke@0 1026
duke@0 1027 if( cbuf ) {
duke@0 1028 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
duke@0 1029 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
duke@0 1030 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
duke@0 1031 emit_d8(*cbuf,0x08);
duke@0 1032 #ifndef PRODUCT
duke@0 1033 } else if( !do_size ) {
duke@0 1034 if( size != 0 ) st->print("\n\t");
duke@0 1035 st->print("LEA ESP,[ESP+8]");
duke@0 1036 #endif
duke@0 1037 }
duke@0 1038 size += 4;
duke@0 1039 return size;
duke@0 1040 }
duke@0 1041
duke@0 1042 assert( size > 0, "missed a case" );
duke@0 1043
duke@0 1044 // --------------------------------------------------------------------
duke@0 1045 // Check for second bits still needing moving.
duke@0 1046 if( src_second == dst_second )
duke@0 1047 return size; // Self copy; no move
duke@0 1048 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
duke@0 1049
duke@0 1050 // Check for second word int-int move
duke@0 1051 if( src_second_rc == rc_int && dst_second_rc == rc_int )
never@406 1052 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
duke@0 1053
duke@0 1054 // Check for second word integer store
duke@0 1055 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
never@406 1056 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
duke@0 1057
duke@0 1058 // Check for second word integer load
duke@0 1059 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
never@406 1060 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
duke@0 1061
duke@0 1062
duke@0 1063 Unimplemented();
duke@0 1064 }
duke@0 1065
duke@0 1066 #ifndef PRODUCT
duke@0 1067 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
duke@0 1068 implementation( NULL, ra_, false, st );
duke@0 1069 }
duke@0 1070 #endif
duke@0 1071
duke@0 1072 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
duke@0 1073 implementation( &cbuf, ra_, false, NULL );
duke@0 1074 }
duke@0 1075
duke@0 1076 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
duke@0 1077 return implementation( NULL, ra_, true, NULL );
duke@0 1078 }
duke@0 1079
duke@0 1080 //=============================================================================
duke@0 1081 #ifndef PRODUCT
duke@0 1082 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
duke@0 1083 st->print("NOP \t# %d bytes pad for loops and calls", _count);
duke@0 1084 }
duke@0 1085 #endif
duke@0 1086
duke@0 1087 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
duke@0 1088 MacroAssembler _masm(&cbuf);
duke@0 1089 __ nop(_count);
duke@0 1090 }
duke@0 1091
duke@0 1092 uint MachNopNode::size(PhaseRegAlloc *) const {
duke@0 1093 return _count;
duke@0 1094 }
duke@0 1095
duke@0 1096
duke@0 1097 //=============================================================================
duke@0 1098 #ifndef PRODUCT
duke@0 1099 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
duke@0 1100 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
duke@0 1101 int reg = ra_->get_reg_first(this);
duke@0 1102 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
duke@0 1103 }
duke@0 1104 #endif
duke@0 1105
duke@0 1106 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
duke@0 1107 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
duke@0 1108 int reg = ra_->get_encode(this);
duke@0 1109 if( offset >= 128 ) {
duke@0 1110 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
duke@0 1111 emit_rm(cbuf, 0x2, reg, 0x04);
duke@0 1112 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
duke@0 1113 emit_d32(cbuf, offset);
duke@0 1114 }
duke@0 1115 else {
duke@0 1116 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
duke@0 1117 emit_rm(cbuf, 0x1, reg, 0x04);
duke@0 1118 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
duke@0 1119 emit_d8(cbuf, offset);
duke@0 1120 }
duke@0 1121 }
duke@0 1122
duke@0 1123 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
duke@0 1124 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
duke@0 1125 if( offset >= 128 ) {
duke@0 1126 return 7;
duke@0 1127 }
duke@0 1128 else {
duke@0 1129 return 4;
duke@0 1130 }
duke@0 1131 }
duke@0 1132
duke@0 1133 //=============================================================================
duke@0 1134
duke@0 1135 // emit call stub, compiled java to interpreter
duke@0 1136 void emit_java_to_interp(CodeBuffer &cbuf ) {
duke@0 1137 // Stub is fixed up when the corresponding call is converted from calling
duke@0 1138 // compiled code to calling interpreted code.
duke@0 1139 // mov rbx,0
duke@0 1140 // jmp -1
duke@0 1141
duke@0 1142 address mark = cbuf.inst_mark(); // get mark within main instrs section
duke@0 1143
duke@0 1144 // Note that the code buffer's inst_mark is always relative to insts.
duke@0 1145 // That's why we must use the macroassembler to generate a stub.
duke@0 1146 MacroAssembler _masm(&cbuf);
duke@0 1147
duke@0 1148 address base =
duke@0 1149 __ start_a_stub(Compile::MAX_stubs_size);
duke@0 1150 if (base == NULL) return; // CodeBuffer::expand failed
duke@0 1151 // static stub relocation stores the instruction address of the call
duke@0 1152 __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
duke@0 1153 // static stub relocation also tags the methodOop in the code-stream.
duke@0 1154 __ movoop(rbx, (jobject)NULL); // method is zapped till fixup time
never@297 1155 // This is recognized as unresolved by relocs/nativeInst/ic code
never@297 1156 __ jump(RuntimeAddress(__ pc()));
duke@0 1157
duke@0 1158 __ end_a_stub();
duke@0 1159 // Update current stubs pointer and restore code_end.
duke@0 1160 }
duke@0 1161 // size of call stub, compiled java to interpretor
duke@0 1162 uint size_java_to_interp() {
duke@0 1163 return 10; // movl; jmp
duke@0 1164 }
duke@0 1165 // relocation entries for call stub, compiled java to interpretor
duke@0 1166 uint reloc_java_to_interp() {
duke@0 1167 return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
duke@0 1168 }
duke@0 1169
duke@0 1170 //=============================================================================
duke@0 1171 #ifndef PRODUCT
duke@0 1172 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
duke@0 1173 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
duke@0 1174 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
duke@0 1175 st->print_cr("\tNOP");
duke@0 1176 st->print_cr("\tNOP");
duke@0 1177 if( !OptoBreakpoint )
duke@0 1178 st->print_cr("\tNOP");
duke@0 1179 }
duke@0 1180 #endif
duke@0 1181
duke@0 1182 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
duke@0 1183 MacroAssembler masm(&cbuf);
duke@0 1184 #ifdef ASSERT
duke@0 1185 uint code_size = cbuf.code_size();
duke@0 1186 #endif
never@297 1187 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
duke@0 1188 masm.jump_cc(Assembler::notEqual,
duke@0 1189 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
duke@0 1190 /* WARNING these NOPs are critical so that verified entry point is properly
duke@0 1191 aligned for patching by NativeJump::patch_verified_entry() */
duke@0 1192 int nops_cnt = 2;
duke@0 1193 if( !OptoBreakpoint ) // Leave space for int3
duke@0 1194 nops_cnt += 1;
duke@0 1195 masm.nop(nops_cnt);
duke@0 1196
duke@0 1197 assert(cbuf.code_size() - code_size == size(ra_), "checking code size of inline cache node");
duke@0 1198 }
duke@0 1199
duke@0 1200 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
duke@0 1201 return OptoBreakpoint ? 11 : 12;
duke@0 1202 }
duke@0 1203
duke@0 1204
duke@0 1205 //=============================================================================
duke@0 1206 uint size_exception_handler() {
duke@0 1207 // NativeCall instruction size is the same as NativeJump.
duke@0 1208 // exception handler starts out as jump and can be patched to
duke@0 1209 // a call be deoptimization. (4932387)
duke@0 1210 // Note that this value is also credited (in output.cpp) to
duke@0 1211 // the size of the code section.
duke@0 1212 return NativeJump::instruction_size;
duke@0 1213 }
duke@0 1214
duke@0 1215 // Emit exception handler code. Stuff framesize into a register
duke@0 1216 // and call a VM stub routine.
duke@0 1217 int emit_exception_handler(CodeBuffer& cbuf) {
duke@0 1218
duke@0 1219 // Note that the code buffer's inst_mark is always relative to insts.
duke@0 1220 // That's why we must use the macroassembler to generate a handler.
duke@0 1221 MacroAssembler _masm(&cbuf);
duke@0 1222 address base =
duke@0 1223 __ start_a_stub(size_exception_handler());
duke@0 1224 if (base == NULL) return 0; // CodeBuffer::expand failed
duke@0 1225 int offset = __ offset();
duke@0 1226 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
duke@0 1227 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
duke@0 1228 __ end_a_stub();
duke@0 1229 return offset;
duke@0 1230 }
duke@0 1231
duke@0 1232 uint size_deopt_handler() {
duke@0 1233 // NativeCall instruction size is the same as NativeJump.
duke@0 1234 // exception handler starts out as jump and can be patched to
duke@0 1235 // a call be deoptimization. (4932387)
duke@0 1236 // Note that this value is also credited (in output.cpp) to
duke@0 1237 // the size of the code section.
duke@0 1238 return 5 + NativeJump::instruction_size; // pushl(); jmp;
duke@0 1239 }
duke@0 1240
duke@0 1241 // Emit deopt handler code.
duke@0 1242 int emit_deopt_handler(CodeBuffer& cbuf) {
duke@0 1243
duke@0 1244 // Note that the code buffer's inst_mark is always relative to insts.
duke@0 1245 // That's why we must use the macroassembler to generate a handler.
duke@0 1246 MacroAssembler _masm(&cbuf);
duke@0 1247 address base =
duke@0 1248 __ start_a_stub(size_exception_handler());
duke@0 1249 if (base == NULL) return 0; // CodeBuffer::expand failed
duke@0 1250 int offset = __ offset();
duke@0 1251 InternalAddress here(__ pc());
duke@0 1252 __ pushptr(here.addr());
duke@0 1253
duke@0 1254 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
duke@0 1255 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
duke@0 1256 __ end_a_stub();
duke@0 1257 return offset;
duke@0 1258 }
duke@0 1259
duke@0 1260
duke@0 1261 static void emit_double_constant(CodeBuffer& cbuf, double x) {
duke@0 1262 int mark = cbuf.insts()->mark_off();
duke@0 1263 MacroAssembler _masm(&cbuf);
duke@0 1264 address double_address = __ double_constant(x);
duke@0 1265 cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
duke@0 1266 emit_d32_reloc(cbuf,
duke@0 1267 (int)double_address,
duke@0 1268 internal_word_Relocation::spec(double_address),
duke@0 1269 RELOC_DISP32);
duke@0 1270 }
duke@0 1271
duke@0 1272 static void emit_float_constant(CodeBuffer& cbuf, float x) {
duke@0 1273 int mark = cbuf.insts()->mark_off();
duke@0 1274 MacroAssembler _masm(&cbuf);
duke@0 1275 address float_address = __ float_constant(x);
duke@0 1276 cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
duke@0 1277 emit_d32_reloc(cbuf,
duke@0 1278 (int)float_address,
duke@0 1279 internal_word_Relocation::spec(float_address),
duke@0 1280 RELOC_DISP32);
duke@0 1281 }
duke@0 1282
duke@0 1283
twisti@747 1284 const bool Matcher::match_rule_supported(int opcode) {
twisti@747 1285 if (!has_match_rule(opcode))
twisti@747 1286 return false;
twisti@747 1287
twisti@747 1288 return true; // Per default match rules are supported.
twisti@747 1289 }
twisti@747 1290
duke@0 1291 int Matcher::regnum_to_fpu_offset(int regnum) {
duke@0 1292 return regnum - 32; // The FP registers are in the second chunk
duke@0 1293 }
duke@0 1294
duke@0 1295 bool is_positive_zero_float(jfloat f) {
duke@0 1296 return jint_cast(f) == jint_cast(0.0F);
duke@0 1297 }
duke@0 1298
duke@0 1299 bool is_positive_one_float(jfloat f) {
duke@0 1300 return jint_cast(f) == jint_cast(1.0F);
duke@0 1301 }
duke@0 1302
duke@0 1303 bool is_positive_zero_double(jdouble d) {
duke@0 1304 return jlong_cast(d) == jlong_cast(0.0);
duke@0 1305 }
duke@0 1306
duke@0 1307 bool is_positive_one_double(jdouble d) {
duke@0 1308 return jlong_cast(d) == jlong_cast(1.0);
duke@0 1309 }
duke@0 1310
duke@0 1311 // This is UltraSparc specific, true just means we have fast l2f conversion
duke@0 1312 const bool Matcher::convL2FSupported(void) {
duke@0 1313 return true;
duke@0 1314 }
duke@0 1315
duke@0 1316 // Vector width in bytes
duke@0 1317 const uint Matcher::vector_width_in_bytes(void) {
duke@0 1318 return UseSSE >= 2 ? 8 : 0;
duke@0 1319 }
duke@0 1320
duke@0 1321 // Vector ideal reg
duke@0 1322 const uint Matcher::vector_ideal_reg(void) {
duke@0 1323 return Op_RegD;
duke@0 1324 }
duke@0 1325
duke@0 1326 // Is this branch offset short enough that a short branch can be used?
duke@0 1327 //
duke@0 1328 // NOTE: If the platform does not provide any short branch variants, then
duke@0 1329 // this method should return false for offset 0.
never@406 1330 bool Matcher::is_short_branch_offset(int rule, int offset) {
never@406 1331 // the short version of jmpConUCF2 contains multiple branches,
never@406 1332 // making the reach slightly less
never@406 1333 if (rule == jmpConUCF2_rule)
never@406 1334 return (-126 <= offset && offset <= 125);
duke@0 1335 return (-128 <= offset && offset <= 127);
duke@0 1336 }
duke@0 1337
duke@0 1338 const bool Matcher::isSimpleConstant64(jlong value) {
duke@0 1339 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
duke@0 1340 return false;
duke@0 1341 }
duke@0 1342
duke@0 1343 // The ecx parameter to rep stos for the ClearArray node is in dwords.
duke@0 1344 const bool Matcher::init_array_count_is_in_bytes = false;
duke@0 1345
duke@0 1346 // Threshold size for cleararray.
duke@0 1347 const int Matcher::init_array_short_size = 8 * BytesPerLong;
duke@0 1348
duke@0 1349 // Should the Matcher clone shifts on addressing modes, expecting them to
duke@0 1350 // be subsumed into complex addressing expressions or compute them into
duke@0 1351 // registers? True for Intel but false for most RISCs
duke@0 1352 const bool Matcher::clone_shift_expressions = true;
duke@0 1353
duke@0 1354 // Is it better to copy float constants, or load them directly from memory?
duke@0 1355 // Intel can load a float constant from a direct address, requiring no
duke@0 1356 // extra registers. Most RISCs will have to materialize an address into a
duke@0 1357 // register first, so they would do better to copy the constant from stack.
duke@0 1358 const bool Matcher::rematerialize_float_constants = true;
duke@0 1359
duke@0 1360 // If CPU can load and store mis-aligned doubles directly then no fixup is
duke@0 1361 // needed. Else we split the double into 2 integer pieces and move it
duke@0 1362 // piece-by-piece. Only happens when passing doubles into C code as the
duke@0 1363 // Java calling convention forces doubles to be aligned.
duke@0 1364 const bool Matcher::misaligned_doubles_ok = true;
duke@0 1365
duke@0 1366
duke@0 1367 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
duke@0 1368 // Get the memory operand from the node
duke@0 1369 uint numopnds = node->num_opnds(); // Virtual call for number of operands
duke@0 1370 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far
duke@0 1371 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
duke@0 1372 uint opcnt = 1; // First operand
duke@0 1373 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
duke@0 1374 while( idx >= skipped+num_edges ) {
duke@0 1375 skipped += num_edges;
duke@0 1376 opcnt++; // Bump operand count
duke@0 1377 assert( opcnt < numopnds, "Accessing non-existent operand" );
duke@0 1378 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
duke@0 1379 }
duke@0 1380
duke@0 1381 MachOper *memory = node->_opnds[opcnt];
duke@0 1382 MachOper *new_memory = NULL;
duke@0 1383 switch (memory->opcode()) {
duke@0 1384 case DIRECT:
duke@0 1385 case INDOFFSET32X:
duke@0 1386 // No transformation necessary.
duke@0 1387 return;
duke@0 1388 case INDIRECT:
duke@0 1389 new_memory = new (C) indirect_win95_safeOper( );
duke@0 1390 break;
duke@0 1391 case INDOFFSET8:
duke@0 1392 new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
duke@0 1393 break;
duke@0 1394 case INDOFFSET32:
duke@0 1395 new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
duke@0 1396 break;
duke@0 1397 case INDINDEXOFFSET:
duke@0 1398 new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
duke@0 1399 break;
duke@0 1400 case INDINDEXSCALE:
duke@0 1401 new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
duke@0 1402 break;
duke@0 1403 case INDINDEXSCALEOFFSET:
duke@0 1404 new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
duke@0 1405 break;
duke@0 1406 case LOAD_LONG_INDIRECT:
duke@0 1407 case LOAD_LONG_INDOFFSET32:
duke@0 1408 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
duke@0 1409 return;
duke@0 1410 default:
duke@0 1411 assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
duke@0 1412 return;
duke@0 1413 }
duke@0 1414 node->_opnds[opcnt] = new_memory;
duke@0 1415 }
duke@0 1416
duke@0 1417 // Advertise here if the CPU requires explicit rounding operations
duke@0 1418 // to implement the UseStrictFP mode.
duke@0 1419 const bool Matcher::strict_fp_requires_explicit_rounding = true;
duke@0 1420
duke@0 1421 // Do floats take an entire double register or just half?
duke@0 1422 const bool Matcher::float_in_double = true;
duke@0 1423 // Do ints take an entire long register or just half?
duke@0 1424 const bool Matcher::int_in_long = false;
duke@0 1425
duke@0 1426 // Return whether or not this register is ever used as an argument. This
duke@0 1427 // function is used on startup to build the trampoline stubs in generateOptoStub.
duke@0 1428 // Registers not mentioned will be killed by the VM call in the trampoline, and
duke@0 1429 // arguments in those registers not be available to the callee.
duke@0 1430 bool Matcher::can_be_java_arg( int reg ) {
duke@0 1431 if( reg == ECX_num || reg == EDX_num ) return true;
duke@0 1432 if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
duke@0 1433 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
duke@0 1434 return false;
duke@0 1435 }
duke@0 1436
duke@0 1437 bool Matcher::is_spillable_arg( int reg ) {
duke@0 1438 return can_be_java_arg(reg);
duke@0 1439 }
duke@0 1440
duke@0 1441 // Register for DIVI projection of divmodI
duke@0 1442 RegMask Matcher::divI_proj_mask() {
duke@0 1443 return EAX_REG_mask;
duke@0 1444 }
duke@0 1445
duke@0 1446 // Register for MODI projection of divmodI
duke@0 1447 RegMask Matcher::modI_proj_mask() {
duke@0 1448 return EDX_REG_mask;
duke@0 1449 }
duke@0 1450
duke@0 1451 // Register for DIVL projection of divmodL
duke@0 1452 RegMask Matcher::divL_proj_mask() {
duke@0 1453 ShouldNotReachHere();
duke@0 1454 return RegMask();
duke@0 1455 }
duke@0 1456
duke@0 1457 // Register for MODL projection of divmodL
duke@0 1458 RegMask Matcher::modL_proj_mask() {
duke@0 1459 ShouldNotReachHere();
duke@0 1460 return RegMask();
duke@0 1461 }
duke@0 1462
duke@0 1463 %}
duke@0 1464
duke@0 1465 //----------ENCODING BLOCK-----------------------------------------------------
duke@0 1466 // This block specifies the encoding classes used by the compiler to output
duke@0 1467 // byte streams. Encoding classes generate functions which are called by
duke@0 1468 // Machine Instruction Nodes in order to generate the bit encoding of the
duke@0 1469 // instruction. Operands specify their base encoding interface with the
duke@0 1470 // interface keyword. There are currently supported four interfaces,
duke@0 1471 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
duke@0 1472 // operand to generate a function which returns its register number when
duke@0 1473 // queried. CONST_INTER causes an operand to generate a function which
duke@0 1474 // returns the value of the constant when queried. MEMORY_INTER causes an
duke@0 1475 // operand to generate four functions which return the Base Register, the
duke@0 1476 // Index Register, the Scale Value, and the Offset Value of the operand when
duke@0 1477 // queried. COND_INTER causes an operand to generate six functions which
duke@0 1478 // return the encoding code (ie - encoding bits for the instruction)
duke@0 1479 // associated with each basic boolean condition for a conditional instruction.
duke@0 1480 // Instructions specify two basic values for encoding. They use the
duke@0 1481 // ins_encode keyword to specify their encoding class (which must be one of
duke@0 1482 // the class names specified in the encoding block), and they use the
duke@0 1483 // opcode keyword to specify, in order, their primary, secondary, and
duke@0 1484 // tertiary opcode. Only the opcode sections which a particular instruction
duke@0 1485 // needs for encoding need to be specified.
duke@0 1486 encode %{
duke@0 1487 // Build emit functions for each basic byte or larger field in the intel
duke@0 1488 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
duke@0 1489 // code in the enc_class source block. Emit functions will live in the
duke@0 1490 // main source block for now. In future, we can generalize this by
duke@0 1491 // adding a syntax that specifies the sizes of fields in an order,
duke@0 1492 // so that the adlc can build the emit functions automagically
twisti@620 1493
twisti@620 1494 // Emit primary opcode
twisti@620 1495 enc_class OpcP %{
twisti@620 1496 emit_opcode(cbuf, $primary);
twisti@620 1497 %}
twisti@620 1498
twisti@620 1499 // Emit secondary opcode
twisti@620 1500 enc_class OpcS %{
twisti@620 1501 emit_opcode(cbuf, $secondary);
twisti@620 1502 %}
twisti@620 1503
twisti@620 1504 // Emit opcode directly
twisti@620 1505 enc_class Opcode(immI d8) %{
twisti@620 1506 emit_opcode(cbuf, $d8$$constant);
duke@0 1507 %}
duke@0 1508
duke@0 1509 enc_class SizePrefix %{
duke@0 1510 emit_opcode(cbuf,0x66);
duke@0 1511 %}
duke@0 1512
duke@0 1513 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
duke@0 1514 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 1515 %}
duke@0 1516
duke@0 1517 enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many)
duke@0 1518 emit_opcode(cbuf,$opcode$$constant);
duke@0 1519 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 1520 %}
duke@0 1521
duke@0 1522 enc_class mov_r32_imm0( eRegI dst ) %{
duke@0 1523 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
duke@0 1524 emit_d32 ( cbuf, 0x0 ); // imm32==0x0
duke@0 1525 %}
duke@0 1526
duke@0 1527 enc_class cdq_enc %{
duke@0 1528 // Full implementation of Java idiv and irem; checks for
duke@0 1529 // special case as described in JVM spec., p.243 & p.271.
duke@0 1530 //
duke@0 1531 // normal case special case
duke@0 1532 //
duke@0 1533 // input : rax,: dividend min_int
duke@0 1534 // reg: divisor -1
duke@0 1535 //
duke@0 1536 // output: rax,: quotient (= rax, idiv reg) min_int
duke@0 1537 // rdx: remainder (= rax, irem reg) 0
duke@0 1538 //
duke@0 1539 // Code sequnce:
duke@0 1540 //
duke@0 1541 // 81 F8 00 00 00 80 cmp rax,80000000h
duke@0 1542 // 0F 85 0B 00 00 00 jne normal_case
duke@0 1543 // 33 D2 xor rdx,edx
duke@0 1544 // 83 F9 FF cmp rcx,0FFh
duke@0 1545 // 0F 84 03 00 00 00 je done
duke@0 1546 // normal_case:
duke@0 1547 // 99 cdq
duke@0 1548 // F7 F9 idiv rax,ecx
duke@0 1549 // done:
duke@0 1550 //
duke@0 1551 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
duke@0 1552 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
duke@0 1553 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
duke@0 1554 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
duke@0 1555 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
duke@0 1556 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
duke@0 1557 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
duke@0 1558 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
duke@0 1559 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
duke@0 1560 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
duke@0 1561 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
duke@0 1562 // normal_case:
duke@0 1563 emit_opcode(cbuf,0x99); // cdq
duke@0 1564 // idiv (note: must be emitted by the user of this rule)
duke@0 1565 // normal:
duke@0 1566 %}
duke@0 1567
duke@0 1568 // Dense encoding for older common ops
duke@0 1569 enc_class Opc_plus(immI opcode, eRegI reg) %{
duke@0 1570 emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
duke@0 1571 %}
duke@0 1572
duke@0 1573
duke@0 1574 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
duke@0 1575 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
duke@0 1576 // Check for 8-bit immediate, and set sign extend bit in opcode
duke@0 1577 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
duke@0 1578 emit_opcode(cbuf, $primary | 0x02);
duke@0 1579 }
duke@0 1580 else { // If 32-bit immediate
duke@0 1581 emit_opcode(cbuf, $primary);
duke@0 1582 }
duke@0 1583 %}
duke@0 1584
duke@0 1585 enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m
duke@0 1586 // Emit primary opcode and set sign-extend bit
duke@0 1587 // Check for 8-bit immediate, and set sign extend bit in opcode
duke@0 1588 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
duke@0 1589 emit_opcode(cbuf, $primary | 0x02); }
duke@0 1590 else { // If 32-bit immediate
duke@0 1591 emit_opcode(cbuf, $primary);
duke@0 1592 }
duke@0 1593 // Emit r/m byte with secondary opcode, after primary opcode.
duke@0 1594 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
duke@0 1595 %}
duke@0 1596
duke@0 1597 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
duke@0 1598 // Check for 8-bit immediate, and set sign extend bit in opcode
duke@0 1599 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
duke@0 1600 $$$emit8$imm$$constant;
duke@0 1601 }
duke@0 1602 else { // If 32-bit immediate
duke@0 1603 // Output immediate
duke@0 1604 $$$emit32$imm$$constant;
duke@0 1605 }
duke@0 1606 %}
duke@0 1607
duke@0 1608 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
duke@0 1609 // Emit primary opcode and set sign-extend bit
duke@0 1610 // Check for 8-bit immediate, and set sign extend bit in opcode
duke@0 1611 int con = (int)$imm$$constant; // Throw away top bits
duke@0 1612 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
duke@0 1613 // Emit r/m byte with secondary opcode, after primary opcode.
duke@0 1614 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
duke@0 1615 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
duke@0 1616 else emit_d32(cbuf,con);
duke@0 1617 %}
duke@0 1618
duke@0 1619 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
duke@0 1620 // Emit primary opcode and set sign-extend bit
duke@0 1621 // Check for 8-bit immediate, and set sign extend bit in opcode
duke@0 1622 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
duke@0 1623 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
duke@0 1624 // Emit r/m byte with tertiary opcode, after primary opcode.
duke@0 1625 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
duke@0 1626 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
duke@0 1627 else emit_d32(cbuf,con);
duke@0 1628 %}
duke@0 1629
duke@0 1630 enc_class Lbl (label labl) %{ // JMP, CALL
duke@0 1631 Label *l = $labl$$label;
duke@0 1632 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
duke@0 1633 %}
duke@0 1634
duke@0 1635 enc_class LblShort (label labl) %{ // JMP, CALL
duke@0 1636 Label *l = $labl$$label;
duke@0 1637 int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
duke@0 1638 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
duke@0 1639 emit_d8(cbuf, disp);
duke@0 1640 %}
duke@0 1641
duke@0 1642 enc_class OpcSReg (eRegI dst) %{ // BSWAP
duke@0 1643 emit_cc(cbuf, $secondary, $dst$$reg );
duke@0 1644 %}
duke@0 1645
duke@0 1646 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
duke@0 1647 int destlo = $dst$$reg;
duke@0 1648 int desthi = HIGH_FROM_LOW(destlo);
duke@0 1649 // bswap lo
duke@0 1650 emit_opcode(cbuf, 0x0F);
duke@0 1651 emit_cc(cbuf, 0xC8, destlo);
duke@0 1652 // bswap hi
duke@0 1653 emit_opcode(cbuf, 0x0F);
duke@0 1654 emit_cc(cbuf, 0xC8, desthi);
duke@0 1655 // xchg lo and hi
duke@0 1656 emit_opcode(cbuf, 0x87);
duke@0 1657 emit_rm(cbuf, 0x3, destlo, desthi);
duke@0 1658 %}
duke@0 1659
duke@0 1660 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ...
duke@0 1661 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
duke@0 1662 %}
duke@0 1663
duke@0 1664 enc_class Jcc (cmpOp cop, label labl) %{ // JCC
duke@0 1665 Label *l = $labl$$label;
duke@0 1666 $$$emit8$primary;
duke@0 1667 emit_cc(cbuf, $secondary, $cop$$cmpcode);
duke@0 1668 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
duke@0 1669 %}
duke@0 1670
duke@0 1671 enc_class JccShort (cmpOp cop, label labl) %{ // JCC
duke@0 1672 Label *l = $labl$$label;
duke@0 1673 emit_cc(cbuf, $primary, $cop$$cmpcode);
duke@0 1674 int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
duke@0 1675 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
duke@0 1676 emit_d8(cbuf, disp);
duke@0 1677 %}
duke@0 1678
duke@0 1679 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
duke@0 1680 $$$emit8$primary;
duke@0 1681 emit_cc(cbuf, $secondary, $cop$$cmpcode);
duke@0 1682 %}
duke@0 1683
duke@0 1684 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
duke@0 1685 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
duke@0 1686 emit_d8(cbuf, op >> 8 );
duke@0 1687 emit_d8(cbuf, op & 255);
duke@0 1688 %}
duke@0 1689
duke@0 1690 // emulate a CMOV with a conditional branch around a MOV
duke@0 1691 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
duke@0 1692 // Invert sense of branch from sense of CMOV
duke@0 1693 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
duke@0 1694 emit_d8( cbuf, $brOffs$$constant );
duke@0 1695 %}
duke@0 1696
duke@0 1697 enc_class enc_PartialSubtypeCheck( ) %{
duke@0 1698 Register Redi = as_Register(EDI_enc); // result register
duke@0 1699 Register Reax = as_Register(EAX_enc); // super class
duke@0 1700 Register Recx = as_Register(ECX_enc); // killed
duke@0 1701 Register Resi = as_Register(ESI_enc); // sub class
jrose@621 1702 Label miss;
duke@0 1703
duke@0 1704 MacroAssembler _masm(&cbuf);
jrose@621 1705 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
jrose@621 1706 NULL, &miss,
jrose@621 1707 /*set_cond_codes:*/ true);
jrose@621 1708 if ($primary) {
jrose@621 1709 __ xorptr(Redi, Redi);
jrose@621 1710 }
duke@0 1711 __ bind(miss);
duke@0 1712 %}
duke@0 1713
duke@0 1714 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
duke@0 1715 MacroAssembler masm(&cbuf);
duke@0 1716 int start = masm.offset();
duke@0 1717 if (UseSSE >= 2) {
duke@0 1718 if (VerifyFPU) {
duke@0 1719 masm.verify_FPU(0, "must be empty in SSE2+ mode");
duke@0 1720 }
duke@0 1721 } else {
duke@0 1722 // External c_calling_convention expects the FPU stack to be 'clean'.
duke@0 1723 // Compiled code leaves it dirty. Do cleanup now.
duke@0 1724 masm.empty_FPU_stack();
duke@0 1725 }
duke@0 1726 if (sizeof_FFree_Float_Stack_All == -1) {
duke@0 1727 sizeof_FFree_Float_Stack_All = masm.offset() - start;
duke@0 1728 } else {
duke@0 1729 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
duke@0 1730 }
duke@0 1731 %}
duke@0 1732
duke@0 1733 enc_class Verify_FPU_For_Leaf %{
duke@0 1734 if( VerifyFPU ) {
duke@0 1735 MacroAssembler masm(&cbuf);
duke@0 1736 masm.verify_FPU( -3, "Returning from Runtime Leaf call");
duke@0 1737 }
duke@0 1738 %}
duke@0 1739
duke@0 1740 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
duke@0 1741 // This is the instruction starting address for relocation info.
duke@0 1742 cbuf.set_inst_mark();
duke@0 1743 $$$emit8$primary;
duke@0 1744 // CALL directly to the runtime
duke@0 1745 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
duke@0 1746 runtime_call_Relocation::spec(), RELOC_IMM32 );
duke@0 1747
duke@0 1748 if (UseSSE >= 2) {
duke@0 1749 MacroAssembler _masm(&cbuf);
duke@0 1750 BasicType rt = tf()->return_type();
duke@0 1751
duke@0 1752 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
duke@0 1753 // A C runtime call where the return value is unused. In SSE2+
duke@0 1754 // mode the result needs to be removed from the FPU stack. It's
duke@0 1755 // likely that this function call could be removed by the
duke@0 1756 // optimizer if the C function is a pure function.
duke@0 1757 __ ffree(0);
duke@0 1758 } else if (rt == T_FLOAT) {
never@297 1759 __ lea(rsp, Address(rsp, -4));
duke@0 1760 __ fstp_s(Address(rsp, 0));
duke@0 1761 __ movflt(xmm0, Address(rsp, 0));
never@297 1762 __ lea(rsp, Address(rsp, 4));
duke@0 1763 } else if (rt == T_DOUBLE) {
never@297 1764 __ lea(rsp, Address(rsp, -8));
duke@0 1765 __ fstp_d(Address(rsp, 0));
duke@0 1766 __ movdbl(xmm0, Address(rsp, 0));
never@297 1767 __ lea(rsp, Address(rsp, 8));
duke@0 1768 }
duke@0 1769 }
duke@0 1770 %}
duke@0 1771
duke@0 1772
duke@0 1773 enc_class pre_call_FPU %{
duke@0 1774 // If method sets FPU control word restore it here
duke@0 1775 if( Compile::current()->in_24_bit_fp_mode() ) {
duke@0 1776 MacroAssembler masm(&cbuf);
duke@0 1777 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
duke@0 1778 }
duke@0 1779 %}
duke@0 1780
duke@0 1781 enc_class post_call_FPU %{
duke@0 1782 // If method sets FPU control word do it here also
duke@0 1783 if( Compile::current()->in_24_bit_fp_mode() ) {
duke@0 1784 MacroAssembler masm(&cbuf);
duke@0 1785 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
duke@0 1786 }
duke@0 1787 %}
duke@0 1788
duke@0 1789 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
duke@0 1790 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
duke@0 1791 // who we intended to call.
duke@0 1792 cbuf.set_inst_mark();
duke@0 1793 $$$emit8$primary;
duke@0 1794 if ( !_method ) {
duke@0 1795 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
duke@0 1796 runtime_call_Relocation::spec(), RELOC_IMM32 );
duke@0 1797 } else if(_optimized_virtual) {
duke@0 1798 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
duke@0 1799 opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
duke@0 1800 } else {
duke@0 1801 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
duke@0 1802 static_call_Relocation::spec(), RELOC_IMM32 );
duke@0 1803 }
duke@0 1804 if( _method ) { // Emit stub for static call
duke@0 1805 emit_java_to_interp(cbuf);
duke@0 1806 }
duke@0 1807 %}
duke@0 1808
duke@0 1809 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
duke@0 1810 // !!!!!
duke@0 1811 // Generate "Mov EAX,0x00", placeholder instruction to load oop-info
duke@0 1812 // emit_call_dynamic_prologue( cbuf );
duke@0 1813 cbuf.set_inst_mark();
duke@0 1814 emit_opcode(cbuf, 0xB8 + EAX_enc); // mov EAX,-1
duke@0 1815 emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
duke@0 1816 address virtual_call_oop_addr = cbuf.inst_mark();
duke@0 1817 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
duke@0 1818 // who we intended to call.
duke@0 1819 cbuf.set_inst_mark();
duke@0 1820 $$$emit8$primary;
duke@0 1821 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
duke@0 1822 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
duke@0 1823 %}
duke@0 1824
duke@0 1825 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
duke@0 1826 int disp = in_bytes(methodOopDesc::from_compiled_offset());
duke@0 1827 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
duke@0 1828
duke@0 1829 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
duke@0 1830 cbuf.set_inst_mark();
duke@0 1831 $$$emit8$primary;
duke@0 1832 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
duke@0 1833 emit_d8(cbuf, disp); // Displacement
duke@0 1834
duke@0 1835 %}
duke@0 1836
duke@0 1837 enc_class Xor_Reg (eRegI dst) %{
duke@0 1838 emit_opcode(cbuf, 0x33);
duke@0 1839 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
duke@0 1840 %}
duke@0 1841
duke@0 1842 // Following encoding is no longer used, but may be restored if calling
duke@0 1843 // convention changes significantly.
duke@0 1844 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
duke@0 1845 //
duke@0 1846 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
duke@0 1847 // // int ic_reg = Matcher::inline_cache_reg();
duke@0 1848 // // int ic_encode = Matcher::_regEncode[ic_reg];
duke@0 1849 // // int imo_reg = Matcher::interpreter_method_oop_reg();
duke@0 1850 // // int imo_encode = Matcher::_regEncode[imo_reg];
duke@0 1851 //
duke@0 1852 // // // Interpreter expects method_oop in EBX, currently a callee-saved register,
duke@0 1853 // // // so we load it immediately before the call
duke@0 1854 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop
duke@0 1855 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
duke@0 1856 //
duke@0 1857 // // xor rbp,ebp
duke@0 1858 // emit_opcode(cbuf, 0x33);
duke@0 1859 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
duke@0 1860 //
duke@0 1861 // // CALL to interpreter.
duke@0 1862 // cbuf.set_inst_mark();
duke@0 1863 // $$$emit8$primary;
duke@0 1864 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.code_end()) - 4),
duke@0 1865 // runtime_call_Relocation::spec(), RELOC_IMM32 );
duke@0 1866 // %}
duke@0 1867
duke@0 1868 enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR
duke@0 1869 $$$emit8$primary;
duke@0 1870 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
duke@0 1871 $$$emit8$shift$$constant;
duke@0 1872 %}
duke@0 1873
duke@0 1874 enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate
duke@0 1875 // Load immediate does not have a zero or sign extended version
duke@0 1876 // for 8-bit immediates
duke@0 1877 emit_opcode(cbuf, 0xB8 + $dst$$reg);
duke@0 1878 $$$emit32$src$$constant;
duke@0 1879 %}
duke@0 1880
duke@0 1881 enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate
duke@0 1882 // Load immediate does not have a zero or sign extended version
duke@0 1883 // for 8-bit immediates
duke@0 1884 emit_opcode(cbuf, $primary + $dst$$reg);
duke@0 1885 $$$emit32$src$$constant;
duke@0 1886 %}
duke@0 1887
duke@0 1888 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
duke@0 1889 // Load immediate does not have a zero or sign extended version
duke@0 1890 // for 8-bit immediates
duke@0 1891 int dst_enc = $dst$$reg;
duke@0 1892 int src_con = $src$$constant & 0x0FFFFFFFFL;
duke@0 1893 if (src_con == 0) {
duke@0 1894 // xor dst, dst
duke@0 1895 emit_opcode(cbuf, 0x33);
duke@0 1896 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
duke@0 1897 } else {
duke@0 1898 emit_opcode(cbuf, $primary + dst_enc);
duke@0 1899 emit_d32(cbuf, src_con);
duke@0 1900 }
duke@0 1901 %}
duke@0 1902
duke@0 1903 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
duke@0 1904 // Load immediate does not have a zero or sign extended version
duke@0 1905 // for 8-bit immediates
duke@0 1906 int dst_enc = $dst$$reg + 2;
duke@0 1907 int src_con = ((julong)($src$$constant)) >> 32;
duke@0 1908 if (src_con == 0) {
duke@0 1909 // xor dst, dst
duke@0 1910 emit_opcode(cbuf, 0x33);
duke@0 1911 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
duke@0 1912 } else {
duke@0 1913 emit_opcode(cbuf, $primary + dst_enc);
duke@0 1914 emit_d32(cbuf, src_con);
duke@0 1915 }
duke@0 1916 %}
duke@0 1917
duke@0 1918
duke@0 1919 enc_class LdImmD (immD src) %{ // Load Immediate
duke@0 1920 if( is_positive_zero_double($src$$constant)) {
duke@0 1921 // FLDZ
duke@0 1922 emit_opcode(cbuf,0xD9);
duke@0 1923 emit_opcode(cbuf,0xEE);
duke@0 1924 } else if( is_positive_one_double($src$$constant)) {
duke@0 1925 // FLD1
duke@0 1926 emit_opcode(cbuf,0xD9);
duke@0 1927 emit_opcode(cbuf,0xE8);
duke@0 1928 } else {
duke@0 1929 emit_opcode(cbuf,0xDD);
duke@0 1930 emit_rm(cbuf, 0x0, 0x0, 0x5);
duke@0 1931 emit_double_constant(cbuf, $src$$constant);
duke@0 1932 }
duke@0 1933 %}
duke@0 1934
duke@0 1935
duke@0 1936 enc_class LdImmF (immF src) %{ // Load Immediate
duke@0 1937 if( is_positive_zero_float($src$$constant)) {
duke@0 1938 emit_opcode(cbuf,0xD9);
duke@0 1939 emit_opcode(cbuf,0xEE);
duke@0 1940 } else if( is_positive_one_float($src$$constant)) {
duke@0 1941 emit_opcode(cbuf,0xD9);
duke@0 1942 emit_opcode(cbuf,0xE8);
duke@0 1943 } else {
duke@0 1944 $$$emit8$primary;
duke@0 1945 // Load immediate does not have a zero or sign extended version
duke@0 1946 // for 8-bit immediates
duke@0 1947 // First load to TOS, then move to dst
duke@0 1948 emit_rm(cbuf, 0x0, 0x0, 0x5);
duke@0 1949 emit_float_constant(cbuf, $src$$constant);
duke@0 1950 }
duke@0 1951 %}
duke@0 1952
duke@0 1953 enc_class LdImmX (regX dst, immXF con) %{ // Load Immediate
duke@0 1954 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
duke@0 1955 emit_float_constant(cbuf, $con$$constant);
duke@0 1956 %}
duke@0 1957
duke@0 1958 enc_class LdImmXD (regXD dst, immXD con) %{ // Load Immediate
duke@0 1959 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
duke@0 1960 emit_double_constant(cbuf, $con$$constant);
duke@0 1961 %}
duke@0 1962
duke@0 1963 enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
duke@0 1964 // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
duke@0 1965 emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
duke@0 1966 emit_opcode(cbuf, 0x0F);
duke@0 1967 emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
duke@0 1968 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
duke@0 1969 emit_double_constant(cbuf, $con$$constant);
duke@0 1970 %}
duke@0 1971
duke@0 1972 enc_class Opc_MemImm_F(immF src) %{
duke@0 1973 cbuf.set_inst_mark();
duke@0 1974 $$$emit8$primary;
duke@0 1975 emit_rm(cbuf, 0x0, $secondary, 0x5);
duke@0 1976 emit_float_constant(cbuf, $src$$constant);
duke@0 1977 %}
duke@0 1978
duke@0 1979
duke@0 1980 enc_class MovI2X_reg(regX dst, eRegI src) %{
duke@0 1981 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
duke@0 1982 emit_opcode(cbuf, 0x0F );
duke@0 1983 emit_opcode(cbuf, 0x6E );
duke@0 1984 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 1985 %}
duke@0 1986
duke@0 1987 enc_class MovX2I_reg(eRegI dst, regX src) %{
duke@0 1988 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
duke@0 1989 emit_opcode(cbuf, 0x0F );
duke@0 1990 emit_opcode(cbuf, 0x7E );
duke@0 1991 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
duke@0 1992 %}
duke@0 1993
duke@0 1994 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
duke@0 1995 { // MOVD $dst,$src.lo
duke@0 1996 emit_opcode(cbuf,0x66);
duke@0 1997 emit_opcode(cbuf,0x0F);
duke@0 1998 emit_opcode(cbuf,0x6E);
duke@0 1999 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 2000 }
duke@0 2001 { // MOVD $tmp,$src.hi
duke@0 2002 emit_opcode(cbuf,0x66);
duke@0 2003 emit_opcode(cbuf,0x0F);
duke@0 2004 emit_opcode(cbuf,0x6E);
duke@0 2005 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
duke@0 2006 }
duke@0 2007 { // PUNPCKLDQ $dst,$tmp
duke@0 2008 emit_opcode(cbuf,0x66);
duke@0 2009 emit_opcode(cbuf,0x0F);
duke@0 2010 emit_opcode(cbuf,0x62);
duke@0 2011 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
duke@0 2012 }
duke@0 2013 %}
duke@0 2014
duke@0 2015 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
duke@0 2016 { // MOVD $dst.lo,$src
duke@0 2017 emit_opcode(cbuf,0x66);
duke@0 2018 emit_opcode(cbuf,0x0F);
duke@0 2019 emit_opcode(cbuf,0x7E);
duke@0 2020 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
duke@0 2021 }
duke@0 2022 { // PSHUFLW $tmp,$src,0x4E (01001110b)
duke@0 2023 emit_opcode(cbuf,0xF2);
duke@0 2024 emit_opcode(cbuf,0x0F);
duke@0 2025 emit_opcode(cbuf,0x70);
duke@0 2026 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
duke@0 2027 emit_d8(cbuf, 0x4E);
duke@0 2028 }
duke@0 2029 { // MOVD $dst.hi,$tmp
duke@0 2030 emit_opcode(cbuf,0x66);
duke@0 2031 emit_opcode(cbuf,0x0F);
duke@0 2032 emit_opcode(cbuf,0x7E);
duke@0 2033 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
duke@0 2034 }
duke@0 2035 %}
duke@0 2036
duke@0 2037
duke@0 2038 // Encode a reg-reg copy. If it is useless, then empty encoding.
duke@0 2039 enc_class enc_Copy( eRegI dst, eRegI src ) %{
duke@0 2040 encode_Copy( cbuf, $dst$$reg, $src$$reg );
duke@0 2041 %}
duke@0 2042
duke@0 2043 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
duke@0 2044 encode_Copy( cbuf, $dst$$reg, $src$$reg );
duke@0 2045 %}
duke@0 2046
duke@0 2047 // Encode xmm reg-reg copy. If it is useless, then empty encoding.
duke@0 2048 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
duke@0 2049 encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
duke@0 2050 %}
duke@0 2051
duke@0 2052 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
duke@0 2053 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 2054 %}
duke@0 2055
duke@0 2056 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
duke@0 2057 $$$emit8$primary;
duke@0 2058 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 2059 %}
duke@0 2060
duke@0 2061 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
duke@0 2062 $$$emit8$secondary;
duke@0 2063 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
duke@0 2064 %}
duke@0 2065
duke@0 2066 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
duke@0 2067 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 2068 %}
duke@0 2069
duke@0 2070 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
duke@0 2071 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
duke@0 2072 %}
duke@0 2073
duke@0 2074 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
duke@0 2075 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
duke@0 2076 %}
duke@0 2077
duke@0 2078 enc_class Con32 (immI src) %{ // Con32(storeImmI)
duke@0 2079 // Output immediate
duke@0 2080 $$$emit32$src$$constant;
duke@0 2081 %}
duke@0 2082
duke@0 2083 enc_class Con32F_as_bits(immF src) %{ // storeF_imm
duke@0 2084 // Output Float immediate bits
duke@0 2085 jfloat jf = $src$$constant;
duke@0 2086 int jf_as_bits = jint_cast( jf );
duke@0 2087 emit_d32(cbuf, jf_as_bits);
duke@0 2088 %}
duke@0 2089
duke@0 2090 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm
duke@0 2091 // Output Float immediate bits
duke@0 2092 jfloat jf = $src$$constant;
duke@0 2093 int jf_as_bits = jint_cast( jf );
duke@0 2094 emit_d32(cbuf, jf_as_bits);
duke@0 2095 %}
duke@0 2096
duke@0 2097 enc_class Con16 (immI src) %{ // Con16(storeImmI)
duke@0 2098 // Output immediate
duke@0 2099 $$$emit16$src$$constant;
duke@0 2100 %}
duke@0 2101
duke@0 2102 enc_class Con_d32(immI src) %{
duke@0 2103 emit_d32(cbuf,$src$$constant);
duke@0 2104 %}
duke@0 2105
duke@0 2106 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
duke@0 2107 // Output immediate memory reference
duke@0 2108 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
duke@0 2109 emit_d32(cbuf, 0x00);
duke@0 2110 %}
duke@0 2111
duke@0 2112 enc_class lock_prefix( ) %{
duke@0 2113 if( os::is_MP() )
duke@0 2114 emit_opcode(cbuf,0xF0); // [Lock]
duke@0 2115 %}
duke@0 2116
duke@0 2117 // Cmp-xchg long value.
duke@0 2118 // Note: we need to swap rbx, and rcx before and after the
duke@0 2119 // cmpxchg8 instruction because the instruction uses
duke@0 2120 // rcx as the high order word of the new value to store but
duke@0 2121 // our register encoding uses rbx,.
duke@0 2122 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
duke@0 2123
duke@0 2124 // XCHG rbx,ecx
duke@0 2125 emit_opcode(cbuf,0x87);
duke@0 2126 emit_opcode(cbuf,0xD9);
duke@0 2127 // [Lock]
duke@0 2128 if( os::is_MP() )
duke@0 2129 emit_opcode(cbuf,0xF0);
duke@0 2130 // CMPXCHG8 [Eptr]
duke@0 2131 emit_opcode(cbuf,0x0F);
duke@0 2132 emit_opcode(cbuf,0xC7);
duke@0 2133 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
duke@0 2134 // XCHG rbx,ecx
duke@0 2135 emit_opcode(cbuf,0x87);
duke@0 2136 emit_opcode(cbuf,0xD9);
duke@0 2137 %}
duke@0 2138
duke@0 2139 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
duke@0 2140 // [Lock]
duke@0 2141 if( os::is_MP() )
duke@0 2142 emit_opcode(cbuf,0xF0);
duke@0 2143
duke@0 2144 // CMPXCHG [Eptr]
duke@0 2145 emit_opcode(cbuf,0x0F);
duke@0 2146 emit_opcode(cbuf,0xB1);
duke@0 2147 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
duke@0 2148 %}
duke@0 2149
duke@0 2150 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
duke@0 2151 int res_encoding = $res$$reg;
duke@0 2152
duke@0 2153 // MOV res,0
duke@0 2154 emit_opcode( cbuf, 0xB8 + res_encoding);
duke@0 2155 emit_d32( cbuf, 0 );
duke@0 2156 // JNE,s fail
duke@0 2157 emit_opcode(cbuf,0x75);
duke@0 2158 emit_d8(cbuf, 5 );
duke@0 2159 // MOV res,1
duke@0 2160 emit_opcode( cbuf, 0xB8 + res_encoding);
duke@0 2161 emit_d32( cbuf, 1 );
duke@0 2162 // fail:
duke@0 2163 %}
duke@0 2164
duke@0 2165 enc_class set_instruction_start( ) %{
duke@0 2166 cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand
duke@0 2167 %}
duke@0 2168
duke@0 2169 enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem
duke@0 2170 int reg_encoding = $ereg$$reg;
duke@0 2171 int base = $mem$$base;
duke@0 2172 int index = $mem$$index;
duke@0 2173 int scale = $mem$$scale;
duke@0 2174 int displace = $mem$$disp;
duke@0 2175 bool disp_is_oop = $mem->disp_is_oop();
duke@0 2176 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
duke@0 2177 %}
duke@0 2178
duke@0 2179 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
duke@0 2180 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo
duke@0 2181 int base = $mem$$base;
duke@0 2182 int index = $mem$$index;
duke@0 2183 int scale = $mem$$scale;
duke@0 2184 int displace = $mem$$disp + 4; // Offset is 4 further in memory
duke@0 2185 assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
duke@0 2186 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
duke@0 2187 %}
duke@0 2188
duke@0 2189 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
duke@0 2190 int r1, r2;
duke@0 2191 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
duke@0 2192 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
duke@0 2193 emit_opcode(cbuf,0x0F);
duke@0 2194 emit_opcode(cbuf,$tertiary);
duke@0 2195 emit_rm(cbuf, 0x3, r1, r2);
duke@0 2196 emit_d8(cbuf,$cnt$$constant);
duke@0 2197 emit_d8(cbuf,$primary);
duke@0 2198 emit_rm(cbuf, 0x3, $secondary, r1);
duke@0 2199 emit_d8(cbuf,$cnt$$constant);
duke@0 2200 %}
duke@0 2201
duke@0 2202 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
duke@0 2203 emit_opcode( cbuf, 0x8B ); // Move
duke@0 2204 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
duke@0 2205 emit_d8(cbuf,$primary);
duke@0 2206 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
duke@0 2207 emit_d8(cbuf,$cnt$$constant-32);
duke@0 2208 emit_d8(cbuf,$primary);
duke@0 2209 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
duke@0 2210 emit_d8(cbuf,31);
duke@0 2211 %}
duke@0 2212
duke@0 2213 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
duke@0 2214 int r1, r2;
duke@0 2215 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
duke@0 2216 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
duke@0 2217
duke@0 2218 emit_opcode( cbuf, 0x8B ); // Move r1,r2
duke@0 2219 emit_rm(cbuf, 0x3, r1, r2);
duke@0 2220 if( $cnt$$constant > 32 ) { // Shift, if not by zero
duke@0 2221 emit_opcode(cbuf,$primary);
duke@0 2222 emit_rm(cbuf, 0x3, $secondary, r1);
duke@0 2223 emit_d8(cbuf,$cnt$$constant-32);
duke@0 2224 }
duke@0 2225 emit_opcode(cbuf,0x33); // XOR r2,r2
duke@0 2226 emit_rm(cbuf, 0x3, r2, r2);
duke@0 2227 %}
duke@0 2228
duke@0 2229 // Clone of RegMem but accepts an extra parameter to access each
duke@0 2230 // half of a double in memory; it never needs relocation info.
duke@0 2231 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
duke@0 2232 emit_opcode(cbuf,$opcode$$constant);
duke@0 2233 int reg_encoding = $rm_reg$$reg;
duke@0 2234 int base = $mem$$base;
duke@0 2235 int index = $mem$$index;
duke@0 2236 int scale = $mem$$scale;
duke@0 2237 int displace = $mem$$disp + $disp_for_half$$constant;
duke@0 2238 bool disp_is_oop = false;
duke@0 2239 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
duke@0 2240 %}
duke@0 2241
duke@0 2242 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
duke@0 2243 //
duke@0 2244 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
duke@0 2245 // and it never needs relocation information.
duke@0 2246 // Frequently used to move data between FPU's Stack Top and memory.
duke@0 2247 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
duke@0 2248 int rm_byte_opcode = $rm_opcode$$constant;
duke@0 2249 int base = $mem$$base;
duke@0 2250 int index = $mem$$index;
duke@0 2251 int scale = $mem$$scale;
duke@0 2252 int displace = $mem$$disp;
duke@0 2253 assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
duke@0 2254 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
duke@0 2255 %}
duke@0 2256
duke@0 2257 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
duke@0 2258 int rm_byte_opcode = $rm_opcode$$constant;
duke@0 2259 int base = $mem$$base;
duke@0 2260 int index = $mem$$index;
duke@0 2261 int scale = $mem$$scale;
duke@0 2262 int displace = $mem$$disp;
duke@0 2263 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
duke@0 2264 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
duke@0 2265 %}
duke@0 2266
duke@0 2267 enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea
duke@0 2268 int reg_encoding = $dst$$reg;
duke@0 2269 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
duke@0 2270 int index = 0x04; // 0x04 indicates no index
duke@0 2271 int scale = 0x00; // 0x00 indicates no scale
duke@0 2272 int displace = $src1$$constant; // 0x00 indicates no displacement
duke@0 2273 bool disp_is_oop = false;
duke@0 2274 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
duke@0 2275 %}
duke@0 2276
duke@0 2277 enc_class min_enc (eRegI dst, eRegI src) %{ // MIN
duke@0 2278 // Compare dst,src
duke@0 2279 emit_opcode(cbuf,0x3B);
duke@0 2280 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 2281 // jmp dst < src around move
duke@0 2282 emit_opcode(cbuf,0x7C);
duke@0 2283 emit_d8(cbuf,2);
duke@0 2284 // move dst,src
duke@0 2285 emit_opcode(cbuf,0x8B);
duke@0 2286 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 2287 %}
duke@0 2288
duke@0 2289 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX
duke@0 2290 // Compare dst,src
duke@0 2291 emit_opcode(cbuf,0x3B);
duke@0 2292 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 2293 // jmp dst > src around move
duke@0 2294 emit_opcode(cbuf,0x7F);
duke@0 2295 emit_d8(cbuf,2);
duke@0 2296 // move dst,src
duke@0 2297 emit_opcode(cbuf,0x8B);
duke@0 2298 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
duke@0 2299 %}
duke@0 2300
duke@0 2301 enc_class enc_FP_store(memory mem, regD src) %{
duke@0 2302 // If src is FPR1, we can just FST to store it.
duke@0 2303 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
duke@0 2304 int reg_encoding = 0x2; // Just store
duke@0 2305 int base = $mem$$base;
duke@0 2306 int index = $mem$$index;
duke@0 2307 int scale = $mem$$scale;
duke@0 2308 int displace = $mem$$disp;
duke@0 2309 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
duke@0 2310 if( $src$$reg != FPR1L_enc ) {
duke@0 2311 reg_encoding = 0x3; // Store & pop
duke@0 2312 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
duke@0 2313 emit_d8( cbuf, 0xC0-1+$src$$reg );
duke@0 2314 }
duke@0 2315 cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand
duke@0 2316 emit_opcode(cbuf,$primary);
duke@0 2317 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
duke@0 2318 %}
duke@0 2319
duke@0 2320 enc_class neg_reg(eRegI dst) %{
duke@0 2321 // NEG $dst
duke@0 2322 emit_opcode(cbuf,0xF7);
duke@0 2323 emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
duke@0 2324 %}
duke@0 2325
duke@0 2326 enc_class setLT_reg(eCXRegI dst) %{
duke@0 2327 // SETLT $dst
duke@0 2328 emit_opcode(cbuf,0x0F);
duke@0 2329 emit_opcode(cbuf,0x9C);
duke@0 2330 emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
duke@0 2331 %}
duke@0 2332
duke@0 2333 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
duke@0 2334 int tmpReg = $tmp$$reg;
duke@0 2335
duke@0 2336 // SUB $p,$q
duke@0 2337 emit_opcode(cbuf,0x2B);
duke@0 2338 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
duke@0 2339 // SBB $tmp,$tmp
duke@0 2340 emit_opcode(cbuf,0x1B);
duke@0 2341 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
duke@0 2342 // AND $tmp,$y
duke@0 2343 emit_opcode(cbuf,0x23);
duke@0 2344 emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
duke@0 2345 // ADD $p,$tmp
duke@0 2346 emit_opcode(cbuf,0x03);
duke@0 2347 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
duke@0 2348 %}
duke@0 2349
duke@0 2350 enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT
duke@0 2351 int tmpReg = $tmp$$reg;
duke@0 2352
duke@0 2353 // SUB $p,$q
duke@0 2354 emit_opcode(cbuf,0x2B);
duke@0 2355 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
duke@0 2356 // SBB $tmp,$tmp
duke@0 2357 emit_opcode(cbuf,0x1B);
duke@0 2358 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
duke@0 2359 // AND $tmp,$y
duke@0 2360 cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand
duke@0 2361 emit_opcode(cbuf,0x23);
duke@0 2362 int reg_encoding = tmpReg;
duke@0 2363 int base = $mem$$base;
duke@0 2364 int index = $mem$$index;
duke@0 2365 int scale = $mem$$scale;
duke@0 2366 int displace = $mem$$disp;
duke@0 2367 bool disp_is_oop = $mem->disp_is_oop();
duke@0 2368 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
duke@0 2369 // ADD $p,$tmp
duke@0 2370 emit_opcode(cbuf,0x03);
duke@0 2371 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
duke@0 2372 %}
duke@0 2373
duke@0 2374 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
duke@0 2375 // TEST shift,32
duke@0 2376 emit_opcode(cbuf,0xF7);
duke@0 2377 emit_rm(cbuf, 0x3, 0, ECX_enc);
duke@0 2378 emit_d32(cbuf,0x20);
duke@0 2379 // JEQ,s small
duke@0 2380 emit_opcode(cbuf, 0x74);
duke@0 2381 emit_d8(cbuf, 0x04);
duke@0 2382 // MOV $dst.hi,$dst.lo
duke@0 2383 emit_opcode( cbuf, 0x8B );
duke@0 2384 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
duke@0 2385 // CLR $dst.lo
duke@0 2386 emit_opcode(cbuf, 0x33);
duke@0 2387 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
duke@0 2388 // small:
duke@0 2389 // SHLD $dst.hi,$dst.lo,$shift
duke@0 2390 emit_opcode(cbuf,0x0F);
duke@0 2391 emit_opcode(cbuf,0xA5);
duke@0 2392 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
duke@0 2393 // SHL $dst.lo,$shift"
duke@0 2394 emit_opcode(cbuf,0xD3);
duke@0 2395 emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
duke@0 2396 %}
duke@0 2397
duke@0 2398 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
duke@0 2399 // TEST shift,32
duke@0 2400 emit_opcode(cbuf,0xF7);
duke@0 2401 emit_rm(cbuf, 0x3, 0, ECX_enc);
duke@0 2402 emit_d32(cbuf,0x20);
duke@0 2403 // JEQ,s small
duke@0 2404 emit_opcode(cbuf, 0x74);
duke@0 2405 emit_d8(cbuf, 0x04);
duke@0 2406 // MOV $dst.lo,$dst.hi
duke@0 2407 emit_opcode( cbuf, 0x8B );
duke@0 2408 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
duke@0 2409 // CLR $dst.hi
duke@0 2410 emit_opcode(cbuf, 0x33);
duke@0 2411 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
duke@0 2412 // small:
duke@0 2413 // SHRD $dst.lo,$dst.hi,$shift
duke@0 2414 emit_opcode(cbuf,0x0F);
duke@0 2415 emit_opcode(cbuf,0xAD);
duke@0 2416 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
duke@0 2417 // SHR $dst.hi,$shift"
duke@0 2418 emit_opcode(cbuf,0xD3);
duke@0 2419 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
duke@0 2420 %}
duke@0 2421
duke@0 2422 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
duke@0 2423 // TEST shift,32
duke@0 2424 emit_opcode(cbuf,0xF7);
duke@0 2425 emit_rm(cbuf, 0x3, 0, ECX_enc);
duke@0 2426 emit_d32(cbuf,0x20);
duke@0 2427 // JEQ,s small
duke@0 2428 emit_opcode(cbuf, 0x74);
duke@0 2429 emit_d8(cbuf, 0x05);
duke@0 2430 // MOV $dst.lo,$dst.hi
duke@0 2431 emit_opcode( cbuf, 0x8B );
duke@0 2432 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
duke@0 2433 // SAR $dst.hi,31
duke@0 2434 emit_opcode(cbuf, 0xC1);
duke@0 2435 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
duke@0 2436 emit_d8(cbuf, 0x1F );
duke@0 2437 // small:
duke@0 2438 // SHRD $dst.lo,$dst.hi,$shift
duke@0 2439 emit_opcode(cbuf,0x0F);
duke@0 2440 emit_opcode(cbuf,0xAD);
duke@0 2441 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
duke@0 2442 // SAR $dst.hi,$shift"
duke@0 2443 emit_opcode(cbuf,0xD3);
duke@0 2444 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
duke@0 2445 %}
duke@0 2446
duke@0 2447
duke@0 2448 // ----------------- Encodings for floating point unit -----------------
duke@0 2449 // May leave result in FPU-TOS or FPU reg depending on opcodes
duke@0 2450 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV
duke@0 2451 $$$emit8$primary;
duke@0 2452 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
duke@0 2453 %}
duke@0 2454
duke@0 2455 // Pop argument in FPR0 with FSTP ST(0)
duke@0 2456 enc_class PopFPU() %{
duke@0 2457 emit_opcode( cbuf, 0xDD );
duke@0 2458 emit_d8( cbuf, 0xD8 );
duke@0 2459 %}
duke@0 2460
duke@0 2461 // !!!!! equivalent to Pop_Reg_F
duke@0 2462 enc_class Pop_Reg_D( regD dst ) %{
duke@0 2463 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
duke@0 2464 emit_d8( cbuf, 0xD8+$dst$$reg );
duke@0 2465 %}
duke@0 2466
duke@0 2467 enc_class Push_Reg_D( regD dst ) %{
duke@0 2468 emit_opcode( cbuf, 0xD9 );
duke@0 2469 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
duke@0 2470 %}
duke@0 2471
duke@0 2472 enc_class strictfp_bias1( regD dst ) %{
duke@0 2473 emit_opcode( cbuf, 0xDB ); // FLD m80real
duke@0 2474 emit_opcode( cbuf, 0x2D );
duke@0 2475 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
duke@0 2476 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
duke@0 2477 emit_opcode( cbuf, 0xC8+$dst$$reg );
duke@0 2478 %}
duke@0 2479
duke@0 2480 enc_class strictfp_bias2( regD dst ) %{
duke@0 2481 emit_opcode( cbuf, 0xDB ); // FLD m80real
duke@0 2482 emit_opcode( cbuf, 0x2D );
duke@0 2483 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
duke@0 2484 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
duke@0 2485 emit_opcode( cbuf, 0xC8+$dst$$reg );
duke@0 2486 %}
duke@0 2487
duke@0 2488 // Special case for moving an integer register to a stack slot.
duke@0 2489 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
duke@0 2490 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
duke@0 2491 %}
duke@0 2492
duke@0 2493 // Special case for moving a register to a stack slot.
duke@0 2494 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
duke@0 2495 // Opcode already emitted
duke@0 2496 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
duke@0 2497 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
duke@0 2498 emit_d32(cbuf, $dst$$disp); // Displacement
duke@0 2499 %}
duke@0 2500
duke@0 2501 // Push the integer in stackSlot 'src' onto FP-stack
duke@0 2502 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
duke@0 2503 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
duke@0 2504 %}
duke@0 2505
duke@0 2506 // Push the float in stackSlot 'src' onto FP-stack
duke@0 2507 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src]
duke@0 2508 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
duke@0 2509 %}
duke@0 2510
duke@0 2511 // Push the double in stackSlot 'src' onto FP-stack
duke@0 2512 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src]
duke@0 2513 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
duke@0 2514 %}
duke@0 2515
duke@0 2516 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
duke@0 2517 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
duke@0 2518 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
duke@0 2519 %}
duke@0 2520
duke@0 2521 // Same as Pop_Mem_F except for opcode
duke@0 2522 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
duke@0 2523 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
duke@0 2524 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
duke@0 2525 %}
duke@0 2526
duke@0 2527 enc_class Pop_Reg_F( regF dst ) %{
duke@0 2528 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
duke@0 2529 emit_d8( cbuf, 0xD8+$dst$$reg );
duke@0 2530 %}
duke@0 2531
duke@0 2532 enc_class Push_Reg_F( regF dst ) %{
duke@0 2533 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
duke@0 2534 emit_d8( cbuf, 0xC0-1+$dst$$reg );
duke@0 2535 %}
duke@0 2536
duke@0 2537 // Push FPU's float to a stack-slot, and pop FPU-stack
duke@0 2538 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
duke@0 2539 int pop = 0x02;
duke@0 2540 if ($src$$reg != FPR1L_enc) {
duke@0 2541 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
duke@0 2542 emit_d8( cbuf, 0xC0-1+$src$$reg );
duke@0 2543 pop = 0x03;
duke@0 2544 }
duke@0 2545 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
duke@0 2546 %}
duke@0 2547
duke@0 2548 // Push FPU's double to a stack-slot, and pop FPU-stack
duke@0 2549 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
duke@0 2550 int pop = 0x02;
duke@0 2551 if ($src$$reg != FPR1L_enc) {
duke@0 2552 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
duke@0 2553 emit_d8( cbuf, 0xC0-1+$src$$reg );
duke@0 2554 pop = 0x03;
duke@0 2555 }
duke@0 2556 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
duke@0 2557 %}
duke@0 2558
duke@0 2559 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
duke@0 2560 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
duke@0 2561 int pop = 0xD0 - 1; // -1 since we skip FLD
duke@0 2562 if ($src$$reg != FPR1L_enc) {
duke@0 2563 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
duke@0 2564 emit_d8( cbuf, 0xC0-1+$src$$reg );
duke@0 2565 pop = 0xD8;
duke@0 2566 }
duke@0 2567 emit_opcode( cbuf, 0xDD );
duke@0 2568 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
duke@0 2569 %}
duke@0 2570
duke@0 2571
duke@0 2572 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
duke@0 2573 MacroAssembler masm(&cbuf);
duke@0 2574 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg
duke@0 2575 masm.fmul( $src2$$reg+0); // value at TOS
duke@0 2576 masm.fadd( $src$$reg+0); // value at TOS
duke@0 2577 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store
duke@0 2578 %}
duke@0 2579
duke@0 2580
duke@0 2581 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
duke@0 2582 // load dst in FPR0
duke@0 2583 emit_opcode( cbuf, 0xD9 );
duke@0 2584 emit_d8( cbuf, 0xC0-1+$dst$$reg );
duke@0 2585 if ($src$$reg != FPR1L_enc) {
duke@0 2586 // fincstp
duke@0 2587 emit_opcode (cbuf, 0xD9);
duke@0 2588 emit_opcode (cbuf, 0xF7);
duke@0 2589 // swap src with FPR1:
duke@0 2590 // FXCH FPR1 with src
duke@0 2591 emit_opcode(cbuf, 0xD9);
duke@0 2592 emit_d8(cbuf, 0xC8-1+$src$$reg );
duke@0 2593 // fdecstp
duke@0 2594 emit_opcode (cbuf, 0xD9);
duke@0 2595 emit_opcode (cbuf, 0xF6);
duke@0 2596 }
duke@0 2597 %}
duke@0 2598
duke@0 2599 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
duke@0 2600 // Allocate a word
duke@0 2601 emit_opcode(cbuf,0x83); // SUB ESP,8
duke@0 2602 emit_opcode(cbuf,0xEC);
duke@0 2603 emit_d8(cbuf,0x08);
duke@0 2604
duke@0 2605 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1
duke@0 2606 emit_opcode (cbuf, 0x0F );
duke@0 2607 emit_opcode (cbuf, 0x11 );
duke@0 2608 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
duke@0 2609
duke@0 2610 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
duke@0 2611 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
duke@0 2612
duke@0 2613 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0
duke@0 2614 emit_opcode (cbuf, 0x0F );
duke@0 2615 emit_opcode (cbuf, 0x11 );
duke@0 2616 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
duke@0 2617
duke@0 2618 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
duke@0 2619 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
duke@0 2620
duke@0 2621 %}
duke@0 2622
duke@0 2623 enc_class Push_ModX_encoding( regX src0, regX src1) %{
duke@0 2624 // Allocate a word
duke@0 2625 emit_opcode(cbuf,0x83); // SUB ESP,4
duke@0 2626 emit_opcode(cbuf,0xEC);
duke@0 2627 emit_d8(cbuf,0x04);
duke@0 2628
duke@0 2629 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1
duke@0 2630 emit_opcode (cbuf, 0x0F );
duke@0 2631 emit_opcode (cbuf, 0x11 );
duke@0 2632 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
duke@0 2633
duke@0 2634 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
duke@0 2635 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
duke@0 2636
duke@0 2637 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0
duke@0 2638 emit_opcode (cbuf, 0x0F );
duke@0 2639 emit_opcode (cbuf, 0x11 );
duke@0 2640 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
duke@0 2641
duke@0 2642 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
duke@0 2643 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
duke@0 2644
duke@0 2645 %}
duke@0 2646
duke@0 2647 enc_class Push_ResultXD(regXD dst) %{
duke@0 2648 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
duke@0 2649
duke@0 2650 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
duke@0 2651 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
duke@0 2652 emit_opcode (cbuf, 0x0F );
duke@0 2653 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
duke@0 2654 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
duke@0 2655
duke@0 2656 emit_opcode(cbuf,0x83); // ADD ESP,8
duke@0 2657 emit_opcode(cbuf,0xC4);
duke@0 2658 emit_d8(cbuf,0x08);
duke@0 2659 %}
duke@0 2660
duke@0 2661 enc_class Push_ResultX(regX dst, immI d8) %{
duke@0 2662 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
duke@0 2663
duke@0 2664 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
duke@0 2665 emit_opcode (cbuf, 0x0F );
duke@0 2666 emit_opcode (cbuf, 0x10 );
duke@0 2667 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
duke@0 2668
duke@0 2669 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8)
duke@0 2670 emit_opcode(cbuf,0xC4);
duke@0 2671 emit_d8(cbuf,$d8$$constant);
duke@0 2672 %}
duke@0 2673
duke@0 2674 enc_class Push_SrcXD(regXD src) %{
duke@0 2675 // Allocate a word
duke@0 2676 emit_opcode(cbuf,0x83); // SUB ESP,8
duke@0 2677 emit_opcode(cbuf,0xEC);
duke@0 2678 emit_d8(cbuf,0x08);
duke@0 2679
duke@0 2680 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
duke@0 2681 emit_opcode (cbuf, 0x0F );
duke@0 2682 emit_opcode (cbuf, 0x11 );
duke@0 2683 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
duke@0 2684
duke@0 2685 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
duke@0 2686 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
duke@0 2687 %}
duke@0 2688
duke@0 2689 enc_class push_stack_temp_qword() %{
duke@0 2690 emit_opcode(cbuf,0x83); // SUB ESP,8
duke@0 2691 emit_opcode(cbuf,0xEC);
duke@0 2692 emit_d8 (cbuf,0x08);
duke@0 2693 %}
duke@0 2694
duke@0 2695 enc_class pop_stack_temp_qword() %{
duke@0 2696 emit_opcode(cbuf,0x83); // ADD ESP,8
duke@0 2697 emit_opcode(cbuf,0xC4);
duke@0 2698 emit_d8 (cbuf,0x08);
duke@0 2699 %}
duke@0 2700
duke@0 2701 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
duke@0 2702 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src
duke@0 2703 emit_opcode (cbuf, 0x0F );
duke@0 2704 emit_opcode (cbuf, 0x11 );
duke@0 2705 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
duke@0 2706
duke@0 2707 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
duke@0 2708 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
duke@0 2709 %}
duke@0 2710
duke@0 2711 // Compute X^Y using Intel's fast hardware instructions, if possible.
duke@0 2712 // Otherwise return a NaN.
duke@0 2713 enc_class pow_exp_core_encoding %{
duke@0 2714 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
duke@0 2715 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
duke@0 2716 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
duke@0 2717 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
duke@0 2718 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
duke@0 2719 emit_opcode(cbuf,0x1C);
duke@0 2720 emit_d8(cbuf,0x24);
duke@0 2721 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
duke@0 2722 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
duke@0 2723 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
duke@0 2724 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
duke@0 2725 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
duke@0 2726 emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask
duke@0 2727 emit_rm(cbuf, 0x3, 0x0, ECX_enc);
duke@0 2728 emit_d32(cbuf,0xFFFFF800);
duke@0 2729 emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias
duke@0 2730 emit_rm(cbuf, 0x3, 0x0, EAX_enc);
duke@0 2731 emit_d32(cbuf,1023);
duke@0 2732 emit_opcode(cbuf,0x8B); // mov rbx,eax
duke@0 2733 emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
duke@0 2734 emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position
duke@0 2735 emit_rm(cbuf,0x3,0x4,EAX_enc);
duke@0 2736 emit_d8(cbuf,20);
duke@0 2737 emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow
duke@0 2738 emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
duke@0 2739 emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX
duke@0 2740 emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
duke@0 2741 emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word
duke@0 2742 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
duke@0 2743 emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
duke@0 2744 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
duke@0 2745 emit_d32(cbuf,0);
duke@0 2746 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
duke@0 2747 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
duke@0 2748 %}
duke@0 2749
duke@0 2750 // enc_class Pop_Reg_Mod_D( regD dst, regD src)
duke@0 2751 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
duke@0 2752
duke@0 2753 enc_class Push_Result_Mod_D( regD src) %{
duke@0 2754 if ($src$$reg != FPR1L_enc) {
duke@0 2755 // fincstp
duke@0 2756 emit_opcode (cbuf, 0xD9);
duke@0 2757 emit_opcode (cbuf, 0xF7);
duke@0 2758 // FXCH FPR1 with src
duke@0 2759 emit_opcode(cbuf, 0xD9);
duke@0 2760 emit_d8(cbuf, 0xC8-1+$src$$reg );
duke@0 2761 // fdecstp
duke@0 2762 emit_opcode (cbuf, 0xD9);
duke@0 2763 emit_opcode (cbuf, 0xF6);
duke@0 2764 }
duke@0 2765 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
duke@0 2766 // // FSTP FPR$dst$$reg
duke@0 2767 // emit_opcode( cbuf, 0xDD );
duke@0 2768 // emit_d8( cbuf, 0xD8+$dst$$reg );
duke@0 2769 %}
duke@0 2770
duke@0 2771 enc_class fnstsw_sahf_skip_parity() %{
duke@0 2772 // fnstsw ax
duke@0 2773 emit_opcode( cbuf, 0xDF );
duke@0 2774 emit_opcode( cbuf, 0xE0 );
duke@0 2775 // sahf
duke@0 2776 emit_opcode( cbuf, 0x9E );
duke@0 2777 // jnp ::skip
duke@0 2778 emit_opcode( cbuf, 0x7B );
duke@0 2779 emit_opcode( cbuf, 0x05 );
duke@0 2780 %}
duke@0 2781
duke@0 2782 enc_class emitModD() %{
duke@0 2783 // fprem must be iterative
duke@0 2784 // :: loop
duke@0 2785 // fprem
duke@0 2786 emit_opcode( cbuf, 0xD9 );
duke@0 2787 emit_opcode( cbuf, 0xF8 );
duke@0 2788 // wait
duke@0 2789 emit_opcode( cbuf, 0x9b );
duke@0 2790 // fnstsw ax
duke@0 2791 emit_opcode( cbuf, 0xDF );
duke@0 2792 emit_opcode( cbuf, 0xE0 );
duke@0 2793 // sahf
duke@0 2794 emit_opcode( cbuf, 0x9E );
duke@0 2795 // jp ::loop
duke@0 2796 emit_opcode( cbuf, 0x0F );
duke@0 2797 emit_opcode( cbuf, 0x8A );
duke@0 2798 emit_opcode( cbuf, 0xF4 );
duke@0 2799 emit_opcode( cbuf, 0xFF );
duke@0 2800 emit_opcode( cbuf, 0xFF );
duke@0 2801 emit_opcode( cbuf, 0xFF );
duke@0 2802 %}
duke@0 2803
duke@0 2804 enc_class fpu_flags() %{
duke@0 2805 // fnstsw_ax
duke@0 2806 emit_opcode( cbuf, 0xDF);
duke@0 2807 emit_opcode( cbuf, 0xE0);
duke@0 2808 // test ax,0x0400
duke@0 2809 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate
duke@0 2810 emit_opcode( cbuf, 0xA9 );
duke@0 2811 emit_d16 ( cbuf, 0x0400 );
duke@0 2812 // // // This sequence works, but stalls for 12-16 cycles on PPro
duke@0 2813 // // test rax,0x0400
duke@0 2814 // emit_opcode( cbuf, 0xA9 );
duke@0 2815 // emit_d32 ( cbuf, 0x00000400 );
duke@0 2816 //
duke@0 2817 // jz exit (no unordered comparison)
duke@0 2818 emit_opcode( cbuf, 0x74 );
duke@0 2819 emit_d8 ( cbuf, 0x02 );
duke@0 2820 // mov ah,1 - treat as LT case (set carry flag)
duke@0 2821 emit_opcode( cbuf, 0xB4 );
duke@0 2822 emit_d8 ( cbuf, 0x01 );
duke@0 2823 // sahf
duke@0 2824 emit_opcode( cbuf, 0x9E);
duke@0 2825 %}
duke@0 2826
duke@0 2827 enc_class cmpF_P6_fixup() %{
duke@0 2828 // Fixup the integer flags in case comparison involved a NaN
duke@0 2829 //
duke@0 2830 // JNP exit (no unordered comparison, P-flag is set by NaN)
duke@0 2831 emit_opcode( cbuf, 0x7B );
duke@0 2832 emit_d8 ( cbuf, 0x03 );
duke@0 2833 // MOV AH,1 - treat as LT case (set carry flag)
duke@0 2834 emit_opcode( cbuf, 0xB4 );
duke@0 2835 emit_d8 ( cbuf, 0x01 );
duke@0 2836 // SAHF
duke@0 2837 emit_opcode( cbuf, 0x9E);
duke@0 2838 // NOP // target for branch to avoid branch to branch
duke@0 2839 emit_opcode( cbuf, 0x90);
duke@0 2840 %}
duke@0 2841
duke@0 2842 // fnstsw_ax();
duke@0 2843 // sahf();
duke@0 2844 // movl(dst, nan_result);
duke@0 2845 // jcc(Assembler::parity, exit);
duke@0 2846 // movl(dst, less_result);
duke@0 2847 // jcc(Assembler::below, exit);
duke@0 2848 // movl(dst, equal_result);
duke@0 2849 // jcc(Assembler::equal, exit);
duke@0 2850 // movl(dst, greater_result);
duke@0 2851
duke@0 2852 // less_result = 1;
duke@0 2853 // greater_result = -1;
duke@0 2854 // equal_result = 0;
duke@0 2855 // nan_result = -1;
duke@0 2856
duke@0 2857 enc_class CmpF_Result(eRegI dst) %{
duke@0 2858 // fnstsw_ax();
duke@0 2859 emit_opcode( cbuf, 0xDF);
duke@0 2860 emit_opcode( cbuf, 0xE0);
duke@0 2861 // sahf
duke@0 2862 emit_opcode( cbuf, 0x9E);
duke@0 2863 // movl(dst, nan_result);
duke@0 2864 emit_opcode( cbuf, 0xB8 + $dst$$reg);
duke@0 2865 emit_d32( cbuf, -1 );
duke@0 2866 // jcc(Assembler::parity, exit);
duke@0 2867 emit_opcode( cbuf, 0x7A );
duke@0 2868 emit_d8 ( cbuf, 0x13 );
duke@0 2869 // movl(dst, less_result);
duke@0 2870 emit_opcode( cbuf, 0xB8 + $dst$$reg);
duke@0 2871 emit_d32( cbuf, -1 );
duke@0 2872 // jcc(Assembler::below, exit);
duke@0 2873 emit_opcode( cbuf, 0x72 );
duke@0 2874 emit_d8 ( cbuf, 0x0C );
duke@0 2875 // movl(dst, equal_result);
duke@0 2876 emit_opcode( cbuf, 0xB8 + $dst$$reg);
duke@0 2877 emit_d32( cbuf, 0 );
duke@0 2878 // jcc(Assembler::equal, exit);
duke@0 2879 emit_opcode( cbuf, 0x74 );
duke@0 2880 emit_d8 ( cbuf, 0x05 );
duke@0 2881 // movl(dst, greater_result);
duke@0 2882 emit_opcode( cbuf, 0xB8 + $dst$$reg);
duke@0 2883 emit_d32( cbuf, 1 );
duke@0 2884 %}
duke@0 2885
duke@0 2886
duke@0 2887 // XMM version of CmpF_Result. Because the XMM compare
duke@0 2888 // instructions set the EFLAGS directly. It becomes simpler than
duke@0 2889 // the float version above.
duke@0 2890 enc_class CmpX_Result(eRegI dst) %{
duke@0 2891 MacroAssembler _masm(&cbuf);
duke@0 2892 Label nan, inc, done;
duke@0 2893
duke@0 2894 __ jccb(Assembler::parity, nan);
duke@0 2895 __ jccb(Assembler::equal, done);
duke@0 2896 __ jccb(Assembler::above, inc);
duke@0 2897 __ bind(nan);
never@297 2898 __ decrement(as_Register($dst$$reg)); // NO L qqq
duke@0 2899 __ jmpb(done);
duke@0 2900 __ bind(inc);
never@297 2901 __ increment(as_Register($dst$$reg)); // NO L qqq
duke@0 2902 __ bind(done);
duke@0 2903 %}
duke@0 2904
duke@0 2905 // Compare the longs and set flags
duke@0 2906 // BROKEN! Do Not use as-is
duke@0 2907 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
duke@0 2908 // CMP $src1.hi,$src2.hi
duke@0 2909 emit_opcode( cbuf, 0x3B );
duke@0 2910 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
duke@0 2911 // JNE,s done
duke@0 2912 emit_opcode(cbuf,0x75);
duke@0 2913 emit_d8(cbuf, 2 );
duke@0 2914 // CMP $src1.lo,$src2.lo
duke@0 2915 emit_opcode( cbuf, 0x3B );
duke@0 2916 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
duke@0 2917 // done:
duke@0 2918 %}
duke@0 2919
duke@0 2920 enc_class convert_int_long( regL dst, eRegI src ) %{
duke@0 2921 // mov $dst.lo,$src
duke@0 2922 int dst_encoding = $dst$$reg;
duke@0 2923 int src_encoding = $src$$reg;
duke@0 2924 encode_Copy( cbuf, dst_encoding , src_encoding );
duke@0 2925 // mov $dst.hi,$src
duke@0 2926 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
duke@0 2927 // sar $dst.hi,31
duke@0 2928 emit_opcode( cbuf, 0xC1 );
duke@0 2929 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
duke@0 2930 emit_d8(cbuf, 0x1F );
duke@0 2931 %}
duke@0 2932
duke@0 2933 enc_class convert_long_double( eRegL src ) %{
duke@0 2934 // push $src.hi
duke@0 2935 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
duke@0 2936 // push $src.lo
duke@0 2937 emit_opcode(cbuf, 0x50+$src$$reg );
duke@0 2938 // fild 64-bits at [SP]
duke@0 2939 emit_opcode(cbuf,0xdf);
duke@0 2940 emit_d8(cbuf, 0x6C);
duke@0 2941 emit_d8(cbuf, 0x24);
duke@0 2942 emit_d8(cbuf, 0x00);
duke@0 2943 // pop stack
duke@0 2944 emit_opcode(cbuf, 0x83); // add SP, #8
duke@0 2945 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
duke@0 2946 emit_d8(cbuf, 0x8);
duke@0 2947 %}
duke@0 2948
duke@0 2949 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
duke@0 2950 // IMUL EDX:EAX,$src1
duke@0 2951 emit_opcode( cbuf, 0xF7 );
duke@0 2952 emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
duke@0 2953 // SAR EDX,$cnt-32
duke@0 2954 int shift_count = ((int)$cnt$$constant) - 32;
duke@0 2955 if (shift_count > 0) {
duke@0 2956 emit_opcode(cbuf, 0xC1);
duke@0 2957 emit_rm(cbuf, 0x3, 7, $dst$$reg );
duke@0 2958 emit_d8(cbuf, shift_count);
duke@0 2959 }
duke@0 2960 %}
duke@0 2961
duke@0 2962 // this version doesn't have add sp, 8
duke@0 2963 enc_class convert_long_double2( eRegL src ) %{
duke@0 2964 // push $src.hi
duke@0 2965 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
duke@0 2966 // push $src.lo
duke@0 2967 emit_opcode(cbuf, 0x50+$src$$reg );
duke@0 2968 // fild 64-bits at [SP]
duke@0 2969 emit_opcode(cbuf,0xdf);
duke@0 2970 emit_d8(cbuf, 0x6C);
duke@0 2971 emit_d8(cbuf, 0x24);
duke@0 2972 emit_d8(cbuf, 0x00);
duke@0 2973 %}
duke@0 2974
duke@0 2975 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
duke@0 2976 // Basic idea: long = (long)int * (long)int
duke@0 2977 // IMUL EDX:EAX, src
duke@0 2978 emit_opcode( cbuf, 0xF7 );
duke@0 2979 emit_rm( cbuf, 0x3, 0x5, $src$$reg);
duke@0 2980 %}
duke@0 2981
duke@0 2982 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
duke@0 2983 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
duke@0 2984 // MUL EDX:EAX, src
duke@0 2985 emit_opcode( cbuf, 0xF7 );
duke@0 2986 emit_rm( cbuf, 0x3, 0x4, $src$$reg);
duke@0 2987 %}
duke@0 2988
duke@0 2989 enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
duke@0 2990 // Basic idea: lo(result) = lo(x_lo * y_lo)
duke@0 2991 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
duke@0 2992 // MOV $tmp,$src.lo
duke@0 2993 encode_Copy( cbuf, $tmp$$reg, $src$$reg );
duke@0 2994 // IMUL $tmp,EDX
duke@0 2995 emit_opcode( cbuf, 0x0F );
duke@0 2996 emit_opcode( cbuf, 0xAF );
duke@0 2997 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
duke@0 2998 // MOV EDX,$src.hi
duke@0 2999 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
duke@0 3000 // IMUL EDX,EAX
duke@0 3001 emit_opcode( cbuf, 0x0F );
duke@0 3002 emit_opcode( cbuf, 0xAF );
duke@0 3003 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
duke@0 3004 // ADD $tmp,EDX
duke@0 3005 emit_opcode( cbuf, 0x03 );
duke@0 3006 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
duke@0 3007 // MUL EDX:EAX,$src.lo
duke@0 3008 emit_opcode( cbuf, 0xF7 );
duke@0 3009 emit_rm( cbuf, 0x3, 0x4, $src$$reg );
duke@0 3010 // ADD EDX,ESI
duke@0 3011 emit_opcode( cbuf, 0x03 );
duke@0 3012 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
duke@0 3013 %}
duke@0 3014
duke@0 3015 enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
duke@0 3016 // Basic idea: lo(result) = lo(src * y_lo)
duke@0 3017 // hi(result) = hi(src * y_lo) + lo(src * y_hi)
duke@0 3018 // IMUL $tmp,EDX,$src
duke@0 3019 emit_opcode( cbuf, 0x6B );
duke@0 3020 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
duke@0 3021 emit_d8( cbuf, (int)$src$$constant );
duke@0 3022 // MOV EDX,$src
duke@0 3023 emit_opcode(cbuf, 0xB8 + EDX_enc);
duke@0 3024 emit_d32( cbuf, (int)$src$$constant );
duke@0 3025 // MUL EDX:EAX,EDX
duke@0 3026 emit_opcode( cbuf, 0xF7 );
duke@0 3027 emit_rm( cbuf, 0x3, 0x4, EDX_enc );
duke@0 3028 // ADD EDX,ESI
duke@0 3029 emit_opcode( cbuf, 0x03 );
duke@0 3030 emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
duke@0 3031 %}
duke@0 3032
duke@0 3033 enc_class long_div( eRegL src1, eRegL src2 ) %{
duke@0 3034 // PUSH src1.hi
duke@0 3035 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
duke@0 3036 // PUSH src1.lo
duke@0 3037 emit_opcode(cbuf, 0x50+$src1$$reg );
duke@0 3038 // PUSH src2.hi
duke@0 3039 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
duke@0 3040 // PUSH src2.lo
duke@0 3041 emit_opcode(cbuf, 0x50+$src2$$reg );
duke@0 3042 // CALL directly to the runtime
duke@0 3043 cbuf.set_inst_mark();
duke@0 3044 emit_opcode(cbuf,0xE8); // Call into runtime
duke@0 3045 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
duke@0 3046 // Restore stack
duke@0 3047 emit_opcode(cbuf, 0x83); // add SP, #framesize
duke@0 3048 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
duke@0 3049 emit_d8(cbuf, 4*4);
duke@0 3050 %}
duke@0 3051
duke@0 3052 enc_class long_mod( eRegL src1, eRegL src2 ) %{
duke@0 3053 // PUSH src1.hi
duke@0 3054 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
duke@0 3055 // PUSH src1.lo
duke@0 3056 emit_opcode(cbuf, 0x50+$src1$$reg );
duke@0 3057 // PUSH src2.hi
duke@0 3058 emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
duke@0 3059 // PUSH src2.lo
duke@0 3060 emit_opcode(cbuf, 0x50+$src2$$reg );
duke@0 3061 // CALL directly to the runtime
duke@0 3062 cbuf.set_inst_mark();
duke@0 3063 emit_opcode(cbuf,0xE8); // Call into runtime
duke@0 3064 emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
duke@0 3065 // Restore stack
duke@0 3066 emit_opcode(cbuf, 0x83); // add SP, #framesize
duke@0 3067 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
duke@0 3068 emit_d8(cbuf, 4*4);
duke@0 3069 %}
duke@0 3070
duke@0 3071 enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
duke@0 3072 // MOV $tmp,$src.lo
duke@0 3073 emit_opcode(cbuf, 0x8B);
duke@0 3074 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
duke@0 3075 // OR $tmp,$src.hi
duke@0 3076 emit_opcode(cbuf, 0x0B);
duke@0 3077 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
duke@0 3078 %}
duke@0 3079
duke@0 3080 enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
duke@0 3081 // CMP $src1.lo,$src2.lo
duke@0 3082 emit_opcode( cbuf, 0x3B );
duke@0 3083 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
duke@0 3084 // JNE,s skip
duke@0 3085 emit_cc(cbuf, 0x70, 0x5);
duke@0 3086 emit_d8(cbuf,2);
duke@0 3087 // CMP $src1.hi,$src2.hi
duke@0 3088 emit_opcode( cbuf, 0x3B );
duke@0 3089 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
duke@0 3090 %}
duke@0 3091
duke@0 3092 enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
duke@0 3093 // CMP $src1.lo,$src2.lo\t! Long compare; set flags for low bits
duke@0 3094 emit_opcode( cbuf, 0x3B );
duke@0 3095 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
duke@0 3096 // MOV $tmp,$src1.hi
duke@0 3097 emit_opcode( cbuf, 0x8B );
duke@0 3098 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
duke@0 3099 // SBB $tmp,$src2.hi\t! Compute flags for long compare
duke@0 3100 emit_opcode( cbuf, 0x1B );
duke@0 3101 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
duke@0 3102 %}
duke@0 3103
duke@0 3104 enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
duke@0 3105 // XOR $tmp,$tmp
duke@0 3106 emit_opcode(cbuf,0x33); // XOR
duke@0 3107 emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
duke@0 3108 // CMP $tmp,$src.lo
duke@0 3109 emit_opcode( cbuf, 0x3B );
duke@0 3110 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
duke@0 3111 // SBB $tmp,$src.hi
duke@0 3112 emit_opcode( cbuf, 0x1B );
duke@0 3113 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
duke@0 3114 %}
duke@0 3115
duke@0 3116 // Sniff, sniff... smells like Gnu Superoptimizer
duke@0 3117 enc_class neg_long( eRegL dst ) %{
duke@0 3118 emit_opcode(cbuf,0xF7); // NEG hi
duke@0 3119 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
duke@0 3120 emit_opcode(cbuf,0xF7); // NEG lo
duke@0 3121 emit_rm (cbuf,0x3, 0x3, $dst$$reg );
duke@0 3122 emit_opcode(cbuf,0x83); // SBB hi,0
duke@0 3123 emit_rm (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
duke@0 3124 emit_d8 (cbuf,0 );
duke@0 3125 %}
duke@0 3126
duke@0 3127 enc_class movq_ld(regXD dst, memory mem) %{
duke@0 3128 MacroAssembler _masm(&cbuf);
twisti@603 3129 __ movq($dst$$XMMRegister, $mem$$Address);
duke@0 3130 %}
duke@0 3131
duke@0 3132 enc_class movq_st(memory mem, regXD src) %{
duke@0 3133 MacroAssembler _masm(&cbuf);
twisti@603 3134 __ movq($mem$$Address, $src$$XMMRegister);
duke@0 3135 %}
duke@0 3136
duke@0 3137 enc_class pshufd_8x8(regX dst, regX src) %{
duke@0 3138 MacroAssembler _masm(&cbuf);
duke@0 3139
duke@0 3140 encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
duke@0 3141 __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
duke@0 3142 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
duke@0 3143 %}
duke@0 3144
duke@0 3145 enc_class pshufd_4x16(regX dst, regX src) %{
duke@0 3146 MacroAssembler _masm(&cbuf);
duke@0 3147
duke@0 3148 __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
duke@0 3149 %}
duke@0 3150
duke@0 3151 enc_class pshufd(regXD dst, regXD src, int mode) %{
duke@0 3152 MacroAssembler _masm(&cbuf);
duke@0 3153
duke@0 3154 __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
duke@0 3155 %}
duke@0 3156
duke@0 3157 enc_class pxor(regXD dst, regXD src) %{
duke@0 3158 MacroAssembler _masm(&cbuf);
duke@0 3159
duke@0 3160 __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
duke@0 3161 %}
duke@0 3162
duke@0 3163 enc_class mov_i2x(regXD dst, eRegI src) %{
duke@0 3164 MacroAssembler _masm(&cbuf);
duke@0 3165
never@297 3166 __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
duke@0 3167 %}
duke@0 3168
duke@0 3169
duke@0 3170 // Because the transitions from emitted code to the runtime
duke@0 3171 // monitorenter/exit helper stubs are so slow it's critical that
duke@0 3172 // we inline both the stack-locking fast-path and the inflated fast path.
duke@0 3173 //
duke@0 3174 // See also: cmpFastLock and cmpFastUnlock.
duke@0 3175 //
duke@0 3176 // What follows is a specialized inline transliteration of the code
duke@0 3177 // in slow_enter() and slow_exit(). If we're concerned about I$ bloat
duke@0 3178 // another option would be to emit TrySlowEnter and TrySlowExit methods
duke@0 3179 // at startup-time. These methods would accept arguments as
duke@0 3180 // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
duke@0 3181 // indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply
duke@0 3182 // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
duke@0 3183 // In practice, however, the # of lock sites is bounded and is usually small.
duke@0 3184 // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
duke@0 3185 // if the processor uses simple bimodal branch predictors keyed by EIP
duke@0 3186 // Since the helper routines would be called from multiple synchronization
duke@0 3187 // sites.
duke@0 3188 //
duke@0 3189 // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
duke@0 3190 // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
duke@0 3191 // to those specialized methods. That'd give us a mostly platform-independent
duke@0 3192 // implementation that the JITs could optimize and inline at their pleasure.
duke@0 3193 // Done correctly, the only time we'd need to cross to native could would be
duke@0 3194 // to park() or unpark() threads. We'd also need a few more unsafe operators
duke@0 3195 // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
duke@0 3196 // (b) explicit barriers or fence operations.
duke@0 3197 //
duke@0 3198 // TODO:
duke@0 3199 //
duke@0 3200 // * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
duke@0 3201 // This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
duke@0 3202 // Given TLAB allocation, Self is usually manifested in a register, so passing it into
duke@0 3203 // the lock operators would typically be faster than reifying Self.
duke@0 3204 //
duke@0 3205 // * Ideally I'd define the primitives as:
duke@0 3206 // fast_lock (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
duke@0 3207 // fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
duke@0 3208 // Unfortunately ADLC bugs prevent us from expressing the ideal form.
duke@0 3209 // Instead, we're stuck with a rather awkward and brittle register assignments below.
duke@0 3210 // Furthermore the register assignments are overconstrained, possibly resulting in
duke@0 3211 // sub-optimal code near the synchronization site.
duke@0 3212 //
duke@0 3213 // * Eliminate the sp-proximity tests and just use "== Self" tests instead.
duke@0 3214 // Alternately, use a better sp-proximity test.
duke@0 3215 //
duke@0 3216 // * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
duke@0 3217 // Either one is sufficient to uniquely identify a thread.
duke@0 3218 // TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
duke@0 3219 //
duke@0 3220 // * Intrinsify notify() and notifyAll() for the common cases where the
duke@0 3221 // object is locked by the calling thread but the waitlist is empty.
duke@0 3222 // avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
duke@0 3223 //
duke@0 3224 // * use jccb and jmpb instead of jcc and jmp to improve code density.
duke@0 3225 // But beware of excessive branch density on AMD Opterons.
duke@0 3226 //
duke@0 3227 // * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
duke@0 3228 // or failure of the fast-path. If the fast-path fails then we pass
duke@0 3229 // control to the slow-path, typically in C. In Fast_Lock and
duke@0 3230 // Fast_Unlock we often branch to DONE_LABEL, just to find that C2
duke@0 3231 // will emit a conditional branch immediately after the node.
duke@0 3232 // So we have branches to branches and lots of ICC.ZF games.
duke@0 3233 // Instead, it might be better to have C2 pass a "FailureLabel"
duke@0 3234 // into Fast_Lock and Fast_Unlock. In the case of success, control
duke@0 3235 // will drop through the node. ICC.ZF is undefined at exit.
duke@0 3236 // In the case of failure, the node will branch directly to the
duke@0 3237 // FailureLabel
duke@0 3238
duke@0 3239
duke@0 3240 // obj: object to lock
duke@0 3241 // box: on-stack box address (displaced header location) - KILLED
duke@0 3242 // rax,: tmp -- KILLED
duke@0 3243 // scr: tmp -- KILLED
duke@0 3244 enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
duke@0 3245
duke@0 3246 Register objReg = as_Register($obj$$reg);
duke@0 3247 Register boxReg = as_Register($box$$reg);
duke@0 3248 Register tmpReg = as_Register($tmp$$reg);
duke@0 3249 Register scrReg = as_Register($scr$$reg);
duke@0 3250
duke@0 3251 // Ensure the register assignents are disjoint
duke@0 3252 guarantee (objReg != boxReg, "") ;
duke@0 3253 guarantee (objReg != tmpReg, "") ;
duke@0 3254 guarantee (objReg != scrReg, "") ;
duke@0 3255 guarantee (boxReg != tmpReg, "") ;
duke@0 3256 guarantee (boxReg != scrReg, "") ;
duke@0 3257 guarantee (tmpReg == as_Register(EAX_enc), "") ;
duke@0 3258
duke@0 3259 MacroAssembler masm(&cbuf);
duke@0 3260
duke@0 3261 if (_counters != NULL) {
duke@0 3262 masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
duke@0 3263 }
duke@0 3264 if (EmitSync & 1) {
duke@0 3265 // set box->dhw = unused_mark (3)
never@297 3266 // Force all sync thru slow-path: slow_enter() and slow_exit()
never@297 3267 masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;
never@297 3268 masm.cmpptr (rsp, (int32_t)0) ;
never@297 3269 } else
never@297 3270 if (EmitSync & 2) {
never@297 3271 Label DONE_LABEL ;
duke@0 3272 if (UseBiasedLocking) {
duke@0 3273 // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
duke@0 3274 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
duke@0 3275 }
duke@0 3276
never@297 3277 masm.movptr(tmpReg, Address(objReg, 0)) ; // fetch markword
never@297 3278 masm.orptr (tmpReg, 0x1);
never@297 3279 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
duke@0 3280 if (os::is_MP()) { masm.lock(); }
never@297 3281 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
duke@0 3282 masm.jcc(Assembler::equal, DONE_LABEL);
duke@0 3283 // Recursive locking
never@297 3284 masm.subptr(tmpReg, rsp);
never@297 3285 masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
never@297 3286 masm.movptr(Address(boxReg, 0), tmpReg);
never@297 3287 masm.bind(DONE_LABEL) ;
never@297 3288 } else {
never@297 3289 // Possible cases that we'll encounter in fast_lock
duke@0 3290 // ------------------------------------------------
duke@0 3291 // * Inflated
duke@0 3292 // -- unlocked
duke@0 3293 // -- Locked
duke@0 3294 // = by self
duke@0 3295 // = by other
duke@0 3296 // * biased
duke@0 3297 // -- by Self
duke@0 3298 // -- by other
duke@0 3299 // * neutral
duke@0 3300 // * stack-locked
duke@0 3301 // -- by self
duke@0 3302 // = sp-proximity test hits
duke@0 3303 // = sp-proximity test generates false-negative
duke@0 3304 // -- by other
duke@0 3305 //
duke@0 3306
duke@0 3307 Label IsInflated, DONE_LABEL, PopDone ;
duke@0 3308
duke@0 3309 // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
duke@0 3310 // order to reduce the number of conditional branches in the most common cases.
duke@0 3311 // Beware -- there's a subtle invariant that fetch of the markword
duke@0 3312 // at [FETCH], below, will never observe a biased encoding (*101b).
duke@0 3313 // If this invariant is not held we risk exclusion (safety) failure.
kvn@411 3314 if (UseBiasedLocking && !UseOptoBiasInlining) {
duke@0 3315 masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
duke@0 3316 }
duke@0 3317
never@297 3318 masm.movptr(tmpReg, Address(objReg, 0)) ; // [FETCH]
never@297 3319 masm.testptr(tmpReg, 0x02) ; // Inflated v (Stack-locked or neutral)
duke@0 3320 masm.jccb (Assembler::notZero, IsInflated) ;
duke@0 3321
duke@0 3322 // Attempt stack-locking ...
never@297 3323 masm.orptr (tmpReg, 0x1);
never@297 3324 masm.movptr(Address(boxReg, 0), tmpReg); // Anticipate successful CAS
duke@0 3325 if (os::is_MP()) { masm.lock(); }
never@297 3326 masm.cmpxchgptr(boxReg, Address(objReg, 0)); // Updates tmpReg
duke@0 3327 if (_counters != NULL) {
duke@0 3328 masm.cond_inc32(Assembler::equal,
duke@0 3329 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
duke@0 3330 }
duke@0 3331 masm.jccb (Assembler::equal, DONE_LABEL);
duke@0 3332
duke@0 3333 // Recursive locking
never@297 3334 masm.subptr(tmpReg, rsp);
never@297 3335 masm.andptr(tmpReg, 0xFFFFF003 );
never@297 3336 masm.movptr(Address(boxReg, 0), tmpReg);
duke@0 3337 if (_counters != NULL) {
duke@0 3338 masm.cond_inc32(Assembler::equal,
duke@0 3339 ExternalAddress((address)_counters->fast_path_entry_count_addr()));
duke@0 3340 }
duke@0 3341 masm.jmp (DONE_LABEL) ;
duke@0 3342
duke@0 3343 masm.bind (IsInflated) ;
duke@0 3344
duke@0 3345 // The object is inflated.
duke@0 3346 //
duke@0 3347 // TODO-FIXME: eliminate the ugly use of manifest constants:
duke@0 3348 // Use markOopDesc::monitor_value instead of "2".
duke@0 3349 // use markOop::unused_mark() instead of "3".
duke@0 3350 // The tmpReg value is an objectMonitor reference ORed with
duke@0 3351 // markOopDesc::monitor_value (2). We can either convert tmpReg to an
duke@0 3352 // objectmonitor pointer by masking off the "2" bit or we can just
duke@0 3353 // use tmpReg as an objectmonitor pointer but bias the objectmonitor
duke@0 3354 // field offsets with "-2" to compensate for and annul the low-order tag bit.
duke@0 3355 //
duke@0 3356 // I use the latter as it avoids AGI stalls.
duke@0 3357 // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
duke@0 3358 // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
duke@0 3359 //
duke@0 3360 #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
duke@0 3361
duke@0 3362 // boxReg refers to the on-stack BasicLock in the current frame.
duke@0 3363 // We'd like to write:
duke@0 3364 // set box->_displaced_header = markOop::unused_mark(). Any non-0 value suffices.
duke@0 3365 // This is convenient but results a ST-before-CAS penalty. The following CAS suffers
duke@0 3366 // additional latency as we have another ST in the store buffer that must drain.
duke@0 3367
never@297 3368 if (EmitSync & 8192) {
never@297 3369 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
never@297 3370 masm.get_thread (scrReg) ;
never@297 3371 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
xlu@528 3372 masm.movptr(tmpReg, NULL_WORD); // consider: xor vs mov
never@297 3373 if (os::is_MP()) { masm.lock(); }
never@297 3374 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
never@297 3375 } else
duke@0 3376 if ((EmitSync & 128) == 0) { // avoid ST-before-CAS
never@297 3377 masm.movptr(scrReg, boxReg) ;
never@297 3378 masm.movptr(boxReg, tmpReg); // consider: LEA box, [tmp-2]
duke@0 3379
duke@0 3380 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
duke@0 3381 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
duke@0 3382 // prefetchw [eax + Offset(_owner)-2]
never@297 3383 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
duke@0 3384 }
duke@0 3385
duke@0 3386 if ((EmitSync & 64) == 0) {
duke@0 3387 // Optimistic form: consider XORL tmpReg,tmpReg
xlu@528 3388 masm.movptr(tmpReg, NULL_WORD) ;
never@297 3389 } else {
duke@0 3390 // Can suffer RTS->RTO upgrades on shared or cold $ lines
duke@0 3391 // Test-And-CAS instead of CAS
never@297 3392 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
never@297 3393 masm.testptr(tmpReg, tmpReg) ; // Locked ?
never@297 3394 masm.jccb (Assembler::notZero, DONE_LABEL) ;
duke@0 3395 }
duke@0 3396
duke@0 3397 // Appears unlocked - try to swing _owner from null to non-null.
duke@0 3398 // Ideally, I'd manifest "Self" with get_thread and then attempt
duke@0 3399 // to CAS the register containing Self into m->Owner.
duke@0 3400 // But we don't have enough registers, so instead we can either try to CAS
duke@0 3401 // rsp or the address of the box (in scr) into &m->owner. If the CAS succeeds
duke@0 3402 // we later store "Self" into m->Owner. Transiently storing a stack address
duke@0 3403 // (rsp or the address of the box) into m->owner is harmless.
duke@0 3404 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
duke@0 3405 if (os::is_MP()) { masm.lock(); }
never@297 3406 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
never@297 3407 masm.movptr(Address(scrReg, 0), 3) ; // box->_displaced_header = 3
never@297 3408 masm.jccb (Assembler::notZero, DONE_LABEL) ;
duke@0 3409 masm.get_thread (scrReg) ; // beware: clobbers ICCs
never@297 3410 masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ;
never@297 3411 masm.xorptr(boxReg, boxReg) ; // set icc.ZFlag = 1 to indicate success
never@297 3412
never@297 3413 // If the CAS fails we can either retry or pass control to the slow-path.
never@297 3414 // We use the latter tactic.
duke@0 3415 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
duke@0 3416 // If the CAS was successful ...
duke@0 3417 // Self has acquired the lock
duke@0 3418 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
duke@0 3419 // Intentional fall-through into DONE_LABEL ...
duke@0 3420 } else {
never@297 3421 masm.movptr(Address(boxReg, 0), 3) ; // results in ST-before-CAS penalty
never@297 3422 masm.movptr(boxReg, tmpReg) ;
duke@0 3423
duke@0 3424 // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
duke@0 3425 if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
duke@0 3426 // prefetchw [eax + Offset(_owner)-2]
never@297 3427 masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
duke@0 3428 }
duke@0 3429
duke@0 3430 if ((EmitSync & 64) == 0) {
duke@0 3431 // Optimistic form
never@297 3432 masm.xorptr (tmpReg, tmpReg) ;
never@297 3433 } else {
duke@0 3434 // Can suffer RTS->RTO upgrades on shared or cold $ lines
never@297 3435 masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; // rax, = m->_owner
never@297 3436 masm.testptr(tmpReg, tmpReg) ; // Locked ?
never@297 3437 masm.jccb (Assembler::notZero, DONE_LABEL) ;
duke@0 3438 }
duke@0 3439
duke@0 3440 // Appears unlocked - try to swing _owner from null to non-null.
duke@0 3441 // Use either "Self" (in scr) or rsp as thread identity in _owner.
duke@0 3442 // Invariant: tmpReg == 0. tmpReg is EAX which is the implicit cmpxchg comparand.
duke@0 3443 masm.get_thread (scrReg) ;
duke@0 3444 if (os::is_MP()) { masm.lock(); }
never@297 3445 masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
duke@0 3446
duke@0 3447 // If the CAS fails we can either retry or pass control to the slow-path.
duke@0 3448 // We use the latter tactic.
duke@0 3449 // Pass the CAS result in the icc.ZFlag into DONE_LABEL
duke@0 3450 // If the CAS was successful ...
duke@0 3451 // Self has acquired the lock
duke@0 3452 // Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
duke@0 3453 // Intentional fall-through into DONE_LABEL ...
duke@0 3454 }
duke@0 3455
duke@0 3456 // DONE_LABEL is a hot target - we'd really like to place it at the
duke@0 3457 // start of cache line by padding with NOPs.
duke@0 3458 // See the AMD and Intel software optimization manuals for the
duke@0 3459 // most efficient "long" NOP encodings.
duke@0 3460 // Unfortunately none of our alignment mechanisms suffice.
duke@0 3461 masm.bind(DONE_LABEL);
duke@0 3462
duke@0 3463 // Avoid branch-to-branch on AMD processors
duke@0 3464 // This appears to be superstition.
duke@0 3465 if (EmitSync & 32) masm.nop() ;
duke@0 3466
duke@0 3467
duke@0 3468 // At DONE_LABEL the icc ZFlag is set as follows ...
duke@0 3469 // Fast_Unlock uses the same protocol.
duke@0 3470 // ZFlag == 1 -> Success
duke@0 3471 // ZFlag == 0 -> Failure - force control through the slow-path
duke@0 3472 }
duke@0 3473 %}
duke@0 3474
duke@0 3475 // obj: object to unlock
duke@0 3476 // box: box address (displaced header location), killed. Must be EAX.
duke@0 3477 // rbx,: killed tmp; cannot be obj nor box.
duke@0 3478 //
duke@0 3479 // Some commentary on balanced locking:
duke@0 3480 //
duke@0 3481 // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
duke@0 3482 // Methods that don't have provably balanced locking are forced to run in the
duke@0 3483 // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
duke@0 3484 // The interpreter provides two properties:
duke@0 3485 // I1: At return-time the interpreter automatically and quietly unlocks any
duke@0 3486 // objects acquired the current activation (frame). Recall that the
duke@0 3487 // interpreter maintains an on-stack list of locks currently held by
duke@0 3488 // a frame.
duke@0 3489 // I2: If a method attempts to unlock an object that is not held by the
duke@0 3490 // the frame the interpreter throws IMSX.
duke@0 3491 //
duke@0 3492 // Lets say A(), which has provably balanced locking, acquires O and then calls B().
duke@0 3493 // B() doesn't have provably balanced locking so it runs in the interpreter.
duke@0 3494 // Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O
duke@0 3495 // is still locked by A().
duke@0 3496 //
duke@0 3497 // The only other source of unbalanced locking would be JNI. The "Java Native Interface:
duke@0 3498 // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
duke@0 3499 // should not be unlocked by "normal" java-level locking and vice-versa. The specification
duke@0 3500 // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
duke@0 3501
duke@0 3502 enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
duke@0 3503
duke@0 3504 Register objReg = as_Register($obj$$reg);
duke@0 3505 Register boxReg = as_Register($box$$reg);
duke@0 3506 Register tmpReg = as_Register($tmp$$reg);
duke@0 3507
duke@0 3508 guarantee (objReg != boxReg, "") ;
duke@0 3509 guarantee (objReg != tmpReg, "") ;
duke@0 3510 guarantee (boxReg != tmpReg, "") ;
duke@0 3511 guarantee (boxReg == as_Register(EAX_enc), "") ;
duke@0 3512 MacroAssembler masm(&cbuf);
duke@0 3513
duke@0 3514 if (EmitSync & 4) {
duke@0 3515 // Disable - inhibit all inlining. Force control through the slow-path
never@297 3516 masm.cmpptr (rsp, 0) ;
never@297 3517 } else
duke@0 3518 if (EmitSync & 8) {
duke@0 3519 Label DONE_LABEL ;
duke@0 3520 if (UseBiasedLocking) {
duke@0 3521 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
duke@0 3522 }
duke@0 3523 // classic stack-locking code ...
never@297 3524 masm.movptr(tmpReg, Address(boxReg, 0)) ;
never@297 3525 masm.testptr(tmpReg, tmpReg) ;
duke@0 3526 masm.jcc (Assembler::zero, DONE_LABEL) ;
duke@0 3527 if (os::is_MP()) { masm.lock(); }
never@297 3528 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
duke@0 3529 masm.bind(DONE_LABEL);
duke@0 3530 } else {
duke@0 3531 Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
duke@0 3532
duke@0 3533 // Critically, the biased locking test must have precedence over
duke@0 3534 // and appear before the (box->dhw == 0) recursive stack-lock test.
kvn@411 3535 if (UseBiasedLocking && !UseOptoBiasInlining) {
duke@0 3536 masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
duke@0 3537 }
never@297 3538
never@297 3539 masm.cmpptr(Address(boxReg, 0), 0) ; // Examine the displaced header
never@297 3540 masm.movptr(tmpReg, Address(objReg, 0)) ; // Examine the object's markword
duke@0 3541 masm.jccb (Assembler::zero, DONE_LABEL) ; // 0 indicates recursive stack-lock
duke@0 3542
never@297 3543 masm.testptr(tmpReg, 0x02) ; // Inflated?
duke@0 3544 masm.jccb (Assembler::zero, Stacked) ;
duke@0 3545
duke@0 3546 masm.bind (Inflated) ;
duke@0 3547 // It's inflated.
duke@0 3548 // Despite our balanced locking property we still check that m->_owner == Self
duke@0 3549 // as java routines or native JNI code called by this thread might
duke@0 3550 // have released the lock.
duke@0 3551 // Refer to the comments in synchronizer.cpp for how we might encode extra
duke@0 3552 // state in _succ so we can avoid fetching EntryList|cxq.
duke@0 3553 //
duke@0 3554 // I'd like to add more cases in fast_lock() and fast_unlock() --
duke@0 3555 // such as recursive enter and exit -- but we have to be wary of
duke@0 3556 // I$ bloat, T$ effects and BP$ effects.
duke@0 3557 //
duke@0 3558 // If there's no contention try a 1-0 exit. That is, exit without
duke@0 3559 // a costly MEMBAR or CAS. See synchronizer.cpp for details on how
duke@0 3560 // we detect and recover from the race that the 1-0 exit admits.
duke@0 3561 //
duke@0 3562 // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
duke@0 3563 // before it STs null into _owner, releasing the lock. Updates
duke@0 3564 // to data protected by the critical section must be visible before
duke@0 3565 // we drop the lock (and thus before any other thread could acquire
duke@0 3566 // the lock and observe the fields protected by the lock).
duke@0 3567 // IA32's memory-model is SPO, so STs are ordered with respect to
duke@0 3568 // each other and there's no need for an explicit barrier (fence).
duke@0 3569 // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
duke@0 3570
duke@0 3571 masm.get_thread (boxReg) ;
duke@0 3572 if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
never@297 3573 // prefetchw [ebx + Offset(_owner)-2]
never@297 3574 masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
duke@0 3575 }
duke@0 3576
duke@0 3577 // Note that we could employ various encoding schemes to reduce
duke@0 3578 // the number of loads below (currently 4) to just 2 or 3.
duke@0 3579 // Refer to the comments in synchronizer.cpp.
duke@0 3580 // In practice the chain of fetches doesn't seem to impact performance, however.
duke@0 3581 if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
duke@0 3582 // Attempt to reduce branch density - AMD's branch predictor.
never@297 3583 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
never@297 3584 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
never@297 3585 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
never@297 3586 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
never@297 3587 masm.jccb (Assembler::notZero, DONE_LABEL) ;
xlu@528 3588 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
never@297 3589 masm.jmpb (DONE_LABEL) ;
never@297 3590 } else {
never@297 3591 masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
never@297 3592 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
never@297 3593 masm.jccb (Assembler::notZero, DONE_LABEL) ;
never@297 3594 masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ;
never@297 3595 masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ;
never@297 3596 masm.jccb (Assembler::notZero, CheckSucc) ;
xlu@528 3597 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
never@297 3598 masm.jmpb (DONE_LABEL) ;
duke@0 3599 }
duke@0 3600
duke@0 3601 // The Following code fragment (EmitSync & 65536) improves the performance of
duke@0 3602 // contended applications and contended synchronization microbenchmarks.
duke@0 3603 // Unfortunately the emission of the code - even though not executed - causes regressions
duke@0 3604 // in scimark and jetstream, evidently because of $ effects. Replacing the code
duke@0 3605 // with an equal number of never-executed NOPs results in the same regression.
duke@0 3606 // We leave it off by default.
duke@0 3607
duke@0 3608 if ((EmitSync & 65536) != 0) {
duke@0 3609 Label LSuccess, LGoSlowPath ;
duke@0 3610
duke@0 3611 masm.bind (CheckSucc) ;
duke@0 3612
duke@0 3613 // Optional pre-test ... it's safe to elide this
never@297 3614 if ((EmitSync & 16) == 0) {
never@297 3615 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
never@297 3616 masm.jccb (Assembler::zero, LGoSlowPath) ;
duke@0 3617 }
duke@0 3618
duke@0 3619 // We have a classic Dekker-style idiom:
duke@0 3620 // ST m->_owner = 0 ; MEMBAR; LD m->_succ
duke@0 3621 // There are a number of ways to implement the barrier:
duke@0 3622 // (1) lock:andl &m->_owner, 0
duke@0 3623 // is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
duke@0 3624 // LOCK: ANDL [ebx+Offset(_Owner)-2], 0
duke@0 3625 // Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
duke@0 3626 // (2) If supported, an explicit MFENCE is appealing.
duke@0 3627 // In older IA32 processors MFENCE is slower than lock:add or xchg
duke@0 3628 // particularly if the write-buffer is full as might be the case if
duke@0 3629 // if stores closely precede the fence or fence-equivalent instruction.
duke@0 3630 // In more modern implementations MFENCE appears faster, however.
duke@0 3631 // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
duke@0 3632 // The $lines underlying the top-of-stack should be in M-state.
duke@0 3633 // The locked add instruction is serializing, of course.
duke@0 3634 // (4) Use xchg, which is serializing
duke@0 3635 // mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
duke@0 3636 // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
duke@0 3637 // The integer condition codes will tell us if succ was 0.
duke@0 3638 // Since _succ and _owner should reside in the same $line and
duke@0 3639 // we just stored into _owner, it's likely that the $line
duke@0 3640 // remains in M-state for the lock:orl.
duke@0 3641 //
duke@0 3642 // We currently use (3), although it's likely that switching to (2)
duke@0 3643 // is correct for the future.
never@297 3644
xlu@528 3645 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ;
never@297 3646 if (os::is_MP()) {
never@297 3647 if (VM_Version::supports_sse2() && 1 == FenceInstruction) {
never@297 3648 masm.mfence();
never@297 3649 } else {
never@297 3650 masm.lock () ; masm.addptr(Address(rsp, 0), 0) ;
duke@0 3651 }
duke@0 3652 }
duke@0 3653 // Ratify _succ remains non-null
never@297 3654 masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ;
never@297 3655 masm.jccb (Assembler::notZero, LSuccess) ;
never@297 3656
never@297 3657 masm.xorptr(boxReg, boxReg) ; // box is really EAX
duke@0 3658 if (os::is_MP()) { masm.lock(); }
never@297 3659 masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
duke@0 3660 masm.jccb (Assembler::notEqual, LSuccess) ;
duke@0 3661 // Since we're low on registers we installed rsp as a placeholding in _owner.
duke@0 3662 // Now install Self over rsp. This is safe as we're transitioning from
duke@0 3663 // non-null to non=null
duke@0 3664 masm.get_thread (boxReg) ;
never@297 3665 masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
duke@0 3666 // Intentional fall-through into LGoSlowPath ...
duke@0 3667
never@297 3668 masm.bind (LGoSlowPath) ;
never@297 3669 masm.orptr(boxReg, 1) ; // set ICC.ZF=0 to indicate failure
never@297 3670 masm.jmpb (DONE_LABEL) ;
never@297 3671
never@297 3672 masm.bind (LSuccess) ;
never@297 3673 masm.xorptr(boxReg, boxReg) ; // set ICC.ZF=1 to indicate success
never@297 3674 masm.jmpb (DONE_LABEL) ;
duke@0 3675 }
duke@0 3676
duke@0 3677 masm.bind (Stacked) ;
duke@0 3678 // It's not inflated and it's not recursively stack-locked and it's not biased.
duke@0 3679 // It must be stack-locked.
duke@0 3680 // Try to reset the header to displaced header.
duke@0 3681 // The "box" value on the stack is stable, so we can reload
duke@0 3682 // and be assured we observe the same value as above.
never@297 3683 masm.movptr(tmpReg, Address(boxReg, 0)) ;
duke@0 3684 if (os::is_MP()) { masm.lock(); }
never@297 3685 masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
duke@0 3686 // Intention fall-thru into DONE_LABEL
duke@0 3687
duke@0 3688
duke@0 3689 // DONE_LABEL is a hot target - we'd really like to place it at the
duke@0 3690 // start of cache line by padding with NOPs.
duke@0 3691 // See the AMD and Intel software optimization manuals for the
duke@0 3692 // most efficient "long" NOP encodings.
duke@0 3693 // Unfortunately none of our alignment mechanisms suffice.
duke@0 3694 if ((EmitSync & 65536) == 0) {
duke@0 3695 masm.bind (CheckSucc) ;
duke@0 3696 }
duke@0 3697 masm.bind(DONE_LABEL);
duke@0 3698
duke@0 3699 // Avoid branch to branch on AMD processors
duke@0 3700 if (EmitSync & 32768) { masm.nop() ; }
duke@0 3701 }