2 // Copyright 1997-2009 Sun Microsystems, Inc. All Rights Reserved.
3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 // This code is free software; you can redistribute it and/or modify it
6 // under the terms of the GNU General Public License version 2 only, as
7 // published by the Free Software Foundation.
9 // This code is distributed in the hope that it will be useful, but WITHOUT
10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 // FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 // version 2 for more details (a copy is included in the LICENSE file that
13 // accompanied this code).
15 // You should have received a copy of the GNU General Public License version
16 // 2 along with this work; if not, write to the Free Software Foundation,
17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 // CA 95054 USA or visit www.sun.com if you need additional information or
21 // have any questions.
25 // X86 Architecture Description File
27 //----------REGISTER DEFINITION BLOCK------------------------------------------
28 // This information is used by the matcher and the register allocator to
29 // describe individual registers and classes of registers within the target
33 //----------Architecture Description Register Definitions----------------------
35 // "reg_def" name ( register save type, C convention save type,
36 // ideal register type, encoding );
37 // Register Save Types:
39 // NS = No-Save: The register allocator assumes that these registers
40 // can be used without saving upon entry to the method, &
41 // that they do not need to be saved at call sites.
43 // SOC = Save-On-Call: The register allocator assumes that these registers
44 // can be used without saving upon entry to the method,
45 // but that they must be saved at call sites.
47 // SOE = Save-On-Entry: The register allocator assumes that these registers
48 // must be saved before using them upon entry to the
49 // method, but they do not need to be saved at call
52 // AS = Always-Save: The register allocator assumes that these registers
53 // must be saved before using them upon entry to the
54 // method, & that they must be saved at call sites.
56 // Ideal Register Type is used to determine how to save & restore a
57 // register. Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58 // spilled with LoadP/StoreP. If the register supports both, use Op_RegI.
60 // The encoding number is the actual bit-pattern placed into the opcodes.
63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75 reg_def ESP( NS, NS, Op_RegI, 4, rsp->as_VMReg());
78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
80 // Float registers. We treat TOS/FPR0 special. It is invisible to the
81 // allocator, and only shows up in the encodings.
82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
84 // Ok so here's the trick FPR1 is really st(0) except in the midst
85 // of emission of assembly for a machnode. During the emission the fpu stack
86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
87 // the stack will not have this element so FPR1 == st(0) from the
88 // oopMap viewpoint. This same weirdness with numbering causes
89 // instruction encoding to have to play games with the register
90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
91 // where it does flt->flt moves to see an example
93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
108 // XMM registers. 128-bit registers or 4 words each, labeled a-d.
109 // Word a in each register holds a Float, words ab hold a Double.
110 // We currently do not use the SIMD capabilities, so registers cd
111 // are unused at the moment.
112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
129 // Specify priority of register selection within phases of register
130 // allocation. Highest priority is first. A useful heuristic is to
131 // give registers a low priority when they are required by machine
132 // instructions, like EAX and EDX. Registers which are used as
133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
134 // For the Intel integer registers, the equivalent Long pairs are
135 // EDX:EAX, EBX:ECX, and EDI:EBP.
136 alloc_class chunk0( ECX, EBX, EBP, EDI, EAX, EDX, ESI, ESP,
137 FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
138 FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
139 FPR6L, FPR6H, FPR7L, FPR7H );
141 alloc_class chunk1( XMM0a, XMM0b,
148 XMM7a, XMM7b, EFLAGS);
151 //----------Architecture Description Register Classes--------------------------
152 // Several register classes are automatically defined based upon information in
153 // this architecture description.
154 // 1) reg_class inline_cache_reg ( /* as def'd in frame section */ )
155 // 2) reg_class compiler_method_oop_reg ( /* as def'd in frame section */ )
156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
159 // Class for all registers
160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
161 // Class for general registers
162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
163 // Class for general registers which may be used for implicit null checks on win95
164 // Also safe for use by tailjump. We don't want to allocate in rbp,
165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
166 // Class of "X" registers
167 reg_class x_reg(EBX, ECX, EDX, EAX);
168 // Class of registers that can appear in an address with no offset.
169 // EBP and ESP require an extra instruction byte for zero offset.
170 // Used in fast-unlock
171 reg_class p_reg(EDX, EDI, ESI, EBX);
172 // Class for general registers not including ECX
173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
174 // Class for general registers not including EAX
175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
176 // Class for general registers not including EAX or EBX.
177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
178 // Class of EAX (for multiply and divide operations)
179 reg_class eax_reg(EAX);
180 // Class of EBX (for atomic add)
181 reg_class ebx_reg(EBX);
182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
183 reg_class ecx_reg(ECX);
184 // Class of EDX (for multiply and divide operations)
185 reg_class edx_reg(EDX);
186 // Class of EDI (for synchronization)
187 reg_class edi_reg(EDI);
188 // Class of ESI (for synchronization)
189 reg_class esi_reg(ESI);
190 // Singleton class for interpreter's stack pointer
191 reg_class ebp_reg(EBP);
192 // Singleton class for stack pointer
193 reg_class sp_reg(ESP);
194 // Singleton class for instruction pointer
195 // reg_class ip_reg(EIP);
196 // Singleton class for condition codes
197 reg_class int_flags(EFLAGS);
198 // Class of integer register pairs
199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
200 // Class of integer register pairs that aligns with calling convention
201 reg_class eadx_reg( EAX,EDX );
202 reg_class ebcx_reg( ECX,EBX );
203 // Not AX or DX, used in divides
204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
206 // Floating point registers. Notice FPR0 is not a choice.
207 // FPR0 is not ever allocated; we use clever encodings to fake
208 // a 2-address instructions out of Intels FP stack.
209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
211 // make a register class for SSE registers
212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
214 // make a double register class for SSE2 registers
215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
216 XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
219 FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
222 reg_class flt_reg0( FPR1L );
223 reg_class dbl_reg0( FPR1L,FPR1H );
224 reg_class dbl_reg1( FPR2L,FPR2H );
225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
226 FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
228 // XMM6 and XMM7 could be used as temporary registers for long, float and
229 // double values for SSE2.
230 reg_class xdb_reg6( XMM6a,XMM6b );
231 reg_class xdb_reg7( XMM7a,XMM7b );
235 //----------SOURCE BLOCK-------------------------------------------------------
236 // This is a block of C++ code which provides values, functions, and
237 // definitions necessary in the rest of the architecture description
239 // Must be visible to the DFA in dfa_x86_32.cpp
240 extern bool is_operand_hi32_zero(Node* n);
244 #define RELOC_IMM32 Assembler::imm_operand
245 #define RELOC_DISP32 Assembler::disp32_operand
249 // How to find the high register of a Long pair, given the low register
250 #define HIGH_FROM_LOW(x) ((x)+2)
252 // These masks are used to provide 128-bit aligned bitmasks to the XMM
253 // instructions, to allow sign-masking or sign-bit flipping. They allow
254 // fast versions of NegF/NegD and AbsF/AbsD.
256 // Note: 'double' and 'long long' have 32-bits alignment on x86.
257 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
258 // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
259 // of 128-bits operands for SSE instructions.
260 jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
261 // Store the value to a 128-bits operand.
267 // Buffer for 128-bits masks used by SSE instructions.
268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
270 // Static initialization during VM startup.
271 static jlong *float_signmask_pool = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
273 static jlong *float_signflip_pool = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
276 // Offset hacking within calls.
277 static int pre_call_FPU_size() {
278 if (Compile::current()->in_24_bit_fp_mode())
283 static int preserve_SP_size() {
284 return LP64_ONLY(1 +) 2; // [rex,] op, rm(reg/reg)
287 // !!!!! Special hack to get all type of calls to specify the byte offset
288 // from the start of the call to the point where the return address
290 int MachCallStaticJavaNode::ret_addr_offset() {
291 int offset = 5 + pre_call_FPU_size(); // 5 bytes from start of call to where return address points
292 if (_method_handle_invoke)
293 offset += preserve_SP_size();
297 int MachCallDynamicJavaNode::ret_addr_offset() {
298 return 10 + pre_call_FPU_size(); // 10 bytes from start of call to where return address points
301 static int sizeof_FFree_Float_Stack_All = -1;
303 int MachCallRuntimeNode::ret_addr_offset() {
304 assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
305 return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
308 // Indicate if the safepoint node needs the polling page as an input.
309 // Since x86 does have absolute addressing, it doesn't.
310 bool SafePointNode::needs_polling_address_input() {
315 // Compute padding required for nodes which need alignment
318 // The address of the call instruction needs to be 4-byte aligned to
319 // ensure that it does not span a cache line so that it can be patched.
320 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
321 current_offset += pre_call_FPU_size(); // skip fldcw, if any
322 current_offset += 1; // skip call opcode byte
323 return round_to(current_offset, alignment_required()) - current_offset;
326 // The address of the call instruction needs to be 4-byte aligned to
327 // ensure that it does not span a cache line so that it can be patched.
328 int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
329 current_offset += pre_call_FPU_size(); // skip fldcw, if any
330 current_offset += preserve_SP_size(); // skip mov rbp, rsp
331 current_offset += 1; // skip call opcode byte
332 return round_to(current_offset, alignment_required()) - current_offset;
335 // The address of the call instruction needs to be 4-byte aligned to
336 // ensure that it does not span a cache line so that it can be patched.
337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338 current_offset += pre_call_FPU_size(); // skip fldcw, if any
339 current_offset += 5; // skip MOV instruction
340 current_offset += 1; // skip call opcode byte
341 return round_to(current_offset, alignment_required()) - current_offset;
345 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
351 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
352 unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
353 *(cbuf.code_end()) = c;
354 cbuf.set_code_end(cbuf.code_end() + 1);
358 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
359 unsigned char c = (unsigned char)( f1 | f2 );
360 *(cbuf.code_end()) = c;
361 cbuf.set_code_end(cbuf.code_end() + 1);
365 void emit_opcode(CodeBuffer &cbuf, int code) {
366 *(cbuf.code_end()) = (unsigned char)code;
367 cbuf.set_code_end(cbuf.code_end() + 1);
370 // EMIT_OPCODE() w/ relocation information
371 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
372 cbuf.relocate(cbuf.inst_mark() + offset, reloc);
373 emit_opcode(cbuf, code);
377 void emit_d8(CodeBuffer &cbuf, int d8) {
378 *(cbuf.code_end()) = (unsigned char)d8;
379 cbuf.set_code_end(cbuf.code_end() + 1);
383 void emit_d16(CodeBuffer &cbuf, int d16) {
384 *((short *)(cbuf.code_end())) = d16;
385 cbuf.set_code_end(cbuf.code_end() + 2);
389 void emit_d32(CodeBuffer &cbuf, int d32) {
390 *((int *)(cbuf.code_end())) = d32;
391 cbuf.set_code_end(cbuf.code_end() + 4);
394 // emit 32 bit value and construct relocation entry from relocInfo::relocType
395 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
397 cbuf.relocate(cbuf.inst_mark(), reloc, format);
399 *((int *)(cbuf.code_end())) = d32;
400 cbuf.set_code_end(cbuf.code_end() + 4);
403 // emit 32 bit value and construct relocation entry from RelocationHolder
404 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
407 if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
408 assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
411 cbuf.relocate(cbuf.inst_mark(), rspec, format);
413 *((int *)(cbuf.code_end())) = d32;
414 cbuf.set_code_end(cbuf.code_end() + 4);
417 // Access stack slot for load or store
418 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
419 emit_opcode( cbuf, opcode ); // (e.g., FILD [ESP+src])
420 if( -128 <= disp && disp <= 127 ) {
421 emit_rm( cbuf, 0x01, rm_field, ESP_enc ); // R/M byte
422 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
423 emit_d8 (cbuf, disp); // Displacement // R/M byte
425 emit_rm( cbuf, 0x02, rm_field, ESP_enc ); // R/M byte
426 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
427 emit_d32(cbuf, disp); // Displacement // R/M byte
431 // eRegI ereg, memory mem) %{ // emit_reg_mem
432 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
433 // There is no index & no scale, use form without SIB byte
434 if ((index == 0x4) &&
435 (scale == 0) && (base != ESP_enc)) {
436 // If no displacement, mode is 0x0; unless base is [EBP]
437 if ( (displace == 0) && (base != EBP_enc) ) {
438 emit_rm(cbuf, 0x0, reg_encoding, base);
440 else { // If 8-bit displacement, mode 0x1
441 if ((displace >= -128) && (displace <= 127)
442 && !(displace_is_oop) ) {
443 emit_rm(cbuf, 0x1, reg_encoding, base);
444 emit_d8(cbuf, displace);
446 else { // If 32-bit displacement
447 if (base == -1) { // Special flag for absolute address
448 emit_rm(cbuf, 0x0, reg_encoding, 0x5);
449 // (manual lies; no SIB needed here)
450 if ( displace_is_oop ) {
451 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
453 emit_d32 (cbuf, displace);
456 else { // Normal base + offset
457 emit_rm(cbuf, 0x2, reg_encoding, base);
458 if ( displace_is_oop ) {
459 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
461 emit_d32 (cbuf, displace);
467 else { // Else, encode with the SIB byte
468 // If no displacement, mode is 0x0; unless base is [EBP]
469 if (displace == 0 && (base != EBP_enc)) { // If no displacement
470 emit_rm(cbuf, 0x0, reg_encoding, 0x4);
471 emit_rm(cbuf, scale, index, base);
473 else { // If 8-bit displacement, mode 0x1
474 if ((displace >= -128) && (displace <= 127)
475 && !(displace_is_oop) ) {
476 emit_rm(cbuf, 0x1, reg_encoding, 0x4);
477 emit_rm(cbuf, scale, index, base);
478 emit_d8(cbuf, displace);
480 else { // If 32-bit displacement
482 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
483 emit_rm(cbuf, scale, index, 0x04);
485 emit_rm(cbuf, 0x2, reg_encoding, 0x4);
486 emit_rm(cbuf, scale, index, base);
488 if ( displace_is_oop ) {
489 emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
491 emit_d32 (cbuf, displace);
499 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
500 if( dst_encoding == src_encoding ) {
501 // reg-reg copy, use an empty encoding
503 emit_opcode( cbuf, 0x8B );
504 emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
508 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
509 if( dst_encoding == src_encoding ) {
510 // reg-reg copy, use an empty encoding
512 MacroAssembler _masm(&cbuf);
514 __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
519 //=============================================================================
521 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
523 if( C->in_24_bit_fp_mode() ) {
524 st->print("FLDCW 24 bit fpu control word");
525 st->print_cr(""); st->print("\t");
528 int framesize = C->frame_slots() << LogBytesPerInt;
529 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
530 // Remove two words for return addr and rbp,
531 framesize -= 2*wordSize;
533 // Calls to C2R adapters often do not accept exceptional returns.
534 // We require that their callers must bang for them. But be careful, because
535 // some VM calls (such as call site linkage) can use several kilobytes of
536 // stack. But the stack safety zone should account for that.
537 // See bugs 4446381, 4468289, 4497237.
538 if (C->need_stack_bang(framesize)) {
539 st->print_cr("# stack bang"); st->print("\t");
541 st->print_cr("PUSHL EBP"); st->print("\t");
543 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
544 st->print("PUSH 0xBADB100D\t# Majik cookie for stack depth check");
545 st->print_cr(""); st->print("\t");
546 framesize -= wordSize;
549 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
551 st->print("SUB ESP,%d\t# Create frame",framesize);
554 st->print("SUB ESP,%d\t# Create frame",framesize);
560 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
563 if (UseSSE >= 2 && VerifyFPU) {
564 MacroAssembler masm(&cbuf);
565 masm.verify_FPU(0, "FPU stack must be clean on entry");
568 // WARNING: Initial instruction MUST be 5 bytes or longer so that
569 // NativeJump::patch_verified_entry will be able to patch out the entry
570 // code safely. The fldcw is ok at 6 bytes, the push to verify stack
571 // depth is ok at 5 bytes, the frame allocation can be either 3 or
572 // 6 bytes. So if we don't do the fldcw or the push then we must
573 // use the 6 byte frame allocation even if we have no frame. :-(
574 // If method sets FPU control word do it now
575 if( C->in_24_bit_fp_mode() ) {
576 MacroAssembler masm(&cbuf);
577 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
580 int framesize = C->frame_slots() << LogBytesPerInt;
581 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
582 // Remove two words for return addr and rbp,
583 framesize -= 2*wordSize;
585 // Calls to C2R adapters often do not accept exceptional returns.
586 // We require that their callers must bang for them. But be careful, because
587 // some VM calls (such as call site linkage) can use several kilobytes of
588 // stack. But the stack safety zone should account for that.
589 // See bugs 4446381, 4468289, 4497237.
590 if (C->need_stack_bang(framesize)) {
591 MacroAssembler masm(&cbuf);
592 masm.generate_stack_overflow_check(framesize);
595 // We always push rbp, so that on return to interpreter rbp, will be
596 // restored correctly and we can correct the stack.
597 emit_opcode(cbuf, 0x50 | EBP_enc);
599 if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
600 emit_opcode(cbuf, 0x68); // push 0xbadb100d
601 emit_d32(cbuf, 0xbadb100d);
602 framesize -= wordSize;
605 if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
607 emit_opcode(cbuf, 0x83); // sub SP,#framesize
608 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
609 emit_d8(cbuf, framesize);
612 emit_opcode(cbuf, 0x81); // sub SP,#framesize
613 emit_rm(cbuf, 0x3, 0x05, ESP_enc);
614 emit_d32(cbuf, framesize);
616 C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
619 if (VerifyStackAtCalls) {
621 MacroAssembler masm(&cbuf);
624 masm.andptr(rax, StackAlignmentInBytes-1);
625 masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
627 masm.jcc(Assembler::equal, L);
628 masm.stop("Stack is not properly aligned!");
635 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
636 return MachNode::size(ra_); // too many variables; just compute it the hard way
639 int MachPrologNode::reloc() const {
640 return 0; // a large enough number
643 //=============================================================================
645 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
647 int framesize = C->frame_slots() << LogBytesPerInt;
648 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
649 // Remove two words for return addr and rbp,
650 framesize -= 2*wordSize;
652 if( C->in_24_bit_fp_mode() ) {
653 st->print("FLDCW standard control word");
654 st->cr(); st->print("\t");
657 st->print("ADD ESP,%d\t# Destroy frame",framesize);
658 st->cr(); st->print("\t");
660 st->print_cr("POPL EBP"); st->print("\t");
661 if( do_polling() && C->is_method_compilation() ) {
662 st->print("TEST PollPage,EAX\t! Poll Safepoint");
663 st->cr(); st->print("\t");
668 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
671 // If method set FPU control word, restore to standard control word
672 if( C->in_24_bit_fp_mode() ) {
673 MacroAssembler masm(&cbuf);
674 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
677 int framesize = C->frame_slots() << LogBytesPerInt;
678 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
679 // Remove two words for return addr and rbp,
680 framesize -= 2*wordSize;
682 // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
684 if( framesize >= 128 ) {
685 emit_opcode(cbuf, 0x81); // add SP, #framesize
686 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
687 emit_d32(cbuf, framesize);
689 else if( framesize ) {
690 emit_opcode(cbuf, 0x83); // add SP, #framesize
691 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
692 emit_d8(cbuf, framesize);
695 emit_opcode(cbuf, 0x58 | EBP_enc);
697 if( do_polling() && C->is_method_compilation() ) {
698 cbuf.relocate(cbuf.code_end(), relocInfo::poll_return_type, 0);
699 emit_opcode(cbuf,0x85);
700 emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
701 emit_d32(cbuf, (intptr_t)os::get_polling_page());
705 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
707 // If method set FPU control word, restore to standard control word
708 int size = C->in_24_bit_fp_mode() ? 6 : 0;
709 if( do_polling() && C->is_method_compilation() ) size += 6;
711 int framesize = C->frame_slots() << LogBytesPerInt;
712 assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
713 // Remove two words for return addr and rbp,
714 framesize -= 2*wordSize;
718 if( framesize >= 128 ) {
721 size += framesize ? 3 : 0;
726 int MachEpilogNode::reloc() const {
727 return 0; // a large enough number
730 const Pipeline * MachEpilogNode::pipeline() const {
731 return MachNode::pipeline_class();
734 int MachEpilogNode::safepoint_offset() const { return 0; }
736 //=============================================================================
738 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
739 static enum RC rc_class( OptoReg::Name reg ) {
741 if( !OptoReg::is_valid(reg) ) return rc_bad;
742 if (OptoReg::is_stack(reg)) return rc_stack;
744 VMReg r = OptoReg::as_VMReg(reg);
745 if (r->is_Register()) return rc_int;
746 if (r->is_FloatRegister()) {
747 assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
750 assert(r->is_XMMRegister(), "must be");
754 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
755 int opcode, const char *op_str, int size, outputStream* st ) {
757 emit_opcode (*cbuf, opcode );
758 encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
760 } else if( !do_size ) {
761 if( size != 0 ) st->print("\n\t");
762 if( opcode == 0x8B || opcode == 0x89 ) { // MOV
763 if( is_load ) st->print("%s %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
764 else st->print("%s [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
765 } else { // FLD, FST, PUSH, POP
766 st->print("%s [ESP + #%d]",op_str,offset);
770 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
771 return size+3+offset_size;
774 // Helper for XMM registers. Extra opcode bits, limited syntax.
775 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
776 int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
778 if( reg_lo+1 == reg_hi ) { // double move?
779 if( is_load && !UseXmmLoadAndClearUpper )
780 emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
782 emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
784 emit_opcode(*cbuf, 0xF3 );
786 emit_opcode(*cbuf, 0x0F );
787 if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
788 emit_opcode(*cbuf, 0x12 ); // use 'movlpd' for load
790 emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
791 encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
793 } else if( !do_size ) {
794 if( size != 0 ) st->print("\n\t");
795 if( reg_lo+1 == reg_hi ) { // double move?
796 if( is_load ) st->print("%s %s,[ESP + #%d]",
797 UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
798 Matcher::regName[reg_lo], offset);
799 else st->print("MOVSD [ESP + #%d],%s",
800 offset, Matcher::regName[reg_lo]);
802 if( is_load ) st->print("MOVSS %s,[ESP + #%d]",
803 Matcher::regName[reg_lo], offset);
804 else st->print("MOVSS [ESP + #%d],%s",
805 offset, Matcher::regName[reg_lo]);
809 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
810 return size+5+offset_size;
814 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
815 int src_hi, int dst_hi, int size, outputStream* st ) {
816 if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
818 if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
819 emit_opcode(*cbuf, 0x66 );
821 emit_opcode(*cbuf, 0x0F );
822 emit_opcode(*cbuf, 0x28 );
823 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
825 } else if( !do_size ) {
826 if( size != 0 ) st->print("\n\t");
827 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
828 st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
830 st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
834 return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
837 emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
838 emit_opcode(*cbuf, 0x0F );
839 emit_opcode(*cbuf, 0x10 );
840 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
842 } else if( !do_size ) {
843 if( size != 0 ) st->print("\n\t");
844 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
845 st->print("MOVSD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
847 st->print("MOVSS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
855 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
857 emit_opcode(*cbuf, 0x8B );
858 emit_rm (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
860 } else if( !do_size ) {
861 if( size != 0 ) st->print("\n\t");
862 st->print("MOV %s,%s",Matcher::regName[dst],Matcher::regName[src]);
868 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
869 int offset, int size, outputStream* st ) {
870 if( src_lo != FPR1L_num ) { // Move value to top of FP stack, if not already there
872 emit_opcode( *cbuf, 0xD9 ); // FLD (i.e., push it)
873 emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
875 } else if( !do_size ) {
876 if( size != 0 ) st->print("\n\t");
877 st->print("FLD %s",Matcher::regName[src_lo]);
883 int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
886 if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
887 op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
889 } else { // 32-bit store
890 op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
892 assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
895 return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
898 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
899 // Get registers to move
900 OptoReg::Name src_second = ra_->get_reg_second(in(1));
901 OptoReg::Name src_first = ra_->get_reg_first(in(1));
902 OptoReg::Name dst_second = ra_->get_reg_second(this );
903 OptoReg::Name dst_first = ra_->get_reg_first(this );
905 enum RC src_second_rc = rc_class(src_second);
906 enum RC src_first_rc = rc_class(src_first);
907 enum RC dst_second_rc = rc_class(dst_second);
908 enum RC dst_first_rc = rc_class(dst_first);
910 assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
912 // Generate spill code!
915 if( src_first == dst_first && src_second == dst_second )
916 return size; // Self copy, no move
918 // --------------------------------------
919 // Check for mem-mem move. push/pop to move.
920 if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
921 if( src_second == dst_first ) { // overlapping stack copy ranges
922 assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
923 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
924 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
925 src_second_rc = dst_second_rc = rc_bad; // flag as already moved the second bits
928 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH ",size, st);
929 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP ",size, st);
930 if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
931 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH ",size, st);
932 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP ",size, st);
937 // --------------------------------------
938 // Check for integer reg-reg copy
939 if( src_first_rc == rc_int && dst_first_rc == rc_int )
940 size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
942 // Check for integer store
943 if( src_first_rc == rc_int && dst_first_rc == rc_stack )
944 size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
946 // Check for integer load
947 if( dst_first_rc == rc_int && src_first_rc == rc_stack )
948 size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
950 // --------------------------------------
951 // Check for float reg-reg copy
952 if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
953 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
954 (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
957 // Note the mucking with the register encode to compensate for the 0/1
958 // indexing issue mentioned in a comment in the reg_def sections
959 // for FPR registers many lines above here.
961 if( src_first != FPR1L_num ) {
962 emit_opcode (*cbuf, 0xD9 ); // FLD ST(i)
963 emit_d8 (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
964 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
965 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
967 emit_opcode (*cbuf, 0xDD ); // FST ST(i)
968 emit_d8 (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
971 } else if( !do_size ) {
972 if( size != 0 ) st->print("\n\t");
973 if( src_first != FPR1L_num ) st->print("FLD %s\n\tFSTP %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
974 else st->print( "FST %s", Matcher::regName[dst_first]);
977 return size + ((src_first != FPR1L_num) ? 2+2 : 2);
980 // Check for float store
981 if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
982 return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
985 // Check for float load
986 if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
987 int offset = ra_->reg2offset(src_first);
990 if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
993 } else { // 32-bit load
996 assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
999 emit_opcode (*cbuf, op );
1000 encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1001 emit_opcode (*cbuf, 0xDD ); // FSTP ST(i)
1002 emit_d8 (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1004 } else if( !do_size ) {
1005 if( size != 0 ) st->print("\n\t");
1006 st->print("%s ST,[ESP + #%d]\n\tFSTP %s",op_str, offset,Matcher::regName[dst_first]);
1009 int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1010 return size + 3+offset_size+2;
1013 // Check for xmm reg-reg copy
1014 if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1015 assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1016 (src_first+1 == src_second && dst_first+1 == dst_second),
1017 "no non-adjacent float-moves" );
1018 return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1021 // Check for xmm store
1022 if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1023 return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1026 // Check for float xmm load
1027 if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1028 return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1031 // Copy from float reg to xmm reg
1032 if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1033 // copy to the top of stack from floating point reg
1034 // and use LEA to preserve flags
1036 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP-8]
1037 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1038 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1039 emit_d8(*cbuf,0xF8);
1041 } else if( !do_size ) {
1042 if( size != 0 ) st->print("\n\t");
1043 st->print("LEA ESP,[ESP-8]");
1048 size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1050 // Copy from the temp memory to the xmm reg.
1051 size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1054 emit_opcode(*cbuf,0x8D); // LEA ESP,[ESP+8]
1055 emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1056 emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1057 emit_d8(*cbuf,0x08);
1059 } else if( !do_size ) {
1060 if( size != 0 ) st->print("\n\t");
1061 st->print("LEA ESP,[ESP+8]");
1068 assert( size > 0, "missed a case" );
1070 // --------------------------------------------------------------------
1071 // Check for second bits still needing moving.
1072 if( src_second == dst_second )
1073 return size; // Self copy; no move
1074 assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1076 // Check for second word int-int move
1077 if( src_second_rc == rc_int && dst_second_rc == rc_int )
1078 return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1080 // Check for second word integer store
1081 if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1082 return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1084 // Check for second word integer load
1085 if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1086 return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1093 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1094 implementation( NULL, ra_, false, st );
1098 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1099 implementation( &cbuf, ra_, false, NULL );
1102 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1103 return implementation( NULL, ra_, true, NULL );
1106 //=============================================================================
1108 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1109 st->print("NOP \t# %d bytes pad for loops and calls", _count);
1113 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1114 MacroAssembler _masm(&cbuf);
1118 uint MachNopNode::size(PhaseRegAlloc *) const {
1123 //=============================================================================
1125 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1126 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1127 int reg = ra_->get_reg_first(this);
1128 st->print("LEA %s,[ESP + #%d]",Matcher::regName[reg],offset);
1132 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1133 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1134 int reg = ra_->get_encode(this);
1135 if( offset >= 128 ) {
1136 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1137 emit_rm(cbuf, 0x2, reg, 0x04);
1138 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1139 emit_d32(cbuf, offset);
1142 emit_opcode(cbuf, 0x8D); // LEA reg,[SP+offset]
1143 emit_rm(cbuf, 0x1, reg, 0x04);
1144 emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1145 emit_d8(cbuf, offset);
1149 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1150 int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1151 if( offset >= 128 ) {
1159 //=============================================================================
1161 // emit call stub, compiled java to interpreter
1162 void emit_java_to_interp(CodeBuffer &cbuf ) {
1163 // Stub is fixed up when the corresponding call is converted from calling
1164 // compiled code to calling interpreted code.
1168 address mark = cbuf.inst_mark(); // get mark within main instrs section
1170 // Note that the code buffer's inst_mark is always relative to insts.
1171 // That's why we must use the macroassembler to generate a stub.
1172 MacroAssembler _masm(&cbuf);
1175 __ start_a_stub(Compile::MAX_stubs_size);
1176 if (base == NULL) return; // CodeBuffer::expand failed
1177 // static stub relocation stores the instruction address of the call
1178 __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1179 // static stub relocation also tags the methodOop in the code-stream.
1180 __ movoop(rbx, (jobject)NULL); // method is zapped till fixup time
1181 // This is recognized as unresolved by relocs/nativeInst/ic code
1182 __ jump(RuntimeAddress(__ pc()));
1185 // Update current stubs pointer and restore code_end.
1187 // size of call stub, compiled java to interpretor
1188 uint size_java_to_interp() {
1189 return 10; // movl; jmp
1191 // relocation entries for call stub, compiled java to interpretor
1192 uint reloc_java_to_interp() {
1193 return 4; // 3 in emit_java_to_interp + 1 in Java_Static_Call
1196 //=============================================================================
1198 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1199 st->print_cr( "CMP EAX,[ECX+4]\t# Inline cache check");
1200 st->print_cr("\tJNE SharedRuntime::handle_ic_miss_stub");
1201 st->print_cr("\tNOP");
1202 st->print_cr("\tNOP");
1203 if( !OptoBreakpoint )
1204 st->print_cr("\tNOP");
1208 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1209 MacroAssembler masm(&cbuf);
1211 uint code_size = cbuf.code_size();
1213 masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1214 masm.jump_cc(Assembler::notEqual,
1215 RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1216 /* WARNING these NOPs are critical so that verified entry point is properly
1217 aligned for patching by NativeJump::patch_verified_entry() */
1219 if( !OptoBreakpoint ) // Leave space for int3
1223 assert(cbuf.code_size() - code_size == size(ra_), "checking code size of inline cache node");
1226 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1227 return OptoBreakpoint ? 11 : 12;
1231 //=============================================================================
1232 uint size_exception_handler() {
1233 // NativeCall instruction size is the same as NativeJump.
1234 // exception handler starts out as jump and can be patched to
1235 // a call be deoptimization. (4932387)
1236 // Note that this value is also credited (in output.cpp) to
1237 // the size of the code section.
1238 return NativeJump::instruction_size;
1241 // Emit exception handler code. Stuff framesize into a register
1242 // and call a VM stub routine.
1243 int emit_exception_handler(CodeBuffer& cbuf) {
1245 // Note that the code buffer's inst_mark is always relative to insts.
1246 // That's why we must use the macroassembler to generate a handler.
1247 MacroAssembler _masm(&cbuf);
1249 __ start_a_stub(size_exception_handler());
1250 if (base == NULL) return 0; // CodeBuffer::expand failed
1251 int offset = __ offset();
1252 __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1253 assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1258 uint size_deopt_handler() {
1259 // NativeCall instruction size is the same as NativeJump.
1260 // exception handler starts out as jump and can be patched to
1261 // a call be deoptimization. (4932387)
1262 // Note that this value is also credited (in output.cpp) to
1263 // the size of the code section.
1264 return 5 + NativeJump::instruction_size; // pushl(); jmp;
1267 // Emit deopt handler code.
1268 int emit_deopt_handler(CodeBuffer& cbuf) {
1270 // Note that the code buffer's inst_mark is always relative to insts.
1271 // That's why we must use the macroassembler to generate a handler.
1272 MacroAssembler _masm(&cbuf);
1274 __ start_a_stub(size_exception_handler());
1275 if (base == NULL) return 0; // CodeBuffer::expand failed
1276 int offset = __ offset();
1277 InternalAddress here(__ pc());
1278 __ pushptr(here.addr());
1280 __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1281 assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1287 static void emit_double_constant(CodeBuffer& cbuf, double x) {
1288 int mark = cbuf.insts()->mark_off();
1289 MacroAssembler _masm(&cbuf);
1290 address double_address = __ double_constant(x);
1291 cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
1292 emit_d32_reloc(cbuf,
1293 (int)double_address,
1294 internal_word_Relocation::spec(double_address),
1298 static void emit_float_constant(CodeBuffer& cbuf, float x) {
1299 int mark = cbuf.insts()->mark_off();
1300 MacroAssembler _masm(&cbuf);
1301 address float_address = __ float_constant(x);
1302 cbuf.insts()->set_mark_off(mark); // preserve mark across masm shift
1303 emit_d32_reloc(cbuf,
1305 internal_word_Relocation::spec(float_address),
1310 const bool Matcher::match_rule_supported(int opcode) {
1311 if (!has_match_rule(opcode))
1314 return true; // Per default match rules are supported.
1317 int Matcher::regnum_to_fpu_offset(int regnum) {
1318 return regnum - 32; // The FP registers are in the second chunk
1321 bool is_positive_zero_float(jfloat f) {
1322 return jint_cast(f) == jint_cast(0.0F);
1325 bool is_positive_one_float(jfloat f) {
1326 return jint_cast(f) == jint_cast(1.0F);
1329 bool is_positive_zero_double(jdouble d) {
1330 return jlong_cast(d) == jlong_cast(0.0);
1333 bool is_positive_one_double(jdouble d) {
1334 return jlong_cast(d) == jlong_cast(1.0);
1337 // This is UltraSparc specific, true just means we have fast l2f conversion
1338 const bool Matcher::convL2FSupported(void) {
1342 // Vector width in bytes
1343 const uint Matcher::vector_width_in_bytes(void) {
1344 return UseSSE >= 2 ? 8 : 0;
1348 const uint Matcher::vector_ideal_reg(void) {
1352 // Is this branch offset short enough that a short branch can be used?
1354 // NOTE: If the platform does not provide any short branch variants, then
1355 // this method should return false for offset 0.
1356 bool Matcher::is_short_branch_offset(int rule, int offset) {
1357 // the short version of jmpConUCF2 contains multiple branches,
1358 // making the reach slightly less
1359 if (rule == jmpConUCF2_rule)
1360 return (-126 <= offset && offset <= 125);
1361 return (-128 <= offset && offset <= 127);
1364 const bool Matcher::isSimpleConstant64(jlong value) {
1365 // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1369 // The ecx parameter to rep stos for the ClearArray node is in dwords.
1370 const bool Matcher::init_array_count_is_in_bytes = false;
1372 // Threshold size for cleararray.
1373 const int Matcher::init_array_short_size = 8 * BytesPerLong;
1375 // Should the Matcher clone shifts on addressing modes, expecting them to
1376 // be subsumed into complex addressing expressions or compute them into
1377 // registers? True for Intel but false for most RISCs
1378 const bool Matcher::clone_shift_expressions = true;
1380 // Is it better to copy float constants, or load them directly from memory?
1381 // Intel can load a float constant from a direct address, requiring no
1382 // extra registers. Most RISCs will have to materialize an address into a
1383 // register first, so they would do better to copy the constant from stack.
1384 const bool Matcher::rematerialize_float_constants = true;
1386 // If CPU can load and store mis-aligned doubles directly then no fixup is
1387 // needed. Else we split the double into 2 integer pieces and move it
1388 // piece-by-piece. Only happens when passing doubles into C code as the
1389 // Java calling convention forces doubles to be aligned.
1390 const bool Matcher::misaligned_doubles_ok = true;
1393 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1394 // Get the memory operand from the node
1395 uint numopnds = node->num_opnds(); // Virtual call for number of operands
1396 uint skipped = node->oper_input_base(); // Sum of leaves skipped so far
1397 assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1398 uint opcnt = 1; // First operand
1399 uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1400 while( idx >= skipped+num_edges ) {
1401 skipped += num_edges;
1402 opcnt++; // Bump operand count
1403 assert( opcnt < numopnds, "Accessing non-existent operand" );
1404 num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1407 MachOper *memory = node->_opnds[opcnt];
1408 MachOper *new_memory = NULL;
1409 switch (memory->opcode()) {
1412 // No transformation necessary.
1415 new_memory = new (C) indirect_win95_safeOper( );
1418 new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1421 new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1423 case INDINDEXOFFSET:
1424 new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1427 new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1429 case INDINDEXSCALEOFFSET:
1430 new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1432 case LOAD_LONG_INDIRECT:
1433 case LOAD_LONG_INDOFFSET32:
1434 // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1437 assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1440 node->_opnds[opcnt] = new_memory;
1443 // Advertise here if the CPU requires explicit rounding operations
1444 // to implement the UseStrictFP mode.
1445 const bool Matcher::strict_fp_requires_explicit_rounding = true;
1447 // Are floats conerted to double when stored to stack during deoptimization?
1448 // On x32 it is stored with convertion only when FPU is used for floats.
1449 bool Matcher::float_in_double() { return (UseSSE == 0); }
1451 // Do ints take an entire long register or just half?
1452 const bool Matcher::int_in_long = false;
1454 // Return whether or not this register is ever used as an argument. This
1455 // function is used on startup to build the trampoline stubs in generateOptoStub.
1456 // Registers not mentioned will be killed by the VM call in the trampoline, and
1457 // arguments in those registers not be available to the callee.
1458 bool Matcher::can_be_java_arg( int reg ) {
1459 if( reg == ECX_num || reg == EDX_num ) return true;
1460 if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1461 if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1465 bool Matcher::is_spillable_arg( int reg ) {
1466 return can_be_java_arg(reg);
1469 // Register for DIVI projection of divmodI
1470 RegMask Matcher::divI_proj_mask() {
1471 return EAX_REG_mask;
1474 // Register for MODI projection of divmodI
1475 RegMask Matcher::modI_proj_mask() {
1476 return EDX_REG_mask;
1479 // Register for DIVL projection of divmodL
1480 RegMask Matcher::divL_proj_mask() {
1481 ShouldNotReachHere();
1485 // Register for MODL projection of divmodL
1486 RegMask Matcher::modL_proj_mask() {
1487 ShouldNotReachHere();
1491 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1492 return EBP_REG_mask;
1495 // Returns true if the high 32 bits of the value is known to be zero.
1496 bool is_operand_hi32_zero(Node* n) {
1497 int opc = n->Opcode();
1498 if (opc == Op_LoadUI2L) {
1501 if (opc == Op_AndL) {
1502 Node* o2 = n->in(2);
1503 if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1512 //----------ENCODING BLOCK-----------------------------------------------------
1513 // This block specifies the encoding classes used by the compiler to output
1514 // byte streams. Encoding classes generate functions which are called by
1515 // Machine Instruction Nodes in order to generate the bit encoding of the
1516 // instruction. Operands specify their base encoding interface with the
1517 // interface keyword. There are currently supported four interfaces,
1518 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an
1519 // operand to generate a function which returns its register number when
1520 // queried. CONST_INTER causes an operand to generate a function which
1521 // returns the value of the constant when queried. MEMORY_INTER causes an
1522 // operand to generate four functions which return the Base Register, the
1523 // Index Register, the Scale Value, and the Offset Value of the operand when
1524 // queried. COND_INTER causes an operand to generate six functions which
1525 // return the encoding code (ie - encoding bits for the instruction)
1526 // associated with each basic boolean condition for a conditional instruction.
1527 // Instructions specify two basic values for encoding. They use the
1528 // ins_encode keyword to specify their encoding class (which must be one of
1529 // the class names specified in the encoding block), and they use the
1530 // opcode keyword to specify, in order, their primary, secondary, and
1531 // tertiary opcode. Only the opcode sections which a particular instruction
1532 // needs for encoding need to be specified.
1534 // Build emit functions for each basic byte or larger field in the intel
1535 // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1536 // code in the enc_class source block. Emit functions will live in the
1537 // main source block for now. In future, we can generalize this by
1538 // adding a syntax that specifies the sizes of fields in an order,
1539 // so that the adlc can build the emit functions automagically
1541 // Emit primary opcode
1543 emit_opcode(cbuf, $primary);
1546 // Emit secondary opcode
1548 emit_opcode(cbuf, $secondary);
1551 // Emit opcode directly
1552 enc_class Opcode(immI d8) %{
1553 emit_opcode(cbuf, $d8$$constant);
1556 enc_class SizePrefix %{
1557 emit_opcode(cbuf,0x66);
1560 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
1561 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1564 enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{ // OpcRegReg(Many)
1565 emit_opcode(cbuf,$opcode$$constant);
1566 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1569 enc_class mov_r32_imm0( eRegI dst ) %{
1570 emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd -- MOV r32 ,imm32
1571 emit_d32 ( cbuf, 0x0 ); // imm32==0x0
1574 enc_class cdq_enc %{
1575 // Full implementation of Java idiv and irem; checks for
1576 // special case as described in JVM spec., p.243 & p.271.
1578 // normal case special case
1580 // input : rax,: dividend min_int
1583 // output: rax,: quotient (= rax, idiv reg) min_int
1584 // rdx: remainder (= rax, irem reg) 0
1588 // 81 F8 00 00 00 80 cmp rax,80000000h
1589 // 0F 85 0B 00 00 00 jne normal_case
1590 // 33 D2 xor rdx,edx
1591 // 83 F9 FF cmp rcx,0FFh
1592 // 0F 84 03 00 00 00 je done
1595 // F7 F9 idiv rax,ecx
1598 emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1599 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1600 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80); // cmp rax,80000000h
1601 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1602 emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1603 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // jne normal_case
1604 emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2); // xor rdx,edx
1605 emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1606 emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1607 emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1608 emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00); // je done
1610 emit_opcode(cbuf,0x99); // cdq
1611 // idiv (note: must be emitted by the user of this rule)
1615 // Dense encoding for older common ops
1616 enc_class Opc_plus(immI opcode, eRegI reg) %{
1617 emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1621 // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1622 enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1623 // Check for 8-bit immediate, and set sign extend bit in opcode
1624 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1625 emit_opcode(cbuf, $primary | 0x02);
1627 else { // If 32-bit immediate
1628 emit_opcode(cbuf, $primary);
1632 enc_class OpcSErm (eRegI dst, immI imm) %{ // OpcSEr/m
1633 // Emit primary opcode and set sign-extend bit
1634 // Check for 8-bit immediate, and set sign extend bit in opcode
1635 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1636 emit_opcode(cbuf, $primary | 0x02); }
1637 else { // If 32-bit immediate
1638 emit_opcode(cbuf, $primary);
1640 // Emit r/m byte with secondary opcode, after primary opcode.
1641 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1644 enc_class Con8or32 (immI imm) %{ // Con8or32(storeImmI), 8 or 32 bits
1645 // Check for 8-bit immediate, and set sign extend bit in opcode
1646 if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1647 $$$emit8$imm$$constant;
1649 else { // If 32-bit immediate
1651 $$$emit32$imm$$constant;
1655 enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1656 // Emit primary opcode and set sign-extend bit
1657 // Check for 8-bit immediate, and set sign extend bit in opcode
1658 int con = (int)$imm$$constant; // Throw away top bits
1659 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1660 // Emit r/m byte with secondary opcode, after primary opcode.
1661 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1662 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1663 else emit_d32(cbuf,con);
1666 enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1667 // Emit primary opcode and set sign-extend bit
1668 // Check for 8-bit immediate, and set sign extend bit in opcode
1669 int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1670 emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1671 // Emit r/m byte with tertiary opcode, after primary opcode.
1672 emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1673 if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1674 else emit_d32(cbuf,con);
1677 enc_class Lbl (label labl) %{ // JMP, CALL
1678 Label *l = $labl$$label;
1679 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
1682 enc_class LblShort (label labl) %{ // JMP, CALL
1683 Label *l = $labl$$label;
1684 int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
1685 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1686 emit_d8(cbuf, disp);
1689 enc_class OpcSReg (eRegI dst) %{ // BSWAP
1690 emit_cc(cbuf, $secondary, $dst$$reg );
1693 enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1694 int destlo = $dst$$reg;
1695 int desthi = HIGH_FROM_LOW(destlo);
1697 emit_opcode(cbuf, 0x0F);
1698 emit_cc(cbuf, 0xC8, destlo);
1700 emit_opcode(cbuf, 0x0F);
1701 emit_cc(cbuf, 0xC8, desthi);
1703 emit_opcode(cbuf, 0x87);
1704 emit_rm(cbuf, 0x3, destlo, desthi);
1707 enc_class RegOpc (eRegI div) %{ // IDIV, IMOD, JMP indirect, ...
1708 emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1711 enc_class Jcc (cmpOp cop, label labl) %{ // JCC
1712 Label *l = $labl$$label;
1714 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1715 emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
1718 enc_class JccShort (cmpOp cop, label labl) %{ // JCC
1719 Label *l = $labl$$label;
1720 emit_cc(cbuf, $primary, $cop$$cmpcode);
1721 int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
1722 assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1723 emit_d8(cbuf, disp);
1726 enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1728 emit_cc(cbuf, $secondary, $cop$$cmpcode);
1731 enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1732 int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1733 emit_d8(cbuf, op >> 8 );
1734 emit_d8(cbuf, op & 255);
1737 // emulate a CMOV with a conditional branch around a MOV
1738 enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1739 // Invert sense of branch from sense of CMOV
1740 emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1741 emit_d8( cbuf, $brOffs$$constant );
1744 enc_class enc_PartialSubtypeCheck( ) %{
1745 Register Redi = as_Register(EDI_enc); // result register
1746 Register Reax = as_Register(EAX_enc); // super class
1747 Register Recx = as_Register(ECX_enc); // killed
1748 Register Resi = as_Register(ESI_enc); // sub class
1751 MacroAssembler _masm(&cbuf);
1752 __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1754 /*set_cond_codes:*/ true);
1756 __ xorptr(Redi, Redi);
1761 enc_class FFree_Float_Stack_All %{ // Free_Float_Stack_All
1762 MacroAssembler masm(&cbuf);
1763 int start = masm.offset();
1766 masm.verify_FPU(0, "must be empty in SSE2+ mode");
1769 // External c_calling_convention expects the FPU stack to be 'clean'.
1770 // Compiled code leaves it dirty. Do cleanup now.
1771 masm.empty_FPU_stack();
1773 if (sizeof_FFree_Float_Stack_All == -1) {
1774 sizeof_FFree_Float_Stack_All = masm.offset() - start;
1776 assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1780 enc_class Verify_FPU_For_Leaf %{
1782 MacroAssembler masm(&cbuf);
1783 masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1787 enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1788 // This is the instruction starting address for relocation info.
1789 cbuf.set_inst_mark();
1791 // CALL directly to the runtime
1792 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1793 runtime_call_Relocation::spec(), RELOC_IMM32 );
1796 MacroAssembler _masm(&cbuf);
1797 BasicType rt = tf()->return_type();
1799 if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1800 // A C runtime call where the return value is unused. In SSE2+
1801 // mode the result needs to be removed from the FPU stack. It's
1802 // likely that this function call could be removed by the
1803 // optimizer if the C function is a pure function.
1805 } else if (rt == T_FLOAT) {
1806 __ lea(rsp, Address(rsp, -4));
1807 __ fstp_s(Address(rsp, 0));
1808 __ movflt(xmm0, Address(rsp, 0));
1809 __ lea(rsp, Address(rsp, 4));
1810 } else if (rt == T_DOUBLE) {
1811 __ lea(rsp, Address(rsp, -8));
1812 __ fstp_d(Address(rsp, 0));
1813 __ movdbl(xmm0, Address(rsp, 0));
1814 __ lea(rsp, Address(rsp, 8));
1820 enc_class pre_call_FPU %{
1821 // If method sets FPU control word restore it here
1822 debug_only(int off0 = cbuf.code_size());
1823 if( Compile::current()->in_24_bit_fp_mode() ) {
1824 MacroAssembler masm(&cbuf);
1825 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1827 debug_only(int off1 = cbuf.code_size());
1828 assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1831 enc_class post_call_FPU %{
1832 // If method sets FPU control word do it here also
1833 if( Compile::current()->in_24_bit_fp_mode() ) {
1834 MacroAssembler masm(&cbuf);
1835 masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1839 enc_class preserve_SP %{
1840 debug_only(int off0 = cbuf.code_size());
1841 MacroAssembler _masm(&cbuf);
1842 // RBP is preserved across all calls, even compiled calls.
1843 // Use it to preserve RSP in places where the callee might change the SP.
1844 __ movptr(rbp, rsp);
1845 debug_only(int off1 = cbuf.code_size());
1846 assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
1849 enc_class restore_SP %{
1850 MacroAssembler _masm(&cbuf);
1851 __ movptr(rsp, rbp);
1854 enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL
1855 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1856 // who we intended to call.
1857 cbuf.set_inst_mark();
1860 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1861 runtime_call_Relocation::spec(), RELOC_IMM32 );
1862 } else if(_optimized_virtual) {
1863 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1864 opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1866 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1867 static_call_Relocation::spec(), RELOC_IMM32 );
1869 if( _method ) { // Emit stub for static call
1870 emit_java_to_interp(cbuf);
1874 enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL
1876 // Generate "Mov EAX,0x00", placeholder instruction to load oop-info
1877 // emit_call_dynamic_prologue( cbuf );
1878 cbuf.set_inst_mark();
1879 emit_opcode(cbuf, 0xB8 + EAX_enc); // mov EAX,-1
1880 emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1881 address virtual_call_oop_addr = cbuf.inst_mark();
1882 // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine
1883 // who we intended to call.
1884 cbuf.set_inst_mark();
1886 emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1887 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1890 enc_class Java_Compiled_Call (method meth) %{ // JAVA COMPILED CALL
1891 int disp = in_bytes(methodOopDesc::from_compiled_offset());
1892 assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1894 // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1895 cbuf.set_inst_mark();
1897 emit_rm(cbuf, 0x01, $secondary, EAX_enc ); // R/M byte
1898 emit_d8(cbuf, disp); // Displacement
1902 enc_class Xor_Reg (eRegI dst) %{
1903 emit_opcode(cbuf, 0x33);
1904 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1907 // Following encoding is no longer used, but may be restored if calling
1908 // convention changes significantly.
1909 // Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1911 // enc_class Java_Interpreter_Call (label labl) %{ // JAVA INTERPRETER CALL
1912 // // int ic_reg = Matcher::inline_cache_reg();
1913 // // int ic_encode = Matcher::_regEncode[ic_reg];
1914 // // int imo_reg = Matcher::interpreter_method_oop_reg();
1915 // // int imo_encode = Matcher::_regEncode[imo_reg];
1917 // // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1918 // // // so we load it immediately before the call
1919 // // emit_opcode(cbuf, 0x8B); // MOV imo_reg,ic_reg # method_oop
1920 // // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1923 // emit_opcode(cbuf, 0x33);
1924 // emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1926 // // CALL to interpreter.
1927 // cbuf.set_inst_mark();
1928 // $$$emit8$primary;
1929 // emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.code_end()) - 4),
1930 // runtime_call_Relocation::spec(), RELOC_IMM32 );
1933 enc_class RegOpcImm (eRegI dst, immI8 shift) %{ // SHL, SAR, SHR
1935 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1936 $$$emit8$shift$$constant;
1939 enc_class LdImmI (eRegI dst, immI src) %{ // Load Immediate
1940 // Load immediate does not have a zero or sign extended version
1941 // for 8-bit immediates
1942 emit_opcode(cbuf, 0xB8 + $dst$$reg);
1943 $$$emit32$src$$constant;
1946 enc_class LdImmP (eRegI dst, immI src) %{ // Load Immediate
1947 // Load immediate does not have a zero or sign extended version
1948 // for 8-bit immediates
1949 emit_opcode(cbuf, $primary + $dst$$reg);
1950 $$$emit32$src$$constant;
1953 enc_class LdImmL_Lo( eRegL dst, immL src) %{ // Load Immediate
1954 // Load immediate does not have a zero or sign extended version
1955 // for 8-bit immediates
1956 int dst_enc = $dst$$reg;
1957 int src_con = $src$$constant & 0x0FFFFFFFFL;
1960 emit_opcode(cbuf, 0x33);
1961 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1963 emit_opcode(cbuf, $primary + dst_enc);
1964 emit_d32(cbuf, src_con);
1968 enc_class LdImmL_Hi( eRegL dst, immL src) %{ // Load Immediate
1969 // Load immediate does not have a zero or sign extended version
1970 // for 8-bit immediates
1971 int dst_enc = $dst$$reg + 2;
1972 int src_con = ((julong)($src$$constant)) >> 32;
1975 emit_opcode(cbuf, 0x33);
1976 emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1978 emit_opcode(cbuf, $primary + dst_enc);
1979 emit_d32(cbuf, src_con);
1984 enc_class LdImmD (immD src) %{ // Load Immediate
1985 if( is_positive_zero_double($src$$constant)) {
1987 emit_opcode(cbuf,0xD9);
1988 emit_opcode(cbuf,0xEE);
1989 } else if( is_positive_one_double($src$$constant)) {
1991 emit_opcode(cbuf,0xD9);
1992 emit_opcode(cbuf,0xE8);
1994 emit_opcode(cbuf,0xDD);
1995 emit_rm(cbuf, 0x0, 0x0, 0x5);
1996 emit_double_constant(cbuf, $src$$constant);
2001 enc_class LdImmF (immF src) %{ // Load Immediate
2002 if( is_positive_zero_float($src$$constant)) {
2003 emit_opcode(cbuf,0xD9);
2004 emit_opcode(cbuf,0xEE);
2005 } else if( is_positive_one_float($src$$constant)) {
2006 emit_opcode(cbuf,0xD9);
2007 emit_opcode(cbuf,0xE8);
2010 // Load immediate does not have a zero or sign extended version
2011 // for 8-bit immediates
2012 // First load to TOS, then move to dst
2013 emit_rm(cbuf, 0x0, 0x0, 0x5);
2014 emit_float_constant(cbuf, $src$$constant);
2018 enc_class LdImmX (regX dst, immXF con) %{ // Load Immediate
2019 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2020 emit_float_constant(cbuf, $con$$constant);
2023 enc_class LdImmXD (regXD dst, immXD con) %{ // Load Immediate
2024 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2025 emit_double_constant(cbuf, $con$$constant);
2028 enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
2029 // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
2030 emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2031 emit_opcode(cbuf, 0x0F);
2032 emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2033 emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
2034 emit_double_constant(cbuf, $con$$constant);
2037 enc_class Opc_MemImm_F(immF src) %{
2038 cbuf.set_inst_mark();
2040 emit_rm(cbuf, 0x0, $secondary, 0x5);
2041 emit_float_constant(cbuf, $src$$constant);
2045 enc_class MovI2X_reg(regX dst, eRegI src) %{
2046 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2047 emit_opcode(cbuf, 0x0F );
2048 emit_opcode(cbuf, 0x6E );
2049 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2052 enc_class MovX2I_reg(eRegI dst, regX src) %{
2053 emit_opcode(cbuf, 0x66 ); // MOVD dst,src
2054 emit_opcode(cbuf, 0x0F );
2055 emit_opcode(cbuf, 0x7E );
2056 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2059 enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
2060 { // MOVD $dst,$src.lo
2061 emit_opcode(cbuf,0x66);
2062 emit_opcode(cbuf,0x0F);
2063 emit_opcode(cbuf,0x6E);
2064 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2066 { // MOVD $tmp,$src.hi
2067 emit_opcode(cbuf,0x66);
2068 emit_opcode(cbuf,0x0F);
2069 emit_opcode(cbuf,0x6E);
2070 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2072 { // PUNPCKLDQ $dst,$tmp
2073 emit_opcode(cbuf,0x66);
2074 emit_opcode(cbuf,0x0F);
2075 emit_opcode(cbuf,0x62);
2076 emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2080 enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2081 { // MOVD $dst.lo,$src
2082 emit_opcode(cbuf,0x66);
2083 emit_opcode(cbuf,0x0F);
2084 emit_opcode(cbuf,0x7E);
2085 emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2087 { // PSHUFLW $tmp,$src,0x4E (01001110b)
2088 emit_opcode(cbuf,0xF2);
2089 emit_opcode(cbuf,0x0F);
2090 emit_opcode(cbuf,0x70);
2091 emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2092 emit_d8(cbuf, 0x4E);
2094 { // MOVD $dst.hi,$tmp
2095 emit_opcode(cbuf,0x66);
2096 emit_opcode(cbuf,0x0F);
2097 emit_opcode(cbuf,0x7E);
2098 emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2103 // Encode a reg-reg copy. If it is useless, then empty encoding.
2104 enc_class enc_Copy( eRegI dst, eRegI src ) %{
2105 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2108 enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2109 encode_Copy( cbuf, $dst$$reg, $src$$reg );
2112 // Encode xmm reg-reg copy. If it is useless, then empty encoding.
2113 enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2114 encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2117 enc_class RegReg (eRegI dst, eRegI src) %{ // RegReg(Many)
2118 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2121 enc_class RegReg_Lo(eRegL dst, eRegL src) %{ // RegReg(Many)
2123 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2126 enc_class RegReg_Hi(eRegL dst, eRegL src) %{ // RegReg(Many)
2128 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2131 enc_class RegReg_Lo2(eRegL dst, eRegL src) %{ // RegReg(Many)
2132 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2135 enc_class RegReg_Hi2(eRegL dst, eRegL src) %{ // RegReg(Many)
2136 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2139 enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2140 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2143 enc_class Con32 (immI src) %{ // Con32(storeImmI)
2145 $$$emit32$src$$constant;
2148 enc_class Con32F_as_bits(immF src) %{ // storeF_imm
2149 // Output Float immediate bits
2150 jfloat jf = $src$$constant;
2151 int jf_as_bits = jint_cast( jf );
2152 emit_d32(cbuf, jf_as_bits);
2155 enc_class Con32XF_as_bits(immXF src) %{ // storeX_imm
2156 // Output Float immediate bits
2157 jfloat jf = $src$$constant;
2158 int jf_as_bits = jint_cast( jf );
2159 emit_d32(cbuf, jf_as_bits);
2162 enc_class Con16 (immI src) %{ // Con16(storeImmI)
2164 $$$emit16$src$$constant;
2167 enc_class Con_d32(immI src) %{
2168 emit_d32(cbuf,$src$$constant);
2171 enc_class conmemref (eRegP t1) %{ // Con32(storeImmI)
2172 // Output immediate memory reference
2173 emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2174 emit_d32(cbuf, 0x00);
2177 enc_class lock_prefix( ) %{
2179 emit_opcode(cbuf,0xF0); // [Lock]
2182 // Cmp-xchg long value.
2183 // Note: we need to swap rbx, and rcx before and after the
2184 // cmpxchg8 instruction because the instruction uses
2185 // rcx as the high order word of the new value to store but
2186 // our register encoding uses rbx,.
2187 enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2190 emit_opcode(cbuf,0x87);
2191 emit_opcode(cbuf,0xD9);
2194 emit_opcode(cbuf,0xF0);
2196 emit_opcode(cbuf,0x0F);
2197 emit_opcode(cbuf,0xC7);
2198 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2200 emit_opcode(cbuf,0x87);
2201 emit_opcode(cbuf,0xD9);
2204 enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2207 emit_opcode(cbuf,0xF0);
2210 emit_opcode(cbuf,0x0F);
2211 emit_opcode(cbuf,0xB1);
2212 emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2215 enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2216 int res_encoding = $res$$reg;
2219 emit_opcode( cbuf, 0xB8 + res_encoding);
2220 emit_d32( cbuf, 0 );
2222 emit_opcode(cbuf,0x75);
2225 emit_opcode( cbuf, 0xB8 + res_encoding);
2226 emit_d32( cbuf, 1 );
2230 enc_class set_instruction_start( ) %{
2231 cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand
2234 enc_class RegMem (eRegI ereg, memory mem) %{ // emit_reg_mem
2235 int reg_encoding = $ereg$$reg;
2236 int base = $mem$$base;
2237 int index = $mem$$index;
2238 int scale = $mem$$scale;
2239 int displace = $mem$$disp;
2240 bool disp_is_oop = $mem->disp_is_oop();
2241 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2244 enc_class RegMem_Hi(eRegL ereg, memory mem) %{ // emit_reg_mem
2245 int reg_encoding = HIGH_FROM_LOW($ereg$$reg); // Hi register of pair, computed from lo
2246 int base = $mem$$base;
2247 int index = $mem$$index;
2248 int scale = $mem$$scale;
2249 int displace = $mem$$disp + 4; // Offset is 4 further in memory
2250 assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2251 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2254 enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2256 if( $tertiary == 0xA4 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2257 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2258 emit_opcode(cbuf,0x0F);
2259 emit_opcode(cbuf,$tertiary);
2260 emit_rm(cbuf, 0x3, r1, r2);
2261 emit_d8(cbuf,$cnt$$constant);
2262 emit_d8(cbuf,$primary);
2263 emit_rm(cbuf, 0x3, $secondary, r1);
2264 emit_d8(cbuf,$cnt$$constant);
2267 enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2268 emit_opcode( cbuf, 0x8B ); // Move
2269 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2270 emit_d8(cbuf,$primary);
2271 emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2272 emit_d8(cbuf,$cnt$$constant-32);
2273 emit_d8(cbuf,$primary);
2274 emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2278 enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2280 if( $secondary == 0x5 ) { r1 = $dst$$reg; r2 = HIGH_FROM_LOW($dst$$reg); }
2281 else { r2 = $dst$$reg; r1 = HIGH_FROM_LOW($dst$$reg); }
2283 emit_opcode( cbuf, 0x8B ); // Move r1,r2
2284 emit_rm(cbuf, 0x3, r1, r2);
2285 if( $cnt$$constant > 32 ) { // Shift, if not by zero
2286 emit_opcode(cbuf,$primary);
2287 emit_rm(cbuf, 0x3, $secondary, r1);
2288 emit_d8(cbuf,$cnt$$constant-32);
2290 emit_opcode(cbuf,0x33); // XOR r2,r2
2291 emit_rm(cbuf, 0x3, r2, r2);
2294 // Clone of RegMem but accepts an extra parameter to access each
2295 // half of a double in memory; it never needs relocation info.
2296 enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2297 emit_opcode(cbuf,$opcode$$constant);
2298 int reg_encoding = $rm_reg$$reg;
2299 int base = $mem$$base;
2300 int index = $mem$$index;
2301 int scale = $mem$$scale;
2302 int displace = $mem$$disp + $disp_for_half$$constant;
2303 bool disp_is_oop = false;
2304 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2307 // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2309 // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2310 // and it never needs relocation information.
2311 // Frequently used to move data between FPU's Stack Top and memory.
2312 enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2313 int rm_byte_opcode = $rm_opcode$$constant;
2314 int base = $mem$$base;
2315 int index = $mem$$index;
2316 int scale = $mem$$scale;
2317 int displace = $mem$$disp;
2318 assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2319 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2322 enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2323 int rm_byte_opcode = $rm_opcode$$constant;
2324 int base = $mem$$base;
2325 int index = $mem$$index;
2326 int scale = $mem$$scale;
2327 int displace = $mem$$disp;
2328 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2329 encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2332 enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{ // emit_reg_lea
2333 int reg_encoding = $dst$$reg;
2334 int base = $src0$$reg; // 0xFFFFFFFF indicates no base
2335 int index = 0x04; // 0x04 indicates no index
2336 int scale = 0x00; // 0x00 indicates no scale
2337 int displace = $src1$$constant; // 0x00 indicates no displacement
2338 bool disp_is_oop = false;
2339 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2342 enc_class min_enc (eRegI dst, eRegI src) %{ // MIN
2344 emit_opcode(cbuf,0x3B);
2345 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2346 // jmp dst < src around move
2347 emit_opcode(cbuf,0x7C);
2350 emit_opcode(cbuf,0x8B);
2351 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2354 enc_class max_enc (eRegI dst, eRegI src) %{ // MAX
2356 emit_opcode(cbuf,0x3B);
2357 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2358 // jmp dst > src around move
2359 emit_opcode(cbuf,0x7F);
2362 emit_opcode(cbuf,0x8B);
2363 emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2366 enc_class enc_FP_store(memory mem, regD src) %{
2367 // If src is FPR1, we can just FST to store it.
2368 // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2369 int reg_encoding = 0x2; // Just store
2370 int base = $mem$$base;
2371 int index = $mem$$index;
2372 int scale = $mem$$scale;
2373 int displace = $mem$$disp;
2374 bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2375 if( $src$$reg != FPR1L_enc ) {
2376 reg_encoding = 0x3; // Store & pop
2377 emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2378 emit_d8( cbuf, 0xC0-1+$src$$reg );
2380 cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand
2381 emit_opcode(cbuf,$primary);
2382 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2385 enc_class neg_reg(eRegI dst) %{
2387 emit_opcode(cbuf,0xF7);
2388 emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2391 enc_class setLT_reg(eCXRegI dst) %{
2393 emit_opcode(cbuf,0x0F);
2394 emit_opcode(cbuf,0x9C);
2395 emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2398 enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{ // cadd_cmpLT
2399 int tmpReg = $tmp$$reg;
2402 emit_opcode(cbuf,0x2B);
2403 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2405 emit_opcode(cbuf,0x1B);
2406 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2408 emit_opcode(cbuf,0x23);
2409 emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2411 emit_opcode(cbuf,0x03);
2412 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2415 enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{ // cadd_cmpLT
2416 int tmpReg = $tmp$$reg;
2419 emit_opcode(cbuf,0x2B);
2420 emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2422 emit_opcode(cbuf,0x1B);
2423 emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2425 cbuf.set_inst_mark(); // Mark start of opcode for reloc info in mem operand
2426 emit_opcode(cbuf,0x23);
2427 int reg_encoding = tmpReg;
2428 int base = $mem$$base;
2429 int index = $mem$$index;
2430 int scale = $mem$$scale;
2431 int displace = $mem$$disp;
2432 bool disp_is_oop = $mem->disp_is_oop();
2433 encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2435 emit_opcode(cbuf,0x03);
2436 emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2439 enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2441 emit_opcode(cbuf,0xF7);
2442 emit_rm(cbuf, 0x3, 0, ECX_enc);
2443 emit_d32(cbuf,0x20);
2445 emit_opcode(cbuf, 0x74);
2446 emit_d8(cbuf, 0x04);
2447 // MOV $dst.hi,$dst.lo
2448 emit_opcode( cbuf, 0x8B );
2449 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2451 emit_opcode(cbuf, 0x33);
2452 emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2454 // SHLD $dst.hi,$dst.lo,$shift
2455 emit_opcode(cbuf,0x0F);
2456 emit_opcode(cbuf,0xA5);
2457 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2458 // SHL $dst.lo,$shift"
2459 emit_opcode(cbuf,0xD3);
2460 emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2463 enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2465 emit_opcode(cbuf,0xF7);
2466 emit_rm(cbuf, 0x3, 0, ECX_enc);
2467 emit_d32(cbuf,0x20);
2469 emit_opcode(cbuf, 0x74);
2470 emit_d8(cbuf, 0x04);
2471 // MOV $dst.lo,$dst.hi
2472 emit_opcode( cbuf, 0x8B );
2473 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2475 emit_opcode(cbuf, 0x33);
2476 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2478 // SHRD $dst.lo,$dst.hi,$shift
2479 emit_opcode(cbuf,0x0F);
2480 emit_opcode(cbuf,0xAD);
2481 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2482 // SHR $dst.hi,$shift"
2483 emit_opcode(cbuf,0xD3);
2484 emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2487 enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2489 emit_opcode(cbuf,0xF7);
2490 emit_rm(cbuf, 0x3, 0, ECX_enc);
2491 emit_d32(cbuf,0x20);
2493 emit_opcode(cbuf, 0x74);
2494 emit_d8(cbuf, 0x05);
2495 // MOV $dst.lo,$dst.hi
2496 emit_opcode( cbuf, 0x8B );
2497 emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2499 emit_opcode(cbuf, 0xC1);
2500 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2501 emit_d8(cbuf, 0x1F );
2503 // SHRD $dst.lo,$dst.hi,$shift
2504 emit_opcode(cbuf,0x0F);
2505 emit_opcode(cbuf,0xAD);
2506 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2507 // SAR $dst.hi,$shift"
2508 emit_opcode(cbuf,0xD3);
2509 emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2513 // ----------------- Encodings for floating point unit -----------------
2514 // May leave result in FPU-TOS or FPU reg depending on opcodes
2515 enc_class OpcReg_F (regF src) %{ // FMUL, FDIV
2517 emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2520 // Pop argument in FPR0 with FSTP ST(0)
2521 enc_class PopFPU() %{
2522 emit_opcode( cbuf, 0xDD );
2523 emit_d8( cbuf, 0xD8 );
2526 // !!!!! equivalent to Pop_Reg_F
2527 enc_class Pop_Reg_D( regD dst ) %{
2528 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2529 emit_d8( cbuf, 0xD8+$dst$$reg );
2532 enc_class Push_Reg_D( regD dst ) %{
2533 emit_opcode( cbuf, 0xD9 );
2534 emit_d8( cbuf, 0xC0-1+$dst$$reg ); // FLD ST(i-1)
2537 enc_class strictfp_bias1( regD dst ) %{
2538 emit_opcode( cbuf, 0xDB ); // FLD m80real
2539 emit_opcode( cbuf, 0x2D );
2540 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2541 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2542 emit_opcode( cbuf, 0xC8+$dst$$reg );
2545 enc_class strictfp_bias2( regD dst ) %{
2546 emit_opcode( cbuf, 0xDB ); // FLD m80real
2547 emit_opcode( cbuf, 0x2D );
2548 emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2549 emit_opcode( cbuf, 0xDE ); // FMULP ST(dst), ST0
2550 emit_opcode( cbuf, 0xC8+$dst$$reg );
2553 // Special case for moving an integer register to a stack slot.
2554 enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2555 store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2558 // Special case for moving a register to a stack slot.
2559 enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2560 // Opcode already emitted
2561 emit_rm( cbuf, 0x02, $src$$reg, ESP_enc ); // R/M byte
2562 emit_rm( cbuf, 0x00, ESP_enc, ESP_enc); // SIB byte
2563 emit_d32(cbuf, $dst$$disp); // Displacement
2566 // Push the integer in stackSlot 'src' onto FP-stack
2567 enc_class Push_Mem_I( memory src ) %{ // FILD [ESP+src]
2568 store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2571 // Push the float in stackSlot 'src' onto FP-stack
2572 enc_class Push_Mem_F( memory src ) %{ // FLD_S [ESP+src]
2573 store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2576 // Push the double in stackSlot 'src' onto FP-stack
2577 enc_class Push_Mem_D( memory src ) %{ // FLD_D [ESP+src]
2578 store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2581 // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2582 enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2583 store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2586 // Same as Pop_Mem_F except for opcode
2587 // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2588 enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2589 store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2592 enc_class Pop_Reg_F( regF dst ) %{
2593 emit_opcode( cbuf, 0xDD ); // FSTP ST(i)
2594 emit_d8( cbuf, 0xD8+$dst$$reg );
2597 enc_class Push_Reg_F( regF dst ) %{
2598 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2599 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2602 // Push FPU's float to a stack-slot, and pop FPU-stack
2603 enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2605 if ($src$$reg != FPR1L_enc) {
2606 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2607 emit_d8( cbuf, 0xC0-1+$src$$reg );
2610 store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S [ESP+dst]
2613 // Push FPU's double to a stack-slot, and pop FPU-stack
2614 enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2616 if ($src$$reg != FPR1L_enc) {
2617 emit_opcode( cbuf, 0xD9 ); // FLD ST(i-1)
2618 emit_d8( cbuf, 0xC0-1+$src$$reg );
2621 store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D [ESP+dst]
2624 // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2625 enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2626 int pop = 0xD0 - 1; // -1 since we skip FLD
2627 if ($src$$reg != FPR1L_enc) {
2628 emit_opcode( cbuf, 0xD9 ); // FLD ST(src-1)
2629 emit_d8( cbuf, 0xC0-1+$src$$reg );
2632 emit_opcode( cbuf, 0xDD );
2633 emit_d8( cbuf, pop+$dst$$reg ); // FST<P> ST(i)
2637 enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2638 MacroAssembler masm(&cbuf);
2639 masm.fld_s( $src1$$reg-1); // nothing at TOS, load TOS from src1.reg
2640 masm.fmul( $src2$$reg+0); // value at TOS
2641 masm.fadd( $src$$reg+0); // value at TOS
2642 masm.fstp_d( $dst$$reg+0); // value at TOS, popped off after store
2646 enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2648 emit_opcode( cbuf, 0xD9 );
2649 emit_d8( cbuf, 0xC0-1+$dst$$reg );
2650 if ($src$$reg != FPR1L_enc) {
2652 emit_opcode (cbuf, 0xD9);
2653 emit_opcode (cbuf, 0xF7);
2654 // swap src with FPR1:
2655 // FXCH FPR1 with src
2656 emit_opcode(cbuf, 0xD9);
2657 emit_d8(cbuf, 0xC8-1+$src$$reg );
2659 emit_opcode (cbuf, 0xD9);
2660 emit_opcode (cbuf, 0xF6);
2664 enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2666 emit_opcode(cbuf,0x83); // SUB ESP,8
2667 emit_opcode(cbuf,0xEC);
2670 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src1
2671 emit_opcode (cbuf, 0x0F );
2672 emit_opcode (cbuf, 0x11 );
2673 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2675 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2676 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2678 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src0
2679 emit_opcode (cbuf, 0x0F );
2680 emit_opcode (cbuf, 0x11 );
2681 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2683 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2684 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2688 enc_class Push_ModX_encoding( regX src0, regX src1) %{
2690 emit_opcode(cbuf,0x83); // SUB ESP,4
2691 emit_opcode(cbuf,0xEC);
2694 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src1
2695 emit_opcode (cbuf, 0x0F );
2696 emit_opcode (cbuf, 0x11 );
2697 encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2699 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2700 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2702 emit_opcode (cbuf, 0xF3 ); // MOVSS [ESP], src0
2703 emit_opcode (cbuf, 0x0F );
2704 emit_opcode (cbuf, 0x11 );
2705 encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2707 emit_opcode(cbuf,0xD9 ); // FLD [ESP]
2708 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2712 enc_class Push_ResultXD(regXD dst) %{
2713 store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2715 // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2716 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2717 emit_opcode (cbuf, 0x0F );
2718 emit_opcode (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2719 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2721 emit_opcode(cbuf,0x83); // ADD ESP,8
2722 emit_opcode(cbuf,0xC4);
2726 enc_class Push_ResultX(regX dst, immI d8) %{
2727 store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2729 emit_opcode (cbuf, 0xF3 ); // MOVSS dst(xmm), [ESP]
2730 emit_opcode (cbuf, 0x0F );
2731 emit_opcode (cbuf, 0x10 );
2732 encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2734 emit_opcode(cbuf,0x83); // ADD ESP,d8 (4 or 8)
2735 emit_opcode(cbuf,0xC4);
2736 emit_d8(cbuf,$d8$$constant);
2739 enc_class Push_SrcXD(regXD src) %{
2741 emit_opcode(cbuf,0x83); // SUB ESP,8
2742 emit_opcode(cbuf,0xEC);
2745 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], src
2746 emit_opcode (cbuf, 0x0F );
2747 emit_opcode (cbuf, 0x11 );
2748 encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2750 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2751 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2754 enc_class push_stack_temp_qword() %{
2755 emit_opcode(cbuf,0x83); // SUB ESP,8
2756 emit_opcode(cbuf,0xEC);
2757 emit_d8 (cbuf,0x08);
2760 enc_class pop_stack_temp_qword() %{
2761 emit_opcode(cbuf,0x83); // ADD ESP,8
2762 emit_opcode(cbuf,0xC4);
2763 emit_d8 (cbuf,0x08);
2766 enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2767 emit_opcode (cbuf, 0xF2 ); // MOVSD [ESP], xmm_src
2768 emit_opcode (cbuf, 0x0F );
2769 emit_opcode (cbuf, 0x11 );
2770 encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2772 emit_opcode(cbuf,0xDD ); // FLD_D [ESP]
2773 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2776 // Compute X^Y using Intel's fast hardware instructions, if possible.
2777 // Otherwise return a NaN.
2778 enc_class pow_exp_core_encoding %{
2779 // FPR1 holds Y*ln2(X). Compute FPR1 = 2^(Y*ln2(X))
2780 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0); // fdup = fld st(0) Q Q
2781 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC); // frndint int(Q) Q
2782 emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9); // fsub st(1) -= st(0); int(Q) frac(Q)
2783 emit_opcode(cbuf,0xDB); // FISTP [ESP] frac(Q)
2784 emit_opcode(cbuf,0x1C);
2786 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0); // f2xm1 2^frac(Q)-1
2787 emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8); // fld1 1 2^frac(Q)-1
2788 emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1); // faddp 2^frac(Q)
2789 emit_opcode(cbuf,0x8B); // mov rax,[esp+0]=int(Q)
2790 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2791 emit_opcode(cbuf,0xC7); // mov rcx,0xFFFFF800 - overflow mask
2792 emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2793 emit_d32(cbuf,0xFFFFF800);
2794 emit_opcode(cbuf,0x81); // add rax,1023 - the double exponent bias
2795 emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2796 emit_d32(cbuf,1023);
2797 emit_opcode(cbuf,0x8B); // mov rbx,eax
2798 emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2799 emit_opcode(cbuf,0xC1); // shl rax,20 - Slide to exponent position
2800 emit_rm(cbuf,0x3,0x4,EAX_enc);
2802 emit_opcode(cbuf,0x85); // test rbx,ecx - check for overflow
2803 emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2804 emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45); // CMOVne rax,ecx - overflow; stuff NAN into EAX
2805 emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2806 emit_opcode(cbuf,0x89); // mov [esp+4],eax - Store as part of double word
2807 encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2808 emit_opcode(cbuf,0xC7); // mov [esp+0],0 - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2809 encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2811 emit_opcode(cbuf,0xDC); // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2812 encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2815 // enc_class Pop_Reg_Mod_D( regD dst, regD src)
2816 // was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2818 enc_class Push_Result_Mod_D( regD src) %{
2819 if ($src$$reg != FPR1L_enc) {
2821 emit_opcode (cbuf, 0xD9);
2822 emit_opcode (cbuf, 0xF7);
2823 // FXCH FPR1 with src
2824 emit_opcode(cbuf, 0xD9);
2825 emit_d8(cbuf, 0xC8-1+$src$$reg );
2827 emit_opcode (cbuf, 0xD9);
2828 emit_opcode (cbuf, 0xF6);
2830 // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2831 // // FSTP FPR$dst$$reg
2832 // emit_opcode( cbuf, 0xDD );
2833 // emit_d8( cbuf, 0xD8+$dst$$reg );
2836 enc_class fnstsw_sahf_skip_parity() %{
2838 emit_opcode( cbuf, 0xDF );
2839 emit_opcode( cbuf, 0xE0 );
2841 emit_opcode( cbuf, 0x9E );
2843 emit_opcode( cbuf, 0x7B );
2844 emit_opcode( cbuf, 0x05 );
2847 enc_class emitModD() %{
2848 // fprem must be iterative
2851 emit_opcode( cbuf, 0xD9 );
2852 emit_opcode( cbuf, 0xF8 );
2854 emit_opcode( cbuf, 0x9b );
2856 emit_opcode( cbuf, 0xDF );
2857 emit_opcode( cbuf, 0xE0 );
2859 emit_opcode( cbuf, 0x9E );
2861 emit_opcode( cbuf, 0x0F );
2862 emit_opcode( cbuf, 0x8A );
2863 emit_opcode( cbuf, 0xF4 );
2864 emit_opcode( cbuf, 0xFF );
2865 emit_opcode( cbuf, 0xFF );
2866 emit_opcode( cbuf, 0xFF );
2869 enc_class fpu_flags() %{
2871 emit_opcode( cbuf, 0xDF);
2872 emit_opcode( cbuf, 0xE0);
2874 emit_opcode( cbuf, 0x66 ); // operand-size prefix for 16-bit immediate
2875 emit_opcode( cbuf, 0xA9 );
2876 emit_d16 ( cbuf, 0x0400 );
2877 // // // This sequence works, but stalls for 12-16 cycles on PPro
2878 // // test rax,0x0400
2879 // emit_opcode( cbuf, 0xA9 );
2880 // emit_d32 ( cbuf, 0x00000400 );
2882 // jz exit (no unordered comparison)
2883 emit_opcode( cbuf, 0x74 );
2884 emit_d8 ( cbuf, 0x02 );
2885 // mov ah,1 - treat as LT case (set carry flag)
2886 emit_opcode( cbuf, 0xB4 );
2887 emit_d8 ( cbuf, 0x01 );
2889 emit_opcode( cbuf, 0x9E);
2892 enc_class cmpF_P6_fixup() %{
2893 // Fixup the integer flags in case comparison involved a NaN
2895 // JNP exit (no unordered comparison, P-flag is set by NaN)
2896 emit_opcode( cbuf, 0x7B );
2897 emit_d8 ( cbuf, 0x03 );
2898 // MOV AH,1 - treat as LT case (set carry flag)
2899 emit_opcode( cbuf, 0xB4 );
2900 emit_d8 ( cbuf, 0x01 );
2902 emit_opcode( cbuf, 0x9E);
2903 // NOP // target for branch to avoid branch to branch
2904 emit_opcode( cbuf, 0x90);
2909 // movl(dst, nan_result);
2910 // jcc(Assembler::parity, exit);
2911 // movl(dst, less_result);
2912 // jcc(Assembler::below, exit);
2913 // movl(dst, equal_result);
2914 // jcc(Assembler::equal, exit);
2915 // movl(dst, greater_result);
2918 // greater_result = -1;
2919 // equal_result = 0;
2922 enc_class CmpF_Result(eRegI dst) %{
2924 emit_opcode( cbuf, 0xDF);
2925 emit_opcode( cbuf, 0xE0);
2927 emit_opcode( cbuf, 0x9E);
2928 // movl(dst, nan_result);
2929 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2930 emit_d32( cbuf, -1 );
2931 // jcc(Assembler::parity, exit);
2932 emit_opcode( cbuf, 0x7A );
2933 emit_d8 ( cbuf, 0x13 );
2934 // movl(dst, less_result);
2935 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2936 emit_d32( cbuf, -1 );
2937 // jcc(Assembler::below, exit);
2938 emit_opcode( cbuf, 0x72 );
2939 emit_d8 ( cbuf, 0x0C );
2940 // movl(dst, equal_result);
2941 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2942 emit_d32( cbuf, 0 );
2943 // jcc(Assembler::equal, exit);
2944 emit_opcode( cbuf, 0x74 );
2945 emit_d8 ( cbuf, 0x05 );
2946 // movl(dst, greater_result);
2947 emit_opcode( cbuf, 0xB8 + $dst$$reg);
2948 emit_d32( cbuf, 1 );
2952 // XMM version of CmpF_Result. Because the XMM compare
2953 // instructions set the EFLAGS directly. It becomes simpler than
2954 // the float version above.
2955 enc_class CmpX_Result(eRegI dst) %{
2956 MacroAssembler _masm(&cbuf);
2957 Label nan, inc, done;
2959 __ jccb(Assembler::parity, nan);
2960 __ jccb(Assembler::equal, done);
2961 __ jccb(Assembler::above, inc);
2963 __ decrement(as_Register($dst$$reg)); // NO L qqq
2966 __ increment(as_Register($dst$$reg)); // NO L qqq
2970 // Compare the longs and set flags
2971 // BROKEN! Do Not use as-is
2972 enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2973 // CMP $src1.hi,$src2.hi
2974 emit_opcode( cbuf, 0x3B );
2975 emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2977 emit_opcode(cbuf,0x75);
2979 // CMP $src1.lo,$src2.lo
2980 emit_opcode( cbuf, 0x3B );
2981 emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2985 enc_class convert_int_long( regL dst, eRegI src ) %{
2987 int dst_encoding = $dst$$reg;
2988 int src_encoding = $src$$reg;
2989 encode_Copy( cbuf, dst_encoding , src_encoding );
2991 encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2993 emit_opcode( cbuf, 0xC1 );
2994 emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2995 emit_d8(cbuf, 0x1F );
2998 enc_class convert_long_double( eRegL src ) %{
3000 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3002 emit_opcode(cbuf, 0x50+$src$$reg );
3003 // fild 64-bits at [SP]
3004 emit_opcode(cbuf,0xdf);
3005 emit_d8(cbuf, 0x6C);
3006 emit_d8(cbuf, 0x24);
3007 emit_d8(cbuf, 0x00);
3009 emit_opcode(cbuf, 0x83); // add SP, #8
3010 emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3014 enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
3015 // IMUL EDX:EAX,$src1
3016 emit_opcode( cbuf, 0xF7 );
3017 emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
3019 int shift_count = ((int)$cnt$$constant) - 32;
3020 if (shift_count > 0) {
3021 emit_opcode(cbuf, 0xC1);
3022 emit_rm(cbuf, 0x3, 7, $dst$$reg );
3023 emit_d8(cbuf, shift_count);
3027 // this version doesn't have add sp, 8
3028 enc_class convert_long_double2( eRegL src ) %{
3030 emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
3032 emit_opcode(cbuf, 0x50+$src$$reg );
3033 // fild 64-bits at [SP]
3034 emit_opcode(cbuf,0xdf);
3035 emit_d8(cbuf, 0x6C);
3036 emit_d8(cbuf, 0x24);
3037 emit_d8(cbuf, 0x00);
3040 enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
3041 // Basic idea: long = (long)int * (long)int
3042 // IMUL EDX:EAX, src
3043 emit_opcode( cbuf, 0xF7 );
3044 emit_rm( cbuf, 0x3, 0x5, $src$$reg);
3047 enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
3048 // Basic Idea: long = (int & 0xffffffffL) * (int & 0xffffffffL)
3050 emit_opcode( cbuf, 0xF7 );
3051 emit_rm( cbuf, 0x3, 0x4, $src$$reg);
3054 enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
3055 // Basic idea: lo(result) = lo(x_lo * y_lo)
3056 // hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
3058 encode_Copy( cbuf, $tmp$$reg, $src$$reg );
3060 emit_opcode( cbuf, 0x0F );
3061 emit_opcode( cbuf, 0xAF );
3062 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3064 encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3066 emit_opcode( cbuf, 0x0F );
3067 emit_opcode( cbuf, 0xAF );
3068 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3070 emit_opcode( cbuf, 0x03 );
3071 emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3072 // MUL EDX:EAX,$src.lo
3073 emit_opcode( cbuf, 0xF7 );
3074 emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3076 emit_opcode( cbuf, 0x03 );
3077 emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );