src/cpu/x86/vm/x86_32.ad
author kvn
Fri Feb 19 10:04:16 2010 -0800 (3 weeks ago)
changeset 1243 2883969d09e7
parent 1204e8443c7be117
permissions -rw-r--r--
6910664: C2: java/util/Arrays/Sorting.java fails with DeoptimizeALot flag
Summary: Matcher::float_in_double should be true only when FPU is used for floats.
Reviewed-by: never, twisti
        1 //
        2 // Copyright 1997-2009 Sun Microsystems, Inc.  All Rights Reserved.
        3 // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
        4 //
        5 // This code is free software; you can redistribute it and/or modify it
        6 // under the terms of the GNU General Public License version 2 only, as
        7 // published by the Free Software Foundation.
        8 //
        9 // This code is distributed in the hope that it will be useful, but WITHOUT
       10 // ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
       11 // FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
       12 // version 2 for more details (a copy is included in the LICENSE file that
       13 // accompanied this code).
       14 //
       15 // You should have received a copy of the GNU General Public License version
       16 // 2 along with this work; if not, write to the Free Software Foundation,
       17 // Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
       18 //
       19 // Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
       20 // CA 95054 USA or visit www.sun.com if you need additional information or
       21 // have any questions.
       22 //
       23 //
       24 
       25 // X86 Architecture Description File
       26 
       27 //----------REGISTER DEFINITION BLOCK------------------------------------------
       28 // This information is used by the matcher and the register allocator to
       29 // describe individual registers and classes of registers within the target
       30 // archtecture.
       31 
       32 register %{
       33 //----------Architecture Description Register Definitions----------------------
       34 // General Registers
       35 // "reg_def"  name ( register save type, C convention save type,
       36 //                   ideal register type, encoding );
       37 // Register Save Types:
       38 //
       39 // NS  = No-Save:       The register allocator assumes that these registers
       40 //                      can be used without saving upon entry to the method, &
       41 //                      that they do not need to be saved at call sites.
       42 //
       43 // SOC = Save-On-Call:  The register allocator assumes that these registers
       44 //                      can be used without saving upon entry to the method,
       45 //                      but that they must be saved at call sites.
       46 //
       47 // SOE = Save-On-Entry: The register allocator assumes that these registers
       48 //                      must be saved before using them upon entry to the
       49 //                      method, but they do not need to be saved at call
       50 //                      sites.
       51 //
       52 // AS  = Always-Save:   The register allocator assumes that these registers
       53 //                      must be saved before using them upon entry to the
       54 //                      method, & that they must be saved at call sites.
       55 //
       56 // Ideal Register Type is used to determine how to save & restore a
       57 // register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
       58 // spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
       59 //
       60 // The encoding number is the actual bit-pattern placed into the opcodes.
       61 
       62 // General Registers
       63 // Previously set EBX, ESI, and EDI as save-on-entry for java code
       64 // Turn off SOE in java-code due to frequent use of uncommon-traps.
       65 // Now that allocator is better, turn on ESI and EDI as SOE registers.
       66 
       67 reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
       68 reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
       69 reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
       70 reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
       71 // now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
       72 reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
       73 reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
       74 reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
       75 reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
       76 
       77 // Special Registers
       78 reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
       79 
       80 // Float registers.  We treat TOS/FPR0 special.  It is invisible to the
       81 // allocator, and only shows up in the encodings.
       82 reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
       83 reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
       84 // Ok so here's the trick FPR1 is really st(0) except in the midst
       85 // of emission of assembly for a machnode. During the emission the fpu stack
       86 // is pushed making FPR1 == st(1) temporarily. However at any safepoint
       87 // the stack will not have this element so FPR1 == st(0) from the
       88 // oopMap viewpoint. This same weirdness with numbering causes
       89 // instruction encoding to have to play games with the register
       90 // encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
       91 // where it does flt->flt moves to see an example
       92 //
       93 reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
       94 reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
       95 reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
       96 reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
       97 reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
       98 reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
       99 reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
      100 reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
      101 reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
      102 reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
      103 reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
      104 reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
      105 reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
      106 reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
      107 
      108 // XMM registers.  128-bit registers or 4 words each, labeled a-d.
      109 // Word a in each register holds a Float, words ab hold a Double.
      110 // We currently do not use the SIMD capabilities, so registers cd
      111 // are unused at the moment.
      112 reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
      113 reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
      114 reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
      115 reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
      116 reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
      117 reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
      118 reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
      119 reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
      120 reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
      121 reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
      122 reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
      123 reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
      124 reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
      125 reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
      126 reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
      127 reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
      128 
      129 // Specify priority of register selection within phases of register
      130 // allocation.  Highest priority is first.  A useful heuristic is to
      131 // give registers a low priority when they are required by machine
      132 // instructions, like EAX and EDX.  Registers which are used as
      133 // pairs must fall on an even boundary (witness the FPR#L's in this list).
      134 // For the Intel integer registers, the equivalent Long pairs are
      135 // EDX:EAX, EBX:ECX, and EDI:EBP.
      136 alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
      137                     FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
      138                     FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
      139                     FPR6L, FPR6H, FPR7L, FPR7H );
      140 
      141 alloc_class chunk1( XMM0a, XMM0b,
      142                     XMM1a, XMM1b,
      143                     XMM2a, XMM2b,
      144                     XMM3a, XMM3b,
      145                     XMM4a, XMM4b,
      146                     XMM5a, XMM5b,
      147                     XMM6a, XMM6b,
      148                     XMM7a, XMM7b, EFLAGS);
      149 
      150 
      151 //----------Architecture Description Register Classes--------------------------
      152 // Several register classes are automatically defined based upon information in
      153 // this architecture description.
      154 // 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
      155 // 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
      156 // 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
      157 // 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
      158 //
      159 // Class for all registers
      160 reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
      161 // Class for general registers
      162 reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
      163 // Class for general registers which may be used for implicit null checks on win95
      164 // Also safe for use by tailjump. We don't want to allocate in rbp,
      165 reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
      166 // Class of "X" registers
      167 reg_class x_reg(EBX, ECX, EDX, EAX);
      168 // Class of registers that can appear in an address with no offset.
      169 // EBP and ESP require an extra instruction byte for zero offset.
      170 // Used in fast-unlock
      171 reg_class p_reg(EDX, EDI, ESI, EBX);
      172 // Class for general registers not including ECX
      173 reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
      174 // Class for general registers not including EAX
      175 reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
      176 // Class for general registers not including EAX or EBX.
      177 reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
      178 // Class of EAX (for multiply and divide operations)
      179 reg_class eax_reg(EAX);
      180 // Class of EBX (for atomic add)
      181 reg_class ebx_reg(EBX);
      182 // Class of ECX (for shift and JCXZ operations and cmpLTMask)
      183 reg_class ecx_reg(ECX);
      184 // Class of EDX (for multiply and divide operations)
      185 reg_class edx_reg(EDX);
      186 // Class of EDI (for synchronization)
      187 reg_class edi_reg(EDI);
      188 // Class of ESI (for synchronization)
      189 reg_class esi_reg(ESI);
      190 // Singleton class for interpreter's stack pointer
      191 reg_class ebp_reg(EBP);
      192 // Singleton class for stack pointer
      193 reg_class sp_reg(ESP);
      194 // Singleton class for instruction pointer
      195 // reg_class ip_reg(EIP);
      196 // Singleton class for condition codes
      197 reg_class int_flags(EFLAGS);
      198 // Class of integer register pairs
      199 reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
      200 // Class of integer register pairs that aligns with calling convention
      201 reg_class eadx_reg( EAX,EDX );
      202 reg_class ebcx_reg( ECX,EBX );
      203 // Not AX or DX, used in divides
      204 reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
      205 
      206 // Floating point registers.  Notice FPR0 is not a choice.
      207 // FPR0 is not ever allocated; we use clever encodings to fake
      208 // a 2-address instructions out of Intels FP stack.
      209 reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
      210 
      211 // make a register class for SSE registers
      212 reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
      213 
      214 // make a double register class for SSE2 registers
      215 reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
      216                   XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
      217 
      218 reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
      219                    FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
      220                    FPR7L,FPR7H );
      221 
      222 reg_class flt_reg0( FPR1L );
      223 reg_class dbl_reg0( FPR1L,FPR1H );
      224 reg_class dbl_reg1( FPR2L,FPR2H );
      225 reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
      226                        FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
      227 
      228 // XMM6 and XMM7 could be used as temporary registers for long, float and
      229 // double values for SSE2.
      230 reg_class xdb_reg6( XMM6a,XMM6b );
      231 reg_class xdb_reg7( XMM7a,XMM7b );
      232 %}
      233 
      234 
      235 //----------SOURCE BLOCK-------------------------------------------------------
      236 // This is a block of C++ code which provides values, functions, and
      237 // definitions necessary in the rest of the architecture description
      238 source_hpp %{
      239 // Must be visible to the DFA in dfa_x86_32.cpp
      240 extern bool is_operand_hi32_zero(Node* n);
      241 %}
      242 
      243 source %{
      244 #define   RELOC_IMM32    Assembler::imm_operand
      245 #define   RELOC_DISP32   Assembler::disp32_operand
      246 
      247 #define __ _masm.
      248 
      249 // How to find the high register of a Long pair, given the low register
      250 #define   HIGH_FROM_LOW(x) ((x)+2)
      251 
      252 // These masks are used to provide 128-bit aligned bitmasks to the XMM
      253 // instructions, to allow sign-masking or sign-bit flipping.  They allow
      254 // fast versions of NegF/NegD and AbsF/AbsD.
      255 
      256 // Note: 'double' and 'long long' have 32-bits alignment on x86.
      257 static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
      258   // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
      259   // of 128-bits operands for SSE instructions.
      260   jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
      261   // Store the value to a 128-bits operand.
      262   operand[0] = lo;
      263   operand[1] = hi;
      264   return operand;
      265 }
      266 
      267 // Buffer for 128-bits masks used by SSE instructions.
      268 static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
      269 
      270 // Static initialization during VM startup.
      271 static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
      272 static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
      273 static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
      274 static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
      275 
      276 // Offset hacking within calls.
      277 static int pre_call_FPU_size() {
      278   if (Compile::current()->in_24_bit_fp_mode())
      279     return 6; // fldcw
      280   return 0;
      281 }
      282 
      283 static int preserve_SP_size() {
      284   return LP64_ONLY(1 +) 2;  // [rex,] op, rm(reg/reg)
      285 }
      286 
      287 // !!!!! Special hack to get all type of calls to specify the byte offset
      288 //       from the start of the call to the point where the return address
      289 //       will point.
      290 int MachCallStaticJavaNode::ret_addr_offset() {
      291   int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
      292   if (_method_handle_invoke)
      293     offset += preserve_SP_size();
      294   return offset;
      295 }
      296 
      297 int MachCallDynamicJavaNode::ret_addr_offset() {
      298   return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
      299 }
      300 
      301 static int sizeof_FFree_Float_Stack_All = -1;
      302 
      303 int MachCallRuntimeNode::ret_addr_offset() {
      304   assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
      305   return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
      306 }
      307 
      308 // Indicate if the safepoint node needs the polling page as an input.
      309 // Since x86 does have absolute addressing, it doesn't.
      310 bool SafePointNode::needs_polling_address_input() {
      311   return false;
      312 }
      313 
      314 //
      315 // Compute padding required for nodes which need alignment
      316 //
      317 
      318 // The address of the call instruction needs to be 4-byte aligned to
      319 // ensure that it does not span a cache line so that it can be patched.
      320 int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
      321   current_offset += pre_call_FPU_size();  // skip fldcw, if any
      322   current_offset += 1;      // skip call opcode byte
      323   return round_to(current_offset, alignment_required()) - current_offset;
      324 }
      325 
      326 // The address of the call instruction needs to be 4-byte aligned to
      327 // ensure that it does not span a cache line so that it can be patched.
      328 int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
      329   current_offset += pre_call_FPU_size();  // skip fldcw, if any
      330   current_offset += preserve_SP_size();   // skip mov rbp, rsp
      331   current_offset += 1;      // skip call opcode byte
      332   return round_to(current_offset, alignment_required()) - current_offset;
      333 }
      334 
      335 // The address of the call instruction needs to be 4-byte aligned to
      336 // ensure that it does not span a cache line so that it can be patched.
      337 int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
      338   current_offset += pre_call_FPU_size();  // skip fldcw, if any
      339   current_offset += 5;      // skip MOV instruction
      340   current_offset += 1;      // skip call opcode byte
      341   return round_to(current_offset, alignment_required()) - current_offset;
      342 }
      343 
      344 #ifndef PRODUCT
      345 void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
      346   st->print("INT3");
      347 }
      348 #endif
      349 
      350 // EMIT_RM()
      351 void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
      352   unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
      353   *(cbuf.code_end()) = c;
      354   cbuf.set_code_end(cbuf.code_end() + 1);
      355 }
      356 
      357 // EMIT_CC()
      358 void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
      359   unsigned char c = (unsigned char)( f1 | f2 );
      360   *(cbuf.code_end()) = c;
      361   cbuf.set_code_end(cbuf.code_end() + 1);
      362 }
      363 
      364 // EMIT_OPCODE()
      365 void emit_opcode(CodeBuffer &cbuf, int code) {
      366   *(cbuf.code_end()) = (unsigned char)code;
      367   cbuf.set_code_end(cbuf.code_end() + 1);
      368 }
      369 
      370 // EMIT_OPCODE() w/ relocation information
      371 void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
      372   cbuf.relocate(cbuf.inst_mark() + offset, reloc);
      373   emit_opcode(cbuf, code);
      374 }
      375 
      376 // EMIT_D8()
      377 void emit_d8(CodeBuffer &cbuf, int d8) {
      378   *(cbuf.code_end()) = (unsigned char)d8;
      379   cbuf.set_code_end(cbuf.code_end() + 1);
      380 }
      381 
      382 // EMIT_D16()
      383 void emit_d16(CodeBuffer &cbuf, int d16) {
      384   *((short *)(cbuf.code_end())) = d16;
      385   cbuf.set_code_end(cbuf.code_end() + 2);
      386 }
      387 
      388 // EMIT_D32()
      389 void emit_d32(CodeBuffer &cbuf, int d32) {
      390   *((int *)(cbuf.code_end())) = d32;
      391   cbuf.set_code_end(cbuf.code_end() + 4);
      392 }
      393 
      394 // emit 32 bit value and construct relocation entry from relocInfo::relocType
      395 void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
      396         int format) {
      397   cbuf.relocate(cbuf.inst_mark(), reloc, format);
      398 
      399   *((int *)(cbuf.code_end())) = d32;
      400   cbuf.set_code_end(cbuf.code_end() + 4);
      401 }
      402 
      403 // emit 32 bit value and construct relocation entry from RelocationHolder
      404 void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
      405         int format) {
      406 #ifdef ASSERT
      407   if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
      408     assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
      409   }
      410 #endif
      411   cbuf.relocate(cbuf.inst_mark(), rspec, format);
      412 
      413   *((int *)(cbuf.code_end())) = d32;
      414   cbuf.set_code_end(cbuf.code_end() + 4);
      415 }
      416 
      417 // Access stack slot for load or store
      418 void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
      419   emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
      420   if( -128 <= disp && disp <= 127 ) {
      421     emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
      422     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
      423     emit_d8 (cbuf, disp);     // Displacement  // R/M byte
      424   } else {
      425     emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
      426     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
      427     emit_d32(cbuf, disp);     // Displacement  // R/M byte
      428   }
      429 }
      430 
      431    // eRegI ereg, memory mem) %{    // emit_reg_mem
      432 void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
      433   // There is no index & no scale, use form without SIB byte
      434   if ((index == 0x4) &&
      435       (scale == 0) && (base != ESP_enc)) {
      436     // If no displacement, mode is 0x0; unless base is [EBP]
      437     if ( (displace == 0) && (base != EBP_enc) ) {
      438       emit_rm(cbuf, 0x0, reg_encoding, base);
      439     }
      440     else {                    // If 8-bit displacement, mode 0x1
      441       if ((displace >= -128) && (displace <= 127)
      442           && !(displace_is_oop) ) {
      443         emit_rm(cbuf, 0x1, reg_encoding, base);
      444         emit_d8(cbuf, displace);
      445       }
      446       else {                  // If 32-bit displacement
      447         if (base == -1) { // Special flag for absolute address
      448           emit_rm(cbuf, 0x0, reg_encoding, 0x5);
      449           // (manual lies; no SIB needed here)
      450           if ( displace_is_oop ) {
      451             emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
      452           } else {
      453             emit_d32      (cbuf, displace);
      454           }
      455         }
      456         else {                // Normal base + offset
      457           emit_rm(cbuf, 0x2, reg_encoding, base);
      458           if ( displace_is_oop ) {
      459             emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
      460           } else {
      461             emit_d32      (cbuf, displace);
      462           }
      463         }
      464       }
      465     }
      466   }
      467   else {                      // Else, encode with the SIB byte
      468     // If no displacement, mode is 0x0; unless base is [EBP]
      469     if (displace == 0 && (base != EBP_enc)) {  // If no displacement
      470       emit_rm(cbuf, 0x0, reg_encoding, 0x4);
      471       emit_rm(cbuf, scale, index, base);
      472     }
      473     else {                    // If 8-bit displacement, mode 0x1
      474       if ((displace >= -128) && (displace <= 127)
      475           && !(displace_is_oop) ) {
      476         emit_rm(cbuf, 0x1, reg_encoding, 0x4);
      477         emit_rm(cbuf, scale, index, base);
      478         emit_d8(cbuf, displace);
      479       }
      480       else {                  // If 32-bit displacement
      481         if (base == 0x04 ) {
      482           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
      483           emit_rm(cbuf, scale, index, 0x04);
      484         } else {
      485           emit_rm(cbuf, 0x2, reg_encoding, 0x4);
      486           emit_rm(cbuf, scale, index, base);
      487         }
      488         if ( displace_is_oop ) {
      489           emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
      490         } else {
      491           emit_d32      (cbuf, displace);
      492         }
      493       }
      494     }
      495   }
      496 }
      497 
      498 
      499 void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
      500   if( dst_encoding == src_encoding ) {
      501     // reg-reg copy, use an empty encoding
      502   } else {
      503     emit_opcode( cbuf, 0x8B );
      504     emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
      505   }
      506 }
      507 
      508 void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
      509   if( dst_encoding == src_encoding ) {
      510     // reg-reg copy, use an empty encoding
      511   } else {
      512     MacroAssembler _masm(&cbuf);
      513 
      514     __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
      515   }
      516 }
      517 
      518 
      519 //=============================================================================
      520 #ifndef PRODUCT
      521 void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
      522   Compile* C = ra_->C;
      523   if( C->in_24_bit_fp_mode() ) {
      524     st->print("FLDCW  24 bit fpu control word");
      525     st->print_cr(""); st->print("\t");
      526   }
      527 
      528   int framesize = C->frame_slots() << LogBytesPerInt;
      529   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
      530   // Remove two words for return addr and rbp,
      531   framesize -= 2*wordSize;
      532 
      533   // Calls to C2R adapters often do not accept exceptional returns.
      534   // We require that their callers must bang for them.  But be careful, because
      535   // some VM calls (such as call site linkage) can use several kilobytes of
      536   // stack.  But the stack safety zone should account for that.
      537   // See bugs 4446381, 4468289, 4497237.
      538   if (C->need_stack_bang(framesize)) {
      539     st->print_cr("# stack bang"); st->print("\t");
      540   }
      541   st->print_cr("PUSHL  EBP"); st->print("\t");
      542 
      543   if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
      544     st->print("PUSH   0xBADB100D\t# Majik cookie for stack depth check");
      545     st->print_cr(""); st->print("\t");
      546     framesize -= wordSize;
      547   }
      548 
      549   if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
      550     if (framesize) {
      551       st->print("SUB    ESP,%d\t# Create frame",framesize);
      552     }
      553   } else {
      554     st->print("SUB    ESP,%d\t# Create frame",framesize);
      555   }
      556 }
      557 #endif
      558 
      559 
      560 void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
      561   Compile* C = ra_->C;
      562 
      563   if (UseSSE >= 2 && VerifyFPU) {
      564     MacroAssembler masm(&cbuf);
      565     masm.verify_FPU(0, "FPU stack must be clean on entry");
      566   }
      567 
      568   // WARNING: Initial instruction MUST be 5 bytes or longer so that
      569   // NativeJump::patch_verified_entry will be able to patch out the entry
      570   // code safely. The fldcw is ok at 6 bytes, the push to verify stack
      571   // depth is ok at 5 bytes, the frame allocation can be either 3 or
      572   // 6 bytes. So if we don't do the fldcw or the push then we must
      573   // use the 6 byte frame allocation even if we have no frame. :-(
      574   // If method sets FPU control word do it now
      575   if( C->in_24_bit_fp_mode() ) {
      576     MacroAssembler masm(&cbuf);
      577     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
      578   }
      579 
      580   int framesize = C->frame_slots() << LogBytesPerInt;
      581   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
      582   // Remove two words for return addr and rbp,
      583   framesize -= 2*wordSize;
      584 
      585   // Calls to C2R adapters often do not accept exceptional returns.
      586   // We require that their callers must bang for them.  But be careful, because
      587   // some VM calls (such as call site linkage) can use several kilobytes of
      588   // stack.  But the stack safety zone should account for that.
      589   // See bugs 4446381, 4468289, 4497237.
      590   if (C->need_stack_bang(framesize)) {
      591     MacroAssembler masm(&cbuf);
      592     masm.generate_stack_overflow_check(framesize);
      593   }
      594 
      595   // We always push rbp, so that on return to interpreter rbp, will be
      596   // restored correctly and we can correct the stack.
      597   emit_opcode(cbuf, 0x50 | EBP_enc);
      598 
      599   if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
      600     emit_opcode(cbuf, 0x68); // push 0xbadb100d
      601     emit_d32(cbuf, 0xbadb100d);
      602     framesize -= wordSize;
      603   }
      604 
      605   if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
      606     if (framesize) {
      607       emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
      608       emit_rm(cbuf, 0x3, 0x05, ESP_enc);
      609       emit_d8(cbuf, framesize);
      610     }
      611   } else {
      612     emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
      613     emit_rm(cbuf, 0x3, 0x05, ESP_enc);
      614     emit_d32(cbuf, framesize);
      615   }
      616   C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
      617 
      618 #ifdef ASSERT
      619   if (VerifyStackAtCalls) {
      620     Label L;
      621     MacroAssembler masm(&cbuf);
      622     masm.push(rax);
      623     masm.mov(rax, rsp);
      624     masm.andptr(rax, StackAlignmentInBytes-1);
      625     masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
      626     masm.pop(rax);
      627     masm.jcc(Assembler::equal, L);
      628     masm.stop("Stack is not properly aligned!");
      629     masm.bind(L);
      630   }
      631 #endif
      632 
      633 }
      634 
      635 uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
      636   return MachNode::size(ra_); // too many variables; just compute it the hard way
      637 }
      638 
      639 int MachPrologNode::reloc() const {
      640   return 0; // a large enough number
      641 }
      642 
      643 //=============================================================================
      644 #ifndef PRODUCT
      645 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
      646   Compile *C = ra_->C;
      647   int framesize = C->frame_slots() << LogBytesPerInt;
      648   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
      649   // Remove two words for return addr and rbp,
      650   framesize -= 2*wordSize;
      651 
      652   if( C->in_24_bit_fp_mode() ) {
      653     st->print("FLDCW  standard control word");
      654     st->cr(); st->print("\t");
      655   }
      656   if( framesize ) {
      657     st->print("ADD    ESP,%d\t# Destroy frame",framesize);
      658     st->cr(); st->print("\t");
      659   }
      660   st->print_cr("POPL   EBP"); st->print("\t");
      661   if( do_polling() && C->is_method_compilation() ) {
      662     st->print("TEST   PollPage,EAX\t! Poll Safepoint");
      663     st->cr(); st->print("\t");
      664   }
      665 }
      666 #endif
      667 
      668 void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
      669   Compile *C = ra_->C;
      670 
      671   // If method set FPU control word, restore to standard control word
      672   if( C->in_24_bit_fp_mode() ) {
      673     MacroAssembler masm(&cbuf);
      674     masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
      675   }
      676 
      677   int framesize = C->frame_slots() << LogBytesPerInt;
      678   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
      679   // Remove two words for return addr and rbp,
      680   framesize -= 2*wordSize;
      681 
      682   // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
      683 
      684   if( framesize >= 128 ) {
      685     emit_opcode(cbuf, 0x81); // add  SP, #framesize
      686     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
      687     emit_d32(cbuf, framesize);
      688   }
      689   else if( framesize ) {
      690     emit_opcode(cbuf, 0x83); // add  SP, #framesize
      691     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
      692     emit_d8(cbuf, framesize);
      693   }
      694 
      695   emit_opcode(cbuf, 0x58 | EBP_enc);
      696 
      697   if( do_polling() && C->is_method_compilation() ) {
      698     cbuf.relocate(cbuf.code_end(), relocInfo::poll_return_type, 0);
      699     emit_opcode(cbuf,0x85);
      700     emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
      701     emit_d32(cbuf, (intptr_t)os::get_polling_page());
      702   }
      703 }
      704 
      705 uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
      706   Compile *C = ra_->C;
      707   // If method set FPU control word, restore to standard control word
      708   int size = C->in_24_bit_fp_mode() ? 6 : 0;
      709   if( do_polling() && C->is_method_compilation() ) size += 6;
      710 
      711   int framesize = C->frame_slots() << LogBytesPerInt;
      712   assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
      713   // Remove two words for return addr and rbp,
      714   framesize -= 2*wordSize;
      715 
      716   size++; // popl rbp,
      717 
      718   if( framesize >= 128 ) {
      719     size += 6;
      720   } else {
      721     size += framesize ? 3 : 0;
      722   }
      723   return size;
      724 }
      725 
      726 int MachEpilogNode::reloc() const {
      727   return 0; // a large enough number
      728 }
      729 
      730 const Pipeline * MachEpilogNode::pipeline() const {
      731   return MachNode::pipeline_class();
      732 }
      733 
      734 int MachEpilogNode::safepoint_offset() const { return 0; }
      735 
      736 //=============================================================================
      737 
      738 enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
      739 static enum RC rc_class( OptoReg::Name reg ) {
      740 
      741   if( !OptoReg::is_valid(reg)  ) return rc_bad;
      742   if (OptoReg::is_stack(reg)) return rc_stack;
      743 
      744   VMReg r = OptoReg::as_VMReg(reg);
      745   if (r->is_Register()) return rc_int;
      746   if (r->is_FloatRegister()) {
      747     assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
      748     return rc_float;
      749   }
      750   assert(r->is_XMMRegister(), "must be");
      751   return rc_xmm;
      752 }
      753 
      754 static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
      755                         int opcode, const char *op_str, int size, outputStream* st ) {
      756   if( cbuf ) {
      757     emit_opcode  (*cbuf, opcode );
      758     encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
      759 #ifndef PRODUCT
      760   } else if( !do_size ) {
      761     if( size != 0 ) st->print("\n\t");
      762     if( opcode == 0x8B || opcode == 0x89 ) { // MOV
      763       if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
      764       else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
      765     } else { // FLD, FST, PUSH, POP
      766       st->print("%s [ESP + #%d]",op_str,offset);
      767     }
      768 #endif
      769   }
      770   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
      771   return size+3+offset_size;
      772 }
      773 
      774 // Helper for XMM registers.  Extra opcode bits, limited syntax.
      775 static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
      776                          int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
      777   if( cbuf ) {
      778     if( reg_lo+1 == reg_hi ) { // double move?
      779       if( is_load && !UseXmmLoadAndClearUpper )
      780         emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
      781       else
      782         emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
      783     } else {
      784       emit_opcode(*cbuf, 0xF3 );
      785     }
      786     emit_opcode(*cbuf, 0x0F );
      787     if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
      788       emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
      789     else
      790       emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
      791     encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
      792 #ifndef PRODUCT
      793   } else if( !do_size ) {
      794     if( size != 0 ) st->print("\n\t");
      795     if( reg_lo+1 == reg_hi ) { // double move?
      796       if( is_load ) st->print("%s %s,[ESP + #%d]",
      797                                UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
      798                                Matcher::regName[reg_lo], offset);
      799       else          st->print("MOVSD  [ESP + #%d],%s",
      800                                offset, Matcher::regName[reg_lo]);
      801     } else {
      802       if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
      803                                Matcher::regName[reg_lo], offset);
      804       else          st->print("MOVSS  [ESP + #%d],%s",
      805                                offset, Matcher::regName[reg_lo]);
      806     }
      807 #endif
      808   }
      809   int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
      810   return size+5+offset_size;
      811 }
      812 
      813 
      814 static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
      815                             int src_hi, int dst_hi, int size, outputStream* st ) {
      816   if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
      817     if( cbuf ) {
      818       if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
      819         emit_opcode(*cbuf, 0x66 );
      820       }
      821       emit_opcode(*cbuf, 0x0F );
      822       emit_opcode(*cbuf, 0x28 );
      823       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
      824 #ifndef PRODUCT
      825     } else if( !do_size ) {
      826       if( size != 0 ) st->print("\n\t");
      827       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
      828         st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
      829       } else {
      830         st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
      831       }
      832 #endif
      833     }
      834     return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
      835   } else {
      836     if( cbuf ) {
      837       emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
      838       emit_opcode(*cbuf, 0x0F );
      839       emit_opcode(*cbuf, 0x10 );
      840       emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
      841 #ifndef PRODUCT
      842     } else if( !do_size ) {
      843       if( size != 0 ) st->print("\n\t");
      844       if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
      845         st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
      846       } else {
      847         st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
      848       }
      849 #endif
      850     }
      851     return size+4;
      852   }
      853 }
      854 
      855 static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
      856   if( cbuf ) {
      857     emit_opcode(*cbuf, 0x8B );
      858     emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
      859 #ifndef PRODUCT
      860   } else if( !do_size ) {
      861     if( size != 0 ) st->print("\n\t");
      862     st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
      863 #endif
      864   }
      865   return size+2;
      866 }
      867 
      868 static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
      869                                  int offset, int size, outputStream* st ) {
      870   if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
      871     if( cbuf ) {
      872       emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
      873       emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
      874 #ifndef PRODUCT
      875     } else if( !do_size ) {
      876       if( size != 0 ) st->print("\n\t");
      877       st->print("FLD    %s",Matcher::regName[src_lo]);
      878 #endif
      879     }
      880     size += 2;
      881   }
      882 
      883   int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
      884   const char *op_str;
      885   int op;
      886   if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
      887     op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
      888     op = 0xDD;
      889   } else {                   // 32-bit store
      890     op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
      891     op = 0xD9;
      892     assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
      893   }
      894 
      895   return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
      896 }
      897 
      898 uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
      899   // Get registers to move
      900   OptoReg::Name src_second = ra_->get_reg_second(in(1));
      901   OptoReg::Name src_first = ra_->get_reg_first(in(1));
      902   OptoReg::Name dst_second = ra_->get_reg_second(this );
      903   OptoReg::Name dst_first = ra_->get_reg_first(this );
      904 
      905   enum RC src_second_rc = rc_class(src_second);
      906   enum RC src_first_rc = rc_class(src_first);
      907   enum RC dst_second_rc = rc_class(dst_second);
      908   enum RC dst_first_rc = rc_class(dst_first);
      909 
      910   assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
      911 
      912   // Generate spill code!
      913   int size = 0;
      914 
      915   if( src_first == dst_first && src_second == dst_second )
      916     return size;            // Self copy, no move
      917 
      918   // --------------------------------------
      919   // Check for mem-mem move.  push/pop to move.
      920   if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
      921     if( src_second == dst_first ) { // overlapping stack copy ranges
      922       assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
      923       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
      924       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
      925       src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
      926     }
      927     // move low bits
      928     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
      929     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
      930     if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
      931       size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
      932       size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
      933     }
      934     return size;
      935   }
      936 
      937   // --------------------------------------
      938   // Check for integer reg-reg copy
      939   if( src_first_rc == rc_int && dst_first_rc == rc_int )
      940     size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
      941 
      942   // Check for integer store
      943   if( src_first_rc == rc_int && dst_first_rc == rc_stack )
      944     size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
      945 
      946   // Check for integer load
      947   if( dst_first_rc == rc_int && src_first_rc == rc_stack )
      948     size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
      949 
      950   // --------------------------------------
      951   // Check for float reg-reg copy
      952   if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
      953     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
      954             (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
      955     if( cbuf ) {
      956 
      957       // Note the mucking with the register encode to compensate for the 0/1
      958       // indexing issue mentioned in a comment in the reg_def sections
      959       // for FPR registers many lines above here.
      960 
      961       if( src_first != FPR1L_num ) {
      962         emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
      963         emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
      964         emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
      965         emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
      966      } else {
      967         emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
      968         emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
      969      }
      970 #ifndef PRODUCT
      971     } else if( !do_size ) {
      972       if( size != 0 ) st->print("\n\t");
      973       if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
      974       else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
      975 #endif
      976     }
      977     return size + ((src_first != FPR1L_num) ? 2+2 : 2);
      978   }
      979 
      980   // Check for float store
      981   if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
      982     return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
      983   }
      984 
      985   // Check for float load
      986   if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
      987     int offset = ra_->reg2offset(src_first);
      988     const char *op_str;
      989     int op;
      990     if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
      991       op_str = "FLD_D";
      992       op = 0xDD;
      993     } else {                   // 32-bit load
      994       op_str = "FLD_S";
      995       op = 0xD9;
      996       assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
      997     }
      998     if( cbuf ) {
      999       emit_opcode  (*cbuf, op );
     1000       encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
     1001       emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
     1002       emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
     1003 #ifndef PRODUCT
     1004     } else if( !do_size ) {
     1005       if( size != 0 ) st->print("\n\t");
     1006       st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
     1007 #endif
     1008     }
     1009     int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
     1010     return size + 3+offset_size+2;
     1011   }
     1012 
     1013   // Check for xmm reg-reg copy
     1014   if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
     1015     assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
     1016             (src_first+1 == src_second && dst_first+1 == dst_second),
     1017             "no non-adjacent float-moves" );
     1018     return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
     1019   }
     1020 
     1021   // Check for xmm store
     1022   if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
     1023     return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
     1024   }
     1025 
     1026   // Check for float xmm load
     1027   if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
     1028     return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
     1029   }
     1030 
     1031   // Copy from float reg to xmm reg
     1032   if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
     1033     // copy to the top of stack from floating point reg
     1034     // and use LEA to preserve flags
     1035     if( cbuf ) {
     1036       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
     1037       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
     1038       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
     1039       emit_d8(*cbuf,0xF8);
     1040 #ifndef PRODUCT
     1041     } else if( !do_size ) {
     1042       if( size != 0 ) st->print("\n\t");
     1043       st->print("LEA    ESP,[ESP-8]");
     1044 #endif
     1045     }
     1046     size += 4;
     1047 
     1048     size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
     1049 
     1050     // Copy from the temp memory to the xmm reg.
     1051     size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
     1052 
     1053     if( cbuf ) {
     1054       emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
     1055       emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
     1056       emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
     1057       emit_d8(*cbuf,0x08);
     1058 #ifndef PRODUCT
     1059     } else if( !do_size ) {
     1060       if( size != 0 ) st->print("\n\t");
     1061       st->print("LEA    ESP,[ESP+8]");
     1062 #endif
     1063     }
     1064     size += 4;
     1065     return size;
     1066   }
     1067 
     1068   assert( size > 0, "missed a case" );
     1069 
     1070   // --------------------------------------------------------------------
     1071   // Check for second bits still needing moving.
     1072   if( src_second == dst_second )
     1073     return size;               // Self copy; no move
     1074   assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
     1075 
     1076   // Check for second word int-int move
     1077   if( src_second_rc == rc_int && dst_second_rc == rc_int )
     1078     return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
     1079 
     1080   // Check for second word integer store
     1081   if( src_second_rc == rc_int && dst_second_rc == rc_stack )
     1082     return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
     1083 
     1084   // Check for second word integer load
     1085   if( dst_second_rc == rc_int && src_second_rc == rc_stack )
     1086     return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
     1087 
     1088 
     1089   Unimplemented();
     1090 }
     1091 
     1092 #ifndef PRODUCT
     1093 void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
     1094   implementation( NULL, ra_, false, st );
     1095 }
     1096 #endif
     1097 
     1098 void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
     1099   implementation( &cbuf, ra_, false, NULL );
     1100 }
     1101 
     1102 uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
     1103   return implementation( NULL, ra_, true, NULL );
     1104 }
     1105 
     1106 //=============================================================================
     1107 #ifndef PRODUCT
     1108 void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
     1109   st->print("NOP \t# %d bytes pad for loops and calls", _count);
     1110 }
     1111 #endif
     1112 
     1113 void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
     1114   MacroAssembler _masm(&cbuf);
     1115   __ nop(_count);
     1116 }
     1117 
     1118 uint MachNopNode::size(PhaseRegAlloc *) const {
     1119   return _count;
     1120 }
     1121 
     1122 
     1123 //=============================================================================
     1124 #ifndef PRODUCT
     1125 void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
     1126   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
     1127   int reg = ra_->get_reg_first(this);
     1128   st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
     1129 }
     1130 #endif
     1131 
     1132 void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
     1133   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
     1134   int reg = ra_->get_encode(this);
     1135   if( offset >= 128 ) {
     1136     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
     1137     emit_rm(cbuf, 0x2, reg, 0x04);
     1138     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
     1139     emit_d32(cbuf, offset);
     1140   }
     1141   else {
     1142     emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
     1143     emit_rm(cbuf, 0x1, reg, 0x04);
     1144     emit_rm(cbuf, 0x0, 0x04, ESP_enc);
     1145     emit_d8(cbuf, offset);
     1146   }
     1147 }
     1148 
     1149 uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
     1150   int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
     1151   if( offset >= 128 ) {
     1152     return 7;
     1153   }
     1154   else {
     1155     return 4;
     1156   }
     1157 }
     1158 
     1159 //=============================================================================
     1160 
     1161 // emit call stub, compiled java to interpreter
     1162 void emit_java_to_interp(CodeBuffer &cbuf ) {
     1163   // Stub is fixed up when the corresponding call is converted from calling
     1164   // compiled code to calling interpreted code.
     1165   // mov rbx,0
     1166   // jmp -1
     1167 
     1168   address mark = cbuf.inst_mark();  // get mark within main instrs section
     1169 
     1170   // Note that the code buffer's inst_mark is always relative to insts.
     1171   // That's why we must use the macroassembler to generate a stub.
     1172   MacroAssembler _masm(&cbuf);
     1173 
     1174   address base =
     1175   __ start_a_stub(Compile::MAX_stubs_size);
     1176   if (base == NULL)  return;  // CodeBuffer::expand failed
     1177   // static stub relocation stores the instruction address of the call
     1178   __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
     1179   // static stub relocation also tags the methodOop in the code-stream.
     1180   __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
     1181   // This is recognized as unresolved by relocs/nativeInst/ic code
     1182   __ jump(RuntimeAddress(__ pc()));
     1183 
     1184   __ end_a_stub();
     1185   // Update current stubs pointer and restore code_end.
     1186 }
     1187 // size of call stub, compiled java to interpretor
     1188 uint size_java_to_interp() {
     1189   return 10;  // movl; jmp
     1190 }
     1191 // relocation entries for call stub, compiled java to interpretor
     1192 uint reloc_java_to_interp() {
     1193   return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
     1194 }
     1195 
     1196 //=============================================================================
     1197 #ifndef PRODUCT
     1198 void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
     1199   st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
     1200   st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
     1201   st->print_cr("\tNOP");
     1202   st->print_cr("\tNOP");
     1203   if( !OptoBreakpoint )
     1204     st->print_cr("\tNOP");
     1205 }
     1206 #endif
     1207 
     1208 void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
     1209   MacroAssembler masm(&cbuf);
     1210 #ifdef ASSERT
     1211   uint code_size = cbuf.code_size();
     1212 #endif
     1213   masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
     1214   masm.jump_cc(Assembler::notEqual,
     1215                RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
     1216   /* WARNING these NOPs are critical so that verified entry point is properly
     1217      aligned for patching by NativeJump::patch_verified_entry() */
     1218   int nops_cnt = 2;
     1219   if( !OptoBreakpoint ) // Leave space for int3
     1220      nops_cnt += 1;
     1221   masm.nop(nops_cnt);
     1222 
     1223   assert(cbuf.code_size() - code_size == size(ra_), "checking code size of inline cache node");
     1224 }
     1225 
     1226 uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
     1227   return OptoBreakpoint ? 11 : 12;
     1228 }
     1229 
     1230 
     1231 //=============================================================================
     1232 uint size_exception_handler() {
     1233   // NativeCall instruction size is the same as NativeJump.
     1234   // exception handler starts out as jump and can be patched to
     1235   // a call be deoptimization.  (4932387)
     1236   // Note that this value is also credited (in output.cpp) to
     1237   // the size of the code section.
     1238   return NativeJump::instruction_size;
     1239 }
     1240 
     1241 // Emit exception handler code.  Stuff framesize into a register
     1242 // and call a VM stub routine.
     1243 int emit_exception_handler(CodeBuffer& cbuf) {
     1244 
     1245   // Note that the code buffer's inst_mark is always relative to insts.
     1246   // That's why we must use the macroassembler to generate a handler.
     1247   MacroAssembler _masm(&cbuf);
     1248   address base =
     1249   __ start_a_stub(size_exception_handler());
     1250   if (base == NULL)  return 0;  // CodeBuffer::expand failed
     1251   int offset = __ offset();
     1252   __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
     1253   assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
     1254   __ end_a_stub();
     1255   return offset;
     1256 }
     1257 
     1258 uint size_deopt_handler() {
     1259   // NativeCall instruction size is the same as NativeJump.
     1260   // exception handler starts out as jump and can be patched to
     1261   // a call be deoptimization.  (4932387)
     1262   // Note that this value is also credited (in output.cpp) to
     1263   // the size of the code section.
     1264   return 5 + NativeJump::instruction_size; // pushl(); jmp;
     1265 }
     1266 
     1267 // Emit deopt handler code.
     1268 int emit_deopt_handler(CodeBuffer& cbuf) {
     1269 
     1270   // Note that the code buffer's inst_mark is always relative to insts.
     1271   // That's why we must use the macroassembler to generate a handler.
     1272   MacroAssembler _masm(&cbuf);
     1273   address base =
     1274   __ start_a_stub(size_exception_handler());
     1275   if (base == NULL)  return 0;  // CodeBuffer::expand failed
     1276   int offset = __ offset();
     1277   InternalAddress here(__ pc());
     1278   __ pushptr(here.addr());
     1279 
     1280   __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
     1281   assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
     1282   __ end_a_stub();
     1283   return offset;
     1284 }
     1285 
     1286 
     1287 static void emit_double_constant(CodeBuffer& cbuf, double x) {
     1288   int mark = cbuf.insts()->mark_off();
     1289   MacroAssembler _masm(&cbuf);
     1290   address double_address = __ double_constant(x);
     1291   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
     1292   emit_d32_reloc(cbuf,
     1293                  (int)double_address,
     1294                  internal_word_Relocation::spec(double_address),
     1295                  RELOC_DISP32);
     1296 }
     1297 
     1298 static void emit_float_constant(CodeBuffer& cbuf, float x) {
     1299   int mark = cbuf.insts()->mark_off();
     1300   MacroAssembler _masm(&cbuf);
     1301   address float_address = __ float_constant(x);
     1302   cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
     1303   emit_d32_reloc(cbuf,
     1304                  (int)float_address,
     1305                  internal_word_Relocation::spec(float_address),
     1306                  RELOC_DISP32);
     1307 }
     1308 
     1309 
     1310 const bool Matcher::match_rule_supported(int opcode) {
     1311   if (!has_match_rule(opcode))
     1312     return false;
     1313 
     1314   return true;  // Per default match rules are supported.
     1315 }
     1316 
     1317 int Matcher::regnum_to_fpu_offset(int regnum) {
     1318   return regnum - 32; // The FP registers are in the second chunk
     1319 }
     1320 
     1321 bool is_positive_zero_float(jfloat f) {
     1322   return jint_cast(f) == jint_cast(0.0F);
     1323 }
     1324 
     1325 bool is_positive_one_float(jfloat f) {
     1326   return jint_cast(f) == jint_cast(1.0F);
     1327 }
     1328 
     1329 bool is_positive_zero_double(jdouble d) {
     1330   return jlong_cast(d) == jlong_cast(0.0);
     1331 }
     1332 
     1333 bool is_positive_one_double(jdouble d) {
     1334   return jlong_cast(d) == jlong_cast(1.0);
     1335 }
     1336 
     1337 // This is UltraSparc specific, true just means we have fast l2f conversion
     1338 const bool Matcher::convL2FSupported(void) {
     1339   return true;
     1340 }
     1341 
     1342 // Vector width in bytes
     1343 const uint Matcher::vector_width_in_bytes(void) {
     1344   return UseSSE >= 2 ? 8 : 0;
     1345 }
     1346 
     1347 // Vector ideal reg
     1348 const uint Matcher::vector_ideal_reg(void) {
     1349   return Op_RegD;
     1350 }
     1351 
     1352 // Is this branch offset short enough that a short branch can be used?
     1353 //
     1354 // NOTE: If the platform does not provide any short branch variants, then
     1355 //       this method should return false for offset 0.
     1356 bool Matcher::is_short_branch_offset(int rule, int offset) {
     1357   // the short version of jmpConUCF2 contains multiple branches,
     1358   // making the reach slightly less
     1359   if (rule == jmpConUCF2_rule)
     1360     return (-126 <= offset && offset <= 125);
     1361   return (-128 <= offset && offset <= 127);
     1362 }
     1363 
     1364 const bool Matcher::isSimpleConstant64(jlong value) {
     1365   // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
     1366   return false;
     1367 }
     1368 
     1369 // The ecx parameter to rep stos for the ClearArray node is in dwords.
     1370 const bool Matcher::init_array_count_is_in_bytes = false;
     1371 
     1372 // Threshold size for cleararray.
     1373 const int Matcher::init_array_short_size = 8 * BytesPerLong;
     1374 
     1375 // Should the Matcher clone shifts on addressing modes, expecting them to
     1376 // be subsumed into complex addressing expressions or compute them into
     1377 // registers?  True for Intel but false for most RISCs
     1378 const bool Matcher::clone_shift_expressions = true;
     1379 
     1380 // Is it better to copy float constants, or load them directly from memory?
     1381 // Intel can load a float constant from a direct address, requiring no
     1382 // extra registers.  Most RISCs will have to materialize an address into a
     1383 // register first, so they would do better to copy the constant from stack.
     1384 const bool Matcher::rematerialize_float_constants = true;
     1385 
     1386 // If CPU can load and store mis-aligned doubles directly then no fixup is
     1387 // needed.  Else we split the double into 2 integer pieces and move it
     1388 // piece-by-piece.  Only happens when passing doubles into C code as the
     1389 // Java calling convention forces doubles to be aligned.
     1390 const bool Matcher::misaligned_doubles_ok = true;
     1391 
     1392 
     1393 void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
     1394   // Get the memory operand from the node
     1395   uint numopnds = node->num_opnds();        // Virtual call for number of operands
     1396   uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
     1397   assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
     1398   uint opcnt     = 1;                 // First operand
     1399   uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
     1400   while( idx >= skipped+num_edges ) {
     1401     skipped += num_edges;
     1402     opcnt++;                          // Bump operand count
     1403     assert( opcnt < numopnds, "Accessing non-existent operand" );
     1404     num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
     1405   }
     1406 
     1407   MachOper *memory = node->_opnds[opcnt];
     1408   MachOper *new_memory = NULL;
     1409   switch (memory->opcode()) {
     1410   case DIRECT:
     1411   case INDOFFSET32X:
     1412     // No transformation necessary.
     1413     return;
     1414   case INDIRECT:
     1415     new_memory = new (C) indirect_win95_safeOper( );
     1416     break;
     1417   case INDOFFSET8:
     1418     new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
     1419     break;
     1420   case INDOFFSET32:
     1421     new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
     1422     break;
     1423   case INDINDEXOFFSET:
     1424     new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
     1425     break;
     1426   case INDINDEXSCALE:
     1427     new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
     1428     break;
     1429   case INDINDEXSCALEOFFSET:
     1430     new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
     1431     break;
     1432   case LOAD_LONG_INDIRECT:
     1433   case LOAD_LONG_INDOFFSET32:
     1434     // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
     1435     return;
     1436   default:
     1437     assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
     1438     return;
     1439   }
     1440   node->_opnds[opcnt] = new_memory;
     1441 }
     1442 
     1443 // Advertise here if the CPU requires explicit rounding operations
     1444 // to implement the UseStrictFP mode.
     1445 const bool Matcher::strict_fp_requires_explicit_rounding = true;
     1446 
     1447 // Are floats conerted to double when stored to stack during deoptimization?
     1448 // On x32 it is stored with convertion only when FPU is used for floats.
     1449 bool Matcher::float_in_double() { return (UseSSE == 0); }
     1450 
     1451 // Do ints take an entire long register or just half?
     1452 const bool Matcher::int_in_long = false;
     1453 
     1454 // Return whether or not this register is ever used as an argument.  This
     1455 // function is used on startup to build the trampoline stubs in generateOptoStub.
     1456 // Registers not mentioned will be killed by the VM call in the trampoline, and
     1457 // arguments in those registers not be available to the callee.
     1458 bool Matcher::can_be_java_arg( int reg ) {
     1459   if(  reg == ECX_num   || reg == EDX_num   ) return true;
     1460   if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
     1461   if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
     1462   return false;
     1463 }
     1464 
     1465 bool Matcher::is_spillable_arg( int reg ) {
     1466   return can_be_java_arg(reg);
     1467 }
     1468 
     1469 // Register for DIVI projection of divmodI
     1470 RegMask Matcher::divI_proj_mask() {
     1471   return EAX_REG_mask;
     1472 }
     1473 
     1474 // Register for MODI projection of divmodI
     1475 RegMask Matcher::modI_proj_mask() {
     1476   return EDX_REG_mask;
     1477 }
     1478 
     1479 // Register for DIVL projection of divmodL
     1480 RegMask Matcher::divL_proj_mask() {
     1481   ShouldNotReachHere();
     1482   return RegMask();
     1483 }
     1484 
     1485 // Register for MODL projection of divmodL
     1486 RegMask Matcher::modL_proj_mask() {
     1487   ShouldNotReachHere();
     1488   return RegMask();
     1489 }
     1490 
     1491 const RegMask Matcher::method_handle_invoke_SP_save_mask() {
     1492   return EBP_REG_mask;
     1493 }
     1494 
     1495 // Returns true if the high 32 bits of the value is known to be zero.
     1496 bool is_operand_hi32_zero(Node* n) {
     1497   int opc = n->Opcode();
     1498   if (opc == Op_LoadUI2L) {
     1499     return true;
     1500   }
     1501   if (opc == Op_AndL) {
     1502     Node* o2 = n->in(2);
     1503     if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
     1504       return true;
     1505     }
     1506   }
     1507   return false;
     1508 }
     1509 
     1510 %}
     1511 
     1512 //----------ENCODING BLOCK-----------------------------------------------------
     1513 // This block specifies the encoding classes used by the compiler to output
     1514 // byte streams.  Encoding classes generate functions which are called by
     1515 // Machine Instruction Nodes in order to generate the bit encoding of the
     1516 // instruction.  Operands specify their base encoding interface with the
     1517 // interface keyword.  There are currently supported four interfaces,
     1518 // REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
     1519 // operand to generate a function which returns its register number when
     1520 // queried.   CONST_INTER causes an operand to generate a function which
     1521 // returns the value of the constant when queried.  MEMORY_INTER causes an
     1522 // operand to generate four functions which return the Base Register, the
     1523 // Index Register, the Scale Value, and the Offset Value of the operand when
     1524 // queried.  COND_INTER causes an operand to generate six functions which
     1525 // return the encoding code (ie - encoding bits for the instruction)
     1526 // associated with each basic boolean condition for a conditional instruction.
     1527 // Instructions specify two basic values for encoding.  They use the
     1528 // ins_encode keyword to specify their encoding class (which must be one of
     1529 // the class names specified in the encoding block), and they use the
     1530 // opcode keyword to specify, in order, their primary, secondary, and
     1531 // tertiary opcode.  Only the opcode sections which a particular instruction
     1532 // needs for encoding need to be specified.
     1533 encode %{
     1534   // Build emit functions for each basic byte or larger field in the intel
     1535   // encoding scheme (opcode, rm, sib, immediate), and call them from C++
     1536   // code in the enc_class source block.  Emit functions will live in the
     1537   // main source block for now.  In future, we can generalize this by
     1538   // adding a syntax that specifies the sizes of fields in an order,
     1539   // so that the adlc can build the emit functions automagically
     1540 
     1541   // Emit primary opcode
     1542   enc_class OpcP %{
     1543     emit_opcode(cbuf, $primary);
     1544   %}
     1545 
     1546   // Emit secondary opcode
     1547   enc_class OpcS %{
     1548     emit_opcode(cbuf, $secondary);
     1549   %}
     1550 
     1551   // Emit opcode directly
     1552   enc_class Opcode(immI d8) %{
     1553     emit_opcode(cbuf, $d8$$constant);
     1554   %}
     1555 
     1556   enc_class SizePrefix %{
     1557     emit_opcode(cbuf,0x66);
     1558   %}
     1559 
     1560   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
     1561     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     1562   %}
     1563 
     1564   enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
     1565     emit_opcode(cbuf,$opcode$$constant);
     1566     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     1567   %}
     1568 
     1569   enc_class mov_r32_imm0( eRegI dst ) %{
     1570     emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
     1571     emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
     1572   %}
     1573 
     1574   enc_class cdq_enc %{
     1575     // Full implementation of Java idiv and irem; checks for
     1576     // special case as described in JVM spec., p.243 & p.271.
     1577     //
     1578     //         normal case                           special case
     1579     //
     1580     // input : rax,: dividend                         min_int
     1581     //         reg: divisor                          -1
     1582     //
     1583     // output: rax,: quotient  (= rax, idiv reg)       min_int
     1584     //         rdx: remainder (= rax, irem reg)       0
     1585     //
     1586     //  Code sequnce:
     1587     //
     1588     //  81 F8 00 00 00 80    cmp         rax,80000000h
     1589     //  0F 85 0B 00 00 00    jne         normal_case
     1590     //  33 D2                xor         rdx,edx
     1591     //  83 F9 FF             cmp         rcx,0FFh
     1592     //  0F 84 03 00 00 00    je          done
     1593     //                  normal_case:
     1594     //  99                   cdq
     1595     //  F7 F9                idiv        rax,ecx
     1596     //                  done:
     1597     //
     1598     emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
     1599     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
     1600     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
     1601     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
     1602     emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
     1603     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
     1604     emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
     1605     emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
     1606     emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
     1607     emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
     1608     emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
     1609     // normal_case:
     1610     emit_opcode(cbuf,0x99);                                         // cdq
     1611     // idiv (note: must be emitted by the user of this rule)
     1612     // normal:
     1613   %}
     1614 
     1615   // Dense encoding for older common ops
     1616   enc_class Opc_plus(immI opcode, eRegI reg) %{
     1617     emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
     1618   %}
     1619 
     1620 
     1621   // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
     1622   enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
     1623     // Check for 8-bit immediate, and set sign extend bit in opcode
     1624     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
     1625       emit_opcode(cbuf, $primary | 0x02);
     1626     }
     1627     else {                          // If 32-bit immediate
     1628       emit_opcode(cbuf, $primary);
     1629     }
     1630   %}
     1631 
     1632   enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
     1633     // Emit primary opcode and set sign-extend bit
     1634     // Check for 8-bit immediate, and set sign extend bit in opcode
     1635     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
     1636       emit_opcode(cbuf, $primary | 0x02);    }
     1637     else {                          // If 32-bit immediate
     1638       emit_opcode(cbuf, $primary);
     1639     }
     1640     // Emit r/m byte with secondary opcode, after primary opcode.
     1641     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
     1642   %}
     1643 
     1644   enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
     1645     // Check for 8-bit immediate, and set sign extend bit in opcode
     1646     if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
     1647       $$$emit8$imm$$constant;
     1648     }
     1649     else {                          // If 32-bit immediate
     1650       // Output immediate
     1651       $$$emit32$imm$$constant;
     1652     }
     1653   %}
     1654 
     1655   enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
     1656     // Emit primary opcode and set sign-extend bit
     1657     // Check for 8-bit immediate, and set sign extend bit in opcode
     1658     int con = (int)$imm$$constant; // Throw away top bits
     1659     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
     1660     // Emit r/m byte with secondary opcode, after primary opcode.
     1661     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
     1662     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
     1663     else                               emit_d32(cbuf,con);
     1664   %}
     1665 
     1666   enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
     1667     // Emit primary opcode and set sign-extend bit
     1668     // Check for 8-bit immediate, and set sign extend bit in opcode
     1669     int con = (int)($imm$$constant >> 32); // Throw away bottom bits
     1670     emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
     1671     // Emit r/m byte with tertiary opcode, after primary opcode.
     1672     emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
     1673     if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
     1674     else                               emit_d32(cbuf,con);
     1675   %}
     1676 
     1677   enc_class Lbl (label labl) %{ // JMP, CALL
     1678     Label *l = $labl$$label;
     1679     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
     1680   %}
     1681 
     1682   enc_class LblShort (label labl) %{ // JMP, CALL
     1683     Label *l = $labl$$label;
     1684     int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
     1685     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
     1686     emit_d8(cbuf, disp);
     1687   %}
     1688 
     1689   enc_class OpcSReg (eRegI dst) %{    // BSWAP
     1690     emit_cc(cbuf, $secondary, $dst$$reg );
     1691   %}
     1692 
     1693   enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
     1694     int destlo = $dst$$reg;
     1695     int desthi = HIGH_FROM_LOW(destlo);
     1696     // bswap lo
     1697     emit_opcode(cbuf, 0x0F);
     1698     emit_cc(cbuf, 0xC8, destlo);
     1699     // bswap hi
     1700     emit_opcode(cbuf, 0x0F);
     1701     emit_cc(cbuf, 0xC8, desthi);
     1702     // xchg lo and hi
     1703     emit_opcode(cbuf, 0x87);
     1704     emit_rm(cbuf, 0x3, destlo, desthi);
     1705   %}
     1706 
     1707   enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
     1708     emit_rm(cbuf, 0x3, $secondary, $div$$reg );
     1709   %}
     1710 
     1711   enc_class Jcc (cmpOp cop, label labl) %{    // JCC
     1712     Label *l = $labl$$label;
     1713     $$$emit8$primary;
     1714     emit_cc(cbuf, $secondary, $cop$$cmpcode);
     1715     emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
     1716   %}
     1717 
     1718   enc_class JccShort (cmpOp cop, label labl) %{    // JCC
     1719     Label *l = $labl$$label;
     1720     emit_cc(cbuf, $primary, $cop$$cmpcode);
     1721     int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
     1722     assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
     1723     emit_d8(cbuf, disp);
     1724   %}
     1725 
     1726   enc_class enc_cmov(cmpOp cop ) %{ // CMOV
     1727     $$$emit8$primary;
     1728     emit_cc(cbuf, $secondary, $cop$$cmpcode);
     1729   %}
     1730 
     1731   enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
     1732     int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
     1733     emit_d8(cbuf, op >> 8 );
     1734     emit_d8(cbuf, op & 255);
     1735   %}
     1736 
     1737   // emulate a CMOV with a conditional branch around a MOV
     1738   enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
     1739     // Invert sense of branch from sense of CMOV
     1740     emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
     1741     emit_d8( cbuf, $brOffs$$constant );
     1742   %}
     1743 
     1744   enc_class enc_PartialSubtypeCheck( ) %{
     1745     Register Redi = as_Register(EDI_enc); // result register
     1746     Register Reax = as_Register(EAX_enc); // super class
     1747     Register Recx = as_Register(ECX_enc); // killed
     1748     Register Resi = as_Register(ESI_enc); // sub class
     1749     Label miss;
     1750 
     1751     MacroAssembler _masm(&cbuf);
     1752     __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
     1753                                      NULL, &miss,
     1754                                      /*set_cond_codes:*/ true);
     1755     if ($primary) {
     1756       __ xorptr(Redi, Redi);
     1757     }
     1758     __ bind(miss);
     1759   %}
     1760 
     1761   enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
     1762     MacroAssembler masm(&cbuf);
     1763     int start = masm.offset();
     1764     if (UseSSE >= 2) {
     1765       if (VerifyFPU) {
     1766         masm.verify_FPU(0, "must be empty in SSE2+ mode");
     1767       }
     1768     } else {
     1769       // External c_calling_convention expects the FPU stack to be 'clean'.
     1770       // Compiled code leaves it dirty.  Do cleanup now.
     1771       masm.empty_FPU_stack();
     1772     }
     1773     if (sizeof_FFree_Float_Stack_All == -1) {
     1774       sizeof_FFree_Float_Stack_All = masm.offset() - start;
     1775     } else {
     1776       assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
     1777     }
     1778   %}
     1779 
     1780   enc_class Verify_FPU_For_Leaf %{
     1781     if( VerifyFPU ) {
     1782       MacroAssembler masm(&cbuf);
     1783       masm.verify_FPU( -3, "Returning from Runtime Leaf call");
     1784     }
     1785   %}
     1786 
     1787   enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
     1788     // This is the instruction starting address for relocation info.
     1789     cbuf.set_inst_mark();
     1790     $$$emit8$primary;
     1791     // CALL directly to the runtime
     1792     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
     1793                 runtime_call_Relocation::spec(), RELOC_IMM32 );
     1794 
     1795     if (UseSSE >= 2) {
     1796       MacroAssembler _masm(&cbuf);
     1797       BasicType rt = tf()->return_type();
     1798 
     1799       if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
     1800         // A C runtime call where the return value is unused.  In SSE2+
     1801         // mode the result needs to be removed from the FPU stack.  It's
     1802         // likely that this function call could be removed by the
     1803         // optimizer if the C function is a pure function.
     1804         __ ffree(0);
     1805       } else if (rt == T_FLOAT) {
     1806         __ lea(rsp, Address(rsp, -4));
     1807         __ fstp_s(Address(rsp, 0));
     1808         __ movflt(xmm0, Address(rsp, 0));
     1809         __ lea(rsp, Address(rsp,  4));
     1810       } else if (rt == T_DOUBLE) {
     1811         __ lea(rsp, Address(rsp, -8));
     1812         __ fstp_d(Address(rsp, 0));
     1813         __ movdbl(xmm0, Address(rsp, 0));
     1814         __ lea(rsp, Address(rsp,  8));
     1815       }
     1816     }
     1817   %}
     1818 
     1819 
     1820   enc_class pre_call_FPU %{
     1821     // If method sets FPU control word restore it here
     1822     debug_only(int off0 = cbuf.code_size());
     1823     if( Compile::current()->in_24_bit_fp_mode() ) {
     1824       MacroAssembler masm(&cbuf);
     1825       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
     1826     }
     1827     debug_only(int off1 = cbuf.code_size());
     1828     assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
     1829   %}
     1830 
     1831   enc_class post_call_FPU %{
     1832     // If method sets FPU control word do it here also
     1833     if( Compile::current()->in_24_bit_fp_mode() ) {
     1834       MacroAssembler masm(&cbuf);
     1835       masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
     1836     }
     1837   %}
     1838 
     1839   enc_class preserve_SP %{
     1840     debug_only(int off0 = cbuf.code_size());
     1841     MacroAssembler _masm(&cbuf);
     1842     // RBP is preserved across all calls, even compiled calls.
     1843     // Use it to preserve RSP in places where the callee might change the SP.
     1844     __ movptr(rbp, rsp);
     1845     debug_only(int off1 = cbuf.code_size());
     1846     assert(off1 - off0 == preserve_SP_size(), "correct size prediction");
     1847   %}
     1848 
     1849   enc_class restore_SP %{
     1850     MacroAssembler _masm(&cbuf);
     1851     __ movptr(rsp, rbp);
     1852   %}
     1853 
     1854   enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
     1855     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
     1856     // who we intended to call.
     1857     cbuf.set_inst_mark();
     1858     $$$emit8$primary;
     1859     if ( !_method ) {
     1860       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
     1861                      runtime_call_Relocation::spec(), RELOC_IMM32 );
     1862     } else if(_optimized_virtual) {
     1863       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
     1864                      opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
     1865     } else {
     1866       emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
     1867                      static_call_Relocation::spec(), RELOC_IMM32 );
     1868     }
     1869     if( _method ) {  // Emit stub for static call
     1870       emit_java_to_interp(cbuf);
     1871     }
     1872   %}
     1873 
     1874   enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
     1875     // !!!!!
     1876     // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
     1877     // emit_call_dynamic_prologue( cbuf );
     1878     cbuf.set_inst_mark();
     1879     emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
     1880     emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
     1881     address  virtual_call_oop_addr = cbuf.inst_mark();
     1882     // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
     1883     // who we intended to call.
     1884     cbuf.set_inst_mark();
     1885     $$$emit8$primary;
     1886     emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
     1887                 virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
     1888   %}
     1889 
     1890   enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
     1891     int disp = in_bytes(methodOopDesc::from_compiled_offset());
     1892     assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
     1893 
     1894     // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
     1895     cbuf.set_inst_mark();
     1896     $$$emit8$primary;
     1897     emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
     1898     emit_d8(cbuf, disp);             // Displacement
     1899 
     1900   %}
     1901 
     1902   enc_class Xor_Reg (eRegI dst) %{
     1903     emit_opcode(cbuf, 0x33);
     1904     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
     1905   %}
     1906 
     1907 //   Following encoding is no longer used, but may be restored if calling
     1908 //   convention changes significantly.
     1909 //   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
     1910 //
     1911 //   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
     1912 //     // int ic_reg     = Matcher::inline_cache_reg();
     1913 //     // int ic_encode  = Matcher::_regEncode[ic_reg];
     1914 //     // int imo_reg    = Matcher::interpreter_method_oop_reg();
     1915 //     // int imo_encode = Matcher::_regEncode[imo_reg];
     1916 //
     1917 //     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
     1918 //     // // so we load it immediately before the call
     1919 //     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
     1920 //     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
     1921 //
     1922 //     // xor rbp,ebp
     1923 //     emit_opcode(cbuf, 0x33);
     1924 //     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
     1925 //
     1926 //     // CALL to interpreter.
     1927 //     cbuf.set_inst_mark();
     1928 //     $$$emit8$primary;
     1929 //     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.code_end()) - 4),
     1930 //                 runtime_call_Relocation::spec(), RELOC_IMM32 );
     1931 //   %}
     1932 
     1933   enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
     1934     $$$emit8$primary;
     1935     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
     1936     $$$emit8$shift$$constant;
     1937   %}
     1938 
     1939   enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
     1940     // Load immediate does not have a zero or sign extended version
     1941     // for 8-bit immediates
     1942     emit_opcode(cbuf, 0xB8 + $dst$$reg);
     1943     $$$emit32$src$$constant;
     1944   %}
     1945 
     1946   enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
     1947     // Load immediate does not have a zero or sign extended version
     1948     // for 8-bit immediates
     1949     emit_opcode(cbuf, $primary + $dst$$reg);
     1950     $$$emit32$src$$constant;
     1951   %}
     1952 
     1953   enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
     1954     // Load immediate does not have a zero or sign extended version
     1955     // for 8-bit immediates
     1956     int dst_enc = $dst$$reg;
     1957     int src_con = $src$$constant & 0x0FFFFFFFFL;
     1958     if (src_con == 0) {
     1959       // xor dst, dst
     1960       emit_opcode(cbuf, 0x33);
     1961       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
     1962     } else {
     1963       emit_opcode(cbuf, $primary + dst_enc);
     1964       emit_d32(cbuf, src_con);
     1965     }
     1966   %}
     1967 
     1968   enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
     1969     // Load immediate does not have a zero or sign extended version
     1970     // for 8-bit immediates
     1971     int dst_enc = $dst$$reg + 2;
     1972     int src_con = ((julong)($src$$constant)) >> 32;
     1973     if (src_con == 0) {
     1974       // xor dst, dst
     1975       emit_opcode(cbuf, 0x33);
     1976       emit_rm(cbuf, 0x3, dst_enc, dst_enc);
     1977     } else {
     1978       emit_opcode(cbuf, $primary + dst_enc);
     1979       emit_d32(cbuf, src_con);
     1980     }
     1981   %}
     1982 
     1983 
     1984   enc_class LdImmD (immD src) %{    // Load Immediate
     1985     if( is_positive_zero_double($src$$constant)) {
     1986       // FLDZ
     1987       emit_opcode(cbuf,0xD9);
     1988       emit_opcode(cbuf,0xEE);
     1989     } else if( is_positive_one_double($src$$constant)) {
     1990       // FLD1
     1991       emit_opcode(cbuf,0xD9);
     1992       emit_opcode(cbuf,0xE8);
     1993     } else {
     1994       emit_opcode(cbuf,0xDD);
     1995       emit_rm(cbuf, 0x0, 0x0, 0x5);
     1996       emit_double_constant(cbuf, $src$$constant);
     1997     }
     1998   %}
     1999 
     2000 
     2001   enc_class LdImmF (immF src) %{    // Load Immediate
     2002     if( is_positive_zero_float($src$$constant)) {
     2003       emit_opcode(cbuf,0xD9);
     2004       emit_opcode(cbuf,0xEE);
     2005     } else if( is_positive_one_float($src$$constant)) {
     2006       emit_opcode(cbuf,0xD9);
     2007       emit_opcode(cbuf,0xE8);
     2008     } else {
     2009       $$$emit8$primary;
     2010       // Load immediate does not have a zero or sign extended version
     2011       // for 8-bit immediates
     2012       // First load to TOS, then move to dst
     2013       emit_rm(cbuf, 0x0, 0x0, 0x5);
     2014       emit_float_constant(cbuf, $src$$constant);
     2015     }
     2016   %}
     2017 
     2018   enc_class LdImmX (regX dst, immXF con) %{    // Load Immediate
     2019     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
     2020     emit_float_constant(cbuf, $con$$constant);
     2021   %}
     2022 
     2023   enc_class LdImmXD (regXD dst, immXD con) %{    // Load Immediate
     2024     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
     2025     emit_double_constant(cbuf, $con$$constant);
     2026   %}
     2027 
     2028   enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
     2029     // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
     2030     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
     2031     emit_opcode(cbuf, 0x0F);
     2032     emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
     2033     emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
     2034     emit_double_constant(cbuf, $con$$constant);
     2035   %}
     2036 
     2037   enc_class Opc_MemImm_F(immF src) %{
     2038     cbuf.set_inst_mark();
     2039     $$$emit8$primary;
     2040     emit_rm(cbuf, 0x0, $secondary, 0x5);
     2041     emit_float_constant(cbuf, $src$$constant);
     2042   %}
     2043 
     2044 
     2045   enc_class MovI2X_reg(regX dst, eRegI src) %{
     2046     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
     2047     emit_opcode(cbuf, 0x0F );
     2048     emit_opcode(cbuf, 0x6E );
     2049     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     2050   %}
     2051 
     2052   enc_class MovX2I_reg(eRegI dst, regX src) %{
     2053     emit_opcode(cbuf, 0x66 );     // MOVD dst,src
     2054     emit_opcode(cbuf, 0x0F );
     2055     emit_opcode(cbuf, 0x7E );
     2056     emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
     2057   %}
     2058 
     2059   enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
     2060     { // MOVD $dst,$src.lo
     2061       emit_opcode(cbuf,0x66);
     2062       emit_opcode(cbuf,0x0F);
     2063       emit_opcode(cbuf,0x6E);
     2064       emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     2065     }
     2066     { // MOVD $tmp,$src.hi
     2067       emit_opcode(cbuf,0x66);
     2068       emit_opcode(cbuf,0x0F);
     2069       emit_opcode(cbuf,0x6E);
     2070       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
     2071     }
     2072     { // PUNPCKLDQ $dst,$tmp
     2073       emit_opcode(cbuf,0x66);
     2074       emit_opcode(cbuf,0x0F);
     2075       emit_opcode(cbuf,0x62);
     2076       emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
     2077      }
     2078   %}
     2079 
     2080   enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
     2081     { // MOVD $dst.lo,$src
     2082       emit_opcode(cbuf,0x66);
     2083       emit_opcode(cbuf,0x0F);
     2084       emit_opcode(cbuf,0x7E);
     2085       emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
     2086     }
     2087     { // PSHUFLW $tmp,$src,0x4E  (01001110b)
     2088       emit_opcode(cbuf,0xF2);
     2089       emit_opcode(cbuf,0x0F);
     2090       emit_opcode(cbuf,0x70);
     2091       emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
     2092       emit_d8(cbuf, 0x4E);
     2093     }
     2094     { // MOVD $dst.hi,$tmp
     2095       emit_opcode(cbuf,0x66);
     2096       emit_opcode(cbuf,0x0F);
     2097       emit_opcode(cbuf,0x7E);
     2098       emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
     2099     }
     2100   %}
     2101 
     2102 
     2103   // Encode a reg-reg copy.  If it is useless, then empty encoding.
     2104   enc_class enc_Copy( eRegI dst, eRegI src ) %{
     2105     encode_Copy( cbuf, $dst$$reg, $src$$reg );
     2106   %}
     2107 
     2108   enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
     2109     encode_Copy( cbuf, $dst$$reg, $src$$reg );
     2110   %}
     2111 
     2112   // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
     2113   enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
     2114     encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
     2115   %}
     2116 
     2117   enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
     2118     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     2119   %}
     2120 
     2121   enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
     2122     $$$emit8$primary;
     2123     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     2124   %}
     2125 
     2126   enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
     2127     $$$emit8$secondary;
     2128     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
     2129   %}
     2130 
     2131   enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
     2132     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     2133   %}
     2134 
     2135   enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
     2136     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
     2137   %}
     2138 
     2139   enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
     2140     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
     2141   %}
     2142 
     2143   enc_class Con32 (immI src) %{    // Con32(storeImmI)
     2144     // Output immediate
     2145     $$$emit32$src$$constant;
     2146   %}
     2147 
     2148   enc_class Con32F_as_bits(immF src) %{        // storeF_imm
     2149     // Output Float immediate bits
     2150     jfloat jf = $src$$constant;
     2151     int    jf_as_bits = jint_cast( jf );
     2152     emit_d32(cbuf, jf_as_bits);
     2153   %}
     2154 
     2155   enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
     2156     // Output Float immediate bits
     2157     jfloat jf = $src$$constant;
     2158     int    jf_as_bits = jint_cast( jf );
     2159     emit_d32(cbuf, jf_as_bits);
     2160   %}
     2161 
     2162   enc_class Con16 (immI src) %{    // Con16(storeImmI)
     2163     // Output immediate
     2164     $$$emit16$src$$constant;
     2165   %}
     2166 
     2167   enc_class Con_d32(immI src) %{
     2168     emit_d32(cbuf,$src$$constant);
     2169   %}
     2170 
     2171   enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
     2172     // Output immediate memory reference
     2173     emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
     2174     emit_d32(cbuf, 0x00);
     2175   %}
     2176 
     2177   enc_class lock_prefix( ) %{
     2178     if( os::is_MP() )
     2179       emit_opcode(cbuf,0xF0);         // [Lock]
     2180   %}
     2181 
     2182   // Cmp-xchg long value.
     2183   // Note: we need to swap rbx, and rcx before and after the
     2184   //       cmpxchg8 instruction because the instruction uses
     2185   //       rcx as the high order word of the new value to store but
     2186   //       our register encoding uses rbx,.
     2187   enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
     2188 
     2189     // XCHG  rbx,ecx
     2190     emit_opcode(cbuf,0x87);
     2191     emit_opcode(cbuf,0xD9);
     2192     // [Lock]
     2193     if( os::is_MP() )
     2194       emit_opcode(cbuf,0xF0);
     2195     // CMPXCHG8 [Eptr]
     2196     emit_opcode(cbuf,0x0F);
     2197     emit_opcode(cbuf,0xC7);
     2198     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
     2199     // XCHG  rbx,ecx
     2200     emit_opcode(cbuf,0x87);
     2201     emit_opcode(cbuf,0xD9);
     2202   %}
     2203 
     2204   enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
     2205     // [Lock]
     2206     if( os::is_MP() )
     2207       emit_opcode(cbuf,0xF0);
     2208 
     2209     // CMPXCHG [Eptr]
     2210     emit_opcode(cbuf,0x0F);
     2211     emit_opcode(cbuf,0xB1);
     2212     emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
     2213   %}
     2214 
     2215   enc_class enc_flags_ne_to_boolean( iRegI res ) %{
     2216     int res_encoding = $res$$reg;
     2217 
     2218     // MOV  res,0
     2219     emit_opcode( cbuf, 0xB8 + res_encoding);
     2220     emit_d32( cbuf, 0 );
     2221     // JNE,s  fail
     2222     emit_opcode(cbuf,0x75);
     2223     emit_d8(cbuf, 5 );
     2224     // MOV  res,1
     2225     emit_opcode( cbuf, 0xB8 + res_encoding);
     2226     emit_d32( cbuf, 1 );
     2227     // fail:
     2228   %}
     2229 
     2230   enc_class set_instruction_start( ) %{
     2231     cbuf.set_inst_mark();            // Mark start of opcode for reloc info in mem operand
     2232   %}
     2233 
     2234   enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
     2235     int reg_encoding = $ereg$$reg;
     2236     int base  = $mem$$base;
     2237     int index = $mem$$index;
     2238     int scale = $mem$$scale;
     2239     int displace = $mem$$disp;
     2240     bool disp_is_oop = $mem->disp_is_oop();
     2241     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
     2242   %}
     2243 
     2244   enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
     2245     int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
     2246     int base  = $mem$$base;
     2247     int index = $mem$$index;
     2248     int scale = $mem$$scale;
     2249     int displace = $mem$$disp + 4;      // Offset is 4 further in memory
     2250     assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
     2251     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
     2252   %}
     2253 
     2254   enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
     2255     int r1, r2;
     2256     if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
     2257     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
     2258     emit_opcode(cbuf,0x0F);
     2259     emit_opcode(cbuf,$tertiary);
     2260     emit_rm(cbuf, 0x3, r1, r2);
     2261     emit_d8(cbuf,$cnt$$constant);
     2262     emit_d8(cbuf,$primary);
     2263     emit_rm(cbuf, 0x3, $secondary, r1);
     2264     emit_d8(cbuf,$cnt$$constant);
     2265   %}
     2266 
     2267   enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
     2268     emit_opcode( cbuf, 0x8B ); // Move
     2269     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
     2270     emit_d8(cbuf,$primary);
     2271     emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
     2272     emit_d8(cbuf,$cnt$$constant-32);
     2273     emit_d8(cbuf,$primary);
     2274     emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
     2275     emit_d8(cbuf,31);
     2276   %}
     2277 
     2278   enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
     2279     int r1, r2;
     2280     if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
     2281     else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
     2282 
     2283     emit_opcode( cbuf, 0x8B ); // Move r1,r2
     2284     emit_rm(cbuf, 0x3, r1, r2);
     2285     if( $cnt$$constant > 32 ) { // Shift, if not by zero
     2286       emit_opcode(cbuf,$primary);
     2287       emit_rm(cbuf, 0x3, $secondary, r1);
     2288       emit_d8(cbuf,$cnt$$constant-32);
     2289     }
     2290     emit_opcode(cbuf,0x33);  // XOR r2,r2
     2291     emit_rm(cbuf, 0x3, r2, r2);
     2292   %}
     2293 
     2294   // Clone of RegMem but accepts an extra parameter to access each
     2295   // half of a double in memory; it never needs relocation info.
     2296   enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
     2297     emit_opcode(cbuf,$opcode$$constant);
     2298     int reg_encoding = $rm_reg$$reg;
     2299     int base     = $mem$$base;
     2300     int index    = $mem$$index;
     2301     int scale    = $mem$$scale;
     2302     int displace = $mem$$disp + $disp_for_half$$constant;
     2303     bool disp_is_oop = false;
     2304     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
     2305   %}
     2306 
     2307   // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
     2308   //
     2309   // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
     2310   // and it never needs relocation information.
     2311   // Frequently used to move data between FPU's Stack Top and memory.
     2312   enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
     2313     int rm_byte_opcode = $rm_opcode$$constant;
     2314     int base     = $mem$$base;
     2315     int index    = $mem$$index;
     2316     int scale    = $mem$$scale;
     2317     int displace = $mem$$disp;
     2318     assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
     2319     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
     2320   %}
     2321 
     2322   enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
     2323     int rm_byte_opcode = $rm_opcode$$constant;
     2324     int base     = $mem$$base;
     2325     int index    = $mem$$index;
     2326     int scale    = $mem$$scale;
     2327     int displace = $mem$$disp;
     2328     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
     2329     encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
     2330   %}
     2331 
     2332   enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
     2333     int reg_encoding = $dst$$reg;
     2334     int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
     2335     int index        = 0x04;            // 0x04 indicates no index
     2336     int scale        = 0x00;            // 0x00 indicates no scale
     2337     int displace     = $src1$$constant; // 0x00 indicates no displacement
     2338     bool disp_is_oop = false;
     2339     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
     2340   %}
     2341 
     2342   enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
     2343     // Compare dst,src
     2344     emit_opcode(cbuf,0x3B);
     2345     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     2346     // jmp dst < src around move
     2347     emit_opcode(cbuf,0x7C);
     2348     emit_d8(cbuf,2);
     2349     // move dst,src
     2350     emit_opcode(cbuf,0x8B);
     2351     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     2352   %}
     2353 
     2354   enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
     2355     // Compare dst,src
     2356     emit_opcode(cbuf,0x3B);
     2357     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     2358     // jmp dst > src around move
     2359     emit_opcode(cbuf,0x7F);
     2360     emit_d8(cbuf,2);
     2361     // move dst,src
     2362     emit_opcode(cbuf,0x8B);
     2363     emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
     2364   %}
     2365 
     2366   enc_class enc_FP_store(memory mem, regD src) %{
     2367     // If src is FPR1, we can just FST to store it.
     2368     // Else we need to FLD it to FPR1, then FSTP to store/pop it.
     2369     int reg_encoding = 0x2; // Just store
     2370     int base  = $mem$$base;
     2371     int index = $mem$$index;
     2372     int scale = $mem$$scale;
     2373     int displace = $mem$$disp;
     2374     bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
     2375     if( $src$$reg != FPR1L_enc ) {
     2376       reg_encoding = 0x3;  // Store & pop
     2377       emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
     2378       emit_d8( cbuf, 0xC0-1+$src$$reg );
     2379     }
     2380     cbuf.set_inst_mark();       // Mark start of opcode for reloc info in mem operand
     2381     emit_opcode(cbuf,$primary);
     2382     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
     2383   %}
     2384 
     2385   enc_class neg_reg(eRegI dst) %{
     2386     // NEG $dst
     2387     emit_opcode(cbuf,0xF7);
     2388     emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
     2389   %}
     2390 
     2391   enc_class setLT_reg(eCXRegI dst) %{
     2392     // SETLT $dst
     2393     emit_opcode(cbuf,0x0F);
     2394     emit_opcode(cbuf,0x9C);
     2395     emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
     2396   %}
     2397 
     2398   enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
     2399     int tmpReg = $tmp$$reg;
     2400 
     2401     // SUB $p,$q
     2402     emit_opcode(cbuf,0x2B);
     2403     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
     2404     // SBB $tmp,$tmp
     2405     emit_opcode(cbuf,0x1B);
     2406     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
     2407     // AND $tmp,$y
     2408     emit_opcode(cbuf,0x23);
     2409     emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
     2410     // ADD $p,$tmp
     2411     emit_opcode(cbuf,0x03);
     2412     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
     2413   %}
     2414 
     2415   enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
     2416     int tmpReg = $tmp$$reg;
     2417 
     2418     // SUB $p,$q
     2419     emit_opcode(cbuf,0x2B);
     2420     emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
     2421     // SBB $tmp,$tmp
     2422     emit_opcode(cbuf,0x1B);
     2423     emit_rm(cbuf, 0x3, tmpReg, tmpReg);
     2424     // AND $tmp,$y
     2425     cbuf.set_inst_mark();       // Mark start of opcode for reloc info in mem operand
     2426     emit_opcode(cbuf,0x23);
     2427     int reg_encoding = tmpReg;
     2428     int base  = $mem$$base;
     2429     int index = $mem$$index;
     2430     int scale = $mem$$scale;
     2431     int displace = $mem$$disp;
     2432     bool disp_is_oop = $mem->disp_is_oop();
     2433     encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
     2434     // ADD $p,$tmp
     2435     emit_opcode(cbuf,0x03);
     2436     emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
     2437   %}
     2438 
     2439   enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
     2440     // TEST shift,32
     2441     emit_opcode(cbuf,0xF7);
     2442     emit_rm(cbuf, 0x3, 0, ECX_enc);
     2443     emit_d32(cbuf,0x20);
     2444     // JEQ,s small
     2445     emit_opcode(cbuf, 0x74);
     2446     emit_d8(cbuf, 0x04);
     2447     // MOV    $dst.hi,$dst.lo
     2448     emit_opcode( cbuf, 0x8B );
     2449     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
     2450     // CLR    $dst.lo
     2451     emit_opcode(cbuf, 0x33);
     2452     emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
     2453 // small:
     2454     // SHLD   $dst.hi,$dst.lo,$shift
     2455     emit_opcode(cbuf,0x0F);
     2456     emit_opcode(cbuf,0xA5);
     2457     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
     2458     // SHL    $dst.lo,$shift"
     2459     emit_opcode(cbuf,0xD3);
     2460     emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
     2461   %}
     2462 
     2463   enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
     2464     // TEST shift,32
     2465     emit_opcode(cbuf,0xF7);
     2466     emit_rm(cbuf, 0x3, 0, ECX_enc);
     2467     emit_d32(cbuf,0x20);
     2468     // JEQ,s small
     2469     emit_opcode(cbuf, 0x74);
     2470     emit_d8(cbuf, 0x04);
     2471     // MOV    $dst.lo,$dst.hi
     2472     emit_opcode( cbuf, 0x8B );
     2473     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
     2474     // CLR    $dst.hi
     2475     emit_opcode(cbuf, 0x33);
     2476     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
     2477 // small:
     2478     // SHRD   $dst.lo,$dst.hi,$shift
     2479     emit_opcode(cbuf,0x0F);
     2480     emit_opcode(cbuf,0xAD);
     2481     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
     2482     // SHR    $dst.hi,$shift"
     2483     emit_opcode(cbuf,0xD3);
     2484     emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
     2485   %}
     2486 
     2487   enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
     2488     // TEST shift,32
     2489     emit_opcode(cbuf,0xF7);
     2490     emit_rm(cbuf, 0x3, 0, ECX_enc);
     2491     emit_d32(cbuf,0x20);
     2492     // JEQ,s small
     2493     emit_opcode(cbuf, 0x74);
     2494     emit_d8(cbuf, 0x05);
     2495     // MOV    $dst.lo,$dst.hi
     2496     emit_opcode( cbuf, 0x8B );
     2497     emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
     2498     // SAR    $dst.hi,31
     2499     emit_opcode(cbuf, 0xC1);
     2500     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
     2501     emit_d8(cbuf, 0x1F );
     2502 // small:
     2503     // SHRD   $dst.lo,$dst.hi,$shift
     2504     emit_opcode(cbuf,0x0F);
     2505     emit_opcode(cbuf,0xAD);
     2506     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
     2507     // SAR    $dst.hi,$shift"
     2508     emit_opcode(cbuf,0xD3);
     2509     emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
     2510   %}
     2511 
     2512 
     2513   // ----------------- Encodings for floating point unit -----------------
     2514   // May leave result in FPU-TOS or FPU reg depending on opcodes
     2515   enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
     2516     $$$emit8$primary;
     2517     emit_rm(cbuf, 0x3, $secondary, $src$$reg );
     2518   %}
     2519 
     2520   // Pop argument in FPR0 with FSTP ST(0)
     2521   enc_class PopFPU() %{
     2522     emit_opcode( cbuf, 0xDD );
     2523     emit_d8( cbuf, 0xD8 );
     2524   %}
     2525 
     2526   // !!!!! equivalent to Pop_Reg_F
     2527   enc_class Pop_Reg_D( regD dst ) %{
     2528     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
     2529     emit_d8( cbuf, 0xD8+$dst$$reg );
     2530   %}
     2531 
     2532   enc_class Push_Reg_D( regD dst ) %{
     2533     emit_opcode( cbuf, 0xD9 );
     2534     emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
     2535   %}
     2536 
     2537   enc_class strictfp_bias1( regD dst ) %{
     2538     emit_opcode( cbuf, 0xDB );           // FLD m80real
     2539     emit_opcode( cbuf, 0x2D );
     2540     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
     2541     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
     2542     emit_opcode( cbuf, 0xC8+$dst$$reg );
     2543   %}
     2544 
     2545   enc_class strictfp_bias2( regD dst ) %{
     2546     emit_opcode( cbuf, 0xDB );           // FLD m80real
     2547     emit_opcode( cbuf, 0x2D );
     2548     emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
     2549     emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
     2550     emit_opcode( cbuf, 0xC8+$dst$$reg );
     2551   %}
     2552 
     2553   // Special case for moving an integer register to a stack slot.
     2554   enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
     2555     store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
     2556   %}
     2557 
     2558   // Special case for moving a register to a stack slot.
     2559   enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
     2560     // Opcode already emitted
     2561     emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
     2562     emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
     2563     emit_d32(cbuf, $dst$$disp);   // Displacement
     2564   %}
     2565 
     2566   // Push the integer in stackSlot 'src' onto FP-stack
     2567   enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
     2568     store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
     2569   %}
     2570 
     2571   // Push the float in stackSlot 'src' onto FP-stack
     2572   enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
     2573     store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
     2574   %}
     2575 
     2576   // Push the double in stackSlot 'src' onto FP-stack
     2577   enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
     2578     store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
     2579   %}
     2580 
     2581   // Push FPU's TOS float to a stack-slot, and pop FPU-stack
     2582   enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
     2583     store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
     2584   %}
     2585 
     2586   // Same as Pop_Mem_F except for opcode
     2587   // Push FPU's TOS double to a stack-slot, and pop FPU-stack
     2588   enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
     2589     store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
     2590   %}
     2591 
     2592   enc_class Pop_Reg_F( regF dst ) %{
     2593     emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
     2594     emit_d8( cbuf, 0xD8+$dst$$reg );
     2595   %}
     2596 
     2597   enc_class Push_Reg_F( regF dst ) %{
     2598     emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
     2599     emit_d8( cbuf, 0xC0-1+$dst$$reg );
     2600   %}
     2601 
     2602   // Push FPU's float to a stack-slot, and pop FPU-stack
     2603   enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
     2604     int pop = 0x02;
     2605     if ($src$$reg != FPR1L_enc) {
     2606       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
     2607       emit_d8( cbuf, 0xC0-1+$src$$reg );
     2608       pop = 0x03;
     2609     }
     2610     store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
     2611   %}
     2612 
     2613   // Push FPU's double to a stack-slot, and pop FPU-stack
     2614   enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
     2615     int pop = 0x02;
     2616     if ($src$$reg != FPR1L_enc) {
     2617       emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
     2618       emit_d8( cbuf, 0xC0-1+$src$$reg );
     2619       pop = 0x03;
     2620     }
     2621     store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
     2622   %}
     2623 
     2624   // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
     2625   enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
     2626     int pop = 0xD0 - 1; // -1 since we skip FLD
     2627     if ($src$$reg != FPR1L_enc) {
     2628       emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
     2629       emit_d8( cbuf, 0xC0-1+$src$$reg );
     2630       pop = 0xD8;
     2631     }
     2632     emit_opcode( cbuf, 0xDD );
     2633     emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
     2634   %}
     2635 
     2636 
     2637   enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
     2638     MacroAssembler masm(&cbuf);
     2639     masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
     2640     masm.fmul(   $src2$$reg+0);   // value at TOS
     2641     masm.fadd(   $src$$reg+0);    // value at TOS
     2642     masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
     2643   %}
     2644 
     2645 
     2646   enc_class Push_Reg_Mod_D( regD dst, regD src) %{
     2647     // load dst in FPR0
     2648     emit_opcode( cbuf, 0xD9 );
     2649     emit_d8( cbuf, 0xC0-1+$dst$$reg );
     2650     if ($src$$reg != FPR1L_enc) {
     2651       // fincstp
     2652       emit_opcode (cbuf, 0xD9);
     2653       emit_opcode (cbuf, 0xF7);
     2654       // swap src with FPR1:
     2655       // FXCH FPR1 with src
     2656       emit_opcode(cbuf, 0xD9);
     2657       emit_d8(cbuf, 0xC8-1+$src$$reg );
     2658       // fdecstp
     2659       emit_opcode (cbuf, 0xD9);
     2660       emit_opcode (cbuf, 0xF6);
     2661     }
     2662   %}
     2663 
     2664   enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
     2665     // Allocate a word
     2666     emit_opcode(cbuf,0x83);            // SUB ESP,8
     2667     emit_opcode(cbuf,0xEC);
     2668     emit_d8(cbuf,0x08);
     2669 
     2670     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
     2671     emit_opcode  (cbuf, 0x0F );
     2672     emit_opcode  (cbuf, 0x11 );
     2673     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
     2674 
     2675     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
     2676     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
     2677 
     2678     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
     2679     emit_opcode  (cbuf, 0x0F );
     2680     emit_opcode  (cbuf, 0x11 );
     2681     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
     2682 
     2683     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
     2684     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
     2685 
     2686   %}
     2687 
     2688   enc_class Push_ModX_encoding( regX src0, regX src1) %{
     2689     // Allocate a word
     2690     emit_opcode(cbuf,0x83);            // SUB ESP,4
     2691     emit_opcode(cbuf,0xEC);
     2692     emit_d8(cbuf,0x04);
     2693 
     2694     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
     2695     emit_opcode  (cbuf, 0x0F );
     2696     emit_opcode  (cbuf, 0x11 );
     2697     encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
     2698 
     2699     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
     2700     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
     2701 
     2702     emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
     2703     emit_opcode  (cbuf, 0x0F );
     2704     emit_opcode  (cbuf, 0x11 );
     2705     encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
     2706 
     2707     emit_opcode(cbuf,0xD9 );      // FLD [ESP]
     2708     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
     2709 
     2710   %}
     2711 
     2712   enc_class Push_ResultXD(regXD dst) %{
     2713     store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
     2714 
     2715     // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
     2716     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
     2717     emit_opcode  (cbuf, 0x0F );
     2718     emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
     2719     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
     2720 
     2721     emit_opcode(cbuf,0x83);    // ADD ESP,8
     2722     emit_opcode(cbuf,0xC4);
     2723     emit_d8(cbuf,0x08);
     2724   %}
     2725 
     2726   enc_class Push_ResultX(regX dst, immI d8) %{
     2727     store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
     2728 
     2729     emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
     2730     emit_opcode  (cbuf, 0x0F );
     2731     emit_opcode  (cbuf, 0x10 );
     2732     encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
     2733 
     2734     emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
     2735     emit_opcode(cbuf,0xC4);
     2736     emit_d8(cbuf,$d8$$constant);
     2737   %}
     2738 
     2739   enc_class Push_SrcXD(regXD src) %{
     2740     // Allocate a word
     2741     emit_opcode(cbuf,0x83);            // SUB ESP,8
     2742     emit_opcode(cbuf,0xEC);
     2743     emit_d8(cbuf,0x08);
     2744 
     2745     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
     2746     emit_opcode  (cbuf, 0x0F );
     2747     emit_opcode  (cbuf, 0x11 );
     2748     encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
     2749 
     2750     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
     2751     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
     2752   %}
     2753 
     2754   enc_class push_stack_temp_qword() %{
     2755     emit_opcode(cbuf,0x83);     // SUB ESP,8
     2756     emit_opcode(cbuf,0xEC);
     2757     emit_d8    (cbuf,0x08);
     2758   %}
     2759 
     2760   enc_class pop_stack_temp_qword() %{
     2761     emit_opcode(cbuf,0x83);     // ADD ESP,8
     2762     emit_opcode(cbuf,0xC4);
     2763     emit_d8    (cbuf,0x08);
     2764   %}
     2765 
     2766   enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
     2767     emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
     2768     emit_opcode  (cbuf, 0x0F );
     2769     emit_opcode  (cbuf, 0x11 );
     2770     encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
     2771 
     2772     emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
     2773     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
     2774   %}
     2775 
     2776   // Compute X^Y using Intel's fast hardware instructions, if possible.
     2777   // Otherwise return a NaN.
     2778   enc_class pow_exp_core_encoding %{
     2779     // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
     2780     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
     2781     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
     2782     emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
     2783     emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
     2784     emit_opcode(cbuf,0x1C);
     2785     emit_d8(cbuf,0x24);
     2786     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
     2787     emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
     2788     emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
     2789     emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
     2790     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
     2791     emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
     2792     emit_rm(cbuf, 0x3, 0x0, ECX_enc);
     2793     emit_d32(cbuf,0xFFFFF800);
     2794     emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
     2795     emit_rm(cbuf, 0x3, 0x0, EAX_enc);
     2796     emit_d32(cbuf,1023);
     2797     emit_opcode(cbuf,0x8B);                          // mov rbx,eax
     2798     emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
     2799     emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
     2800     emit_rm(cbuf,0x3,0x4,EAX_enc);
     2801     emit_d8(cbuf,20);
     2802     emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
     2803     emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
     2804     emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
     2805     emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
     2806     emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
     2807     encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
     2808     emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
     2809     encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
     2810     emit_d32(cbuf,0);
     2811     emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
     2812     encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
     2813   %}
     2814 
     2815 //   enc_class Pop_Reg_Mod_D( regD dst, regD src)
     2816 //   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
     2817 
     2818   enc_class Push_Result_Mod_D( regD src) %{
     2819     if ($src$$reg != FPR1L_enc) {
     2820       // fincstp
     2821       emit_opcode (cbuf, 0xD9);
     2822       emit_opcode (cbuf, 0xF7);
     2823       // FXCH FPR1 with src
     2824       emit_opcode(cbuf, 0xD9);
     2825       emit_d8(cbuf, 0xC8-1+$src$$reg );
     2826       // fdecstp
     2827       emit_opcode (cbuf, 0xD9);
     2828       emit_opcode (cbuf, 0xF6);
     2829     }
     2830     // // following asm replaced with Pop_Reg_F or Pop_Mem_F
     2831     // // FSTP   FPR$dst$$reg
     2832     // emit_opcode( cbuf, 0xDD );
     2833     // emit_d8( cbuf, 0xD8+$dst$$reg );
     2834   %}
     2835 
     2836   enc_class fnstsw_sahf_skip_parity() %{
     2837     // fnstsw ax
     2838     emit_opcode( cbuf, 0xDF );
     2839     emit_opcode( cbuf, 0xE0 );
     2840     // sahf
     2841     emit_opcode( cbuf, 0x9E );
     2842     // jnp  ::skip
     2843     emit_opcode( cbuf, 0x7B );
     2844     emit_opcode( cbuf, 0x05 );
     2845   %}
     2846 
     2847   enc_class emitModD() %{
     2848     // fprem must be iterative
     2849     // :: loop
     2850     // fprem
     2851     emit_opcode( cbuf, 0xD9 );
     2852     emit_opcode( cbuf, 0xF8 );
     2853     // wait
     2854     emit_opcode( cbuf, 0x9b );
     2855     // fnstsw ax
     2856     emit_opcode( cbuf, 0xDF );
     2857     emit_opcode( cbuf, 0xE0 );
     2858     // sahf
     2859     emit_opcode( cbuf, 0x9E );
     2860     // jp  ::loop
     2861     emit_opcode( cbuf, 0x0F );
     2862     emit_opcode( cbuf, 0x8A );
     2863     emit_opcode( cbuf, 0xF4 );
     2864     emit_opcode( cbuf, 0xFF );
     2865     emit_opcode( cbuf, 0xFF );
     2866     emit_opcode( cbuf, 0xFF );
     2867   %}
     2868 
     2869   enc_class fpu_flags() %{
     2870     // fnstsw_ax
     2871     emit_opcode( cbuf, 0xDF);
     2872     emit_opcode( cbuf, 0xE0);
     2873     // test ax,0x0400
     2874     emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
     2875     emit_opcode( cbuf, 0xA9 );
     2876     emit_d16   ( cbuf, 0x0400 );
     2877     // // // This sequence works, but stalls for 12-16 cycles on PPro
     2878     // // test rax,0x0400
     2879     // emit_opcode( cbuf, 0xA9 );
     2880     // emit_d32   ( cbuf, 0x00000400 );
     2881     //
     2882     // jz exit (no unordered comparison)
     2883     emit_opcode( cbuf, 0x74 );
     2884     emit_d8    ( cbuf, 0x02 );
     2885     // mov ah,1 - treat as LT case (set carry flag)
     2886     emit_opcode( cbuf, 0xB4 );
     2887     emit_d8    ( cbuf, 0x01 );
     2888     // sahf
     2889     emit_opcode( cbuf, 0x9E);
     2890   %}
     2891 
     2892   enc_class cmpF_P6_fixup() %{
     2893     // Fixup the integer flags in case comparison involved a NaN
     2894     //
     2895     // JNP exit (no unordered comparison, P-flag is set by NaN)
     2896     emit_opcode( cbuf, 0x7B );
     2897     emit_d8    ( cbuf, 0x03 );
     2898     // MOV AH,1 - treat as LT case (set carry flag)
     2899     emit_opcode( cbuf, 0xB4 );
     2900     emit_d8    ( cbuf, 0x01 );
     2901     // SAHF
     2902     emit_opcode( cbuf, 0x9E);
     2903     // NOP     // target for branch to avoid branch to branch
     2904     emit_opcode( cbuf, 0x90);
     2905   %}
     2906 
     2907 //     fnstsw_ax();
     2908 //     sahf();
     2909 //     movl(dst, nan_result);
     2910 //     jcc(Assembler::parity, exit);
     2911 //     movl(dst, less_result);
     2912 //     jcc(Assembler::below, exit);
     2913 //     movl(dst, equal_result);
     2914 //     jcc(Assembler::equal, exit);
     2915 //     movl(dst, greater_result);
     2916 
     2917 // less_result     =  1;
     2918 // greater_result  = -1;
     2919 // equal_result    = 0;
     2920 // nan_result      = -1;
     2921 
     2922   enc_class CmpF_Result(eRegI dst) %{
     2923     // fnstsw_ax();
     2924     emit_opcode( cbuf, 0xDF);
     2925     emit_opcode( cbuf, 0xE0);
     2926     // sahf
     2927     emit_opcode( cbuf, 0x9E);
     2928     // movl(dst, nan_result);
     2929     emit_opcode( cbuf, 0xB8 + $dst$$reg);
     2930     emit_d32( cbuf, -1 );
     2931     // jcc(Assembler::parity, exit);
     2932     emit_opcode( cbuf, 0x7A );
     2933     emit_d8    ( cbuf, 0x13 );
     2934     // movl(dst, less_result);
     2935     emit_opcode( cbuf, 0xB8 + $dst$$reg);
     2936     emit_d32( cbuf, -1 );
     2937     // jcc(Assembler::below, exit);
     2938     emit_opcode( cbuf, 0x72 );
     2939     emit_d8    ( cbuf, 0x0C );
     2940     // movl(dst, equal_result);
     2941     emit_opcode( cbuf, 0xB8 + $dst$$reg);
     2942     emit_d32( cbuf, 0 );
     2943     // jcc(Assembler::equal, exit);
     2944     emit_opcode( cbuf, 0x74 );
     2945     emit_d8    ( cbuf, 0x05 );
     2946     // movl(dst, greater_result);
     2947     emit_opcode( cbuf, 0xB8 + $dst$$reg);
     2948     emit_d32( cbuf, 1 );
     2949   %}
     2950 
     2951 
     2952   // XMM version of CmpF_Result. Because the XMM compare
     2953   // instructions set the EFLAGS directly. It becomes simpler than
     2954   // the float version above.
     2955   enc_class CmpX_Result(eRegI dst) %{
     2956     MacroAssembler _masm(&cbuf);
     2957     Label nan, inc, done;
     2958 
     2959     __ jccb(Assembler::parity, nan);
     2960     __ jccb(Assembler::equal,  done);
     2961     __ jccb(Assembler::above,  inc);
     2962     __ bind(nan);
     2963     __ decrement(as_Register($dst$$reg)); // NO L qqq
     2964     __ jmpb(done);
     2965     __ bind(inc);
     2966     __ increment(as_Register($dst$$reg)); // NO L qqq
     2967     __ bind(done);
     2968   %}
     2969 
     2970   // Compare the longs and set flags
     2971   // BROKEN!  Do Not use as-is
     2972   enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
     2973     // CMP    $src1.hi,$src2.hi
     2974     emit_opcode( cbuf, 0x3B );
     2975     emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
     2976     // JNE,s  done
     2977     emit_opcode(cbuf,0x75);
     2978     emit_d8(cbuf, 2 );
     2979     // CMP    $src1.lo,$src2.lo
     2980     emit_opcode( cbuf, 0x3B );
     2981     emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
     2982 // done:
     2983   %}
     2984 
     2985   enc_class convert_int_long( regL dst, eRegI src ) %{
     2986     // mov $dst.lo,$src
     2987     int dst_encoding = $dst$$reg;
     2988     int src_encoding = $src$$reg;
     2989     encode_Copy( cbuf, dst_encoding  , src_encoding );
     2990     // mov $dst.hi,$src
     2991     encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
     2992     // sar $dst.hi,31
     2993     emit_opcode( cbuf, 0xC1 );
     2994     emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
     2995     emit_d8(cbuf, 0x1F );
     2996   %}
     2997 
     2998   enc_class convert_long_double( eRegL src ) %{
     2999     // push $src.hi
     3000     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
     3001     // push $src.lo
     3002     emit_opcode(cbuf, 0x50+$src$$reg  );
     3003     // fild 64-bits at [SP]
     3004     emit_opcode(cbuf,0xdf);
     3005     emit_d8(cbuf, 0x6C);
     3006     emit_d8(cbuf, 0x24);
     3007     emit_d8(cbuf, 0x00);
     3008     // pop stack
     3009     emit_opcode(cbuf, 0x83); // add  SP, #8
     3010     emit_rm(cbuf, 0x3, 0x00, ESP_enc);
     3011     emit_d8(cbuf, 0x8);
     3012   %}
     3013 
     3014   enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
     3015     // IMUL   EDX:EAX,$src1
     3016     emit_opcode( cbuf, 0xF7 );
     3017     emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
     3018     // SAR    EDX,$cnt-32
     3019     int shift_count = ((int)$cnt$$constant) - 32;
     3020     if (shift_count > 0) {
     3021       emit_opcode(cbuf, 0xC1);
     3022       emit_rm(cbuf, 0x3, 7, $dst$$reg );
     3023       emit_d8(cbuf, shift_count);
     3024     }
     3025   %}
     3026 
     3027   // this version doesn't have add sp, 8
     3028   enc_class convert_long_double2( eRegL src ) %{
     3029     // push $src.hi
     3030     emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
     3031     // push $src.lo
     3032     emit_opcode(cbuf, 0x50+$src$$reg  );
     3033     // fild 64-bits at [SP]
     3034     emit_opcode(cbuf,0xdf);
     3035     emit_d8(cbuf, 0x6C);
     3036     emit_d8(cbuf, 0x24);
     3037     emit_d8(cbuf, 0x00);
     3038   %}
     3039 
     3040   enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
     3041     // Basic idea: long = (long)int * (long)int
     3042     // IMUL EDX:EAX, src
     3043     emit_opcode( cbuf, 0xF7 );
     3044     emit_rm( cbuf, 0x3, 0x5, $src$$reg);
     3045   %}
     3046 
     3047   enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
     3048     // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
     3049     // MUL EDX:EAX, src
     3050     emit_opcode( cbuf, 0xF7 );
     3051     emit_rm( cbuf, 0x3, 0x4, $src$$reg);
     3052   %}
     3053 
     3054   enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
     3055     // Basic idea: lo(result) = lo(x_lo * y_lo)
     3056     //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
     3057     // MOV    $tmp,$src.lo
     3058     encode_Copy( cbuf, $tmp$$reg, $src$$reg );
     3059     // IMUL   $tmp,EDX
     3060     emit_opcode( cbuf, 0x0F );
     3061     emit_opcode( cbuf, 0xAF );
     3062     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
     3063     // MOV    EDX,$src.hi
     3064     encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
     3065     // IMUL   EDX,EAX
     3066     emit_opcode( cbuf, 0x0F );
     3067     emit_opcode( cbuf, 0xAF );
     3068     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
     3069     // ADD    $tmp,EDX
     3070     emit_opcode( cbuf, 0x03 );
     3071     emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
     3072     // MUL   EDX:EAX,$src.lo
     3073     emit_opcode( cbuf, 0xF7 );
     3074     emit_rm( cbuf, 0x3, 0x4, $src$$reg );
     3075     // ADD    EDX,ESI
     3076     emit_opcode( cbuf, 0x03 );
     3077     emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
     3078