view src/cpu/sparc/vm/sparc.ad @ 3429:c6f9c897ea33

opto: specify offset of IC load in java_to_interp stub. If a compiled static call calls the interpreter, it jumps past a java_to_interp stub in the compiled code. Patching this call must find the load of the IC. So far the code assumed this is the first instruction in the stub. This might not be the case if, for example, the base of the constant table (toc) must be loaded. Extend CompiledStaticCall to consider an offset from it's beginning where to search the load of the IC.
author Goetz
date Wed, 14 Nov 2012 10:43:39 +0100
parents b49d275b362d
children 43ccc18e9d22
line wrap: on
line source
//
// Copyright (c) 1998, 2011, Oracle and/or its affiliates. All rights reserved.
// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
//
// This code is free software; you can redistribute it and/or modify it
// under the terms of the GNU General Public License version 2 only, as
// published by the Free Software Foundation.
//
// This code is distributed in the hope that it will be useful, but WITHOUT
// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
// version 2 for more details (a copy is included in the LICENSE file that
// accompanied this code).
//
// You should have received a copy of the GNU General Public License version
// 2 along with this work; if not, write to the Free Software Foundation,
// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
//
// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
// or visit www.oracle.com if you need additional information or have any
// questions.
//
//

// SPARC Architecture Description File

//----------REGISTER DEFINITION BLOCK------------------------------------------
// This information is used by the matcher and the register allocator to
// describe individual registers and classes of registers within the target
// archtecture.
register %{
//----------Architecture Description Register Definitions----------------------
// General Registers
// "reg_def"  name ( register save type, C convention save type,
//                   ideal register type, encoding, vm name );
// Register Save Types:
//
// NS  = No-Save:       The register allocator assumes that these registers
//                      can be used without saving upon entry to the method, &
//                      that they do not need to be saved at call sites.
//
// SOC = Save-On-Call:  The register allocator assumes that these registers
//                      can be used without saving upon entry to the method,
//                      but that they must be saved at call sites.
//
// SOE = Save-On-Entry: The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, but they do not need to be saved at call
//                      sites.
//
// AS  = Always-Save:   The register allocator assumes that these registers
//                      must be saved before using them upon entry to the
//                      method, & that they must be saved at call sites.
//
// Ideal Register Type is used to determine how to save & restore a
// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
//
// The encoding number is the actual bit-pattern placed into the opcodes.


// ----------------------------
// Integer/Long Registers
// ----------------------------

// Need to expose the hi/lo aspect of 64-bit registers
// This register set is used for both the 64-bit build and
// the 32-bit build with 1-register longs.

// Global Registers 0-7
reg_def R_G0H( NS,  NS, Op_RegI,128, G0->as_VMReg()->next());
reg_def R_G0 ( NS,  NS, Op_RegI,  0, G0->as_VMReg());
reg_def R_G1H(SOC, SOC, Op_RegI,129, G1->as_VMReg()->next());
reg_def R_G1 (SOC, SOC, Op_RegI,  1, G1->as_VMReg());
reg_def R_G2H( NS,  NS, Op_RegI,130, G2->as_VMReg()->next());
reg_def R_G2 ( NS,  NS, Op_RegI,  2, G2->as_VMReg());
reg_def R_G3H(SOC, SOC, Op_RegI,131, G3->as_VMReg()->next());
reg_def R_G3 (SOC, SOC, Op_RegI,  3, G3->as_VMReg());
reg_def R_G4H(SOC, SOC, Op_RegI,132, G4->as_VMReg()->next());
reg_def R_G4 (SOC, SOC, Op_RegI,  4, G4->as_VMReg());
reg_def R_G5H(SOC, SOC, Op_RegI,133, G5->as_VMReg()->next());
reg_def R_G5 (SOC, SOC, Op_RegI,  5, G5->as_VMReg());
reg_def R_G6H( NS,  NS, Op_RegI,134, G6->as_VMReg()->next());
reg_def R_G6 ( NS,  NS, Op_RegI,  6, G6->as_VMReg());
reg_def R_G7H( NS,  NS, Op_RegI,135, G7->as_VMReg()->next());
reg_def R_G7 ( NS,  NS, Op_RegI,  7, G7->as_VMReg());

// Output Registers 0-7
reg_def R_O0H(SOC, SOC, Op_RegI,136, O0->as_VMReg()->next());
reg_def R_O0 (SOC, SOC, Op_RegI,  8, O0->as_VMReg());
reg_def R_O1H(SOC, SOC, Op_RegI,137, O1->as_VMReg()->next());
reg_def R_O1 (SOC, SOC, Op_RegI,  9, O1->as_VMReg());
reg_def R_O2H(SOC, SOC, Op_RegI,138, O2->as_VMReg()->next());
reg_def R_O2 (SOC, SOC, Op_RegI, 10, O2->as_VMReg());
reg_def R_O3H(SOC, SOC, Op_RegI,139, O3->as_VMReg()->next());
reg_def R_O3 (SOC, SOC, Op_RegI, 11, O3->as_VMReg());
reg_def R_O4H(SOC, SOC, Op_RegI,140, O4->as_VMReg()->next());
reg_def R_O4 (SOC, SOC, Op_RegI, 12, O4->as_VMReg());
reg_def R_O5H(SOC, SOC, Op_RegI,141, O5->as_VMReg()->next());
reg_def R_O5 (SOC, SOC, Op_RegI, 13, O5->as_VMReg());
reg_def R_SPH( NS,  NS, Op_RegI,142, SP->as_VMReg()->next());
reg_def R_SP ( NS,  NS, Op_RegI, 14, SP->as_VMReg());
reg_def R_O7H(SOC, SOC, Op_RegI,143, O7->as_VMReg()->next());
reg_def R_O7 (SOC, SOC, Op_RegI, 15, O7->as_VMReg());

// Local Registers 0-7
reg_def R_L0H( NS,  NS, Op_RegI,144, L0->as_VMReg()->next());
reg_def R_L0 ( NS,  NS, Op_RegI, 16, L0->as_VMReg());
reg_def R_L1H( NS,  NS, Op_RegI,145, L1->as_VMReg()->next());
reg_def R_L1 ( NS,  NS, Op_RegI, 17, L1->as_VMReg());
reg_def R_L2H( NS,  NS, Op_RegI,146, L2->as_VMReg()->next());
reg_def R_L2 ( NS,  NS, Op_RegI, 18, L2->as_VMReg());
reg_def R_L3H( NS,  NS, Op_RegI,147, L3->as_VMReg()->next());
reg_def R_L3 ( NS,  NS, Op_RegI, 19, L3->as_VMReg());
reg_def R_L4H( NS,  NS, Op_RegI,148, L4->as_VMReg()->next());
reg_def R_L4 ( NS,  NS, Op_RegI, 20, L4->as_VMReg());
reg_def R_L5H( NS,  NS, Op_RegI,149, L5->as_VMReg()->next());
reg_def R_L5 ( NS,  NS, Op_RegI, 21, L5->as_VMReg());
reg_def R_L6H( NS,  NS, Op_RegI,150, L6->as_VMReg()->next());
reg_def R_L6 ( NS,  NS, Op_RegI, 22, L6->as_VMReg());
reg_def R_L7H( NS,  NS, Op_RegI,151, L7->as_VMReg()->next());
reg_def R_L7 ( NS,  NS, Op_RegI, 23, L7->as_VMReg());

// Input Registers 0-7
reg_def R_I0H( NS,  NS, Op_RegI,152, I0->as_VMReg()->next());
reg_def R_I0 ( NS,  NS, Op_RegI, 24, I0->as_VMReg());
reg_def R_I1H( NS,  NS, Op_RegI,153, I1->as_VMReg()->next());
reg_def R_I1 ( NS,  NS, Op_RegI, 25, I1->as_VMReg());
reg_def R_I2H( NS,  NS, Op_RegI,154, I2->as_VMReg()->next());
reg_def R_I2 ( NS,  NS, Op_RegI, 26, I2->as_VMReg());
reg_def R_I3H( NS,  NS, Op_RegI,155, I3->as_VMReg()->next());
reg_def R_I3 ( NS,  NS, Op_RegI, 27, I3->as_VMReg());
reg_def R_I4H( NS,  NS, Op_RegI,156, I4->as_VMReg()->next());
reg_def R_I4 ( NS,  NS, Op_RegI, 28, I4->as_VMReg());
reg_def R_I5H( NS,  NS, Op_RegI,157, I5->as_VMReg()->next());
reg_def R_I5 ( NS,  NS, Op_RegI, 29, I5->as_VMReg());
reg_def R_FPH( NS,  NS, Op_RegI,158, FP->as_VMReg()->next());
reg_def R_FP ( NS,  NS, Op_RegI, 30, FP->as_VMReg());
reg_def R_I7H( NS,  NS, Op_RegI,159, I7->as_VMReg()->next());
reg_def R_I7 ( NS,  NS, Op_RegI, 31, I7->as_VMReg());

// ----------------------------
// Float/Double Registers
// ----------------------------

// Float Registers
reg_def R_F0 ( SOC, SOC, Op_RegF,  0, F0->as_VMReg());
reg_def R_F1 ( SOC, SOC, Op_RegF,  1, F1->as_VMReg());
reg_def R_F2 ( SOC, SOC, Op_RegF,  2, F2->as_VMReg());
reg_def R_F3 ( SOC, SOC, Op_RegF,  3, F3->as_VMReg());
reg_def R_F4 ( SOC, SOC, Op_RegF,  4, F4->as_VMReg());
reg_def R_F5 ( SOC, SOC, Op_RegF,  5, F5->as_VMReg());
reg_def R_F6 ( SOC, SOC, Op_RegF,  6, F6->as_VMReg());
reg_def R_F7 ( SOC, SOC, Op_RegF,  7, F7->as_VMReg());
reg_def R_F8 ( SOC, SOC, Op_RegF,  8, F8->as_VMReg());
reg_def R_F9 ( SOC, SOC, Op_RegF,  9, F9->as_VMReg());
reg_def R_F10( SOC, SOC, Op_RegF, 10, F10->as_VMReg());
reg_def R_F11( SOC, SOC, Op_RegF, 11, F11->as_VMReg());
reg_def R_F12( SOC, SOC, Op_RegF, 12, F12->as_VMReg());
reg_def R_F13( SOC, SOC, Op_RegF, 13, F13->as_VMReg());
reg_def R_F14( SOC, SOC, Op_RegF, 14, F14->as_VMReg());
reg_def R_F15( SOC, SOC, Op_RegF, 15, F15->as_VMReg());
reg_def R_F16( SOC, SOC, Op_RegF, 16, F16->as_VMReg());
reg_def R_F17( SOC, SOC, Op_RegF, 17, F17->as_VMReg());
reg_def R_F18( SOC, SOC, Op_RegF, 18, F18->as_VMReg());
reg_def R_F19( SOC, SOC, Op_RegF, 19, F19->as_VMReg());
reg_def R_F20( SOC, SOC, Op_RegF, 20, F20->as_VMReg());
reg_def R_F21( SOC, SOC, Op_RegF, 21, F21->as_VMReg());
reg_def R_F22( SOC, SOC, Op_RegF, 22, F22->as_VMReg());
reg_def R_F23( SOC, SOC, Op_RegF, 23, F23->as_VMReg());
reg_def R_F24( SOC, SOC, Op_RegF, 24, F24->as_VMReg());
reg_def R_F25( SOC, SOC, Op_RegF, 25, F25->as_VMReg());
reg_def R_F26( SOC, SOC, Op_RegF, 26, F26->as_VMReg());
reg_def R_F27( SOC, SOC, Op_RegF, 27, F27->as_VMReg());
reg_def R_F28( SOC, SOC, Op_RegF, 28, F28->as_VMReg());
reg_def R_F29( SOC, SOC, Op_RegF, 29, F29->as_VMReg());
reg_def R_F30( SOC, SOC, Op_RegF, 30, F30->as_VMReg());
reg_def R_F31( SOC, SOC, Op_RegF, 31, F31->as_VMReg());

// Double Registers
// The rules of ADL require that double registers be defined in pairs.
// Each pair must be two 32-bit values, but not necessarily a pair of
// single float registers.  In each pair, ADLC-assigned register numbers
// must be adjacent, with the lower number even.  Finally, when the
// CPU stores such a register pair to memory, the word associated with
// the lower ADLC-assigned number must be stored to the lower address.

// These definitions specify the actual bit encodings of the sparc
// double fp register numbers.  FloatRegisterImpl in register_sparc.hpp
// wants 0-63, so we have to convert every time we want to use fp regs
// with the macroassembler, using reg_to_DoubleFloatRegister_object().
// 255 is a flag meaning "don't go here".
// I believe we can't handle callee-save doubles D32 and up until
// the place in the sparc stack crawler that asserts on the 255 is
// fixed up.
reg_def R_D32 (SOC, SOC, Op_RegD,  1, F32->as_VMReg());
reg_def R_D32x(SOC, SOC, Op_RegD,255, F32->as_VMReg()->next());
reg_def R_D34 (SOC, SOC, Op_RegD,  3, F34->as_VMReg());
reg_def R_D34x(SOC, SOC, Op_RegD,255, F34->as_VMReg()->next());
reg_def R_D36 (SOC, SOC, Op_RegD,  5, F36->as_VMReg());
reg_def R_D36x(SOC, SOC, Op_RegD,255, F36->as_VMReg()->next());
reg_def R_D38 (SOC, SOC, Op_RegD,  7, F38->as_VMReg());
reg_def R_D38x(SOC, SOC, Op_RegD,255, F38->as_VMReg()->next());
reg_def R_D40 (SOC, SOC, Op_RegD,  9, F40->as_VMReg());
reg_def R_D40x(SOC, SOC, Op_RegD,255, F40->as_VMReg()->next());
reg_def R_D42 (SOC, SOC, Op_RegD, 11, F42->as_VMReg());
reg_def R_D42x(SOC, SOC, Op_RegD,255, F42->as_VMReg()->next());
reg_def R_D44 (SOC, SOC, Op_RegD, 13, F44->as_VMReg());
reg_def R_D44x(SOC, SOC, Op_RegD,255, F44->as_VMReg()->next());
reg_def R_D46 (SOC, SOC, Op_RegD, 15, F46->as_VMReg());
reg_def R_D46x(SOC, SOC, Op_RegD,255, F46->as_VMReg()->next());
reg_def R_D48 (SOC, SOC, Op_RegD, 17, F48->as_VMReg());
reg_def R_D48x(SOC, SOC, Op_RegD,255, F48->as_VMReg()->next());
reg_def R_D50 (SOC, SOC, Op_RegD, 19, F50->as_VMReg());
reg_def R_D50x(SOC, SOC, Op_RegD,255, F50->as_VMReg()->next());
reg_def R_D52 (SOC, SOC, Op_RegD, 21, F52->as_VMReg());
reg_def R_D52x(SOC, SOC, Op_RegD,255, F52->as_VMReg()->next());
reg_def R_D54 (SOC, SOC, Op_RegD, 23, F54->as_VMReg());
reg_def R_D54x(SOC, SOC, Op_RegD,255, F54->as_VMReg()->next());
reg_def R_D56 (SOC, SOC, Op_RegD, 25, F56->as_VMReg());
reg_def R_D56x(SOC, SOC, Op_RegD,255, F56->as_VMReg()->next());
reg_def R_D58 (SOC, SOC, Op_RegD, 27, F58->as_VMReg());
reg_def R_D58x(SOC, SOC, Op_RegD,255, F58->as_VMReg()->next());
reg_def R_D60 (SOC, SOC, Op_RegD, 29, F60->as_VMReg());
reg_def R_D60x(SOC, SOC, Op_RegD,255, F60->as_VMReg()->next());
reg_def R_D62 (SOC, SOC, Op_RegD, 31, F62->as_VMReg());
reg_def R_D62x(SOC, SOC, Op_RegD,255, F62->as_VMReg()->next());


// ----------------------------
// Special Registers
// Condition Codes Flag Registers
// I tried to break out ICC and XCC but it's not very pretty.
// Every Sparc instruction which defs/kills one also kills the other.
// Hence every compare instruction which defs one kind of flags ends
// up needing a kill of the other.
reg_def CCR (SOC, SOC,  Op_RegFlags, 0, VMRegImpl::Bad());

reg_def FCC0(SOC, SOC,  Op_RegFlags, 0, VMRegImpl::Bad());
reg_def FCC1(SOC, SOC,  Op_RegFlags, 1, VMRegImpl::Bad());
reg_def FCC2(SOC, SOC,  Op_RegFlags, 2, VMRegImpl::Bad());
reg_def FCC3(SOC, SOC,  Op_RegFlags, 3, VMRegImpl::Bad());

// ----------------------------
// Specify the enum values for the registers.  These enums are only used by the
// OptoReg "class". We can convert these enum values at will to VMReg when needed
// for visibility to the rest of the vm. The order of this enum influences the
// register allocator so having the freedom to set this order and not be stuck
// with the order that is natural for the rest of the vm is worth it.
alloc_class chunk0(
  R_L0,R_L0H, R_L1,R_L1H, R_L2,R_L2H, R_L3,R_L3H, R_L4,R_L4H, R_L5,R_L5H, R_L6,R_L6H, R_L7,R_L7H,
  R_G0,R_G0H, R_G1,R_G1H, R_G2,R_G2H, R_G3,R_G3H, R_G4,R_G4H, R_G5,R_G5H, R_G6,R_G6H, R_G7,R_G7H,
  R_O7,R_O7H, R_SP,R_SPH, R_O0,R_O0H, R_O1,R_O1H, R_O2,R_O2H, R_O3,R_O3H, R_O4,R_O4H, R_O5,R_O5H,
  R_I0,R_I0H, R_I1,R_I1H, R_I2,R_I2H, R_I3,R_I3H, R_I4,R_I4H, R_I5,R_I5H, R_FP,R_FPH, R_I7,R_I7H);

// Note that a register is not allocatable unless it is also mentioned
// in a widely-used reg_class below.  Thus, R_G7 and R_G0 are outside i_reg.

alloc_class chunk1(
  // The first registers listed here are those most likely to be used
  // as temporaries.  We move F0..F7 away from the front of the list,
  // to reduce the likelihood of interferences with parameters and
  // return values.  Likewise, we avoid using F0/F1 for parameters,
  // since they are used for return values.
  // This FPU fine-tuning is worth about 1% on the SPEC geomean.
  R_F8 ,R_F9 ,R_F10,R_F11,R_F12,R_F13,R_F14,R_F15,
  R_F16,R_F17,R_F18,R_F19,R_F20,R_F21,R_F22,R_F23,
  R_F24,R_F25,R_F26,R_F27,R_F28,R_F29,R_F30,R_F31,
  R_F0 ,R_F1 ,R_F2 ,R_F3 ,R_F4 ,R_F5 ,R_F6 ,R_F7 , // used for arguments and return values
  R_D32,R_D32x,R_D34,R_D34x,R_D36,R_D36x,R_D38,R_D38x,
  R_D40,R_D40x,R_D42,R_D42x,R_D44,R_D44x,R_D46,R_D46x,
  R_D48,R_D48x,R_D50,R_D50x,R_D52,R_D52x,R_D54,R_D54x,
  R_D56,R_D56x,R_D58,R_D58x,R_D60,R_D60x,R_D62,R_D62x);

alloc_class chunk2(CCR, FCC0, FCC1, FCC2, FCC3);

//----------Architecture Description Register Classes--------------------------
// Several register classes are automatically defined based upon information in
// this architecture description.
// 1) reg_class inline_cache_reg           ( as defined in frame section )
// 2) reg_class interpreter_method_oop_reg ( as defined in frame section )
// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
//

// G0 is not included in integer class since it has special meaning.
reg_class g0_reg(R_G0);

// ----------------------------
// Integer Register Classes
// ----------------------------
// Exclusions from i_reg:
// R_G0: hardwired zero
// R_G2: reserved by HotSpot to the TLS register (invariant within Java)
// R_G6: reserved by Solaris ABI to tools
// R_G7: reserved by Solaris ABI to libthread
// R_O7: Used as a temp in many encodings
reg_class int_reg(R_G1,R_G3,R_G4,R_G5,R_O0,R_O1,R_O2,R_O3,R_O4,R_O5,R_L0,R_L1,R_L2,R_L3,R_L4,R_L5,R_L6,R_L7,R_I0,R_I1,R_I2,R_I3,R_I4,R_I5);

// Class for all integer registers, except the G registers.  This is used for
// encodings which use G registers as temps.  The regular inputs to such
// instructions use a "notemp_" prefix, as a hack to ensure that the allocator
// will not put an input into a temp register.
reg_class notemp_int_reg(R_O0,R_O1,R_O2,R_O3,R_O4,R_O5,R_L0,R_L1,R_L2,R_L3,R_L4,R_L5,R_L6,R_L7,R_I0,R_I1,R_I2,R_I3,R_I4,R_I5);

reg_class g1_regI(R_G1);
reg_class g3_regI(R_G3);
reg_class g4_regI(R_G4);
reg_class o0_regI(R_O0);
reg_class o7_regI(R_O7);

// ----------------------------
// Pointer Register Classes
// ----------------------------
#ifdef _LP64
// 64-bit build means 64-bit pointers means hi/lo pairs
reg_class ptr_reg(            R_G1H,R_G1,             R_G3H,R_G3, R_G4H,R_G4, R_G5H,R_G5,
                  R_O0H,R_O0, R_O1H,R_O1, R_O2H,R_O2, R_O3H,R_O3, R_O4H,R_O4, R_O5H,R_O5,
                  R_L0H,R_L0, R_L1H,R_L1, R_L2H,R_L2, R_L3H,R_L3, R_L4H,R_L4, R_L5H,R_L5, R_L6H,R_L6, R_L7H,R_L7,
                  R_I0H,R_I0, R_I1H,R_I1, R_I2H,R_I2, R_I3H,R_I3, R_I4H,R_I4, R_I5H,R_I5 );
// Lock encodings use G3 and G4 internally
reg_class lock_ptr_reg(       R_G1H,R_G1,                                     R_G5H,R_G5,
                  R_O0H,R_O0, R_O1H,R_O1, R_O2H,R_O2, R_O3H,R_O3, R_O4H,R_O4, R_O5H,R_O5,
                  R_L0H,R_L0, R_L1H,R_L1, R_L2H,R_L2, R_L3H,R_L3, R_L4H,R_L4, R_L5H,R_L5, R_L6H,R_L6, R_L7H,R_L7,
                  R_I0H,R_I0, R_I1H,R_I1, R_I2H,R_I2, R_I3H,R_I3, R_I4H,R_I4, R_I5H,R_I5 );
// Special class for storeP instructions, which can store SP or RPC to TLS.
// It is also used for memory addressing, allowing direct TLS addressing.
reg_class sp_ptr_reg(         R_G1H,R_G1, R_G2H,R_G2, R_G3H,R_G3, R_G4H,R_G4, R_G5H,R_G5,
                  R_O0H,R_O0, R_O1H,R_O1, R_O2H,R_O2, R_O3H,R_O3, R_O4H,R_O4, R_O5H,R_O5, R_SPH,R_SP,
                  R_L0H,R_L0, R_L1H,R_L1, R_L2H,R_L2, R_L3H,R_L3, R_L4H,R_L4, R_L5H,R_L5, R_L6H,R_L6, R_L7H,R_L7,
                  R_I0H,R_I0, R_I1H,R_I1, R_I2H,R_I2, R_I3H,R_I3, R_I4H,R_I4, R_I5H,R_I5, R_FPH,R_FP );
// R_L7 is the lowest-priority callee-save (i.e., NS) register
// We use it to save R_G2 across calls out of Java.
reg_class l7_regP(R_L7H,R_L7);

// Other special pointer regs
reg_class g1_regP(R_G1H,R_G1);
reg_class g2_regP(R_G2H,R_G2);
reg_class g3_regP(R_G3H,R_G3);
reg_class g4_regP(R_G4H,R_G4);
reg_class g5_regP(R_G5H,R_G5);
reg_class i0_regP(R_I0H,R_I0);
reg_class o0_regP(R_O0H,R_O0);
reg_class o1_regP(R_O1H,R_O1);
reg_class o2_regP(R_O2H,R_O2);
reg_class o7_regP(R_O7H,R_O7);

#else // _LP64
// 32-bit build means 32-bit pointers means 1 register.
reg_class ptr_reg(     R_G1,     R_G3,R_G4,R_G5,
                  R_O0,R_O1,R_O2,R_O3,R_O4,R_O5,
                  R_L0,R_L1,R_L2,R_L3,R_L4,R_L5,R_L6,R_L7,
                  R_I0,R_I1,R_I2,R_I3,R_I4,R_I5);
// Lock encodings use G3 and G4 internally
reg_class lock_ptr_reg(R_G1,               R_G5,
                  R_O0,R_O1,R_O2,R_O3,R_O4,R_O5,
                  R_L0,R_L1,R_L2,R_L3,R_L4,R_L5,R_L6,R_L7,
                  R_I0,R_I1,R_I2,R_I3,R_I4,R_I5);
// Special class for storeP instructions, which can store SP or RPC to TLS.
// It is also used for memory addressing, allowing direct TLS addressing.
reg_class sp_ptr_reg(  R_G1,R_G2,R_G3,R_G4,R_G5,
                  R_O0,R_O1,R_O2,R_O3,R_O4,R_O5,R_SP,
                  R_L0,R_L1,R_L2,R_L3,R_L4,R_L5,R_L6,R_L7,
                  R_I0,R_I1,R_I2,R_I3,R_I4,R_I5,R_FP);
// R_L7 is the lowest-priority callee-save (i.e., NS) register
// We use it to save R_G2 across calls out of Java.
reg_class l7_regP(R_L7);

// Other special pointer regs
reg_class g1_regP(R_G1);
reg_class g2_regP(R_G2);
reg_class g3_regP(R_G3);
reg_class g4_regP(R_G4);
reg_class g5_regP(R_G5);
reg_class i0_regP(R_I0);
reg_class o0_regP(R_O0);
reg_class o1_regP(R_O1);
reg_class o2_regP(R_O2);
reg_class o7_regP(R_O7);
#endif // _LP64


// ----------------------------
// Long Register Classes
// ----------------------------
// Longs in 1 register.  Aligned adjacent hi/lo pairs.
// Note:  O7 is never in this class; it is sometimes used as an encoding temp.
reg_class long_reg(             R_G1H,R_G1,             R_G3H,R_G3, R_G4H,R_G4, R_G5H,R_G5
                   ,R_O0H,R_O0, R_O1H,R_O1, R_O2H,R_O2, R_O3H,R_O3, R_O4H,R_O4, R_O5H,R_O5
#ifdef _LP64
// 64-bit, longs in 1 register: use all 64-bit integer registers
// 32-bit, longs in 1 register: cannot use I's and L's.  Restrict to O's and G's.
                   ,R_L0H,R_L0, R_L1H,R_L1, R_L2H,R_L2, R_L3H,R_L3, R_L4H,R_L4, R_L5H,R_L5, R_L6H,R_L6, R_L7H,R_L7
                   ,R_I0H,R_I0, R_I1H,R_I1, R_I2H,R_I2, R_I3H,R_I3, R_I4H,R_I4, R_I5H,R_I5
#endif // _LP64
                  );

reg_class g1_regL(R_G1H,R_G1);
reg_class g3_regL(R_G3H,R_G3);
reg_class o2_regL(R_O2H,R_O2);
reg_class o7_regL(R_O7H,R_O7);

// ----------------------------
// Special Class for Condition Code Flags Register
reg_class int_flags(CCR);
reg_class float_flags(FCC0,FCC1,FCC2,FCC3);
reg_class float_flag0(FCC0);


// ----------------------------
// Float Point Register Classes
// ----------------------------
// Skip F30/F31, they are reserved for mem-mem copies
reg_class sflt_reg(R_F0,R_F1,R_F2,R_F3,R_F4,R_F5,R_F6,R_F7,R_F8,R_F9,R_F10,R_F11,R_F12,R_F13,R_F14,R_F15,R_F16,R_F17,R_F18,R_F19,R_F20,R_F21,R_F22,R_F23,R_F24,R_F25,R_F26,R_F27,R_F28,R_F29);

// Paired floating point registers--they show up in the same order as the floats,
// but they are used with the "Op_RegD" type, and always occur in even/odd pairs.
reg_class dflt_reg(R_F0, R_F1, R_F2, R_F3, R_F4, R_F5, R_F6, R_F7, R_F8, R_F9, R_F10,R_F11,R_F12,R_F13,R_F14,R_F15,
                   R_F16,R_F17,R_F18,R_F19,R_F20,R_F21,R_F22,R_F23,R_F24,R_F25,R_F26,R_F27,R_F28,R_F29,
                   /* Use extra V9 double registers; this AD file does not support V8 */
                   R_D32,R_D32x,R_D34,R_D34x,R_D36,R_D36x,R_D38,R_D38x,R_D40,R_D40x,R_D42,R_D42x,R_D44,R_D44x,R_D46,R_D46x,
                   R_D48,R_D48x,R_D50,R_D50x,R_D52,R_D52x,R_D54,R_D54x,R_D56,R_D56x,R_D58,R_D58x,R_D60,R_D60x,R_D62,R_D62x
                   );

// Paired floating point registers--they show up in the same order as the floats,
// but they are used with the "Op_RegD" type, and always occur in even/odd pairs.
// This class is usable for mis-aligned loads as happen in I2C adapters.
reg_class dflt_low_reg(R_F0, R_F1, R_F2, R_F3, R_F4, R_F5, R_F6, R_F7, R_F8, R_F9, R_F10,R_F11,R_F12,R_F13,R_F14,R_F15,
                   R_F16,R_F17,R_F18,R_F19,R_F20,R_F21,R_F22,R_F23,R_F24,R_F25,R_F26,R_F27,R_F28,R_F29);
%}

//----------DEFINITION BLOCK---------------------------------------------------
// Define name --> value mappings to inform the ADLC of an integer valued name
// Current support includes integer values in the range [0, 0x7FFFFFFF]
// Format:
//        int_def  <name>         ( <int_value>, <expression>);
// Generated Code in ad_<arch>.hpp
//        #define  <name>   (<expression>)
//        // value == <int_value>
// Generated code in ad_<arch>.cpp adlc_verification()
//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
//
definitions %{
// The default cost (of an ALU instruction).
  int_def DEFAULT_COST      (    100,     100);
  int_def HUGE_COST         (1000000, 1000000);

// Memory refs are twice as expensive as run-of-the-mill.
  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);

// Branches are even more expensive.
  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
  int_def CALL_COST         (    300, DEFAULT_COST * 3);
%}


//----------SOURCE BLOCK-------------------------------------------------------
// This is a block of C++ code which provides values, functions, and
// definitions necessary in the rest of the architecture description
source_hpp %{
// Must be visible to the DFA in dfa_sparc.cpp
extern bool can_branch_register( Node *bol, Node *cmp );

extern bool use_block_zeroing(Node* count);

// Macros to extract hi & lo halves from a long pair.
// G0 is not part of any long pair, so assert on that.
// Prevents accidentally using G1 instead of G0.
#define LONG_HI_REG(x) (x)
#define LONG_LO_REG(x) (x)

%}

source %{
#define __ _masm.

// tertiary op of a LoadP or StoreP encoding
#define REGP_OP true

static FloatRegister reg_to_SingleFloatRegister_object(int register_encoding);
static FloatRegister reg_to_DoubleFloatRegister_object(int register_encoding);
static Register reg_to_register_object(int register_encoding);

// Used by the DFA in dfa_sparc.cpp.
// Check for being able to use a V9 branch-on-register.  Requires a
// compare-vs-zero, equal/not-equal, of a value which was zero- or sign-
// extended.  Doesn't work following an integer ADD, for example, because of
// overflow (-1 incremented yields 0 plus a carry in the high-order word).  On
// 32-bit V9 systems, interrupts currently blow away the high-order 32 bits and
// replace them with zero, which could become sign-extension in a different OS
// release.  There's no obvious reason why an interrupt will ever fill these
// bits with non-zero junk (the registers are reloaded with standard LD
// instructions which either zero-fill or sign-fill).
bool can_branch_register( Node *bol, Node *cmp ) {
  if( !BranchOnRegister ) return false;
#ifdef _LP64
  if( cmp->Opcode() == Op_CmpP )
    return true;  // No problems with pointer compares
#endif
  if( cmp->Opcode() == Op_CmpL )
    return true;  // No problems with long compares

  if( !SparcV9RegsHiBitsZero ) return false;
  if( bol->as_Bool()->_test._test != BoolTest::ne &&
      bol->as_Bool()->_test._test != BoolTest::eq )
     return false;

  // Check for comparing against a 'safe' value.  Any operation which
  // clears out the high word is safe.  Thus, loads and certain shifts
  // are safe, as are non-negative constants.  Any operation which
  // preserves zero bits in the high word is safe as long as each of its
  // inputs are safe.  Thus, phis and bitwise booleans are safe if their
  // inputs are safe.  At present, the only important case to recognize
  // seems to be loads.  Constants should fold away, and shifts &
  // logicals can use the 'cc' forms.
  Node *x = cmp->in(1);
  if( x->is_Load() ) return true;
  if( x->is_Phi() ) {
    for( uint i = 1; i < x->req(); i++ )
      if( !x->in(i)->is_Load() )
        return false;
    return true;
  }
  return false;
}

bool use_block_zeroing(Node* count) {
  // Use BIS for zeroing if count is not constant
  // or it is >= BlockZeroingLowLimit.
  return UseBlockZeroing && (count->find_intptr_t_con(BlockZeroingLowLimit) >= BlockZeroingLowLimit);
}

// ****************************************************************************

// REQUIRED FUNCTIONALITY

// !!!!! Special hack to get all type of calls to specify the byte offset
//       from the start of the call to the point where the return address
//       will point.
//       The "return address" is the address of the call instruction, plus 8.

int MachCallStaticJavaNode::ret_addr_offset() {
  int offset = NativeCall::instruction_size;  // call; delay slot
  if (_method_handle_invoke)
    offset += 4;  // restore SP
  return offset;
}

int MachCallDynamicJavaNode::ret_addr_offset() {
  int vtable_index = this->_vtable_index;
  if (vtable_index < 0) {
    // must be invalid_vtable_index, not nonvirtual_vtable_index
    assert(vtable_index == methodOopDesc::invalid_vtable_index, "correct sentinel value");
    return (NativeMovConstReg::instruction_size +
           NativeCall::instruction_size);  // sethi; setlo; call; delay slot
  } else {
    assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
    int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
    int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
    int klass_load_size;
    if (UseCompressedOops) {
      assert(Universe::heap() != NULL, "java heap should be initialized");
      if (Universe::narrow_oop_base() == NULL)
        klass_load_size = 2*BytesPerInstWord; // see MacroAssembler::load_klass()
      else
        klass_load_size = 3*BytesPerInstWord;
    } else {
      klass_load_size = 1*BytesPerInstWord;
    }
    if (Assembler::is_simm13(v_off)) {
      return klass_load_size +
             (2*BytesPerInstWord +           // ld_ptr, ld_ptr
             NativeCall::instruction_size);  // call; delay slot
    } else {
      return klass_load_size +
             (4*BytesPerInstWord +           // set_hi, set, ld_ptr, ld_ptr
             NativeCall::instruction_size);  // call; delay slot
    }
  }
}

int MachCallRuntimeNode::ret_addr_offset() {
#ifdef _LP64
  if (MacroAssembler::is_far_target(entry_point())) {
    return NativeFarCall::instruction_size;
  } else {
    return NativeCall::instruction_size;
  }
#else
  return NativeCall::instruction_size;  // call; delay slot
#endif
}

// Indicate if the safepoint node needs the polling page as an input.
// Since Sparc does not have absolute addressing, it does.
bool SafePointNode::needs_polling_address_input() {
  return true;
}

// emit an interrupt that is caught by the debugger (for debugging compiler)
void emit_break(CodeBuffer &cbuf) {
  MacroAssembler _masm(&cbuf);
  __ breakpoint_trap();
}

#ifndef PRODUCT
void MachBreakpointNode::format( PhaseRegAlloc *, outputStream *st ) const {
  st->print("TA");
}
#endif

void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  emit_break(cbuf);
}

uint MachBreakpointNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

// Traceable jump
void  emit_jmpl(CodeBuffer &cbuf, int jump_target) {
  MacroAssembler _masm(&cbuf);
  Register rdest = reg_to_register_object(jump_target);
  __ JMP(rdest, 0);
  __ delayed()->nop();
}

// Traceable jump and set exception pc
void  emit_jmpl_set_exception_pc(CodeBuffer &cbuf, int jump_target) {
  MacroAssembler _masm(&cbuf);
  Register rdest = reg_to_register_object(jump_target);
  __ JMP(rdest, 0);
  __ delayed()->add(O7, frame::pc_return_offset, Oissuing_pc );
}

void emit_nop(CodeBuffer &cbuf) {
  MacroAssembler _masm(&cbuf);
  __ nop();
}

void emit_illtrap(CodeBuffer &cbuf) {
  MacroAssembler _masm(&cbuf);
  __ illtrap(0);
}


intptr_t get_offset_from_base(const MachNode* n, const TypePtr* atype, int disp32) {
  assert(n->rule() != loadUB_rule, "");

  intptr_t offset = 0;
  const TypePtr *adr_type = TYPE_PTR_SENTINAL;  // Check for base==RegI, disp==immP
  const Node* addr = n->get_base_and_disp(offset, adr_type);
  assert(adr_type == (const TypePtr*)-1, "VerifyOops: no support for sparc operands with base==RegI, disp==immP");
  assert(addr != NULL && addr != (Node*)-1, "invalid addr");
  assert(addr->bottom_type()->isa_oopptr() == atype, "");
  atype = atype->add_offset(offset);
  assert(disp32 == offset, "wrong disp32");
  return atype->_offset;
}


intptr_t get_offset_from_base_2(const MachNode* n, const TypePtr* atype, int disp32) {
  assert(n->rule() != loadUB_rule, "");

  intptr_t offset = 0;
  Node* addr = n->in(2);
  assert(addr->bottom_type()->isa_oopptr() == atype, "");
  if (addr->is_Mach() && addr->as_Mach()->ideal_Opcode() == Op_AddP) {
    Node* a = addr->in(2/*AddPNode::Address*/);
    Node* o = addr->in(3/*AddPNode::Offset*/);
    offset = o->is_Con() ? o->bottom_type()->is_intptr_t()->get_con() : Type::OffsetBot;
    atype = a->bottom_type()->is_ptr()->add_offset(offset);
    assert(atype->isa_oop_ptr(), "still an oop");
  }
  offset = atype->is_ptr()->_offset;
  if (offset != Type::OffsetBot)  offset += disp32;
  return offset;
}

static inline jdouble replicate_immI(int con, int count, int width) {
  // Load a constant replicated "count" times with width "width"
  int bit_width = width * 8;
  jlong elt_val = con;
  elt_val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
  jlong val = elt_val;
  for (int i = 0; i < count - 1; i++) {
    val <<= bit_width;
    val |= elt_val;
  }
  jdouble dval = *((jdouble*) &val);  // coerce to double type
  return dval;
}

// Standard Sparc opcode form2 field breakdown
static inline void emit2_19(CodeBuffer &cbuf, int f30, int f29, int f25, int f22, int f20, int f19, int f0 ) {
  f0 &= (1<<19)-1;     // Mask displacement to 19 bits
  int op = (f30 << 30) |
           (f29 << 29) |
           (f25 << 25) |
           (f22 << 22) |
           (f20 << 20) |
           (f19 << 19) |
           (f0  <<  0);
  cbuf.insts()->emit_int32(op);
}

// Standard Sparc opcode form2 field breakdown
static inline void emit2_22(CodeBuffer &cbuf, int f30, int f25, int f22, int f0 ) {
  f0 >>= 10;           // Drop 10 bits
  f0 &= (1<<22)-1;     // Mask displacement to 22 bits
  int op = (f30 << 30) |
           (f25 << 25) |
           (f22 << 22) |
           (f0  <<  0);
  cbuf.insts()->emit_int32(op);
}

// Standard Sparc opcode form3 field breakdown
static inline void emit3(CodeBuffer &cbuf, int f30, int f25, int f19, int f14, int f5, int f0 ) {
  int op = (f30 << 30) |
           (f25 << 25) |
           (f19 << 19) |
           (f14 << 14) |
           (f5  <<  5) |
           (f0  <<  0);
  cbuf.insts()->emit_int32(op);
}

// Standard Sparc opcode form3 field breakdown
static inline void emit3_simm13(CodeBuffer &cbuf, int f30, int f25, int f19, int f14, int simm13 ) {
  simm13 &= (1<<13)-1; // Mask to 13 bits
  int op = (f30 << 30) |
           (f25 << 25) |
           (f19 << 19) |
           (f14 << 14) |
           (1   << 13) | // bit to indicate immediate-mode
           (simm13<<0);
  cbuf.insts()->emit_int32(op);
}

static inline void emit3_simm10(CodeBuffer &cbuf, int f30, int f25, int f19, int f14, int simm10 ) {
  simm10 &= (1<<10)-1; // Mask to 10 bits
  emit3_simm13(cbuf,f30,f25,f19,f14,simm10);
}

#ifdef ASSERT
// Helper function for VerifyOops in emit_form3_mem_reg
void verify_oops_warning(const MachNode *n, int ideal_op, int mem_op) {
  warning("VerifyOops encountered unexpected instruction:");
  n->dump(2);
  warning("Instruction has ideal_Opcode==Op_%s and op_ld==Op_%s \n", NodeClassNames[ideal_op], NodeClassNames[mem_op]);
}
#endif


void emit_form3_mem_reg(CodeBuffer &cbuf, const MachNode* n, int primary, int tertiary,
                        int src1_enc, int disp32, int src2_enc, int dst_enc) {

#ifdef ASSERT
  // The following code implements the +VerifyOops feature.
  // It verifies oop values which are loaded into or stored out of
  // the current method activation.  +VerifyOops complements techniques
  // like ScavengeALot, because it eagerly inspects oops in transit,
  // as they enter or leave the stack, as opposed to ScavengeALot,
  // which inspects oops "at rest", in the stack or heap, at safepoints.
  // For this reason, +VerifyOops can sometimes detect bugs very close
  // to their point of creation.  It can also serve as a cross-check
  // on the validity of oop maps, when used toegether with ScavengeALot.

  // It would be good to verify oops at other points, especially
  // when an oop is used as a base pointer for a load or store.
  // This is presently difficult, because it is hard to know when
  // a base address is biased or not.  (If we had such information,
  // it would be easy and useful to make a two-argument version of
  // verify_oop which unbiases the base, and performs verification.)

  assert((uint)tertiary == 0xFFFFFFFF || tertiary == REGP_OP, "valid tertiary");
  bool is_verified_oop_base  = false;
  bool is_verified_oop_load  = false;
  bool is_verified_oop_store = false;
  int tmp_enc = -1;
  if (VerifyOops && src1_enc != R_SP_enc) {
    // classify the op, mainly for an assert check
    int st_op = 0, ld_op = 0;
    switch (primary) {
    case Assembler::stb_op3:  st_op = Op_StoreB; break;
    case Assembler::sth_op3:  st_op = Op_StoreC; break;
    case Assembler::stx_op3:  // may become StoreP or stay StoreI or StoreD0
    case Assembler::stw_op3:  st_op = Op_StoreI; break;
    case Assembler::std_op3:  st_op = Op_StoreL; break;
    case Assembler::stf_op3:  st_op = Op_StoreF; break;
    case Assembler::stdf_op3: st_op = Op_StoreD; break;

    case Assembler::ldsb_op3: ld_op = Op_LoadB; break;
    case Assembler::lduh_op3: ld_op = Op_LoadUS; break;
    case Assembler::ldsh_op3: ld_op = Op_LoadS; break;
    case Assembler::ldx_op3:  // may become LoadP or stay LoadI
    case Assembler::ldsw_op3: // may become LoadP or stay LoadI
    case Assembler::lduw_op3: ld_op = Op_LoadI; break;
    case Assembler::ldd_op3:  ld_op = Op_LoadL; break;
    case Assembler::ldf_op3:  ld_op = Op_LoadF; break;
    case Assembler::lddf_op3: ld_op = Op_LoadD; break;
    case Assembler::ldub_op3: ld_op = Op_LoadB; break;
    case Assembler::prefetch_op3: ld_op = Op_LoadI; break;

    default: ShouldNotReachHere();
    }
    if (tertiary == REGP_OP) {
      if      (st_op == Op_StoreI)  st_op = Op_StoreP;
      else if (ld_op == Op_LoadI)   ld_op = Op_LoadP;
      else                          ShouldNotReachHere();
      if (st_op) {
        // a store
        // inputs are (0:control, 1:memory, 2:address, 3:value)
        Node* n2 = n->in(3);
        if (n2 != NULL) {
          const Type* t = n2->bottom_type();
          is_verified_oop_store = t->isa_oop_ptr() ? (t->is_ptr()->_offset==0) : false;
        }
      } else {
        // a load
        const Type* t = n->bottom_type();
        is_verified_oop_load = t->isa_oop_ptr() ? (t->is_ptr()->_offset==0) : false;
      }
    }

    if (ld_op) {
      // a Load
      // inputs are (0:control, 1:memory, 2:address)
      if (!(n->ideal_Opcode()==ld_op)       && // Following are special cases
          !(n->ideal_Opcode()==Op_LoadLLocked && ld_op==Op_LoadI) &&
          !(n->ideal_Opcode()==Op_LoadPLocked && ld_op==Op_LoadP) &&
          !(n->ideal_Opcode()==Op_LoadI     && ld_op==Op_LoadF) &&
          !(n->ideal_Opcode()==Op_LoadF     && ld_op==Op_LoadI) &&
          !(n->ideal_Opcode()==Op_LoadRange && ld_op==Op_LoadI) &&
          !(n->ideal_Opcode()==Op_LoadKlass && ld_op==Op_LoadP) &&
          !(n->ideal_Opcode()==Op_LoadL     && ld_op==Op_LoadI) &&
          !(n->ideal_Opcode()==Op_LoadL_unaligned && ld_op==Op_LoadI) &&
          !(n->ideal_Opcode()==Op_LoadD_unaligned && ld_op==Op_LoadF) &&
          !(n->ideal_Opcode()==Op_ConvI2F   && ld_op==Op_LoadF) &&
          !(n->ideal_Opcode()==Op_ConvI2D   && ld_op==Op_LoadF) &&
          !(n->ideal_Opcode()==Op_PrefetchRead  && ld_op==Op_LoadI) &&
          !(n->ideal_Opcode()==Op_PrefetchWrite && ld_op==Op_LoadI) &&
          !(n->ideal_Opcode()==Op_PrefetchAllocation && ld_op==Op_LoadI) &&
          !(n->ideal_Opcode()==Op_Load2I    && ld_op==Op_LoadD) &&
          !(n->ideal_Opcode()==Op_Load4C    && ld_op==Op_LoadD) &&
          !(n->ideal_Opcode()==Op_Load4S    && ld_op==Op_LoadD) &&
          !(n->ideal_Opcode()==Op_Load8B    && ld_op==Op_LoadD) &&
          !(n->rule() == loadUB_rule)) {
        verify_oops_warning(n, n->ideal_Opcode(), ld_op);
      }
    } else if (st_op) {
      // a Store
      // inputs are (0:control, 1:memory, 2:address, 3:value)
      if (!(n->ideal_Opcode()==st_op)    && // Following are special cases
          !(n->ideal_Opcode()==Op_StoreCM && st_op==Op_StoreB) &&
          !(n->ideal_Opcode()==Op_StoreI && st_op==Op_StoreF) &&
          !(n->ideal_Opcode()==Op_StoreF && st_op==Op_StoreI) &&
          !(n->ideal_Opcode()==Op_StoreL && st_op==Op_StoreI) &&
          !(n->ideal_Opcode()==Op_Store2I && st_op==Op_StoreD) &&
          !(n->ideal_Opcode()==Op_Store4C && st_op==Op_StoreD) &&
          !(n->ideal_Opcode()==Op_Store8B && st_op==Op_StoreD) &&
          !(n->ideal_Opcode()==Op_StoreD && st_op==Op_StoreI && n->rule() == storeD0_rule)) {
        verify_oops_warning(n, n->ideal_Opcode(), st_op);
      }
    }

    if (src2_enc == R_G0_enc && n->rule() != loadUB_rule && n->ideal_Opcode() != Op_StoreCM ) {
      Node* addr = n->in(2);
      if (!(addr->is_Mach() && addr->as_Mach()->ideal_Opcode() == Op_AddP)) {
        const TypeOopPtr* atype = addr->bottom_type()->isa_instptr();  // %%% oopptr?
        if (atype != NULL) {
          intptr_t offset = get_offset_from_base(n, atype, disp32);
          intptr_t offset_2 = get_offset_from_base_2(n, atype, disp32);
          if (offset != offset_2) {
            get_offset_from_base(n, atype, disp32);
            get_offset_from_base_2(n, atype, disp32);
          }
          assert(offset == offset_2, "different offsets");
          if (offset == disp32) {
            // we now know that src1 is a true oop pointer
            is_verified_oop_base = true;
            if (ld_op && src1_enc == dst_enc && ld_op != Op_LoadF && ld_op != Op_LoadD) {
              if( primary == Assembler::ldd_op3 ) {
                is_verified_oop_base = false; // Cannot 'ldd' into O7
              } else {
                tmp_enc = dst_enc;
                dst_enc = R_O7_enc; // Load into O7; preserve source oop
                assert(src1_enc != dst_enc, "");
              }
            }
          }
          if (st_op && (( offset == oopDesc::klass_offset_in_bytes())
                       || offset == oopDesc::mark_offset_in_bytes())) {
                      // loading the mark should not be allowed either, but
                      // we don't check this since it conflicts with InlineObjectHash
                      // usage of LoadINode to get the mark. We could keep the
                      // check if we create a new LoadMarkNode
            // but do not verify the object before its header is initialized
            ShouldNotReachHere();
          }
        }
      }
    }
  }
#endif

  uint instr;
  instr = (Assembler::ldst_op << 30)
        | (dst_enc        << 25)
        | (primary        << 19)
        | (src1_enc       << 14);

  uint index = src2_enc;
  int disp = disp32;

  if (src1_enc == R_SP_enc || src1_enc == R_FP_enc)
    disp += STACK_BIAS;

  // We should have a compiler bailout here rather than a guarantee.
  // Better yet would be some mechanism to handle variable-size matches correctly.
  guarantee(Assembler::is_simm13(disp), "Do not match large constant offsets" );

  if( disp == 0 ) {
    // use reg-reg form
    // bit 13 is already zero
    instr |= index;
  } else {
    // use reg-imm form
    instr |= 0x00002000;          // set bit 13 to one
    instr |= disp & 0x1FFF;
  }

  cbuf.insts()->emit_int32(instr);

#ifdef ASSERT
  {
    MacroAssembler _masm(&cbuf);
    if (is_verified_oop_base) {
      __ verify_oop(reg_to_register_object(src1_enc));
    }
    if (is_verified_oop_store) {
      __ verify_oop(reg_to_register_object(dst_enc));
    }
    if (tmp_enc != -1) {
      __ mov(O7, reg_to_register_object(tmp_enc));
    }
    if (is_verified_oop_load) {
      __ verify_oop(reg_to_register_object(dst_enc));
    }
  }
#endif
}

void emit_call_reloc(CodeBuffer &cbuf, intptr_t entry_point, relocInfo::relocType rtype, bool preserve_g2 = false) {
  // The method which records debug information at every safepoint
  // expects the call to be the first instruction in the snippet as
  // it creates a PcDesc structure which tracks the offset of a call
  // from the start of the codeBlob. This offset is computed as
  // code_end() - code_begin() of the code which has been emitted
  // so far.
  // In this particular case we have skirted around the problem by
  // putting the "mov" instruction in the delay slot but the problem
  // may bite us again at some other point and a cleaner/generic
  // solution using relocations would be needed.
  MacroAssembler _masm(&cbuf);
  __ set_inst_mark();

  // We flush the current window just so that there is a valid stack copy
  // the fact that the current window becomes active again instantly is
  // not a problem there is nothing live in it.

#ifdef ASSERT
  int startpos = __ offset();
#endif /* ASSERT */

  __ call((address)entry_point, rtype);

  if (preserve_g2)   __ delayed()->mov(G2, L7);
  else __ delayed()->nop();

  if (preserve_g2)   __ mov(L7, G2);

#ifdef ASSERT
  if (preserve_g2 && (VerifyCompiledCode || VerifyOops)) {
#ifdef _LP64
    // Trash argument dump slots.
    __ set(0xb0b8ac0db0b8ac0d, G1);
    __ mov(G1, G5);
    __ stx(G1, SP, STACK_BIAS + 0x80);
    __ stx(G1, SP, STACK_BIAS + 0x88);
    __ stx(G1, SP, STACK_BIAS + 0x90);
    __ stx(G1, SP, STACK_BIAS + 0x98);
    __ stx(G1, SP, STACK_BIAS + 0xA0);
    __ stx(G1, SP, STACK_BIAS + 0xA8);
#else // _LP64
    // this is also a native call, so smash the first 7 stack locations,
    // and the various registers

    // Note:  [SP+0x40] is sp[callee_aggregate_return_pointer_sp_offset],
    // while [SP+0x44..0x58] are the argument dump slots.
    __ set((intptr_t)0xbaadf00d, G1);
    __ mov(G1, G5);
    __ sllx(G1, 32, G1);
    __ or3(G1, G5, G1);
    __ mov(G1, G5);
    __ stx(G1, SP, 0x40);
    __ stx(G1, SP, 0x48);
    __ stx(G1, SP, 0x50);
    __ stw(G1, SP, 0x58); // Do not trash [SP+0x5C] which is a usable spill slot
#endif // _LP64
  }
#endif /*ASSERT*/
}

//=============================================================================
// REQUIRED FUNCTIONALITY for encoding
void emit_lo(CodeBuffer &cbuf, int val) {  }
void emit_hi(CodeBuffer &cbuf, int val) {  }


//=============================================================================
const RegMask& MachConstantBaseNode::_out_RegMask = PTR_REG_mask();

int Compile::ConstantTable::calculate_table_base_offset() const {
  if (UseRDPCForConstantTableBase) {
    // The table base offset might be less but then it fits into
    // simm13 anyway and we are good (cf. MachConstantBaseNode::emit).
    return Assembler::min_simm13();
  } else {
    int offset = -(size() / 2);
    if (!Assembler::is_simm13(offset)) {
      offset = Assembler::min_simm13();
    }
    return offset;
  }
}

bool MachConstantBaseNode::requires_late_expand() const { return false; }
void MachConstantBaseNode::lateExpand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
  ShouldNotReachHere();
}

void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
  Compile* C = ra_->C;
  Compile::ConstantTable& constant_table = C->constant_table();
  MacroAssembler _masm(&cbuf);

  Register r = as_Register(ra_->get_encode(this));
  CodeSection* consts_section = __ code()->consts();
  int consts_size = consts_section->align_at_start(consts_section->size());
  assert(constant_table.size() == consts_size, err_msg("must be: %d == %d", constant_table.size(), consts_size));

  if (UseRDPCForConstantTableBase) {
    // For the following RDPC logic to work correctly the consts
    // section must be allocated right before the insts section.  This
    // assert checks for that.  The layout and the SECT_* constants
    // are defined in src/share/vm/asm/codeBuffer.hpp.
    assert(CodeBuffer::SECT_CONSTS + 1 == CodeBuffer::SECT_INSTS, "must be");
    int insts_offset = __ offset();

    // Layout:
    //
    // |----------- consts section ------------|----------- insts section -----------...
    // |------ constant table -----|- padding -|------------------x----
    //                                                            \ current PC (RDPC instruction)
    // |<------------- consts_size ----------->|<- insts_offset ->|
    //                                                            \ table base
    // The table base offset is later added to the load displacement
    // so it has to be negative.
    int table_base_offset = -(consts_size + insts_offset);
    int disp;

    // If the displacement from the current PC to the constant table
    // base fits into simm13 we set the constant table base to the
    // current PC.
    if (Assembler::is_simm13(table_base_offset)) {
      constant_table.set_table_base_offset(table_base_offset);
      disp = 0;
    } else {
      // Otherwise we set the constant table base offset to the
      // maximum negative displacement of load instructions to keep
      // the disp as small as possible:
      //
      // |<------------- consts_size ----------->|<- insts_offset ->|
      // |<--------- min_simm13 --------->|<-------- disp --------->|
      //                                  \ table base
      table_base_offset = Assembler::min_simm13();
      constant_table.set_table_base_offset(table_base_offset);
      disp = (consts_size + insts_offset) + table_base_offset;
    }

    __ rdpc(r);

    if (disp != 0) {
      assert(r != O7, "need temporary");
      __ sub(r, __ ensure_simm13_or_reg(disp, O7), r);
    }
  }
  else {
    // Materialize the constant table base.
    address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
    RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
    AddressLiteral base(baseaddr, rspec);
    __ set(base, r);
  }
}

uint MachConstantBaseNode::size(PhaseRegAlloc*) const {
  if (UseRDPCForConstantTableBase) {
    // This is really the worst case but generally it's only 1 instruction.
    return (1 /*rdpc*/ + 1 /*sub*/ + MacroAssembler::worst_case_insts_for_set()) * BytesPerInstWord;
  } else {
    return MacroAssembler::worst_case_insts_for_set() * BytesPerInstWord;
  }
}

#ifndef PRODUCT
void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
  char reg[128];
  ra_->dump_register(this, reg);
  if (UseRDPCForConstantTableBase) {
    st->print("RDPC   %s\t! constant table base", reg);
  } else {
    st->print("SET    &constanttable,%s\t! constant table base", reg);
  }
}
#endif


//=============================================================================

#ifndef PRODUCT
void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  Compile* C = ra_->C;

  for (int i = 0; i < OptoPrologueNops; i++) {
    st->print_cr("NOP"); st->print("\t");
  }

  if( VerifyThread ) {
    st->print_cr("Verify_Thread"); st->print("\t");
  }

  size_t framesize = C->frame_slots() << LogBytesPerInt;

  // Calls to C2R adapters often do not accept exceptional returns.
  // We require that their callers must bang for them.  But be careful, because
  // some VM calls (such as call site linkage) can use several kilobytes of
  // stack.  But the stack safety zone should account for that.
  // See bugs 4446381, 4468289, 4497237.
  if (C->need_stack_bang(framesize)) {
    st->print_cr("! stack bang"); st->print("\t");
  }

  if (Assembler::is_simm13(-framesize)) {
    st->print   ("SAVE   R_SP,-%d,R_SP",framesize);
  } else {
    st->print_cr("SETHI  R_SP,hi%%(-%d),R_G3",framesize); st->print("\t");
    st->print_cr("ADD    R_G3,lo%%(-%d),R_G3",framesize); st->print("\t");
    st->print   ("SAVE   R_SP,R_G3,R_SP");
  }

}
#endif

void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  Compile* C = ra_->C;
  MacroAssembler _masm(&cbuf);

  for (int i = 0; i < OptoPrologueNops; i++) {
    __ nop();
  }

  __ verify_thread();

  size_t framesize = C->frame_slots() << LogBytesPerInt;
  assert(framesize >= 16*wordSize, "must have room for reg. save area");
  assert(framesize%(2*wordSize) == 0, "must preserve 2*wordSize alignment");

  // Calls to C2R adapters often do not accept exceptional returns.
  // We require that their callers must bang for them.  But be careful, because
  // some VM calls (such as call site linkage) can use several kilobytes of
  // stack.  But the stack safety zone should account for that.
  // See bugs 4446381, 4468289, 4497237.
  if (C->need_stack_bang(framesize)) {
    __ generate_stack_overflow_check(framesize);
  }

  if (Assembler::is_simm13(-framesize)) {
    __ save(SP, -framesize, SP);
  } else {
    __ sethi(-framesize & ~0x3ff, G3);
    __ add(G3, -framesize & 0x3ff, G3);
    __ save(SP, G3, SP);
  }
  C->set_frame_complete( __ offset() );

  if (!UseRDPCForConstantTableBase && C->has_mach_constant_base_node()) {
    // NOTE: We set the table base offset here because users might be
    // emitted before MachConstantBaseNode.
    Compile::ConstantTable& constant_table = C->constant_table();
    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
  }
}

uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

int MachPrologNode::reloc() const {
  return 10; // a large enough number
}

//=============================================================================
#ifndef PRODUCT
void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  Compile* C = ra_->C;

  if( do_polling() && ra_->C->is_method_compilation() ) {
    st->print("SETHI  #PollAddr,L0\t! Load Polling address\n\t");
#ifdef _LP64
    st->print("LDX    [L0],G0\t!Poll for Safepointing\n\t");
#else
    st->print("LDUW   [L0],G0\t!Poll for Safepointing\n\t");
#endif
  }

  if( do_polling() )
    st->print("RET\n\t");

  st->print("RESTORE");
}
#endif

void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  MacroAssembler _masm(&cbuf);
  Compile* C = ra_->C;

  __ verify_thread();

  // If this does safepoint polling, then do it here
  if( do_polling() && ra_->C->is_method_compilation() ) {
    AddressLiteral polling_page(os::get_polling_page());
    __ sethi(polling_page, L0);
    __ relocate(relocInfo::poll_return_type);
    __ ld_ptr( L0, 0, G0 );
  }

  // If this is a return, then stuff the restore in the delay slot
  if( do_polling() ) {
    __ ret();
    __ delayed()->restore();
  } else {
    __ restore();
  }
}

uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}

int MachEpilogNode::reloc() const {
  return 16; // a large enough number
}

const Pipeline * MachEpilogNode::pipeline() const {
  return MachNode::pipeline_class();
}

int MachEpilogNode::safepoint_offset() const {
  assert( do_polling(), "no return for this epilog node");
  return MacroAssembler::insts_for_sethi(os::get_polling_page()) * BytesPerInstWord;
}

//=============================================================================

// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
enum RC { rc_bad, rc_int, rc_float, rc_stack };
static enum RC rc_class( OptoReg::Name reg ) {
  if( !OptoReg::is_valid(reg)  ) return rc_bad;
  if (OptoReg::is_stack(reg)) return rc_stack;
  VMReg r = OptoReg::as_VMReg(reg);
  if (r->is_Register()) return rc_int;
  assert(r->is_FloatRegister(), "must be");
  return rc_float;
}

static int impl_helper( const MachNode *mach, CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, bool is_load, int offset, int reg, int opcode, const char *op_str, int size, outputStream* st ) {
  if( cbuf ) {
    // Better yet would be some mechanism to handle variable-size matches correctly
    if (!Assembler::is_simm13(offset + STACK_BIAS)) {
      ra_->C->record_method_not_compilable("unable to handle large constant offsets");
    } else {
      emit_form3_mem_reg(*cbuf, mach, opcode, -1, R_SP_enc, offset, 0, Matcher::_regEncode[reg]);
    }
  }
#ifndef PRODUCT
  else if( !do_size ) {
    if( size != 0 ) st->print("\n\t");
    if( is_load ) st->print("%s   [R_SP + #%d],R_%s\t! spill",op_str,offset,OptoReg::regname(reg));
    else          st->print("%s   R_%s,[R_SP + #%d]\t! spill",op_str,OptoReg::regname(reg),offset);
  }
#endif
  return size+4;
}

static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int op1, int op2, const char *op_str, int size, outputStream* st ) {
  if( cbuf ) emit3( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst], op1, 0, op2, Matcher::_regEncode[src] );
#ifndef PRODUCT
  else if( !do_size ) {
    if( size != 0 ) st->print("\n\t");
    st->print("%s  R_%s,R_%s\t! spill",op_str,OptoReg::regname(src),OptoReg::regname(dst));
  }
#endif
  return size+4;
}

uint MachSpillCopyNode::implementation( CodeBuffer *cbuf,
                                        PhaseRegAlloc *ra_,
                                        bool do_size,
                                        outputStream* st ) const {
  // Get registers to move
  OptoReg::Name src_second = ra_->get_reg_second(in(1));
  OptoReg::Name src_first = ra_->get_reg_first(in(1));
  OptoReg::Name dst_second = ra_->get_reg_second(this );
  OptoReg::Name dst_first = ra_->get_reg_first(this );

  enum RC src_second_rc = rc_class(src_second);
  enum RC src_first_rc = rc_class(src_first);
  enum RC dst_second_rc = rc_class(dst_second);
  enum RC dst_first_rc = rc_class(dst_first);

  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );

  // Generate spill code!
  int size = 0;

  if( src_first == dst_first && src_second == dst_second )
    return size;            // Self copy, no move

  // --------------------------------------
  // Check for mem-mem move.  Load into unused float registers and fall into
  // the float-store case.
  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
    int offset = ra_->reg2offset(src_first);
    // Further check for aligned-adjacent pair, so we can use a double load
    if( (src_first&1)==0 && src_first+1 == src_second ) {
      src_second    = OptoReg::Name(R_F31_num);
      src_second_rc = rc_float;
      size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F30_num,Assembler::lddf_op3,"LDDF",size, st);
    } else {
      size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F30_num,Assembler::ldf_op3 ,"LDF ",size, st);
    }
    src_first    = OptoReg::Name(R_F30_num);
    src_first_rc = rc_float;
  }

  if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) {
    int offset = ra_->reg2offset(src_second);
    size = impl_helper(this,cbuf,ra_,do_size,true,offset,R_F31_num,Assembler::ldf_op3,"LDF ",size, st);
    src_second    = OptoReg::Name(R_F31_num);
    src_second_rc = rc_float;
  }

  // --------------------------------------
  // Check for float->int copy; requires a trip through memory
  if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS < 3) {
    int offset = frame::register_save_words*wordSize;
    if (cbuf) {
      emit3_simm13( *cbuf, Assembler::arith_op, R_SP_enc, Assembler::sub_op3, R_SP_enc, 16 );
      impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st);
      impl_helper(this,cbuf,ra_,do_size,true ,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st);
      emit3_simm13( *cbuf, Assembler::arith_op, R_SP_enc, Assembler::add_op3, R_SP_enc, 16 );
    }
#ifndef PRODUCT
    else if (!do_size) {
      if (size != 0) st->print("\n\t");
      st->print(  "SUB    R_SP,16,R_SP\n");
      impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st);
      impl_helper(this,cbuf,ra_,do_size,true ,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st);
      st->print("\tADD    R_SP,16,R_SP\n");
    }
#endif
    size += 16;
  }

  // Check for float->int copy on T4
  if (src_first_rc == rc_float && dst_first_rc == rc_int && UseVIS >= 3) {
    // Further check for aligned-adjacent pair, so we can use a double move
    if ((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second)
      return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mdtox_opf,"MOVDTOX",size, st);
    size  =  impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mstouw_opf,"MOVSTOUW",size, st);
  }
  // Check for int->float copy on T4
  if (src_first_rc == rc_int && dst_first_rc == rc_float && UseVIS >= 3) {
    // Further check for aligned-adjacent pair, so we can use a double move
    if ((src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second)
      return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mxtod_opf,"MOVXTOD",size, st);
    size  =  impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::mftoi_op3,Assembler::mwtos_opf,"MOVWTOS",size, st);
  }

  // --------------------------------------
  // In the 32-bit 1-reg-longs build ONLY, I see mis-aligned long destinations.
  // In such cases, I have to do the big-endian swap.  For aligned targets, the
  // hardware does the flop for me.  Doubles are always aligned, so no problem
  // there.  Misaligned sources only come from native-long-returns (handled
  // special below).
#ifndef _LP64
  if( src_first_rc == rc_int &&     // source is already big-endian
      src_second_rc != rc_bad &&    // 64-bit move
      ((dst_first&1)!=0 || dst_second != dst_first+1) ) { // misaligned dst
    assert( (src_first&1)==0 && src_second == src_first+1, "source must be aligned" );
    // Do the big-endian flop.
    OptoReg::Name tmp    = dst_first   ; dst_first    = dst_second   ; dst_second    = tmp   ;
    enum RC       tmp_rc = dst_first_rc; dst_first_rc = dst_second_rc; dst_second_rc = tmp_rc;
  }
#endif

  // --------------------------------------
  // Check for integer reg-reg copy
  if( src_first_rc == rc_int && dst_first_rc == rc_int ) {
#ifndef _LP64
    if( src_first == R_O0_num && src_second == R_O1_num ) {  // Check for the evil O0/O1 native long-return case
      // Note: The _first and _second suffixes refer to the addresses of the the 2 halves of the 64-bit value
      //       as stored in memory.  On a big-endian machine like SPARC, this means that the _second
      //       operand contains the least significant word of the 64-bit value and vice versa.
      OptoReg::Name tmp = OptoReg::Name(R_O7_num);
      assert( (dst_first&1)==0 && dst_second == dst_first+1, "return a native O0/O1 long to an aligned-adjacent 64-bit reg" );
      // Shift O0 left in-place, zero-extend O1, then OR them into the dst
      if( cbuf ) {
        emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[tmp], Assembler::sllx_op3, Matcher::_regEncode[src_first], 0x1020 );
        emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[src_second], Assembler::srl_op3, Matcher::_regEncode[src_second], 0x0000 );
        emit3       ( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler:: or_op3, Matcher::_regEncode[tmp], 0, Matcher::_regEncode[src_second] );
#ifndef PRODUCT
      } else if( !do_size ) {
        if( size != 0 ) st->print("\n\t");
        st->print("SLLX   R_%s,32,R_%s\t! Move O0-first to O7-high\n\t", OptoReg::regname(src_first), OptoReg::regname(tmp));
        st->print("SRL    R_%s, 0,R_%s\t! Zero-extend O1\n\t", OptoReg::regname(src_second), OptoReg::regname(src_second));
        st->print("OR     R_%s,R_%s,R_%s\t! spill",OptoReg::regname(tmp), OptoReg::regname(src_second), OptoReg::regname(dst_first));
#endif
      }
      return size+12;
    }
    else if( dst_first == R_I0_num && dst_second == R_I1_num ) {
      // returning a long value in I0/I1
      // a SpillCopy must be able to target a return instruction's reg_class
      // Note: The _first and _second suffixes refer to the addresses of the the 2 halves of the 64-bit value
      //       as stored in memory.  On a big-endian machine like SPARC, this means that the _second
      //       operand contains the least significant word of the 64-bit value and vice versa.
      OptoReg::Name tdest = dst_first;

      if (src_first == dst_first) {
        tdest = OptoReg::Name(R_O7_num);
        size += 4;
      }

      if( cbuf ) {
        assert( (src_first&1) == 0 && (src_first+1) == src_second, "return value was in an aligned-adjacent 64-bit reg");
        // Shift value in upper 32-bits of src to lower 32-bits of I0; move lower 32-bits to I1
        // ShrL_reg_imm6
        emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[tdest], Assembler::srlx_op3, Matcher::_regEncode[src_second], 32 | 0x1000 );
        // ShrR_reg_imm6  src, 0, dst
        emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srl_op3, Matcher::_regEncode[src_first], 0x0000 );
        if (tdest != dst_first) {
          emit3     ( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_first], Assembler::or_op3, 0/*G0*/, 0/*op2*/, Matcher::_regEncode[tdest] );
        }
      }
#ifndef PRODUCT
      else if( !do_size ) {
        if( size != 0 ) st->print("\n\t");  // %%%%% !!!!!
        st->print("SRLX   R_%s,32,R_%s\t! Extract MSW\n\t",OptoReg::regname(src_second),OptoReg::regname(tdest));
        st->print("SRL    R_%s, 0,R_%s\t! Extract LSW\n\t",OptoReg::regname(src_first),OptoReg::regname(dst_second));
        if (tdest != dst_first) {
          st->print("MOV    R_%s,R_%s\t! spill\n\t", OptoReg::regname(tdest), OptoReg::regname(dst_first));
        }
      }
#endif // PRODUCT
      return size+8;
    }
#endif // !_LP64
    // Else normal reg-reg copy
    assert( src_second != dst_first, "smashed second before evacuating it" );
    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::or_op3,0,"MOV  ",size, st);
    assert( (src_first&1) == 0 && (dst_first&1) == 0, "never move second-halves of int registers" );
    // This moves an aligned adjacent pair.
    // See if we are done.
    if( src_first+1 == src_second && dst_first+1 == dst_second )
      return size;
  }

  // Check for integer store
  if( src_first_rc == rc_int && dst_first_rc == rc_stack ) {
    int offset = ra_->reg2offset(dst_first);
    // Further check for aligned-adjacent pair, so we can use a double store
    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
      return impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stx_op3,"STX ",size, st);
    size  =  impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stw_op3,"STW ",size, st);
  }

  // Check for integer load
  if( dst_first_rc == rc_int && src_first_rc == rc_stack ) {
    int offset = ra_->reg2offset(src_first);
    // Further check for aligned-adjacent pair, so we can use a double load
    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
      return impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::ldx_op3 ,"LDX ",size, st);
    size  =  impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::lduw_op3,"LDUW",size, st);
  }

  // Check for float reg-reg copy
  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
    // Further check for aligned-adjacent pair, so we can use a double move
    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
      return impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::fpop1_op3,Assembler::fmovd_opf,"FMOVD",size, st);
    size  =  impl_mov_helper(cbuf,do_size,src_first,dst_first,Assembler::fpop1_op3,Assembler::fmovs_opf,"FMOVS",size, st);
  }

  // Check for float store
  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
    int offset = ra_->reg2offset(dst_first);
    // Further check for aligned-adjacent pair, so we can use a double store
    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
      return impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stdf_op3,"STDF",size, st);
    size  =  impl_helper(this,cbuf,ra_,do_size,false,offset,src_first,Assembler::stf_op3 ,"STF ",size, st);
  }

  // Check for float load
  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
    int offset = ra_->reg2offset(src_first);
    // Further check for aligned-adjacent pair, so we can use a double load
    if( (src_first&1)==0 && src_first+1 == src_second && (dst_first&1)==0 && dst_first+1 == dst_second )
      return impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::lddf_op3,"LDDF",size, st);
    size  =  impl_helper(this,cbuf,ra_,do_size,true,offset,dst_first,Assembler::ldf_op3 ,"LDF ",size, st);
  }

  // --------------------------------------------------------------------
  // Check for hi bits still needing moving.  Only happens for misaligned
  // arguments to native calls.
  if( src_second == dst_second )
    return size;               // Self copy; no move
  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );

#ifndef _LP64
  // In the LP64 build, all registers can be moved as aligned/adjacent
  // pairs, so there's never any need to move the high bits separately.
  // The 32-bit builds have to deal with the 32-bit ABI which can force
  // all sorts of silly alignment problems.

  // Check for integer reg-reg copy.  Hi bits are stuck up in the top
  // 32-bits of a 64-bit register, but are needed in low bits of another
  // register (else it's a hi-bits-to-hi-bits copy which should have
  // happened already as part of a 64-bit move)
  if( src_second_rc == rc_int && dst_second_rc == rc_int ) {
    assert( (src_second&1)==1, "its the evil O0/O1 native return case" );
    assert( (dst_second&1)==0, "should have moved with 1 64-bit move" );
    // Shift src_second down to dst_second's low bits.
    if( cbuf ) {
      emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[dst_second], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020 );
#ifndef PRODUCT
    } else if( !do_size ) {
      if( size != 0 ) st->print("\n\t");
      st->print("SRLX   R_%s,32,R_%s\t! spill: Move high bits down low",OptoReg::regname(src_second-1),OptoReg::regname(dst_second));
#endif
    }
    return size+4;
  }

  // Check for high word integer store.  Must down-shift the hi bits
  // into a temp register, then fall into the case of storing int bits.
  if( src_second_rc == rc_int && dst_second_rc == rc_stack && (src_second&1)==1 ) {
    // Shift src_second down to dst_second's low bits.
    if( cbuf ) {
      emit3_simm13( *cbuf, Assembler::arith_op, Matcher::_regEncode[R_O7_num], Assembler::srlx_op3, Matcher::_regEncode[src_second-1], 0x1020 );
#ifndef PRODUCT
    } else if( !do_size ) {
      if( size != 0 ) st->print("\n\t");
      st->print("SRLX   R_%s,32,R_%s\t! spill: Move high bits down low",OptoReg::regname(src_second-1),OptoReg::regname(R_O7_num));
#endif
    }
    size+=4;
    src_second = OptoReg::Name(R_O7_num); // Not R_O7H_num!
  }

  // Check for high word integer load
  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
    return impl_helper(this,cbuf,ra_,do_size,true ,ra_->reg2offset(src_second),dst_second,Assembler::lduw_op3,"LDUW",size, st);

  // Check for high word integer store
  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
    return impl_helper(this,cbuf,ra_,do_size,false,ra_->reg2offset(dst_second),src_second,Assembler::stw_op3 ,"STW ",size, st);

  // Check for high word float store
  if( src_second_rc == rc_float && dst_second_rc == rc_stack )
    return impl_helper(this,cbuf,ra_,do_size,false,ra_->reg2offset(dst_second),src_second,Assembler::stf_op3 ,"STF ",size, st);

#endif // !_LP64

  Unimplemented();
}

#ifndef PRODUCT
void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  implementation( NULL, ra_, false, st );
}
#endif

void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  implementation( &cbuf, ra_, false, NULL );
}

uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
  return implementation( NULL, ra_, true, NULL );
}

//=============================================================================
#ifndef PRODUCT
void MachNopNode::format( PhaseRegAlloc *, outputStream *st ) const {
  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
}
#endif

void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
  MacroAssembler _masm(&cbuf);
  for(int i = 0; i < _count; i += 1) {
    __ nop();
  }
}

uint MachNopNode::size(PhaseRegAlloc *ra_) const {
  return 4 * _count;
}


//=============================================================================
#ifndef PRODUCT
void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
  int reg = ra_->get_reg_first(this);
  st->print("LEA    [R_SP+#%d+BIAS],%s",offset,Matcher::regName[reg]);
}
#endif

void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  MacroAssembler _masm(&cbuf);
  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()) + STACK_BIAS;
  int reg = ra_->get_encode(this);

  if (Assembler::is_simm13(offset)) {
     __ add(SP, offset, reg_to_register_object(reg));
  } else {
     __ set(offset, O7);
     __ add(SP, O7, reg_to_register_object(reg));
  }
}

uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
  // BoxLockNode is not a MachNode, so we can't just call MachNode::size(ra_)
  assert(ra_ == ra_->C->regalloc(), "sanity");
  return ra_->C->scratch_emit_size(this);
}

//=============================================================================

// Offset from start of compiled java to interpreter stub to the load
// constant that loads the inline cache (IC) (0 on sparc).
const int CompiledStaticCall::comp_to_int_load_offset = 0;

// emit call stub, compiled java to interpretor
void emit_java_to_interp(CodeBuffer &cbuf ) {

  // Stub is fixed up when the corresponding call is converted from calling
  // compiled code to calling interpreted code.
  // set (empty), G5
  // jmp -1

  address mark = cbuf.insts_mark();  // get mark within main instrs section

  MacroAssembler _masm(&cbuf);

  address base =
  __ start_a_stub(Compile::MAX_stubs_size);
  if (base == NULL)  return;  // CodeBuffer::expand failed

  // static stub relocation stores the instruction address of the call
  __ relocate(static_stub_Relocation::spec(mark));

  __ set_oop(NULL, reg_to_register_object(Matcher::inline_cache_reg_encode()));

  __ set_inst_mark();
  AddressLiteral addrlit(-1);
  __ JUMP(addrlit, G3, 0);

  __ delayed()->nop();

  // Update current stubs pointer and restore code_end.
  __ end_a_stub();
}

// size of call stub, compiled java to interpretor
uint size_java_to_interp() {
  // This doesn't need to be accurate but it must be larger or equal to
  // the real size of the stub.
  return (NativeMovConstReg::instruction_size +  // sethi/setlo;
          NativeJump::instruction_size + // sethi; jmp; nop
          (TraceJumps ? 20 * BytesPerInstWord : 0) );
}
// relocation entries for call stub, compiled java to interpretor
uint reloc_java_to_interp() {
  return 10;  // 4 in emit_java_to_interp + 1 in Java_Static_Call
}


//=============================================================================
#ifndef PRODUCT
void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
  st->print_cr("\nUEP:");
#ifdef    _LP64
  if (UseCompressedOops) {
    assert(Universe::heap() != NULL, "java heap should be initialized");
    st->print_cr("\tLDUW   [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check - compressed klass");
    st->print_cr("\tSLL    R_G5,3,R_G5");
    if (Universe::narrow_oop_base() != NULL)
      st->print_cr("\tADD    R_G5,R_G6_heap_base,R_G5");
  } else {
    st->print_cr("\tLDX    [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
  }
  st->print_cr("\tCMP    R_G5,R_G3" );
  st->print   ("\tTne    xcc,R_G0+ST_RESERVED_FOR_USER_0+2");
#else  // _LP64
  st->print_cr("\tLDUW   [R_O0 + oopDesc::klass_offset_in_bytes],R_G5\t! Inline cache check");
  st->print_cr("\tCMP    R_G5,R_G3" );
  st->print   ("\tTne    icc,R_G0+ST_RESERVED_FOR_USER_0+2");
#endif // _LP64
}
#endif

void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
  MacroAssembler _masm(&cbuf);
  Register G5_ic_reg  = reg_to_register_object(Matcher::inline_cache_reg_encode());
  Register temp_reg   = G3;
  assert( G5_ic_reg != temp_reg, "conflicting registers" );

  // Load klass from receiver
  __ load_klass(O0, temp_reg);
  // Compare against expected klass
  __ cmp(temp_reg, G5_ic_reg);
  // Branch to miss code, checks xcc or icc depending
  __ trap(Assembler::notEqual, Assembler::ptr_cc, G0, ST_RESERVED_FOR_USER_0+2);
}

uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
  return MachNode::size(ra_);
}


//=============================================================================

uint size_exception_handler() {
  if (TraceJumps) {
    return (400); // just a guess
  }
  return ( NativeJump::instruction_size ); // sethi;jmp;nop
}

uint size_deopt_handler() {
  if (TraceJumps) {
    return (400); // just a guess
  }
  return ( 4+  NativeJump::instruction_size ); // save;sethi;jmp;restore
}

// Emit exception handler code.
int emit_exception_handler(CodeBuffer& cbuf) {
  Register temp_reg = G3;
  AddressLiteral exception_blob(OptoRuntime::exception_blob()->entry_point());
  MacroAssembler _masm(&cbuf);

  address base =
  __ start_a_stub(size_exception_handler());
  if (base == NULL)  return 0;  // CodeBuffer::expand failed

  int offset = __ offset();

  __ JUMP(exception_blob, temp_reg, 0); // sethi;jmp
  __ delayed()->nop();

  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");

  __ end_a_stub();

  return offset;
}

int emit_deopt_handler(CodeBuffer& cbuf) {
  // Can't use any of the current frame's registers as we may have deopted
  // at a poll and everything (including G3) can be live.
  Register temp_reg = L0;
  AddressLiteral deopt_blob(SharedRuntime::deopt_blob()->unpack());
  MacroAssembler _masm(&cbuf);

  address base =
  __ start_a_stub(size_deopt_handler());
  if (base == NULL)  return 0;  // CodeBuffer::expand failed

  int offset = __ offset();
  __ save_frame(0);
  __ JUMP(deopt_blob, temp_reg, 0); // sethi;jmp
  __ delayed()->restore();

  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");

  __ end_a_stub();
  return offset;

}

// Given a register encoding, produce a Integer Register object
static Register reg_to_register_object(int register_encoding) {
  assert(L5->encoding() == R_L5_enc && G1->encoding() == R_G1_enc, "right coding");
  return as_Register(register_encoding);
}

// Given a register encoding, produce a single-precision Float Register object
static FloatRegister reg_to_SingleFloatRegister_object(int register_encoding) {
  assert(F5->encoding(FloatRegisterImpl::S) == R_F5_enc && F12->encoding(FloatRegisterImpl::S) == R_F12_enc, "right coding");
  return as_SingleFloatRegister(register_encoding);
}

// Given a register encoding, produce a double-precision Float Register object
static FloatRegister reg_to_DoubleFloatRegister_object(int register_encoding) {
  assert(F4->encoding(FloatRegisterImpl::D) == R_F4_enc, "right coding");
  assert(F32->encoding(FloatRegisterImpl::D) == R_D32_enc, "right coding");
  return as_DoubleFloatRegister(register_encoding);
}

const bool Matcher::match_rule_supported(int opcode) {
  if (!has_match_rule(opcode))
    return false;

  switch (opcode) {
  case Op_CountLeadingZerosI:
  case Op_CountLeadingZerosL:
  case Op_CountTrailingZerosI:
  case Op_CountTrailingZerosL:
    if (!UsePopCountInstruction)
      return false;
    break;
  }

  return true;  // Per default match rules are supported.
}

int Matcher::regnum_to_fpu_offset(int regnum) {
  return regnum - 32; // The FP registers are in the second chunk
}

#ifdef ASSERT
address last_rethrow = NULL;  // debugging aid for Rethrow encoding
#endif

// Vector width in bytes
const uint Matcher::vector_width_in_bytes(void) {
  return 8;
}

// Vector ideal reg
const uint Matcher::vector_ideal_reg(void) {
  return Op_RegD;
}

// USII supports fxtof through the whole range of number, USIII doesn't
const bool Matcher::convL2FSupported(void) {
  return VM_Version::has_fast_fxtof();
}

// Is this branch offset short enough that a short branch can be used?
//
// NOTE: If the platform does not provide any short branch variants, then
//       this method should return false for offset 0.
bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
  // The passed offset is relative to address of the branch.
  // Don't need to adjust the offset.
  return UseCBCond && Assembler::is_simm12(offset);
}

const bool Matcher::isSimpleConstant64(jlong value) {
  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
  // Depends on optimizations in MacroAssembler::setx.
  int hi = (int)(value >> 32);
  int lo = (int)(value & ~0);
  return (hi == 0) || (hi == -1) || (lo == 0);
}

// No scaling for the parameter the ClearArray node.
const bool Matcher::init_array_count_is_in_bytes = true;

// Threshold size for cleararray.
const int Matcher::init_array_short_size = 8 * BytesPerLong;

// No additional cost for CMOVL.
const int Matcher::long_cmove_cost() { return 0; }

// CMOVF/CMOVD are expensive on T4 and on SPARC64.
const int Matcher::float_cmove_cost() {
  return (VM_Version::is_T4() || VM_Version::is_sparc64()) ? ConditionalMoveLimit : 0;
}

// Does the CPU require late expand (see block.cpp for description of late expand)?
const bool Matcher::require_late_expand = false;

// Should the Matcher clone shifts on addressing modes, expecting them to
// be subsumed into complex addressing expressions or compute them into
// registers?  True for Intel but false for most RISCs
const bool Matcher::clone_shift_expressions = false;

// Do we need to mask the count passed to shift instructions or does
// the cpu only look at the lower 5/6 bits anyway?
const bool Matcher::need_masked_shift_count = false;

bool Matcher::narrow_oop_use_complex_address() {
  NOT_LP64(ShouldNotCallThis());
  assert(UseCompressedOops, "only for compressed oops code");
  return false;
}

// Is it better to copy float constants, or load them directly from memory?
// Intel can load a float constant from a direct address, requiring no
// extra registers.  Most RISCs will have to materialize an address into a
// register first, so they would do better to copy the constant from stack.
const bool Matcher::rematerialize_float_constants = false;

// If CPU can load and store mis-aligned doubles directly then no fixup is
// needed.  Else we split the double into 2 integer pieces and move it
// piece-by-piece.  Only happens when passing doubles into C code as the
// Java calling convention forces doubles to be aligned.
#ifdef _LP64
const bool Matcher::misaligned_doubles_ok = true;
#else
const bool Matcher::misaligned_doubles_ok = false;
#endif

// No-op on SPARC.
void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
}

// Advertise here if the CPU requires explicit rounding operations
// to implement the UseStrictFP mode.
const bool Matcher::strict_fp_requires_explicit_rounding = false;

// Are floats conerted to double when stored to stack during deoptimization?
// Sparc does not handle callee-save floats.
bool Matcher::float_in_double() { return false; }

// Do ints take an entire long register or just half?
// Note that we if-def off of _LP64.
// The relevant question is how the int is callee-saved.  In _LP64
// the whole long is written but de-opt'ing will have to extract
// the relevant 32 bits, in not-_LP64 only the low 32 bits is written.
#ifdef _LP64
const bool Matcher::int_in_long = true;
#else
const bool Matcher::int_in_long = false;
#endif

// Return whether or not this register is ever used as an argument.  This
// function is used on startup to build the trampoline stubs in generateOptoStub.
// Registers not mentioned will be killed by the VM call in the trampoline, and
// arguments in those registers not be available to the callee.
bool Matcher::can_be_java_arg( int reg ) {
  // Standard sparc 6 args in registers
  if( reg == R_I0_num ||
      reg == R_I1_num ||
      reg == R_I2_num ||
      reg == R_I3_num ||
      reg == R_I4_num ||
      reg == R_I5_num ) return true;
#ifdef _LP64
  // 64-bit builds can pass 64-bit pointers and longs in
  // the high I registers
  if( reg == R_I0H_num ||
      reg == R_I1H_num ||
      reg == R_I2H_num ||
      reg == R_I3H_num ||
      reg == R_I4H_num ||
      reg == R_I5H_num ) return true;

  if ((UseCompressedOops) && (reg == R_G6_num || reg == R_G6H_num)) {
    return true;
  }

#else
  // 32-bit builds with longs-in-one-entry pass longs in G1 & G4.
  // Longs cannot be passed in O regs, because O regs become I regs
  // after a 'save' and I regs get their high bits chopped off on
  // interrupt.
  if( reg == R_G1H_num || reg == R_G1_num ) return true;
  if( reg == R_G4H_num || reg == R_G4_num ) return true;
#endif
  // A few float args in registers
  if( reg >= R_F0_num && reg <= R_F7_num ) return true;

  return false;
}

bool Matcher::is_spillable_arg( int reg ) {
  return can_be_java_arg(reg);
}

bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
  // Use hardware SDIVX instruction when it is
  // faster than a code which use multiply.
  return VM_Version::has_fast_idiv();
}

// Register for DIVI projection of divmodI
RegMask Matcher::divI_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for MODI projection of divmodI
RegMask Matcher::modI_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for DIVL projection of divmodL
RegMask Matcher::divL_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

// Register for MODL projection of divmodL
RegMask Matcher::modL_proj_mask() {
  ShouldNotReachHere();
  return RegMask();
}

const RegMask Matcher::method_handle_invoke_SP_save_mask() {
  return L7_REGP_mask();
}

%}


// The intptr_t operand types, defined by textual substitution.
// (Cf. opto/type.hpp.  This lets us avoid many, many other ifdefs.)
#ifdef _LP64
#define immX      immL
#define immX13    immL13
#define immX13m7  immL13m7
#define iRegX     iRegL
#define g1RegX    g1RegL
#else
#define immX      immI
#define immX13    immI13
#define immX13m7  immI13m7
#define iRegX     iRegI
#define g1RegX    g1RegI
#endif

//----------ENCODING BLOCK-----------------------------------------------------
// This block specifies the encoding classes used by the compiler to output
// byte streams.  Encoding classes are parameterized macros used by
// Machine Instruction Nodes in order to generate the bit encoding of the
// instruction.  Operands specify their base encoding interface with the
// interface keyword.  There are currently supported four interfaces,
// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
// operand to generate a function which returns its register number when
// queried.   CONST_INTER causes an operand to generate a function which
// returns the value of the constant when queried.  MEMORY_INTER causes an
// operand to generate four functions which return the Base Register, the
// Index Register, the Scale Value, and the Offset Value of the operand when
// queried.  COND_INTER causes an operand to generate six functions which
// return the encoding code (ie - encoding bits for the instruction)
// associated with each basic boolean condition for a conditional instruction.
//
// Instructions specify two basic values for encoding.  Again, a function
// is available to check if the constant displacement is an oop. They use the
// ins_encode keyword to specify their encoding classes (which must be
// a sequence of enc_class names, and their parameters, specified in
// the encoding block), and they use the
// opcode keyword to specify, in order, their primary, secondary, and
// tertiary opcode.  Only the opcode sections which a particular instruction
// needs for encoding need to be specified.
encode %{
  enc_class enc_untested %{
#ifdef ASSERT
    MacroAssembler _masm(&cbuf);
    __ untested("encoding");
#endif
  %}

  enc_class form3_mem_reg( memory mem, iRegI dst ) %{
    emit_form3_mem_reg(cbuf, this, $primary, $tertiary,
                       $mem$$base, $mem$$disp, $mem$$index, $dst$$reg);
  %}

  enc_class simple_form3_mem_reg( memory mem, iRegI dst ) %{
    emit_form3_mem_reg(cbuf, this, $primary, -1,
                       $mem$$base, $mem$$disp, $mem$$index, $dst$$reg);
  %}

  enc_class form3_mem_prefetch_read( memory mem ) %{
    emit_form3_mem_reg(cbuf, this, $primary, -1,
                       $mem$$base, $mem$$disp, $mem$$index, 0/*prefetch function many-reads*/);
  %}

  enc_class form3_mem_prefetch_write( memory mem ) %{
    emit_form3_mem_reg(cbuf, this, $primary, -1,
                       $mem$$base, $mem$$disp, $mem$$index, 2/*prefetch function many-writes*/);
  %}

  enc_class form3_mem_reg_long_unaligned_marshal( memory mem, iRegL reg ) %{
    assert(Assembler::is_simm13($mem$$disp  ), "need disp and disp+4");
    assert(Assembler::is_simm13($mem$$disp+4), "need disp and disp+4");
    guarantee($mem$$index == R_G0_enc, "double index?");
    emit_form3_mem_reg(cbuf, this, $primary, -1, $mem$$base, $mem$$disp+4, R_G0_enc, R_O7_enc );
    emit_form3_mem_reg(cbuf, this, $primary, -1, $mem$$base, $mem$$disp,   R_G0_enc, $reg$$reg );
    emit3_simm13( cbuf, Assembler::arith_op, $reg$$reg, Assembler::sllx_op3, $reg$$reg, 0x1020 );
    emit3( cbuf, Assembler::arith_op, $reg$$reg, Assembler::or_op3, $reg$$reg, 0, R_O7_enc );
  %}

  enc_class form3_mem_reg_double_unaligned( memory mem, RegD_low reg ) %{
    assert(Assembler::is_simm13($mem$$disp  ), "need disp and disp+4");
    assert(Assembler::is_simm13($mem$$disp+4), "need disp and disp+4");
    guarantee($mem$$index == R_G0_enc, "double index?");
    // Load long with 2 instructions
    emit_form3_mem_reg(cbuf, this, $primary, -1, $mem$$base, $mem$$disp,   R_G0_enc, $reg$$reg+0 );
    emit_form3_mem_reg(cbuf, this, $primary, -1, $mem$$base, $mem$$disp+4, R_G0_enc, $reg$$reg+1 );
  %}

  //%%% form3_mem_plus_4_reg is a hack--get rid of it
  enc_class form3_mem_plus_4_reg( memory mem, iRegI dst ) %{
    guarantee($mem$$disp, "cannot offset a reg-reg operand by 4");
    emit_form3_mem_reg(cbuf, this, $primary, -1, $mem$$base, $mem$$disp + 4, $mem$$index, $dst$$reg);
  %}

  enc_class form3_g0_rs2_rd_move( iRegI rs2, iRegI rd ) %{
    // Encode a reg-reg copy.  If it is useless, then empty encoding.
    if( $rs2$$reg != $rd$$reg )
      emit3( cbuf, Assembler::arith_op, $rd$$reg, Assembler::or_op3, 0, 0, $rs2$$reg );
  %}

  // Target lo half of long
  enc_class form3_g0_rs2_rd_move_lo( iRegI rs2, iRegL rd ) %{
    // Encode a reg-reg copy.  If it is useless, then empty encoding.
    if( $rs2$$reg != LONG_LO_REG($rd$$reg) )
      emit3( cbuf, Assembler::arith_op, LONG_LO_REG($rd$$reg), Assembler::or_op3, 0, 0, $rs2$$reg );
  %}

  // Source lo half of long
  enc_class form3_g0_rs2_rd_move_lo2( iRegL rs2, iRegI rd ) %{
    // Encode a reg-reg copy.  If it is useless, then empty encoding.
    if( LONG_LO_REG($rs2$$reg) != $rd$$reg )
      emit3( cbuf, Assembler::arith_op, $rd$$reg, Assembler::or_op3, 0, 0, LONG_LO_REG($rs2$$reg) );
  %}

  // Target hi half of long
  enc_class form3_rs1_rd_copysign_hi( iRegI rs1, iRegL rd ) %{
    emit3_simm13( cbuf, Assembler::arith_op, $rd$$reg, Assembler::sra_op3, $rs1$$reg, 31 );
  %}

  // Source lo half of long, and leave it sign extended.
  enc_class form3_rs1_rd_signextend_lo1( iRegL rs1, iRegI rd ) %{
    // Sign extend low half
    emit3( cbuf, Assembler::arith_op, $rd$$reg, Assembler::sra_op3, $rs1$$reg, 0, 0 );
  %}

  // Source hi half of long, and leave it sign extended.
  enc_class form3_rs1_rd_copy_hi1( iRegL rs1, iRegI rd ) %{
    // Shift high half to low half
    emit3_simm13( cbuf, Assembler::arith_op, $rd$$reg, Assembler::srlx_op3, $rs1$$reg, 32 );
  %}

  // Source hi half of long
  enc_class form3_g0_rs2_rd_move_hi2( iRegL rs2, iRegI rd ) %{
    // Encode a reg-reg copy.  If it is useless, then empty encoding.
    if( LONG_HI_REG($rs2$$reg) != $rd$$reg )
      emit3( cbuf, Assembler::arith_op, $rd$$reg, Assembler::or_op3, 0, 0, LONG_HI_REG($rs2$$reg) );
  %}

  enc_class form3_rs1_rs2_rd( iRegI rs1, iRegI rs2, iRegI rd ) %{
    emit3( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, 0, $rs2$$reg );
  %}

  enc_class enc_to_bool( iRegI src, iRegI dst ) %{
    emit3       ( cbuf, Assembler::arith_op,         0, Assembler::subcc_op3, 0, 0, $src$$reg );
    emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::addc_op3 , 0, 0 );
  %}

  enc_class enc_ltmask( iRegI p, iRegI q, iRegI dst ) %{
    emit3       ( cbuf, Assembler::arith_op,         0, Assembler::subcc_op3, $p$$reg, 0, $q$$reg );
    // clear if nothing else is happening
    emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0,  0 );
    // blt,a,pn done
    emit2_19    ( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::less, Assembler::bp_op2, Assembler::icc, 0/*predict not taken*/, 2 );
    // mov dst,-1 in delay slot
    emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, -1 );
  %}

  enc_class form3_rs1_imm5_rd( iRegI rs1, immU5 imm5, iRegI rd ) %{
    emit3_simm13( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, $imm5$$constant & 0x1F );
  %}

  enc_class form3_sd_rs1_imm6_rd( iRegL rs1, immU6 imm6, iRegL rd ) %{
    emit3_simm13( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, ($imm6$$constant & 0x3F) | 0x1000 );
  %}

  enc_class form3_sd_rs1_rs2_rd( iRegL rs1, iRegI rs2, iRegL rd ) %{
    emit3( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, 0x80, $rs2$$reg );
  %}

  enc_class form3_rs1_simm13_rd( iRegI rs1, immI13 simm13, iRegI rd ) %{
    emit3_simm13( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, $simm13$$constant );
  %}

  enc_class move_return_pc_to_o1() %{
    emit3_simm13( cbuf, Assembler::arith_op, R_O1_enc, Assembler::add_op3, R_O7_enc, frame::pc_return_offset );
  %}

#ifdef _LP64
  /* %%% merge with enc_to_bool */
  enc_class enc_convP2B( iRegI dst, iRegP src ) %{
    MacroAssembler _masm(&cbuf);

    Register   src_reg = reg_to_register_object($src$$reg);
    Register   dst_reg = reg_to_register_object($dst$$reg);
    __ movr(Assembler::rc_nz, src_reg, 1, dst_reg);
  %}
#endif

  enc_class enc_cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, iRegI tmp ) %{
    // (Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)))
    MacroAssembler _masm(&cbuf);

    Register   p_reg = reg_to_register_object($p$$reg);
    Register   q_reg = reg_to_register_object($q$$reg);
    Register   y_reg = reg_to_register_object($y$$reg);
    Register tmp_reg = reg_to_register_object($tmp$$reg);

    __ subcc( p_reg, q_reg,   p_reg );
    __ add  ( p_reg, y_reg, tmp_reg );
    __ movcc( Assembler::less, false, Assembler::icc, tmp_reg, p_reg );
  %}

  enc_class form_d2i_helper(regD src, regF dst) %{
    // fcmp %fcc0,$src,$src
    emit3( cbuf, Assembler::arith_op , Assembler::fcc0, Assembler::fpop2_op3, $src$$reg, Assembler::fcmpd_opf, $src$$reg );
    // branch %fcc0 not-nan, predict taken
    emit2_19( cbuf, Assembler::branch_op, 0/*annul*/, Assembler::f_ordered, Assembler::fbp_op2, Assembler::fcc0, 1/*predict taken*/, 4 );
    // fdtoi $src,$dst
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3,         0, Assembler::fdtoi_opf, $src$$reg );
    // fitos $dst,$dst (if nan)
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3,         0, Assembler::fitos_opf, $dst$$reg );
    // clear $dst (if nan)
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, $dst$$reg, Assembler::fsubs_opf, $dst$$reg );
    // carry on here...
  %}

  enc_class form_d2l_helper(regD src, regD dst) %{
    // fcmp %fcc0,$src,$src  check for NAN
    emit3( cbuf, Assembler::arith_op , Assembler::fcc0, Assembler::fpop2_op3, $src$$reg, Assembler::fcmpd_opf, $src$$reg );
    // branch %fcc0 not-nan, predict taken
    emit2_19( cbuf, Assembler::branch_op, 0/*annul*/, Assembler::f_ordered, Assembler::fbp_op2, Assembler::fcc0, 1/*predict taken*/, 4 );
    // fdtox $src,$dst   convert in delay slot
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3,         0, Assembler::fdtox_opf, $src$$reg );
    // fxtod $dst,$dst  (if nan)
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3,         0, Assembler::fxtod_opf, $dst$$reg );
    // clear $dst (if nan)
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, $dst$$reg, Assembler::fsubd_opf, $dst$$reg );
    // carry on here...
  %}

  enc_class form_f2i_helper(regF src, regF dst) %{
    // fcmps %fcc0,$src,$src
    emit3( cbuf, Assembler::arith_op , Assembler::fcc0, Assembler::fpop2_op3, $src$$reg, Assembler::fcmps_opf, $src$$reg );
    // branch %fcc0 not-nan, predict taken
    emit2_19( cbuf, Assembler::branch_op, 0/*annul*/, Assembler::f_ordered, Assembler::fbp_op2, Assembler::fcc0, 1/*predict taken*/, 4 );
    // fstoi $src,$dst
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3,         0, Assembler::fstoi_opf, $src$$reg );
    // fitos $dst,$dst (if nan)
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3,         0, Assembler::fitos_opf, $dst$$reg );
    // clear $dst (if nan)
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, $dst$$reg, Assembler::fsubs_opf, $dst$$reg );
    // carry on here...
  %}

  enc_class form_f2l_helper(regF src, regD dst) %{
    // fcmps %fcc0,$src,$src
    emit3( cbuf, Assembler::arith_op , Assembler::fcc0, Assembler::fpop2_op3, $src$$reg, Assembler::fcmps_opf, $src$$reg );
    // branch %fcc0 not-nan, predict taken
    emit2_19( cbuf, Assembler::branch_op, 0/*annul*/, Assembler::f_ordered, Assembler::fbp_op2, Assembler::fcc0, 1/*predict taken*/, 4 );
    // fstox $src,$dst
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3,         0, Assembler::fstox_opf, $src$$reg );
    // fxtod $dst,$dst (if nan)
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3,         0, Assembler::fxtod_opf, $dst$$reg );
    // clear $dst (if nan)
    emit3( cbuf, Assembler::arith_op , $dst$$reg, Assembler::fpop1_op3, $dst$$reg, Assembler::fsubd_opf, $dst$$reg );
    // carry on here...
  %}

  enc_class form3_opf_rs2F_rdF(regF rs2, regF rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %}
  enc_class form3_opf_rs2F_rdD(regF rs2, regD rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %}
  enc_class form3_opf_rs2D_rdF(regD rs2, regF rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %}
  enc_class form3_opf_rs2D_rdD(regD rs2, regD rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %}

  enc_class form3_opf_rs2D_lo_rdF(regD rs2, regF rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg+1); %}

  enc_class form3_opf_rs2D_hi_rdD_hi(regD rs2, regD rd) %{ emit3(cbuf,$secondary,$rd$$reg,$primary,0,$tertiary,$rs2$$reg); %}
  enc_class form3_opf_rs2D_lo_rdD_lo(regD rs2, regD rd) %{ emit3(cbuf,$secondary,$rd$$reg+1,$primary,0,$tertiary,$rs2$$reg+1); %}

  enc_class form3_opf_rs1F_rs2F_rdF( regF rs1, regF rs2, regF rd ) %{
    emit3( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, $tertiary, $rs2$$reg );
  %}

  enc_class form3_opf_rs1D_rs2D_rdD( regD rs1, regD rs2, regD rd ) %{
    emit3( cbuf, $secondary, $rd$$reg, $primary, $rs1$$reg, $tertiary, $rs2$$reg );
  %}

  enc_class form3_opf_rs1F_rs2F_fcc( regF rs1, regF rs2, flagsRegF fcc ) %{
    emit3( cbuf, $secondary, $fcc$$reg, $primary, $rs1$$reg, $tertiary, $rs2$$reg );
  %}

  enc_class form3_opf_rs1D_rs2D_fcc( regD rs1, regD rs2, flagsRegF fcc ) %{
    emit3( cbuf, $secondary, $fcc$$reg, $primary, $rs1$$reg, $tertiary, $rs2$$reg );
  %}

  enc_class form3_convI2F(regF rs2, regF rd) %{
    emit3(cbuf,Assembler::arith_op,$rd$$reg,Assembler::fpop1_op3,0,$secondary,$rs2$$reg);
  %}

  // Encloding class for traceable jumps
  enc_class form_jmpl(g3RegP dest) %{
    emit_jmpl(cbuf, $dest$$reg);
  %}

  enc_class form_jmpl_set_exception_pc(g1RegP dest) %{
    emit_jmpl_set_exception_pc(cbuf, $dest$$reg);
  %}

  enc_class form2_nop() %{
    emit_nop(cbuf);
  %}

  enc_class form2_illtrap() %{
    emit_illtrap(cbuf);
  %}


  // Compare longs and convert into -1, 0, 1.
  enc_class cmpl_flag( iRegL src1, iRegL src2, iRegI dst ) %{
    // CMP $src1,$src2
    emit3( cbuf, Assembler::arith_op, 0, Assembler::subcc_op3, $src1$$reg, 0, $src2$$reg );
    // blt,a,pn done
    emit2_19( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::less   , Assembler::bp_op2, Assembler::xcc, 0/*predict not taken*/, 5 );
    // mov dst,-1 in delay slot
    emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0, -1 );
    // bgt,a,pn done
    emit2_19( cbuf, Assembler::branch_op, 1/*annul*/, Assembler::greater, Assembler::bp_op2, Assembler::xcc, 0/*predict not taken*/, 3 );
    // mov dst,1 in delay slot
    emit3_simm13( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3, 0,  1 );
    // CLR    $dst
    emit3( cbuf, Assembler::arith_op, $dst$$reg, Assembler::or_op3 , 0, 0, 0 );
  %}

  enc_class enc_PartialSubtypeCheck() %{
    MacroAssembler _masm(&cbuf);
    __ call(StubRoutines::Sparc::partial_subtype_check(), relocInfo::runtime_call_type);
    __ delayed()->nop();
  %}

  enc_class enc_bp( label labl, cmpOp cmp, flagsReg cc ) %{
    MacroAssembler _masm(&cbuf);
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;

    __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
    __ delayed()->nop();
  %}

  enc_class enc_bpr( label labl, cmpOp_reg cmp, iRegI op1 ) %{
    MacroAssembler _masm(&cbuf);
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;

    __ bpr( (Assembler::RCondition)($cmp$$cmpcode), false, predict_taken, as_Register($op1$$reg), *L);
    __ delayed()->nop();
  %}

  enc_class enc_cmov_reg( cmpOp cmp, iRegI dst, iRegI src, immI pcc) %{
    int op = (Assembler::arith_op << 30) |
             ($dst$$reg << 25) |
             (Assembler::movcc_op3 << 19) |
             (1 << 18) |                    // cc2 bit for 'icc'
             ($cmp$$cmpcode << 14) |
             (0 << 13) |                    // select register move
             ($pcc$$constant << 11) |       // cc1, cc0 bits for 'icc' or 'xcc'
             ($src$$reg << 0);
    cbuf.insts()->emit_int32(op);
  %}

  enc_class enc_cmov_imm( cmpOp cmp, iRegI dst, immI11 src, immI pcc ) %{
    int simm11 = $src$$constant & ((1<<11)-1); // Mask to 11 bits
    int op = (Assembler::arith_op << 30) |
             ($dst$$reg << 25) |
             (Assembler::movcc_op3 << 19) |
             (1 << 18) |                    // cc2 bit for 'icc'
             ($cmp$$cmpcode << 14) |
             (1 << 13) |                    // select immediate move
             ($pcc$$constant << 11) |       // cc1, cc0 bits for 'icc'
             (simm11 << 0);
    cbuf.insts()->emit_int32(op);
  %}

  enc_class enc_cmov_reg_f( cmpOpF cmp, iRegI dst, iRegI src, flagsRegF fcc ) %{
    int op = (Assembler::arith_op << 30) |
             ($dst$$reg << 25) |
             (Assembler::movcc_op3 << 19) |
             (0 << 18) |                    // cc2 bit for 'fccX'
             ($cmp$$cmpcode << 14) |
             (0 << 13) |                    // select register move
             ($fcc$$reg << 11) |            // cc1, cc0 bits for fcc0-fcc3
             ($src$$reg << 0);
    cbuf.insts()->emit_int32(op);
  %}

  enc_class enc_cmov_imm_f( cmpOp cmp, iRegI dst, immI11 src, flagsRegF fcc ) %{
    int simm11 = $src$$constant & ((1<<11)-1); // Mask to 11 bits
    int op = (Assembler::arith_op << 30) |
             ($dst$$reg << 25) |
             (Assembler::movcc_op3 << 19) |
             (0 << 18) |                    // cc2 bit for 'fccX'
             ($cmp$$cmpcode << 14) |
             (1 << 13) |                    // select immediate move
             ($fcc$$reg << 11) |            // cc1, cc0 bits for fcc0-fcc3
             (simm11 << 0);
    cbuf.insts()->emit_int32(op);
  %}

  enc_class enc_cmovf_reg( cmpOp cmp, regD dst, regD src, immI pcc ) %{
    int op = (Assembler::arith_op << 30) |
             ($dst$$reg << 25) |
             (Assembler::fpop2_op3 << 19) |
             (0 << 18) |
             ($cmp$$cmpcode << 14) |
             (1 << 13) |                    // select register move
             ($pcc$$constant << 11) |       // cc1-cc0 bits for 'icc' or 'xcc'
             ($primary << 5) |              // select single, double or quad
             ($src$$reg << 0);
    cbuf.insts()->emit_int32(op);
  %}

  enc_class enc_cmovff_reg( cmpOpF cmp, flagsRegF fcc, regD dst, regD src ) %{
    int op = (Assembler::arith_op << 30) |
             ($dst$$reg << 25) |
             (Assembler::fpop2_op3 << 19) |
             (0 << 18) |
             ($cmp$$cmpcode << 14) |
             ($fcc$$reg << 11) |            // cc2-cc0 bits for 'fccX'
             ($primary << 5) |              // select single, double or quad
             ($src$$reg << 0);
    cbuf.insts()->emit_int32(op);
  %}

  // Used by the MIN/MAX encodings.  Same as a CMOV, but
  // the condition comes from opcode-field instead of an argument.
  enc_class enc_cmov_reg_minmax( iRegI dst, iRegI src ) %{
    int op = (Assembler::arith_op << 30) |
             ($dst$$reg << 25) |
             (Assembler::movcc_op3 << 19) |
             (1 << 18) |                    // cc2 bit for 'icc'
             ($primary << 14) |
             (0 << 13) |                    // select register move
             (0 << 11) |                    // cc1, cc0 bits for 'icc'
             ($src$$reg << 0);
    cbuf.insts()->emit_int32(op);
  %}

  enc_class enc_cmov_reg_minmax_long( iRegL dst, iRegL src ) %{
    int op = (Assembler::arith_op << 30) |
             ($dst$$reg << 25) |
             (Assembler::movcc_op3 << 19) |
             (6 << 16) |                    // cc2 bit for 'xcc'
             ($primary << 14) |
             (0 << 13) |                    // select register move
             (0 << 11) |                    // cc1, cc0 bits for 'icc'
             ($src$$reg << 0);
    cbuf.insts()->emit_int32(op);
  %}

  enc_class Set13( immI13 src, iRegI rd ) %{
    emit3_simm13( cbuf, Assembler::arith_op, $rd$$reg, Assembler::or_op3, 0, $src$$constant );
  %}

  enc_class SetHi22( immI src, iRegI rd ) %{
    emit2_22( cbuf, Assembler::branch_op, $rd$$reg, Assembler::sethi_op2, $src$$constant );
  %}

  enc_class Set32( immI src, iRegI rd ) %{
    MacroAssembler _masm(&cbuf);
    __ set($src$$constant, reg_to_register_object($rd$$reg));
  %}

  enc_class call_epilog %{
    if( VerifyStackAtCalls ) {
      MacroAssembler _masm(&cbuf);
      int framesize = ra_->C->frame_slots() << LogBytesPerInt;
      Register temp_reg = G3;
      __ add(SP, framesize, temp_reg);
      __ cmp(temp_reg, FP);
      __ breakpoint_trap(Assembler::notEqual, Assembler::ptr_cc);
    }
  %}

  // Long values come back from native calls in O0:O1 in the 32-bit VM, copy the value
  // to G1 so the register allocator will not have to deal with the misaligned register
  // pair.
  enc_class adjust_long_from_native_call %{
#ifndef _LP64
    if (returns_long()) {
      //    sllx  O0,32,O0
      emit3_simm13( cbuf, Assembler::arith_op, R_O0_enc, Assembler::sllx_op3, R_O0_enc, 0x1020 );
      //    srl   O1,0,O1
      emit3_simm13( cbuf, Assembler::arith_op, R_O1_enc, Assembler::srl_op3, R_O1_enc, 0x0000 );
      //    or    O0,O1,G1
      emit3       ( cbuf, Assembler::arith_op, R_G1_enc, Assembler:: or_op3, R_O0_enc, 0, R_O1_enc );
    }
#endif
  %}

  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime
    // CALL directly to the runtime
    // The user of this is responsible for ensuring that R_L7 is empty (killed).
    emit_call_reloc(cbuf, $meth$$method, relocInfo::runtime_call_type,
                    /*preserve_g2=*/true);
  %}

  enc_class preserve_SP %{
    MacroAssembler _masm(&cbuf);
    __ mov(SP, L7_mh_SP_save);
  %}

  enc_class restore_SP %{
    MacroAssembler _masm(&cbuf);
    __ mov(L7_mh_SP_save, SP);
  %}

  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
    // who we intended to call.
    if ( !_method ) {
      emit_call_reloc(cbuf, $meth$$method, relocInfo::runtime_call_type);
    } else if (_optimized_virtual) {
      emit_call_reloc(cbuf, $meth$$method, relocInfo::opt_virtual_call_type);
    } else {
      emit_call_reloc(cbuf, $meth$$method, relocInfo::static_call_type);
    }
    if( _method ) {  // Emit stub for static call
      emit_java_to_interp(cbuf);
    }
  %}

  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
    MacroAssembler _masm(&cbuf);
    __ set_inst_mark();
    int vtable_index = this->_vtable_index;
    // MachCallDynamicJavaNode::ret_addr_offset uses this same test
    if (vtable_index < 0) {
      // must be invalid_vtable_index, not nonvirtual_vtable_index
      assert(vtable_index == methodOopDesc::invalid_vtable_index, "correct sentinel value");
      Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
      assert(G5_ic_reg == G5_inline_cache_reg, "G5_inline_cache_reg used in assemble_ic_buffer_code()");
      assert(G5_ic_reg == G5_megamorphic_method, "G5_megamorphic_method used in megamorphic call stub");
      // !!!!!
      // Generate  "set 0x01, R_G5", placeholder instruction to load oop-info
      // emit_call_dynamic_prologue( cbuf );
      __ set_oop((jobject)Universe::non_oop_word(), G5_ic_reg);

      address  virtual_call_oop_addr = __ inst_mark();
      // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
      // who we intended to call.
      __ relocate(virtual_call_Relocation::spec(virtual_call_oop_addr));
      emit_call_reloc(cbuf, $meth$$method, relocInfo::none);
    } else {
      assert(!UseInlineCaches, "expect vtable calls only if not using ICs");
      // Just go thru the vtable
      // get receiver klass (receiver already checked for non-null)
      // If we end up going thru a c2i adapter interpreter expects method in G5
      int off = __ offset();
      __ load_klass(O0, G3_scratch);
      int klass_load_size;
      if (UseCompressedOops) {
        assert(Universe::heap() != NULL, "java heap should be initialized");
        if (Universe::narrow_oop_base() == NULL)
          klass_load_size = 2*BytesPerInstWord;
        else
          klass_load_size = 3*BytesPerInstWord;
      } else {
        klass_load_size = 1*BytesPerInstWord;
      }
      int entry_offset = instanceKlass::vtable_start_offset() + vtable_index*vtableEntry::size();
      int v_off = entry_offset*wordSize + vtableEntry::method_offset_in_bytes();
      if (Assembler::is_simm13(v_off)) {
        __ ld_ptr(G3, v_off, G5_method);
      } else {
        // Generate 2 instructions
        __ Assembler::sethi(v_off & ~0x3ff, G5_method);
        __ or3(G5_method, v_off & 0x3ff, G5_method);
        // ld_ptr, set_hi, set
        assert(__ offset() - off == klass_load_size + 2*BytesPerInstWord,
               "Unexpected instruction size(s)");
        __ ld_ptr(G3, G5_method, G5_method);
      }
      // NOTE: for vtable dispatches, the vtable entry will never be null.
      // However it may very well end up in handle_wrong_method if the
      // method is abstract for the particular class.
      __ ld_ptr(G5_method, in_bytes(methodOopDesc::from_compiled_offset()), G3_scratch);
      // jump to target (either compiled code or c2iadapter)
      __ jmpl(G3_scratch, G0, O7);
      __ delayed()->nop();
    }
  %}

  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
    MacroAssembler _masm(&cbuf);

    Register G5_ic_reg = reg_to_register_object(Matcher::inline_cache_reg_encode());
    Register temp_reg = G3;   // caller must kill G3!  We cannot reuse G5_ic_reg here because
                              // we might be calling a C2I adapter which needs it.

    assert(temp_reg != G5_ic_reg, "conflicting registers");
    // Load nmethod
    __ ld_ptr(G5_ic_reg, in_bytes(methodOopDesc::from_compiled_offset()), temp_reg);

    // CALL to compiled java, indirect the contents of G3
    __ set_inst_mark();
    __ callr(temp_reg, G0);
    __ delayed()->nop();
  %}

enc_class idiv_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst) %{
    MacroAssembler _masm(&cbuf);
    Register Rdividend = reg_to_register_object($src1$$reg);
    Register Rdivisor = reg_to_register_object($src2$$reg);
    Register Rresult = reg_to_register_object($dst$$reg);

    __ sra(Rdivisor, 0, Rdivisor);
    __ sra(Rdividend, 0, Rdividend);
    __ sdivx(Rdividend, Rdivisor, Rresult);
%}

enc_class idiv_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst) %{
    MacroAssembler _masm(&cbuf);

    Register Rdividend = reg_to_register_object($src1$$reg);
    int divisor = $imm$$constant;
    Register Rresult = reg_to_register_object($dst$$reg);

    __ sra(Rdividend, 0, Rdividend);
    __ sdivx(Rdividend, divisor, Rresult);
%}

enc_class enc_mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2) %{
    MacroAssembler _masm(&cbuf);
    Register Rsrc1 = reg_to_register_object($src1$$reg);
    Register Rsrc2 = reg_to_register_object($src2$$reg);
    Register Rdst  = reg_to_register_object($dst$$reg);

    __ sra( Rsrc1, 0, Rsrc1 );
    __ sra( Rsrc2, 0, Rsrc2 );
    __ mulx( Rsrc1, Rsrc2, Rdst );
    __ srlx( Rdst, 32, Rdst );
%}

enc_class irem_reg(iRegIsafe src1, iRegIsafe src2, iRegIsafe dst, o7RegL scratch) %{
    MacroAssembler _masm(&cbuf);
    Register Rdividend = reg_to_register_object($src1$$reg);
    Register Rdivisor = reg_to_register_object($src2$$reg);
    Register Rresult = reg_to_register_object($dst$$reg);
    Register Rscratch = reg_to_register_object($scratch$$reg);

    assert(Rdividend != Rscratch, "");
    assert(Rdivisor  != Rscratch, "");

    __ sra(Rdividend, 0, Rdividend);
    __ sra(Rdivisor, 0, Rdivisor);
    __ sdivx(Rdividend, Rdivisor, Rscratch);
    __ mulx(Rscratch, Rdivisor, Rscratch);
    __ sub(Rdividend, Rscratch, Rresult);
%}

enc_class irem_imm(iRegIsafe src1, immI13 imm, iRegIsafe dst, o7RegL scratch) %{
    MacroAssembler _masm(&cbuf);

    Register Rdividend = reg_to_register_object($src1$$reg);
    int divisor = $imm$$constant;
    Register Rresult = reg_to_register_object($dst$$reg);
    Register Rscratch = reg_to_register_object($scratch$$reg);

    assert(Rdividend != Rscratch, "");

    __ sra(Rdividend, 0, Rdividend);
    __ sdivx(Rdividend, divisor, Rscratch);
    __ mulx(Rscratch, divisor, Rscratch);
    __ sub(Rdividend, Rscratch, Rresult);
%}

enc_class fabss (sflt_reg dst, sflt_reg src) %{
    MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);

    __ fabs(FloatRegisterImpl::S, Fsrc, Fdst);
%}

enc_class fabsd (dflt_reg dst, dflt_reg src) %{
    MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);

    __ fabs(FloatRegisterImpl::D, Fsrc, Fdst);
%}

enc_class fnegd (dflt_reg dst, dflt_reg src) %{
    MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);

    __ fneg(FloatRegisterImpl::D, Fsrc, Fdst);
%}

enc_class fsqrts (sflt_reg dst, sflt_reg src) %{
    MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);

    __ fsqrt(FloatRegisterImpl::S, Fsrc, Fdst);
%}

enc_class fsqrtd (dflt_reg dst, dflt_reg src) %{
    MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);

    __ fsqrt(FloatRegisterImpl::D, Fsrc, Fdst);
%}

enc_class fmovs (dflt_reg dst, dflt_reg src) %{
    MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_SingleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_SingleFloatRegister_object($src$$reg);

    __ fmov(FloatRegisterImpl::S, Fsrc, Fdst);
%}

enc_class fmovd (dflt_reg dst, dflt_reg src) %{
    MacroAssembler _masm(&cbuf);

    FloatRegister Fdst = reg_to_DoubleFloatRegister_object($dst$$reg);
    FloatRegister Fsrc = reg_to_DoubleFloatRegister_object($src$$reg);

    __ fmov(FloatRegisterImpl::D, Fsrc, Fdst);
%}

enc_class Fast_Lock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
    MacroAssembler _masm(&cbuf);

    Register Roop  = reg_to_register_object($oop$$reg);
    Register Rbox  = reg_to_register_object($box$$reg);
    Register Rscratch = reg_to_register_object($scratch$$reg);
    Register Rmark =    reg_to_register_object($scratch2$$reg);

    assert(Roop  != Rscratch, "");
    assert(Roop  != Rmark, "");
    assert(Rbox  != Rscratch, "");
    assert(Rbox  != Rmark, "");

    __ compiler_lock_object(Roop, Rmark, Rbox, Rscratch, _counters, UseBiasedLocking && !UseOptoBiasInlining);
%}

enc_class Fast_Unlock(iRegP oop, iRegP box, o7RegP scratch, iRegP scratch2) %{
    MacroAssembler _masm(&cbuf);

    Register Roop  = reg_to_register_object($oop$$reg);
    Register Rbox  = reg_to_register_object($box$$reg);
    Register Rscratch = reg_to_register_object($scratch$$reg);
    Register Rmark =    reg_to_register_object($scratch2$$reg);

    assert(Roop  != Rscratch, "");
    assert(Roop  != Rmark, "");
    assert(Rbox  != Rscratch, "");
    assert(Rbox  != Rmark, "");

    __ compiler_unlock_object(Roop, Rmark, Rbox, Rscratch, UseBiasedLocking && !UseOptoBiasInlining);
  %}

  enc_class enc_cas( iRegP mem, iRegP old, iRegP new ) %{
    MacroAssembler _masm(&cbuf);
    Register Rmem = reg_to_register_object($mem$$reg);
    Register Rold = reg_to_register_object($old$$reg);
    Register Rnew = reg_to_register_object($new$$reg);

    // casx_under_lock picks 1 of 3 encodings:
    // For 32-bit pointers you get a 32-bit CAS
    // For 64-bit pointers you get a 64-bit CASX
    __ casn(Rmem, Rold, Rnew); // Swap(*Rmem,Rnew) if *Rmem == Rold
    __ cmp( Rold, Rnew );
  %}

  enc_class enc_casx( iRegP mem, iRegL old, iRegL new) %{
    Register Rmem = reg_to_register_object($mem$$reg);
    Register Rold = reg_to_register_object($old$$reg);
    Register Rnew = reg_to_register_object($new$$reg);

    MacroAssembler _masm(&cbuf);
    __ mov(Rnew, O7);
    __ casx(Rmem, Rold, O7);
    __ cmp( Rold, O7 );
  %}

  // raw int cas, used for compareAndSwap
  enc_class enc_casi( iRegP mem, iRegL old, iRegL new) %{
    Register Rmem = reg_to_register_object($mem$$reg);
    Register Rold = reg_to_register_object($old$$reg);
    Register Rnew = reg_to_register_object($new$$reg);

    MacroAssembler _masm(&cbuf);
    __ mov(Rnew, O7);
    __ cas(Rmem, Rold, O7);
    __ cmp( Rold, O7 );
  %}

  enc_class enc_lflags_ne_to_boolean( iRegI res ) %{
    Register Rres = reg_to_register_object($res$$reg);

    MacroAssembler _masm(&cbuf);
    __ mov(1, Rres);
    __ movcc( Assembler::notEqual, false, Assembler::xcc, G0, Rres );
  %}

  enc_class enc_iflags_ne_to_boolean( iRegI res ) %{
    Register Rres = reg_to_register_object($res$$reg);

    MacroAssembler _masm(&cbuf);
    __ mov(1, Rres);
    __ movcc( Assembler::notEqual, false, Assembler::icc, G0, Rres );
  %}

  enc_class floating_cmp ( iRegP dst, regF src1, regF src2 ) %{
    MacroAssembler _masm(&cbuf);
    Register Rdst = reg_to_register_object($dst$$reg);
    FloatRegister Fsrc1 = $primary ? reg_to_SingleFloatRegister_object($src1$$reg)
                                     : reg_to_DoubleFloatRegister_object($src1$$reg);
    FloatRegister Fsrc2 = $primary ? reg_to_SingleFloatRegister_object($src2$$reg)
                                     : reg_to_DoubleFloatRegister_object($src2$$reg);

    // Convert condition code fcc0 into -1,0,1; unordered reports less-than (-1)
    __ float_cmp( $primary, -1, Fsrc1, Fsrc2, Rdst);
  %}


  enc_class enc_String_Compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result) %{
    Label Ldone, Lloop;
    MacroAssembler _masm(&cbuf);

    Register   str1_reg = reg_to_register_object($str1$$reg);
    Register   str2_reg = reg_to_register_object($str2$$reg);
    Register   cnt1_reg = reg_to_register_object($cnt1$$reg);
    Register   cnt2_reg = reg_to_register_object($cnt2$$reg);
    Register result_reg = reg_to_register_object($result$$reg);

    assert(result_reg != str1_reg &&
           result_reg != str2_reg &&
           result_reg != cnt1_reg &&
           result_reg != cnt2_reg ,
           "need different registers");

    // Compute the minimum of the string lengths(str1_reg) and the
    // difference of the string lengths (stack)

    // See if the lengths are different, and calculate min in str1_reg.
    // Stash diff in O7 in case we need it for a tie-breaker.
    Label Lskip;
    __ subcc(cnt1_reg, cnt2_reg, O7);
    __ sll(cnt1_reg, exact_log2(sizeof(jchar)), cnt1_reg); // scale the limit
    __ br(Assembler::greater, true, Assembler::pt, Lskip);
    // cnt2 is shorter, so use its count:
    __ delayed()->sll(cnt2_reg, exact_log2(sizeof(jchar)), cnt1_reg); // scale the limit
    __ bind(Lskip);

    // reallocate cnt1_reg, cnt2_reg, result_reg
    // Note:  limit_reg holds the string length pre-scaled by 2
    Register limit_reg =   cnt1_reg;
    Register  chr2_reg =   cnt2_reg;
    Register  chr1_reg = result_reg;
    // str{12} are the base pointers

    // Is the minimum length zero?
    __ cmp(limit_reg, (int)(0 * sizeof(jchar))); // use cast to resolve overloading ambiguity
    __ br(Assembler::equal, true, Assembler::pn, Ldone);
    __ delayed()->mov(O7, result_reg);  // result is difference in lengths

    // Load first characters
    __ lduh(str1_reg, 0, chr1_reg);
    __ lduh(str2_reg, 0, chr2_reg);

    // Compare first characters
    __ subcc(chr1_reg, chr2_reg, chr1_reg);
    __ br(Assembler::notZero, false, Assembler::pt,  Ldone);
    assert(chr1_reg == result_reg, "result must be pre-placed");
    __ delayed()->nop();

    {
      // Check after comparing first character to see if strings are equivalent
      Label LSkip2;
      // Check if the strings start at same location
      __ cmp(str1_reg, str2_reg);
      __ brx(Assembler::notEqual, true, Assembler::pt, LSkip2);
      __ delayed()->nop();

      // Check if the length difference is zero (in O7)
      __ cmp(G0, O7);
      __ br(Assembler::equal, true, Assembler::pn, Ldone);
      __ delayed()->mov(G0, result_reg);  // result is zero

      // Strings might not be equal
      __ bind(LSkip2);
    }

    __ subcc(limit_reg, 1 * sizeof(jchar), chr1_reg);
    __ br(Assembler::equal, true, Assembler::pn, Ldone);
    __ delayed()->mov(O7, result_reg);  // result is difference in lengths

    // Shift str1_reg and str2_reg to the end of the arrays, negate limit
    __ add(str1_reg, limit_reg, str1_reg);
    __ add(str2_reg, limit_reg, str2_reg);
    __ neg(chr1_reg, limit_reg);  // limit = -(limit-2)

    // Compare the rest of the characters
    __ lduh(str1_reg, limit_reg, chr1_reg);
    __ bind(Lloop);
    // __ lduh(str1_reg, limit_reg, chr1_reg); // hoisted
    __ lduh(str2_reg, limit_reg, chr2_reg);
    __ subcc(chr1_reg, chr2_reg, chr1_reg);
    __ br(Assembler::notZero, false, Assembler::pt, Ldone);
    assert(chr1_reg == result_reg, "result must be pre-placed");
    __ delayed()->inccc(limit_reg, sizeof(jchar));
    // annul LDUH if branch is not taken to prevent access past end of string
    __ br(Assembler::notZero, true, Assembler::pt, Lloop);
    __ delayed()->lduh(str1_reg, limit_reg, chr1_reg); // hoisted

    // If strings are equal up to min length, return the length difference.
    __ mov(O7, result_reg);

    // Otherwise, return the difference between the first mismatched chars.
    __ bind(Ldone);
  %}

enc_class enc_String_Equals(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result) %{
    Label Lword_loop, Lpost_word, Lchar, Lchar_loop, Ldone;
    MacroAssembler _masm(&cbuf);

    Register   str1_reg = reg_to_register_object($str1$$reg);
    Register   str2_reg = reg_to_register_object($str2$$reg);
    Register    cnt_reg = reg_to_register_object($cnt$$reg);
    Register   tmp1_reg = O7;
    Register result_reg = reg_to_register_object($result$$reg);

    assert(result_reg != str1_reg &&
           result_reg != str2_reg &&
           result_reg !=  cnt_reg &&
           result_reg != tmp1_reg ,
           "need different registers");

    __ cmp(str1_reg, str2_reg); //same char[] ?
    __ brx(Assembler::equal, true, Assembler::pn, Ldone);
    __ delayed()->add(G0, 1, result_reg);

    __ cmp_zero_and_br(Assembler::zero, cnt_reg, Ldone, true, Assembler::pn);
    __ delayed()->add(G0, 1, result_reg); // count == 0

    //rename registers
    Register limit_reg =    cnt_reg;
    Register  chr1_reg = result_reg;
    Register  chr2_reg =   tmp1_reg;

    //check for alignment and position the pointers to the ends
    __ or3(str1_reg, str2_reg, chr1_reg);
    __ andcc(chr1_reg, 0x3, chr1_reg);
    // notZero means at least one not 4-byte aligned.
    // We could optimize the case when both arrays are not aligned
    // but it is not frequent case and it requires additional checks.
    __ br(Assembler::notZero, false, Assembler::pn, Lchar); // char by char compare
    __ delayed()->sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg); // set byte count

    // Compare char[] arrays aligned to 4 bytes.
    __ char_arrays_equals(str1_reg, str2_reg, limit_reg, result_reg,
                          chr1_reg, chr2_reg, Ldone);
    __ ba(Ldone);
    __ delayed()->add(G0, 1, result_reg);

    // char by char compare
    __ bind(Lchar);
    __ add(str1_reg, limit_reg, str1_reg);
    __ add(str2_reg, limit_reg, str2_reg);
    __ neg(limit_reg); //negate count

    __ lduh(str1_reg, limit_reg, chr1_reg);
    // Lchar_loop
    __ bind(Lchar_loop);
    __ lduh(str2_reg, limit_reg, chr2_reg);
    __ cmp(chr1_reg, chr2_reg);
    __ br(Assembler::notEqual, true, Assembler::pt, Ldone);
    __ delayed()->mov(G0, result_reg); //not equal
    __ inccc(limit_reg, sizeof(jchar));
    // annul LDUH if branch is not taken to prevent access past end of string
    __ br(Assembler::notZero, true, Assembler::pt, Lchar_loop);
    __ delayed()->lduh(str1_reg, limit_reg, chr1_reg); // hoisted

    __ add(G0, 1, result_reg);  //equal

    __ bind(Ldone);
  %}

enc_class enc_Array_Equals(o0RegP ary1, o1RegP ary2, g3RegP tmp1, notemp_iRegI result) %{
    Label Lvector, Ldone, Lloop;
    MacroAssembler _masm(&cbuf);

    Register   ary1_reg = reg_to_register_object($ary1$$reg);
    Register   ary2_reg = reg_to_register_object($ary2$$reg);
    Register   tmp1_reg = reg_to_register_object($tmp1$$reg);
    Register   tmp2_reg = O7;
    Register result_reg = reg_to_register_object($result$$reg);

    int length_offset  = arrayOopDesc::length_offset_in_bytes();
    int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);

    // return true if the same array
    __ cmp(ary1_reg, ary2_reg);
    __ brx(Assembler::equal, true, Assembler::pn, Ldone);
    __ delayed()->add(G0, 1, result_reg); // equal

    __ br_null(ary1_reg, true, Assembler::pn, Ldone);
    __ delayed()->mov(G0, result_reg);    // not equal

    __ br_null(ary2_reg, true, Assembler::pn, Ldone);
    __ delayed()->mov(G0, result_reg);    // not equal

    //load the lengths of arrays
    __ ld(Address(ary1_reg, length_offset), tmp1_reg);
    __ ld(Address(ary2_reg, length_offset), tmp2_reg);

    // return false if the two arrays are not equal length
    __ cmp(tmp1_reg, tmp2_reg);
    __ br(Assembler::notEqual, true, Assembler::pn, Ldone);
    __ delayed()->mov(G0, result_reg);     // not equal

    __ cmp_zero_and_br(Assembler::zero, tmp1_reg, Ldone, true, Assembler::pn);
    __ delayed()->add(G0, 1, result_reg); // zero-length arrays are equal

    // load array addresses
    __ add(ary1_reg, base_offset, ary1_reg);
    __ add(ary2_reg, base_offset, ary2_reg);

    // renaming registers
    Register chr1_reg  =  result_reg; // for characters in ary1
    Register chr2_reg  =  tmp2_reg;   // for characters in ary2
    Register limit_reg =  tmp1_reg;   // length

    // set byte count
    __ sll(limit_reg, exact_log2(sizeof(jchar)), limit_reg);

    // Compare char[] arrays aligned to 4 bytes.
    __ char_arrays_equals(ary1_reg, ary2_reg, limit_reg, result_reg,
                          chr1_reg, chr2_reg, Ldone);
    __ add(G0, 1, result_reg); // equals

    __ bind(Ldone);
  %}

  enc_class enc_rethrow() %{
    cbuf.set_insts_mark();
    Register temp_reg = G3;
    AddressLiteral rethrow_stub(OptoRuntime::rethrow_stub());
    assert(temp_reg != reg_to_register_object(R_I0_num), "temp must not break oop_reg");
    MacroAssembler _masm(&cbuf);
#ifdef ASSERT
    __ save_frame(0);
    AddressLiteral last_rethrow_addrlit(&last_rethrow);
    __ sethi(last_rethrow_addrlit, L1);
    Address addr(L1, last_rethrow_addrlit.low10());
    __ get_pc(L2);
    __ inc(L2, 3 * BytesPerInstWord);  // skip this & 2 more insns to point at jump_to
    __ st_ptr(L2, addr);
    __ restore();
#endif
    __ JUMP(rethrow_stub, temp_reg, 0); // sethi;jmp
    __ delayed()->nop();
  %}

  enc_class emit_mem_nop() %{
    // Generates the instruction LDUXA [o6,g0],#0x82,g0
    cbuf.insts()->emit_int32((unsigned int) 0xc0839040);
  %}

  enc_class emit_fadd_nop() %{
    // Generates the instruction FMOVS f31,f31
    cbuf.insts()->emit_int32((unsigned int) 0xbfa0003f);
  %}

  enc_class emit_br_nop() %{
    // Generates the instruction BPN,PN .
    cbuf.insts()->emit_int32((unsigned int) 0x00400000);
  %}

  enc_class enc_membar_acquire %{
    MacroAssembler _masm(&cbuf);
    __ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::LoadLoad) );
  %}

  enc_class enc_membar_release %{
    MacroAssembler _masm(&cbuf);
    __ membar( Assembler::Membar_mask_bits(Assembler::LoadStore | Assembler::StoreStore) );
  %}

  enc_class enc_membar_volatile %{
    MacroAssembler _masm(&cbuf);
    __ membar( Assembler::Membar_mask_bits(Assembler::StoreLoad) );
  %}

  enc_class enc_repl8b( iRegI src, iRegL dst ) %{
    MacroAssembler _masm(&cbuf);
    Register src_reg = reg_to_register_object($src$$reg);
    Register dst_reg = reg_to_register_object($dst$$reg);
    __ sllx(src_reg, 56, dst_reg);
    __ srlx(dst_reg,  8, O7);
    __ or3 (dst_reg, O7, dst_reg);
    __ srlx(dst_reg, 16, O7);
    __ or3 (dst_reg, O7, dst_reg);
    __ srlx(dst_reg, 32, O7);
    __ or3 (dst_reg, O7, dst_reg);
  %}

  enc_class enc_repl4b( iRegI src, iRegL dst ) %{
    MacroAssembler _masm(&cbuf);
    Register src_reg = reg_to_register_object($src$$reg);
    Register dst_reg = reg_to_register_object($dst$$reg);
    __ sll(src_reg, 24, dst_reg);
    __ srl(dst_reg,  8, O7);
    __ or3(dst_reg, O7, dst_reg);
    __ srl(dst_reg, 16, O7);
    __ or3(dst_reg, O7, dst_reg);
  %}

  enc_class enc_repl4s( iRegI src, iRegL dst ) %{
    MacroAssembler _masm(&cbuf);
    Register src_reg = reg_to_register_object($src$$reg);
    Register dst_reg = reg_to_register_object($dst$$reg);
    __ sllx(src_reg, 48, dst_reg);
    __ srlx(dst_reg, 16, O7);
    __ or3 (dst_reg, O7, dst_reg);
    __ srlx(dst_reg, 32, O7);
    __ or3 (dst_reg, O7, dst_reg);
  %}

  enc_class enc_repl2i( iRegI src, iRegL dst ) %{
    MacroAssembler _masm(&cbuf);
    Register src_reg = reg_to_register_object($src$$reg);
    Register dst_reg = reg_to_register_object($dst$$reg);
    __ sllx(src_reg, 32, dst_reg);
    __ srlx(dst_reg, 32, O7);
    __ or3 (dst_reg, O7, dst_reg);
  %}

%}

//----------FRAME--------------------------------------------------------------
// Definition of frame structure and management information.
//
//  S T A C K   L A Y O U T    Allocators stack-slot number
//                             |   (to get allocators register number
//  G  Owned by    |        |  v    add VMRegImpl::stack0)
//  r   CALLER     |        |
//  o     |        +--------+      pad to even-align allocators stack-slot
//  w     V        |  pad0  |        numbers; owned by CALLER
//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
//  h     ^        |   in   |  5
//        |        |  args  |  4   Holes in incoming args owned by SELF
//  |     |        |        |  3
//  |     |        +--------+
//  V     |        | old out|      Empty on Intel, window on Sparc
//        |    old |preserve|      Must be even aligned.
//        |     SP-+--------+----> Matcher::_old_SP, 8 (or 16 in LP64)-byte aligned
//        |        |   in   |  3   area for Intel ret address
//     Owned by    |preserve|      Empty on Sparc.
//       SELF      +--------+
//        |        |  pad2  |  2   pad to align old SP
//        |        +--------+  1
//        |        | locks  |  0
//        |        +--------+----> VMRegImpl::stack0, 8 (or 16 in LP64)-byte aligned
//        |        |  pad1  | 11   pad to align new SP
//        |        +--------+
//        |        |        | 10
//        |        | spills |  9   spills
//        V        |        |  8   (pad0 slot for callee)
//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
//        ^        |  out   |  7
//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
//     Owned by    +--------+
//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
//        |    new |preserve|      Must be even-aligned.
//        |     SP-+--------+----> Matcher::_new_SP, even aligned
//        |        |        |
//
// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
//         known from SELF's arguments and the Java calling convention.
//         Region 6-7 is determined per call site.
// Note 2: If the calling convention leaves holes in the incoming argument
//         area, those holes are owned by SELF.  Holes in the outgoing area
//         are owned by the CALLEE.  Holes should not be nessecary in the
//         incoming area, as the Java calling convention is completely under
//         the control of the AD file.  Doubles can be sorted and packed to
//         avoid holes.  Holes in the outgoing arguments may be nessecary for
//         varargs C calling conventions.
// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
//         even aligned with pad0 as needed.
//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
//         region 6-11 is even aligned; it may be padded out more so that
//         the region from SP to FP meets the minimum stack alignment.

frame %{
  // What direction does stack grow in (assumed to be same for native & Java)
  stack_direction(TOWARDS_LOW);

  // These two registers define part of the calling convention
  // between compiled code and the interpreter.
  inline_cache_reg(R_G5);                // Inline Cache Register or methodOop for I2C
  interpreter_method_oop_reg(R_G5);      // Method Oop Register when calling interpreter

  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
  cisc_spilling_operand_name(indOffset);

  // Number of stack slots consumed by a Monitor enter
#ifdef _LP64
  sync_stack_slots(2);
#else
  sync_stack_slots(1);
#endif

  // Compiled code's Frame Pointer
  frame_pointer(R_SP);

  // Stack alignment requirement
  stack_alignment(StackAlignmentInBytes);
  //  LP64: Alignment size in bytes (128-bit -> 16 bytes)
  // !LP64: Alignment size in bytes (64-bit  ->  8 bytes)

  // Number of stack slots between incoming argument block and the start of
  // a new frame.  The PROLOG must add this many slots to the stack.  The
  // EPILOG must remove this many slots.
  in_preserve_stack_slots(0);

  // Number of outgoing stack slots killed above the out_preserve_stack_slots
  // for calls to C.  Supports the var-args backing area for register parms.
  // ADLC doesn't support parsing expressions, so I folded the math by hand.
#ifdef _LP64
  // (callee_register_argument_save_area_words (6) + callee_aggregate_return_pointer_words (0)) * 2-stack-slots-per-word
  varargs_C_out_slots_killed(12);
#else
  // (callee_register_argument_save_area_words (6) + callee_aggregate_return_pointer_words (1)) * 1-stack-slots-per-word
  varargs_C_out_slots_killed( 7);
#endif

  // The after-PROLOG location of the return address.  Location of
  // return address specifies a type (REG or STACK) and a number
  // representing the register number (i.e. - use a register name) or
  // stack slot.
  return_addr(REG R_I7);          // Ret Addr is in register I7

  // Body of function which returns an OptoRegs array locating
  // arguments either in registers or in stack slots for calling
  // java
  calling_convention %{
    (void) SharedRuntime::java_calling_convention(sig_bt, regs, length, is_outgoing);

  %}

  // Body of function which returns an OptoRegs array locating
  // arguments either in registers or in stack slots for callin
  // C.
  c_calling_convention %{
    // This is obviously always outgoing
    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
  %}

  // Location of native (C/C++) and interpreter return values.  This is specified to
  // be the  same as Java.  In the 32-bit VM, long values are actually returned from
  // native calls in O0:O1 and returned to the interpreter in I0:I1.  The copying
  // to and from the register pairs is done by the appropriate call and epilog
  // opcodes.  This simplifies the register allocator.
  c_return_value %{
    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
#ifdef     _LP64
    static int lo_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_O0_num,     R_O0_num,     R_O0_num,     R_F0_num,     R_F0_num, R_O0_num };
    static int hi_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_O0H_num,    OptoReg::Bad, R_F1_num, R_O0H_num};
    static int lo_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_I0_num,     R_I0_num,     R_I0_num,     R_F0_num,     R_F0_num, R_I0_num };
    static int hi_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_I0H_num,    OptoReg::Bad, R_F1_num, R_I0H_num};
#else  // !_LP64
    static int lo_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_O0_num,     R_O0_num,     R_O0_num,     R_F0_num,     R_F0_num, R_G1_num };
    static int hi_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_F1_num, R_G1H_num };
    static int lo_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_I0_num,     R_I0_num,     R_I0_num,     R_F0_num,     R_F0_num, R_G1_num };
    static int hi_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_F1_num, R_G1H_num };
#endif
    return OptoRegPair( (is_outgoing?hi_out:hi_in)[ideal_reg],
                        (is_outgoing?lo_out:lo_in)[ideal_reg] );
  %}

  // Location of compiled Java return values.  Same as C
  return_value %{
    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
#ifdef     _LP64
    static int lo_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_O0_num,     R_O0_num,     R_O0_num,     R_F0_num,     R_F0_num, R_O0_num };
    static int hi_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_O0H_num,    OptoReg::Bad, R_F1_num, R_O0H_num};
    static int lo_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_I0_num,     R_I0_num,     R_I0_num,     R_F0_num,     R_F0_num, R_I0_num };
    static int hi_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_I0H_num,    OptoReg::Bad, R_F1_num, R_I0H_num};
#else  // !_LP64
    static int lo_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_O0_num,     R_O0_num,     R_O0_num,     R_F0_num,     R_F0_num, R_G1_num };
    static int hi_out[Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_F1_num, R_G1H_num};
    static int lo_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, R_I0_num,     R_I0_num,     R_I0_num,     R_F0_num,     R_F0_num, R_G1_num };
    static int hi_in [Op_RegL+1] = { OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, R_F1_num, R_G1H_num};
#endif
    return OptoRegPair( (is_outgoing?hi_out:hi_in)[ideal_reg],
                        (is_outgoing?lo_out:lo_in)[ideal_reg] );
  %}

%}


//----------ATTRIBUTES---------------------------------------------------------
//----------Operand Attributes-------------------------------------------------
op_attrib op_cost(1);          // Required cost attribute

//----------Instruction Attributes---------------------------------------------
ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
ins_attrib ins_size(32);           // Required size attribute (in bits)
ins_attrib ins_avoid_back_to_back(0); // instruction should not be generated back to back
ins_attrib ins_short_branch(0);    // Required flag: is this instruction a
                                   // non-matching short branch variant of some
                                                            // long branch?

//----------OPERANDS-----------------------------------------------------------
// Operand definitions must precede instruction definitions for correct parsing
// in the ADLC because operands constitute user defined types which are used in
// instruction definitions.

//----------Simple Operands----------------------------------------------------
// Immediate Operands
// Integer Immediate: 32-bit
operand immI() %{
  match(ConI);

  op_cost(0);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 8-bit
operand immI8() %{
  predicate(Assembler::is_simm8(n->get_int()));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 13-bit
operand immI13() %{
  predicate(Assembler::is_simm13(n->get_int()));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 13-bit minus 7
operand immI13m7() %{
  predicate((-4096 < n->get_int()) && ((n->get_int() + 7) <= 4095));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 16-bit
operand immI16() %{
  predicate(Assembler::is_simm16(n->get_int()));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Unsigned (positive) Integer Immediate: 13-bit
operand immU13() %{
  predicate((0 <= n->get_int()) && Assembler::is_simm13(n->get_int()));
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 6-bit
operand immU6() %{
  predicate(n->get_int() >= 0 && n->get_int() <= 63);
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 11-bit
operand immI11() %{
  predicate(Assembler::is_simm11(n->get_int()));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 5-bit
operand immI5() %{
  predicate(Assembler::is_simm5(n->get_int()));
  match(ConI);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 0-bit
operand immI0() %{
  predicate(n->get_int() == 0);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 10
operand immI10() %{
  predicate(n->get_int() == 10);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the values 0-31
operand immU5() %{
  predicate(n->get_int() >= 0 && n->get_int() <= 31);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the values 1-31
operand immI_1_31() %{
  predicate(n->get_int() >= 1 && n->get_int() <= 31);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the values 32-63
operand immI_32_63() %{
  predicate(n->get_int() >= 32 && n->get_int() <= 63);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Immediates for special shifts (sign extend)

// Integer Immediate: the value 16
operand immI_16() %{
  predicate(n->get_int() == 16);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 24
operand immI_24() %{
  predicate(n->get_int() == 24);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 255
operand immI_255() %{
  predicate( n->get_int() == 255 );
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: the value 65535
operand immI_65535() %{
  predicate(n->get_int() == 65535);
  match(ConI);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: the value FF
operand immL_FF() %{
  predicate( n->get_long() == 0xFFL );
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: the value FFFF
operand immL_FFFF() %{
  predicate( n->get_long() == 0xFFFFL );
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate: 32 or 64-bit
operand immP() %{
  match(ConP);

  op_cost(5);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

#ifdef _LP64
// Pointer Immediate: 64-bit
operand immP_set() %{
  predicate(!VM_Version::is_niagara_plus());
  match(ConP);

  op_cost(5);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate: 64-bit
// From Niagara2 processors on a load should be better than materializing.
operand immP_load() %{
  predicate(VM_Version::is_niagara_plus() && (n->bottom_type()->isa_oop_ptr() || (MacroAssembler::insts_for_set(n->get_ptr()) > 3)));
  match(ConP);

  op_cost(5);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate: 64-bit
operand immP_no_oop_cheap() %{
  predicate(VM_Version::is_niagara_plus() && !n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set(n->get_ptr()) <= 3));
  match(ConP);

  op_cost(5);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}
#endif

operand immP13() %{
  predicate((-4096 < n->get_ptr()) && (n->get_ptr() <= 4095));
  match(ConP);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

operand immP0() %{
  predicate(n->get_ptr() == 0);
  match(ConP);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

operand immP_poll() %{
  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
  match(ConP);

  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

// Pointer Immediate
operand immN()
%{
  match(ConN);

  op_cost(10);
  format %{ %}
  interface(CONST_INTER);
%}

// NULL Pointer Immediate
operand immN0()
%{
  predicate(n->get_narrowcon() == 0);
  match(ConN);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

operand immL() %{
  match(ConL);
  op_cost(40);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

operand immL0() %{
  predicate(n->get_long() == 0L);
  match(ConL);
  op_cost(0);
  // formats are generated automatically for constants and base registers
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Immediate: 5-bit
operand immL5() %{
  predicate(n->get_long() == (int)n->get_long() && Assembler::is_simm5((int)n->get_long()));
  match(ConL);
  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: 13-bit
operand immL13() %{
  predicate((-4096L < n->get_long()) && (n->get_long() <= 4095L));
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: 13-bit minus 7
operand immL13m7() %{
  predicate((-4096L < n->get_long()) && ((n->get_long() + 7L) <= 4095L));
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: low 32-bit mask
operand immL_32bits() %{
  predicate(n->get_long() == 0xFFFFFFFFL);
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: cheap (materialize in <= 3 instructions)
operand immL_cheap() %{
  predicate(!VM_Version::is_niagara_plus() || MacroAssembler::insts_for_set64(n->get_long()) <= 3);
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Long Immediate: expensive (materialize in > 3 instructions)
operand immL_expensive() %{
  predicate(VM_Version::is_niagara_plus() && MacroAssembler::insts_for_set64(n->get_long()) > 3);
  match(ConL);
  op_cost(0);

  format %{ %}
  interface(CONST_INTER);
%}

// Double Immediate
operand immD() %{
  match(ConD);

  op_cost(40);
  format %{ %}
  interface(CONST_INTER);
%}

operand immD0() %{
#ifdef _LP64
  // on 64-bit architectures this comparision is faster
  predicate(jlong_cast(n->getd()) == 0);
#else
  predicate((n->getd() == 0) && (fpclass(n->getd()) == FP_PZERO));
#endif
  match(ConD);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate
operand immF() %{
  match(ConF);

  op_cost(20);
  format %{ %}
  interface(CONST_INTER);
%}

// Float Immediate: 0
operand immF0() %{
  predicate((n->getf() == 0) && (fpclass(n->getf()) == FP_PZERO));
  match(ConF);

  op_cost(0);
  format %{ %}
  interface(CONST_INTER);
%}

// Integer Register Operands
// Integer Register
operand iRegI() %{
  constraint(ALLOC_IN_RC(int_reg));
  match(RegI);

  match(notemp_iRegI);
  match(g1RegI);
  match(o0RegI);
  match(iRegIsafe);

  format %{ %}
  interface(REG_INTER);
%}

operand notemp_iRegI() %{
  constraint(ALLOC_IN_RC(notemp_int_reg));
  match(RegI);

  match(o0RegI);

  format %{ %}
  interface(REG_INTER);
%}

operand o0RegI() %{
  constraint(ALLOC_IN_RC(o0_regI));
  match(iRegI);

  format %{ %}
  interface(REG_INTER);
%}

// Pointer Register
operand iRegP() %{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(RegP);

  match(lock_ptr_RegP);
  match(g1RegP);
  match(g2RegP);
  match(g3RegP);
  match(g4RegP);
  match(i0RegP);
  match(o0RegP);
  match(o1RegP);
  match(l7RegP);

  format %{ %}
  interface(REG_INTER);
%}

operand sp_ptr_RegP() %{
  constraint(ALLOC_IN_RC(sp_ptr_reg));
  match(RegP);
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand lock_ptr_RegP() %{
  constraint(ALLOC_IN_RC(lock_ptr_reg));
  match(RegP);
  match(i0RegP);
  match(o0RegP);
  match(o1RegP);
  match(l7RegP);

  format %{ %}
  interface(REG_INTER);
%}

operand g1RegP() %{
  constraint(ALLOC_IN_RC(g1_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand g2RegP() %{
  constraint(ALLOC_IN_RC(g2_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand g3RegP() %{
  constraint(ALLOC_IN_RC(g3_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand g1RegI() %{
  constraint(ALLOC_IN_RC(g1_regI));
  match(iRegI);

  format %{ %}
  interface(REG_INTER);
%}

operand g3RegI() %{
  constraint(ALLOC_IN_RC(g3_regI));
  match(iRegI);

  format %{ %}
  interface(REG_INTER);
%}

operand g4RegI() %{
  constraint(ALLOC_IN_RC(g4_regI));
  match(iRegI);

  format %{ %}
  interface(REG_INTER);
%}

operand g4RegP() %{
  constraint(ALLOC_IN_RC(g4_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand i0RegP() %{
  constraint(ALLOC_IN_RC(i0_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand o0RegP() %{
  constraint(ALLOC_IN_RC(o0_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand o1RegP() %{
  constraint(ALLOC_IN_RC(o1_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand o2RegP() %{
  constraint(ALLOC_IN_RC(o2_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand o7RegP() %{
  constraint(ALLOC_IN_RC(o7_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand l7RegP() %{
  constraint(ALLOC_IN_RC(l7_regP));
  match(iRegP);

  format %{ %}
  interface(REG_INTER);
%}

operand o7RegI() %{
  constraint(ALLOC_IN_RC(o7_regI));
  match(iRegI);

  format %{ %}
  interface(REG_INTER);
%}

operand iRegN() %{
  constraint(ALLOC_IN_RC(int_reg));
  match(RegN);

  format %{ %}
  interface(REG_INTER);
%}

// Long Register
operand iRegL() %{
  constraint(ALLOC_IN_RC(long_reg));
  match(RegL);

  format %{ %}
  interface(REG_INTER);
%}

operand o2RegL() %{
  constraint(ALLOC_IN_RC(o2_regL));
  match(iRegL);

  format %{ %}
  interface(REG_INTER);
%}

operand o7RegL() %{
  constraint(ALLOC_IN_RC(o7_regL));
  match(iRegL);

  format %{ %}
  interface(REG_INTER);
%}

operand g1RegL() %{
  constraint(ALLOC_IN_RC(g1_regL));
  match(iRegL);

  format %{ %}
  interface(REG_INTER);
%}

operand g3RegL() %{
  constraint(ALLOC_IN_RC(g3_regL));
  match(iRegL);

  format %{ %}
  interface(REG_INTER);
%}

// Int Register safe
// This is 64bit safe
operand iRegIsafe() %{
  constraint(ALLOC_IN_RC(long_reg));

  match(iRegI);

  format %{ %}
  interface(REG_INTER);
%}

// Condition Code Flag Register
operand flagsReg() %{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);

  format %{ "ccr" %} // both ICC and XCC
  interface(REG_INTER);
%}

// Condition Code Register, unsigned comparisons.
operand flagsRegU() %{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);

  format %{ "icc_U" %}
  interface(REG_INTER);
%}

// Condition Code Register, pointer comparisons.
operand flagsRegP() %{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);

#ifdef _LP64
  format %{ "xcc_P" %}
#else
  format %{ "icc_P" %}
#endif
  interface(REG_INTER);
%}

// Condition Code Register, long comparisons.
operand flagsRegL() %{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);

  format %{ "xcc_L" %}
  interface(REG_INTER);
%}

// Condition Code Register, floating comparisons, unordered same as "less".
operand flagsRegF() %{
  constraint(ALLOC_IN_RC(float_flags));
  match(RegFlags);
  match(flagsRegF0);

  format %{ %}
  interface(REG_INTER);
%}

operand flagsRegF0() %{
  constraint(ALLOC_IN_RC(float_flag0));
  match(RegFlags);

  format %{ %}
  interface(REG_INTER);
%}


// Condition Code Flag Register used by long compare
operand flagsReg_long_LTGE() %{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);
  format %{ "icc_LTGE" %}
  interface(REG_INTER);
%}
operand flagsReg_long_EQNE() %{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);
  format %{ "icc_EQNE" %}
  interface(REG_INTER);
%}
operand flagsReg_long_LEGT() %{
  constraint(ALLOC_IN_RC(int_flags));
  match(RegFlags);
  format %{ "icc_LEGT" %}
  interface(REG_INTER);
%}


operand regD() %{
  constraint(ALLOC_IN_RC(dflt_reg));
  match(RegD);

  match(regD_low);

  format %{ %}
  interface(REG_INTER);
%}

operand regF() %{
  constraint(ALLOC_IN_RC(sflt_reg));
  match(RegF);

  format %{ %}
  interface(REG_INTER);
%}

operand regD_low() %{
  constraint(ALLOC_IN_RC(dflt_low_reg));
  match(regD);

  format %{ %}
  interface(REG_INTER);
%}

// Special Registers

// Method Register
operand inline_cache_regP(iRegP reg) %{
  constraint(ALLOC_IN_RC(g5_regP)); // G5=inline_cache_reg but uses 2 bits instead of 1
  match(reg);
  format %{ %}
  interface(REG_INTER);
%}

operand interpreter_method_oop_regP(iRegP reg) %{
  constraint(ALLOC_IN_RC(g5_regP)); // G5=interpreter_method_oop_reg but uses 2 bits instead of 1
  match(reg);
  format %{ %}
  interface(REG_INTER);
%}


//----------Complex Operands---------------------------------------------------
// Indirect Memory Reference
operand indirect(sp_ptr_RegP reg) %{
  constraint(ALLOC_IN_RC(sp_ptr_reg));
  match(reg);

  op_cost(100);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x0);
    scale(0x0);
    disp(0x0);
  %}
%}

// Indirect with simm13 Offset
operand indOffset13(sp_ptr_RegP reg, immX13 offset) %{
  constraint(ALLOC_IN_RC(sp_ptr_reg));
  match(AddP reg offset);

  op_cost(100);
  format %{ "[$reg + $offset]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x0);
    scale(0x0);
    disp($offset);
  %}
%}

// Indirect with simm13 Offset minus 7
operand indOffset13m7(sp_ptr_RegP reg, immX13m7 offset) %{
  constraint(ALLOC_IN_RC(sp_ptr_reg));
  match(AddP reg offset);

  op_cost(100);
  format %{ "[$reg + $offset]" %}
  interface(MEMORY_INTER) %{
    base($reg);
    index(0x0);
    scale(0x0);
    disp($offset);
  %}
%}

// Note:  Intel has a swapped version also, like this:
//operand indOffsetX(iRegI reg, immP offset) %{
//  constraint(ALLOC_IN_RC(int_reg));
//  match(AddP offset reg);
//
//  op_cost(100);
//  format %{ "[$reg + $offset]" %}
//  interface(MEMORY_INTER) %{
//    base($reg);
//    index(0x0);
//    scale(0x0);
//    disp($offset);
//  %}
//%}
//// However, it doesn't make sense for SPARC, since
// we have no particularly good way to embed oops in
// single instructions.

// Indirect with Register Index
operand indIndex(iRegP addr, iRegX index) %{
  constraint(ALLOC_IN_RC(ptr_reg));
  match(AddP addr index);

  op_cost(100);
  format %{ "[$addr + $index]" %}
  interface(MEMORY_INTER) %{
    base($addr);
    index($index);
    scale(0x0);
    disp(0x0);
  %}
%}

//----------Special Memory Operands--------------------------------------------
// Stack Slot Operand - This operand is used for loading and storing temporary
//                      values on the stack where a match requires a value to
//                      flow through memory.
operand stackSlotI(sRegI reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegI);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0xE);   // R_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotP(sRegP reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegP);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0xE);   // R_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}

operand stackSlotF(sRegF reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegF);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0xE);   // R_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}
operand stackSlotD(sRegD reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegD);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0xE);   // R_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}
operand stackSlotL(sRegL reg) %{
  constraint(ALLOC_IN_RC(stack_slots));
  op_cost(100);
  //match(RegL);
  format %{ "[$reg]" %}
  interface(MEMORY_INTER) %{
    base(0xE);   // R_SP
    index(0x0);
    scale(0x0);
    disp($reg);  // Stack Offset
  %}
%}

// Operands for expressing Control Flow
// NOTE:  Label is a predefined operand which should not be redefined in
//        the AD file.  It is generically handled within the ADLC.

//----------Conditional Branch Operands----------------------------------------
// Comparison Op  - This is the operation of the comparison, and is limited to
//                  the following set of codes:
//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
//
// Other attributes of the comparison, such as unsignedness, are specified
// by the comparison instruction that sets a condition code flags register.
// That result is represented by a flags operand whose subtype is appropriate
// to the unsignedness (etc.) of the comparison.
//
// Later, the instruction which matches both the Comparison Op (a Bool) and
// the flags (produced by the Cmp) specifies the coding of the comparison op
// by matching a specific subtype of Bool operand below, such as cmpOpU.

operand cmpOp() %{
  match(Bool);

  format %{ "" %}
  interface(COND_INTER) %{
    equal(0x1);
    not_equal(0x9);
    less(0x3);
    greater_equal(0xB);
    less_equal(0x2);
    greater(0xA);
  %}
%}

// Comparison Op, unsigned
operand cmpOpU() %{
  match(Bool);

  format %{ "u" %}
  interface(COND_INTER) %{
    equal(0x1);
    not_equal(0x9);
    less(0x5);
    greater_equal(0xD);
    less_equal(0x4);
    greater(0xC);
  %}
%}

// Comparison Op, pointer (same as unsigned)
operand cmpOpP() %{
  match(Bool);

  format %{ "p" %}
  interface(COND_INTER) %{
    equal(0x1);
    not_equal(0x9);
    less(0x5);
    greater_equal(0xD);
    less_equal(0x4);
    greater(0xC);
  %}
%}

// Comparison Op, branch-register encoding
operand cmpOp_reg() %{
  match(Bool);

  format %{ "" %}
  interface(COND_INTER) %{
    equal        (0x1);
    not_equal    (0x5);
    less         (0x3);
    greater_equal(0x7);
    less_equal   (0x2);
    greater      (0x6);
  %}
%}

// Comparison Code, floating, unordered same as less
operand cmpOpF() %{
  match(Bool);

  format %{ "fl" %}
  interface(COND_INTER) %{
    equal(0x9);
    not_equal(0x1);
    less(0x3);
    greater_equal(0xB);
    less_equal(0xE);
    greater(0x6);
  %}
%}

// Used by long compare
operand cmpOp_commute() %{
  match(Bool);

  format %{ "" %}
  interface(COND_INTER) %{
    equal(0x1);
    not_equal(0x9);
    less(0xA);
    greater_equal(0x2);
    less_equal(0xB);
    greater(0x3);
  %}
%}

//----------OPERAND CLASSES----------------------------------------------------
// Operand Classes are groups of operands that are used to simplify
// instruction definitions by not requiring the AD writer to specify separate
// instructions for every form of operand when the instruction accepts
// multiple operand types with the same basic encoding and format.  The classic
// case of this is memory operands.
opclass memory( indirect, indOffset13, indIndex );
opclass indIndexMemory( indIndex );

//----------PIPELINE-----------------------------------------------------------
pipeline %{

//----------ATTRIBUTES---------------------------------------------------------
attributes %{
  fixed_size_instructions;           // Fixed size instructions
  branch_has_delay_slot;             // Branch has delay slot following
  max_instructions_per_bundle = 4;   // Up to 4 instructions per bundle
  instruction_unit_size = 4;         // An instruction is 4 bytes long
  instruction_fetch_unit_size = 16;  // The processor fetches one line
  instruction_fetch_units = 1;       // of 16 bytes

  // List of nop instructions
  nops( Nop_A0, Nop_A1, Nop_MS, Nop_FA, Nop_BR );
%}

//----------RESOURCES----------------------------------------------------------
// Resources are the functional units available to the machine
resources(A0, A1, MS, BR, FA, FM, IDIV, FDIV, IALU = A0 | A1);

//----------PIPELINE DESCRIPTION-----------------------------------------------
// Pipeline Description specifies the stages in the machine's pipeline

pipe_desc(A, P, F, B, I, J, S, R, E, C, M, W, X, T, D);

//----------PIPELINE CLASSES---------------------------------------------------
// Pipeline Classes describe the stages in which input and output are
// referenced by the hardware pipeline.

// Integer ALU reg-reg operation
pipe_class ialu_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
    single_instruction;
    dst   : E(write);
    src1  : R(read);
    src2  : R(read);
    IALU  : R;
%}

// Integer ALU reg-reg long operation
pipe_class ialu_reg_reg_2(iRegL dst, iRegL src1, iRegL src2) %{
    instruction_count(2);
    dst   : E(write);
    src1  : R(read);
    src2  : R(read);
    IALU  : R;
    IALU  : R;
%}

// Integer ALU reg-reg long dependent operation
pipe_class ialu_reg_reg_2_dep(iRegL dst, iRegL src1, iRegL src2, flagsReg cr) %{
    instruction_count(1); multiple_bundles;
    dst   : E(write);
    src1  : R(read);
    src2  : R(read);
    cr    : E(write);
    IALU  : R(2);
%}

// Integer ALU reg-imm operaion
pipe_class ialu_reg_imm(iRegI dst, iRegI src1, immI13 src2) %{
    single_instruction;
    dst   : E(write);
    src1  : R(read);
    IALU  : R;
%}

// Integer ALU reg-reg operation with condition code
pipe_class ialu_cc_reg_reg(iRegI dst, iRegI src1, iRegI src2, flagsReg cr) %{
    single_instruction;
    dst   : E(write);
    cr    : E(write);
    src1  : R(read);
    src2  : R(read);
    IALU  : R;
%}

// Integer ALU reg-imm operation with condition code
pipe_class ialu_cc_reg_imm(iRegI dst, iRegI src1, immI13 src2, flagsReg cr) %{
    single_instruction;
    dst   : E(write);
    cr    : E(write);
    src1  : R(read);
    IALU  : R;
%}

// Integer ALU zero-reg operation
pipe_class ialu_zero_reg(iRegI dst, immI0 zero, iRegI src2) %{
    single_instruction;
    dst   : E(write);
    src2  : R(read);
    IALU  : R;
%}

// Integer ALU zero-reg operation with condition code only
pipe_class ialu_cconly_zero_reg(flagsReg cr, iRegI src) %{
    single_instruction;
    cr    : E(write);
    src   : R(read);
    IALU  : R;
%}

// Integer ALU reg-reg operation with condition code only
pipe_class ialu_cconly_reg_reg(flagsReg cr, iRegI src1, iRegI src2) %{
    single_instruction;
    cr    : E(write);
    src1  : R(read);
    src2  : R(read);
    IALU  : R;
%}

// Integer ALU reg-imm operation with condition code only
pipe_class ialu_cconly_reg_imm(flagsReg cr, iRegI src1, immI13 src2) %{
    single_instruction;
    cr    : E(write);
    src1  : R(read);
    IALU  : R;
%}

// Integer ALU reg-reg-zero operation with condition code only
pipe_class ialu_cconly_reg_reg_zero(flagsReg cr, iRegI src1, iRegI src2, immI0 zero) %{
    single_instruction;
    cr    : E(write);
    src1  : R(read);
    src2  : R(read);
    IALU  : R;
%}

// Integer ALU reg-imm-zero operation with condition code only
pipe_class ialu_cconly_reg_imm_zero(flagsReg cr, iRegI src1, immI13 src2, immI0 zero) %{
    single_instruction;
    cr    : E(write);
    src1  : R(read);
    IALU  : R;
%}

// Integer ALU reg-reg operation with condition code, src1 modified
pipe_class ialu_cc_rwreg_reg(flagsReg cr, iRegI src1, iRegI src2) %{
    single_instruction;
    cr    : E(write);
    src1  : E(write);
    src1  : R(read);
    src2  : R(read);
    IALU  : R;
%}

// Integer ALU reg-imm operation with condition code, src1 modified
pipe_class ialu_cc_rwreg_imm(flagsReg cr, iRegI src1, immI13 src2) %{
    single_instruction;
    cr    : E(write);
    src1  : E(write);
    src1  : R(read);
    IALU  : R;
%}

pipe_class cmpL_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg cr ) %{
    multiple_bundles;
    dst   : E(write)+4;
    cr    : E(write);
    src1  : R(read);
    src2  : R(read);
    IALU  : R(3);
    BR    : R(2);
%}

// Integer ALU operation
pipe_class ialu_none(iRegI dst) %{
    single_instruction;
    dst   : E(write);
    IALU  : R;
%}

// Integer ALU reg operation
pipe_class ialu_reg(iRegI dst, iRegI src) %{
    single_instruction; may_have_no_code;
    dst   : E(write);
    src   : R(read);
    IALU  : R;
%}

// Integer ALU reg conditional operation
// This instruction has a 1 cycle stall, and cannot execute
// in the same cycle as the instruction setting the condition
// code. We kludge this by pretending to read the condition code
// 1 cycle earlier, and by marking the functional units as busy
// for 2 cycles with the result available 1 cycle later than
// is really the case.
pipe_class ialu_reg_flags( iRegI op2_out, iRegI op2_in, iRegI op1, flagsReg cr ) %{
    single_instruction;
    op2_out : C(write);
    op1     : R(read);
    cr      : R(read);       // This is really E, with a 1 cycle stall
    BR      : R(2);
    MS      : R(2);
%}

#ifdef _LP64
pipe_class ialu_clr_and_mover( iRegI dst, iRegP src ) %{
    instruction_count(1); multiple_bundles;
    dst     : C(write)+1;
    src     : R(read)+1;
    IALU    : R(1);
    BR      : E(2);
    MS      : E(2);
%}
#endif

// Integer ALU reg operation
pipe_class ialu_move_reg_L_to_I(iRegI dst, iRegL src) %{
    single_instruction; may_have_no_code;
    dst   : E(write);
    src   : R(read);
    IALU  : R;
%}
pipe_class ialu_move_reg_I_to_L(iRegL dst, iRegI src) %{
    single_instruction; may_have_no_code;
    dst   : E(write);
    src   : R(read);
    IALU  : R;
%}

// Two integer ALU reg operations
pipe_class ialu_reg_2(iRegL dst, iRegL src) %{
    instruction_count(2);
    dst   : E(write);
    src   : R(read);
    A0    : R;
    A1    : R;
%}

// Two integer ALU reg operations
pipe_class ialu_move_reg_L_to_L(iRegL dst, iRegL src) %{
    instruction_count(2); may_have_no_code;
    dst   : E(write);
    src   : R(read);
    A0    : R;
    A1    : R;
%}

// Integer ALU imm operation
pipe_class ialu_imm(iRegI dst, immI13 src) %{
    single_instruction;
    dst   : E(write);
    IALU  : R;
%}

// Integer ALU reg-reg with carry operation
pipe_class ialu_reg_reg_cy(iRegI dst, iRegI src1, iRegI src2, iRegI cy) %{
    single_instruction;
    dst   : E(write);
    src1  : R(read);
    src2  : R(read);
    IALU  : R;
%}

// Integer ALU cc operation
pipe_class ialu_cc(iRegI dst, flagsReg cc) %{
    single_instruction;
    dst   : E(write);
    cc    : R(read);
    IALU  : R;
%}

// Integer ALU cc / second IALU operation
pipe_class ialu_reg_ialu( iRegI dst, iRegI src ) %{
    instruction_count(1); multiple_bundles;
    dst   : E(write)+1;
    src   : R(read);
    IALU  : R;
%}

// Integer ALU cc / second IALU operation
pipe_class ialu_reg_reg_ialu( iRegI dst, iRegI p, iRegI q ) %{
    instruction_count(1); multiple_bundles;
    dst   : E(write)+1;
    p     : R(read);
    q     : R(read);
    IALU  : R;
%}

// Integer ALU hi-lo-reg operation
pipe_class ialu_hi_lo_reg(iRegI dst, immI src) %{
    instruction_count(1); multiple_bundles;
    dst   : E(write)+1;
    IALU  : R(2);
%}

// Float ALU hi-lo-reg operation (with temp)
pipe_class ialu_hi_lo_reg_temp(regF dst, immF src, g3RegP tmp) %{
    instruction_count(1); multiple_bundles;
    dst   : E(write)+1;
    IALU  : R(2);
%}

// Long Constant
pipe_class loadConL( iRegL dst, immL src ) %{
    instruction_count(2); multiple_bundles;
    dst   : E(write)+1;
    IALU  : R(2);
    IALU  : R(2);
%}

// Pointer Constant
pipe_class loadConP( iRegP dst, immP src ) %{
    instruction_count(0); multiple_bundles;
    fixed_latency(6);
%}

// Polling Address
pipe_class loadConP_poll( iRegP dst, immP_poll src ) %{
#ifdef _LP64
    instruction_count(0); multiple_bundles;
    fixed_latency(6);
#else
    dst   : E(write);
    IALU  : R;
#endif
%}

// Long Constant small
pipe_class loadConLlo( iRegL dst, immL src ) %{
    instruction_count(2);
    dst   : E(write);
    IALU  : R;
    IALU  : R;
%}

// [PHH] This is wrong for 64-bit.  See LdImmF/D.
pipe_class loadConFD(regF dst, immF src, g3RegP tmp) %{
    instruction_count(1); multiple_bundles;
    src   : R(read);
    dst   : M(write)+1;
    IALU  : R;
    MS    : E;
%}

// Integer ALU nop operation
pipe_class ialu_nop() %{
    single_instruction;
    IALU  : R;
%}

// Integer ALU nop operation
pipe_class ialu_nop_A0() %{
    single_instruction;
    A0    : R;
%}

// Integer ALU nop operation
pipe_class ialu_nop_A1() %{
    single_instruction;
    A1    : R;
%}

// Integer Multiply reg-reg operation
pipe_class imul_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
    single_instruction;
    dst   : E(write);
    src1  : R(read);
    src2  : R(read);
    MS    : R(5);
%}

// Integer Multiply reg-imm operation
pipe_class imul_reg_imm(iRegI dst, iRegI src1, immI13 src2) %{
    single_instruction;
    dst   : E(write);
    src1  : R(read);
    MS    : R(5);
%}

pipe_class mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
    single_instruction;
    dst   : E(write)+4;
    src1  : R(read);
    src2  : R(read);
    MS    : R(6);
%}

pipe_class mulL_reg_imm(iRegL dst, iRegL src1, immL13 src2) %{
    single_instruction;
    dst   : E(write)+4;
    src1  : R(read);
    MS    : R(6);
%}

// Integer Divide reg-reg
pipe_class sdiv_reg_reg(iRegI dst, iRegI src1, iRegI src2, iRegI temp, flagsReg cr) %{
    instruction_count(1); multiple_bundles;
    dst   : E(write);
    temp  : E(write);
    src1  : R(read);
    src2  : R(read);
    temp  : R(read);
    MS    : R(38);
%}

// Integer Divide reg-imm
pipe_class sdiv_reg_imm(iRegI dst, iRegI src1, immI13 src2, iRegI temp, flagsReg cr) %{
    instruction_count(1); multiple_bundles;
    dst   : E(write);
    temp  : E(write);
    src1  : R(read);
    temp  : R(read);
    MS    : R(38);
%}

// Long Divide
pipe_class divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
    dst  : E(write)+71;
    src1 : R(read);
    src2 : R(read)+1;
    MS   : R(70);
%}

pipe_class divL_reg_imm(iRegL dst, iRegL src1, immL13 src2) %{
    dst  : E(write)+71;
    src1 : R(read);
    MS   : R(70);
%}

// Floating Point Add Float
pipe_class faddF_reg_reg(regF dst, regF src1, regF src2) %{
    single_instruction;
    dst   : X(write);
    src1  : E(read);
    src2  : E(read);
    FA    : R;
%}

// Floating Point Add Double
pipe_class faddD_reg_reg(regD dst, regD src1, regD src2) %{
    single_instruction;
    dst   : X(write);
    src1  : E(read);
    src2  : E(read);
    FA    : R;
%}

// Floating Point Conditional Move based on integer flags
pipe_class int_conditional_float_move (cmpOp cmp, flagsReg cr, regF dst, regF src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    cr    : R(read);
    FA    : R(2);
    BR    : R(2);
%}

// Floating Point Conditional Move based on integer flags
pipe_class int_conditional_double_move (cmpOp cmp, flagsReg cr, regD dst, regD src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    cr    : R(read);
    FA    : R(2);
    BR    : R(2);
%}

// Floating Point Multiply Float
pipe_class fmulF_reg_reg(regF dst, regF src1, regF src2) %{
    single_instruction;
    dst   : X(write);
    src1  : E(read);
    src2  : E(read);
    FM    : R;
%}

// Floating Point Multiply Double
pipe_class fmulD_reg_reg(regD dst, regD src1, regD src2) %{
    single_instruction;
    dst   : X(write);
    src1  : E(read);
    src2  : E(read);
    FM    : R;
%}

// Floating Point Divide Float
pipe_class fdivF_reg_reg(regF dst, regF src1, regF src2) %{
    single_instruction;
    dst   : X(write);
    src1  : E(read);
    src2  : E(read);
    FM    : R;
    FDIV  : C(14);
%}

// Floating Point Divide Double
pipe_class fdivD_reg_reg(regD dst, regD src1, regD src2) %{
    single_instruction;
    dst   : X(write);
    src1  : E(read);
    src2  : E(read);
    FM    : R;
    FDIV  : C(17);
%}

// Floating Point Move/Negate/Abs Float
pipe_class faddF_reg(regF dst, regF src) %{
    single_instruction;
    dst   : W(write);
    src   : E(read);
    FA    : R(1);
%}

// Floating Point Move/Negate/Abs Double
pipe_class faddD_reg(regD dst, regD src) %{
    single_instruction;
    dst   : W(write);
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert F->D
pipe_class fcvtF2D(regD dst, regF src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert I->D
pipe_class fcvtI2D(regD dst, regF src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert LHi->D
pipe_class fcvtLHi2D(regD dst, regD src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert L->D
pipe_class fcvtL2D(regD dst, regF src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert L->F
pipe_class fcvtL2F(regD dst, regF src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert D->F
pipe_class fcvtD2F(regD dst, regF src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert I->L
pipe_class fcvtI2L(regD dst, regF src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert D->F
pipe_class fcvtD2I(regF dst, regD src, flagsReg cr) %{
    instruction_count(1); multiple_bundles;
    dst   : X(write)+6;
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert D->L
pipe_class fcvtD2L(regD dst, regD src, flagsReg cr) %{
    instruction_count(1); multiple_bundles;
    dst   : X(write)+6;
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert F->I
pipe_class fcvtF2I(regF dst, regF src, flagsReg cr) %{
    instruction_count(1); multiple_bundles;
    dst   : X(write)+6;
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert F->L
pipe_class fcvtF2L(regD dst, regF src, flagsReg cr) %{
    instruction_count(1); multiple_bundles;
    dst   : X(write)+6;
    src   : E(read);
    FA    : R;
%}

// Floating Point Convert I->F
pipe_class fcvtI2F(regF dst, regF src) %{
    single_instruction;
    dst   : X(write);
    src   : E(read);
    FA    : R;
%}

// Floating Point Compare
pipe_class faddF_fcc_reg_reg_zero(flagsRegF cr, regF src1, regF src2, immI0 zero) %{
    single_instruction;
    cr    : X(write);
    src1  : E(read);
    src2  : E(read);
    FA    : R;
%}

// Floating Point Compare
pipe_class faddD_fcc_reg_reg_zero(flagsRegF cr, regD src1, regD src2, immI0 zero) %{
    single_instruction;
    cr    : X(write);
    src1  : E(read);
    src2  : E(read);
    FA    : R;
%}

// Floating Add Nop
pipe_class fadd_nop() %{
    single_instruction;
    FA  : R;
%}

// Integer Store to Memory
pipe_class istore_mem_reg(memory mem, iRegI src) %{
    single_instruction;
    mem   : R(read);
    src   : C(read);
    MS    : R;
%}

// Integer Store to Memory
pipe_class istore_mem_spORreg(memory mem, sp_ptr_RegP src) %{
    single_instruction;
    mem   : R(read);
    src   : C(read);
    MS    : R;
%}

// Integer Store Zero to Memory
pipe_class istore_mem_zero(memory mem, immI0 src) %{
    single_instruction;
    mem   : R(read);
    MS    : R;
%}

// Special Stack Slot Store
pipe_class istore_stk_reg(stackSlotI stkSlot, iRegI src) %{
    single_instruction;
    stkSlot : R(read);
    src     : C(read);
    MS      : R;
%}

// Special Stack Slot Store
pipe_class lstoreI_stk_reg(stackSlotL stkSlot, iRegI src) %{
    instruction_count(2); multiple_bundles;
    stkSlot : R(read);
    src     : C(read);
    MS      : R(2);
%}

// Float Store
pipe_class fstoreF_mem_reg(memory mem, RegF src) %{
    single_instruction;
    mem : R(read);
    src : C(read);
    MS  : R;
%}

// Float Store
pipe_class fstoreF_mem_zero(memory mem, immF0 src) %{
    single_instruction;
    mem : R(read);
    MS  : R;
%}

// Double Store
pipe_class fstoreD_mem_reg(memory mem, RegD src) %{
    instruction_count(1);
    mem : R(read);
    src : C(read);
    MS  : R;
%}

// Double Store
pipe_class fstoreD_mem_zero(memory mem, immD0 src) %{
    single_instruction;
    mem : R(read);
    MS  : R;
%}

// Special Stack Slot Float Store
pipe_class fstoreF_stk_reg(stackSlotI stkSlot, RegF src) %{
    single_instruction;
    stkSlot : R(read);
    src     : C(read);
    MS      : R;
%}

// Special Stack Slot Double Store
pipe_class fstoreD_stk_reg(stackSlotI stkSlot, RegD src) %{
    single_instruction;
    stkSlot : R(read);
    src     : C(read);
    MS      : R;
%}

// Integer Load (when sign bit propagation not needed)
pipe_class iload_mem(iRegI dst, memory mem) %{
    single_instruction;
    mem : R(read);
    dst : C(write);
    MS  : R;
%}

// Integer Load from stack operand
pipe_class iload_stkD(iRegI dst, stackSlotD mem ) %{
    single_instruction;
    mem : R(read);
    dst : C(write);
    MS  : R;
%}

// Integer Load (when sign bit propagation or masking is needed)
pipe_class iload_mask_mem(iRegI dst, memory mem) %{
    single_instruction;
    mem : R(read);
    dst : M(write);
    MS  : R;
%}

// Float Load
pipe_class floadF_mem(regF dst, memory mem) %{
    single_instruction;
    mem : R(read);
    dst : M(write);
    MS  : R;
%}

// Float Load
pipe_class floadD_mem(regD dst, memory mem) %{
    instruction_count(1); multiple_bundles; // Again, unaligned argument is only multiple case
    mem : R(read);
    dst : M(write);
    MS  : R;
%}

// Float Load
pipe_class floadF_stk(regF dst, stackSlotI stkSlot) %{
    single_instruction;
    stkSlot : R(read);
    dst : M(write);
    MS  : R;
%}

// Float Load
pipe_class floadD_stk(regD dst, stackSlotI stkSlot) %{
    single_instruction;
    stkSlot : R(read);
    dst : M(write);
    MS  : R;
%}

// Memory Nop
pipe_class mem_nop() %{
    single_instruction;
    MS  : R;
%}

pipe_class sethi(iRegP dst, immI src) %{
    single_instruction;
    dst  : E(write);
    IALU : R;
%}

pipe_class loadPollP(iRegP poll) %{
    single_instruction;
    poll : R(read);
    MS   : R;
%}

pipe_class br(Universe br, label labl) %{
    single_instruction_with_delay_slot;
    BR  : R;
%}

pipe_class br_cc(Universe br, cmpOp cmp, flagsReg cr, label labl) %{
    single_instruction_with_delay_slot;
    cr    : E(read);
    BR    : R;
%}

pipe_class br_reg(Universe br, cmpOp cmp, iRegI op1, label labl) %{
    single_instruction_with_delay_slot;
    op1 : E(read);
    BR  : R;
    MS  : R;
%}

// Compare and branch
pipe_class cmp_br_reg_reg(Universe br, cmpOp cmp, iRegI src1, iRegI src2, label labl, flagsReg cr) %{
    instruction_count(2); has_delay_slot;
    cr    : E(write);
    src1  : R(read);
    src2  : R(read);
    IALU  : R;
    BR    : R;
%}

// Compare and branch
pipe_class cmp_br_reg_imm(Universe br, cmpOp cmp, iRegI src1, immI13 src2, label labl, flagsReg cr) %{
    instruction_count(2); has_delay_slot;
    cr    : E(write);
    src1  : R(read);
    IALU  : R;
    BR    : R;
%}

// Compare and branch using cbcond
pipe_class cbcond_reg_reg(Universe br, cmpOp cmp, iRegI src1, iRegI src2, label labl) %{
    single_instruction;
    src1  : E(read);
    src2  : E(read);
    IALU  : R;
    BR    : R;
%}

// Compare and branch using cbcond
pipe_class cbcond_reg_imm(Universe br, cmpOp cmp, iRegI src1, immI5 src2, label labl) %{
    single_instruction;
    src1  : E(read);
    IALU  : R;
    BR    : R;
%}

pipe_class br_fcc(Universe br, cmpOpF cc, flagsReg cr, label labl) %{
    single_instruction_with_delay_slot;
    cr    : E(read);
    BR    : R;
%}

pipe_class br_nop() %{
    single_instruction;
    BR  : R;
%}

pipe_class simple_call(method meth) %{
    instruction_count(2); multiple_bundles; force_serialization;
    fixed_latency(100);
    BR  : R(1);
    MS  : R(1);
    A0  : R(1);
%}

pipe_class compiled_call(method meth) %{
    instruction_count(1); multiple_bundles; force_serialization;
    fixed_latency(100);
    MS  : R(1);
%}

pipe_class call(method meth) %{
    instruction_count(0); multiple_bundles; force_serialization;
    fixed_latency(100);
%}

pipe_class tail_call(Universe ignore, label labl) %{
    single_instruction; has_delay_slot;
    fixed_latency(100);
    BR  : R(1);
    MS  : R(1);
%}

pipe_class ret(Universe ignore) %{
    single_instruction; has_delay_slot;
    BR  : R(1);
    MS  : R(1);
%}

pipe_class ret_poll(g3RegP poll) %{
    instruction_count(3); has_delay_slot;
    poll : E(read);
    MS   : R;
%}

// The real do-nothing guy
pipe_class empty( ) %{
    instruction_count(0);
%}

pipe_class long_memory_op() %{
    instruction_count(0); multiple_bundles; force_serialization;
    fixed_latency(25);
    MS  : R(1);
%}

// Check-cast
pipe_class partial_subtype_check_pipe(Universe ignore, iRegP array, iRegP match ) %{
    array : R(read);
    match  : R(read);
    IALU   : R(2);
    BR     : R(2);
    MS     : R;
%}

// Convert FPU flags into +1,0,-1
pipe_class floating_cmp( iRegI dst, regF src1, regF src2 ) %{
    src1  : E(read);
    src2  : E(read);
    dst   : E(write);
    FA    : R;
    MS    : R(2);
    BR    : R(2);
%}

// Compare for p < q, and conditionally add y
pipe_class cadd_cmpltmask( iRegI p, iRegI q, iRegI y ) %{
    p     : E(read);
    q     : E(read);
    y     : E(read);
    IALU  : R(3)
%}

// Perform a compare, then move conditionally in a branch delay slot.
pipe_class min_max( iRegI src2, iRegI srcdst ) %{
    src2   : E(read);
    srcdst : E(read);
    IALU   : R;
    BR     : R;
%}

// Define the class for the Nop node
define %{
   MachNop = ialu_nop;
%}

%}

//----------INSTRUCTIONS-------------------------------------------------------

//------------Special Stack Slot instructions - no match rules-----------------
instruct stkI_to_regF(regF dst, stackSlotI src) %{
  // No match rule to avoid chain rule match.
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDF    $src,$dst\t! stkI to regF" %}
  opcode(Assembler::ldf_op3);
  ins_encode(simple_form3_mem_reg(src, dst));
  ins_pipe(floadF_stk);
%}

instruct stkL_to_regD(regD dst, stackSlotL src) %{
  // No match rule to avoid chain rule match.
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDDF   $src,$dst\t! stkL to regD" %}
  opcode(Assembler::lddf_op3);
  ins_encode(simple_form3_mem_reg(src, dst));
  ins_pipe(floadD_stk);
%}

instruct regF_to_stkI(stackSlotI dst, regF src) %{
  // No match rule to avoid chain rule match.
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STF    $src,$dst\t! regF to stkI" %}
  opcode(Assembler::stf_op3);
  ins_encode(simple_form3_mem_reg(dst, src));
  ins_pipe(fstoreF_stk_reg);
%}

instruct regD_to_stkL(stackSlotL dst, regD src) %{
  // No match rule to avoid chain rule match.
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STDF   $src,$dst\t! regD to stkL" %}
  opcode(Assembler::stdf_op3);
  ins_encode(simple_form3_mem_reg(dst, src));
  ins_pipe(fstoreD_stk_reg);
%}

instruct regI_to_stkLHi(stackSlotL dst, iRegI src) %{
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST*2);
  size(8);
  format %{ "STW    $src,$dst.hi\t! long\n\t"
            "STW    R_G0,$dst.lo" %}
  opcode(Assembler::stw_op3);
  ins_encode(simple_form3_mem_reg(dst, src), form3_mem_plus_4_reg(dst, R_G0));
  ins_pipe(lstoreI_stk_reg);
%}

instruct regL_to_stkD(stackSlotD dst, iRegL src) %{
  // No match rule to avoid chain rule match.
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STX    $src,$dst\t! regL to stkD" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( dst, src ) );
  ins_pipe(istore_stk_reg);
%}

//---------- Chain stack slots between similar types --------

// Load integer from stack slot
instruct stkI_to_regI( iRegI dst, stackSlotI src ) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUW   $src,$dst\t!stk" %}
  opcode(Assembler::lduw_op3);
  ins_encode(simple_form3_mem_reg( src, dst ) );
  ins_pipe(iload_mem);
%}

// Store integer to stack slot
instruct regI_to_stkI( stackSlotI dst, iRegI src ) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STW    $src,$dst\t!stk" %}
  opcode(Assembler::stw_op3);
  ins_encode(simple_form3_mem_reg( dst, src ) );
  ins_pipe(istore_mem_reg);
%}

// Load long from stack slot
instruct stkL_to_regL( iRegL dst, stackSlotL src ) %{
  match(Set dst src);

  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDX    $src,$dst\t! long" %}
  opcode(Assembler::ldx_op3);
  ins_encode(simple_form3_mem_reg( src, dst ) );
  ins_pipe(iload_mem);
%}

// Store long to stack slot
instruct regL_to_stkL(stackSlotL dst, iRegL src) %{
  match(Set dst src);

  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STX    $src,$dst\t! long" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( dst, src ) );
  ins_pipe(istore_mem_reg);
%}

#ifdef _LP64
// Load pointer from stack slot, 64-bit encoding
instruct stkP_to_regP( iRegP dst, stackSlotP src ) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDX    $src,$dst\t!ptr" %}
  opcode(Assembler::ldx_op3);
  ins_encode(simple_form3_mem_reg( src, dst ) );
  ins_pipe(iload_mem);
%}

// Store pointer to stack slot
instruct regP_to_stkP(stackSlotP dst, iRegP src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STX    $src,$dst\t!ptr" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( dst, src ) );
  ins_pipe(istore_mem_reg);
%}
#else // _LP64
// Load pointer from stack slot, 32-bit encoding
instruct stkP_to_regP( iRegP dst, stackSlotP src ) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);
  format %{ "LDUW   $src,$dst\t!ptr" %}
  opcode(Assembler::lduw_op3, Assembler::ldst_op);
  ins_encode(simple_form3_mem_reg( src, dst ) );
  ins_pipe(iload_mem);
%}

// Store pointer to stack slot
instruct regP_to_stkP(stackSlotP dst, iRegP src) %{
  match(Set dst src);
  ins_cost(MEMORY_REF_COST);
  format %{ "STW    $src,$dst\t!ptr" %}
  opcode(Assembler::stw_op3, Assembler::ldst_op);
  ins_encode(simple_form3_mem_reg( dst, src ) );
  ins_pipe(istore_mem_reg);
%}
#endif // _LP64

//------------Special Nop instructions for bundling - no match rules-----------
// Nop using the A0 functional unit
instruct Nop_A0() %{
  ins_cost(0);

  format %{ "NOP    ! Alu Pipeline" %}
  opcode(Assembler::or_op3, Assembler::arith_op);
  ins_encode( form2_nop() );
  ins_pipe(ialu_nop_A0);
%}

// Nop using the A1 functional unit
instruct Nop_A1( ) %{
  ins_cost(0);

  format %{ "NOP    ! Alu Pipeline" %}
  opcode(Assembler::or_op3, Assembler::arith_op);
  ins_encode( form2_nop() );
  ins_pipe(ialu_nop_A1);
%}

// Nop using the memory functional unit
instruct Nop_MS( ) %{
  ins_cost(0);

  format %{ "NOP    ! Memory Pipeline" %}
  ins_encode( emit_mem_nop );
  ins_pipe(mem_nop);
%}

// Nop using the floating add functional unit
instruct Nop_FA( ) %{
  ins_cost(0);

  format %{ "NOP    ! Floating Add Pipeline" %}
  ins_encode( emit_fadd_nop );
  ins_pipe(fadd_nop);
%}

// Nop using the branch functional unit
instruct Nop_BR( ) %{
  ins_cost(0);

  format %{ "NOP    ! Branch Pipeline" %}
  ins_encode( emit_br_nop );
  ins_pipe(br_nop);
%}

//----------Load/Store/Move Instructions---------------------------------------
//----------Load Instructions--------------------------------------------------
// Load Byte (8bit signed)
instruct loadB(iRegI dst, memory mem) %{
  match(Set dst (LoadB mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDSB   $mem,$dst\t! byte" %}
  ins_encode %{
    __ ldsb($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Byte (8bit signed) into a Long Register
instruct loadB2L(iRegL dst, memory mem) %{
  match(Set dst (ConvI2L (LoadB mem)));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDSB   $mem,$dst\t! byte -> long" %}
  ins_encode %{
    __ ldsb($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Unsigned Byte (8bit UNsigned) into an int reg
instruct loadUB(iRegI dst, memory mem) %{
  match(Set dst (LoadUB mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUB   $mem,$dst\t! ubyte" %}
  ins_encode %{
    __ ldub($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Unsigned Byte (8bit UNsigned) into a Long Register
instruct loadUB2L(iRegL dst, memory mem) %{
  match(Set dst (ConvI2L (LoadUB mem)));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUB   $mem,$dst\t! ubyte -> long" %}
  ins_encode %{
    __ ldub($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Unsigned Byte (8 bit UNsigned) with 8-bit mask into Long Register
instruct loadUB2L_immI8(iRegL dst, memory mem, immI8 mask) %{
  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
  ins_cost(MEMORY_REF_COST + DEFAULT_COST);

  size(2*4);
  format %{ "LDUB   $mem,$dst\t# ubyte & 8-bit mask -> long\n\t"
            "AND    $dst,$mask,$dst" %}
  ins_encode %{
    __ ldub($mem$$Address, $dst$$Register);
    __ and3($dst$$Register, $mask$$constant, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Short (16bit signed)
instruct loadS(iRegI dst, memory mem) %{
  match(Set dst (LoadS mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDSH   $mem,$dst\t! short" %}
  ins_encode %{
    __ ldsh($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Short (16 bit signed) to Byte (8 bit signed)
instruct loadS2B(iRegI dst, indOffset13m7 mem, immI_24 twentyfour) %{
  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
  ins_cost(MEMORY_REF_COST);

  size(4);

  format %{ "LDSB   $mem+1,$dst\t! short -> byte" %}
  ins_encode %{
    __ ldsb($mem$$Address, $dst$$Register, 1);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Short (16bit signed) into a Long Register
instruct loadS2L(iRegL dst, memory mem) %{
  match(Set dst (ConvI2L (LoadS mem)));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDSH   $mem,$dst\t! short -> long" %}
  ins_encode %{
    __ ldsh($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Unsigned Short/Char (16bit UNsigned)
instruct loadUS(iRegI dst, memory mem) %{
  match(Set dst (LoadUS mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUH   $mem,$dst\t! ushort/char" %}
  ins_encode %{
    __ lduh($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
instruct loadUS2B(iRegI dst, indOffset13m7 mem, immI_24 twentyfour) %{
  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDSB   $mem+1,$dst\t! ushort -> byte" %}
  ins_encode %{
    __ ldsb($mem$$Address, $dst$$Register, 1);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Unsigned Short/Char (16bit UNsigned) into a Long Register
instruct loadUS2L(iRegL dst, memory mem) %{
  match(Set dst (ConvI2L (LoadUS mem)));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUH   $mem,$dst\t! ushort/char -> long" %}
  ins_encode %{
    __ lduh($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Unsigned Short/Char (16bit UNsigned) with mask 0xFF into a Long Register
instruct loadUS2L_immI_255(iRegL dst, indOffset13m7 mem, immI_255 mask) %{
  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUB   $mem+1,$dst\t! ushort/char & 0xFF -> long" %}
  ins_encode %{
    __ ldub($mem$$Address, $dst$$Register, 1);  // LSB is index+1 on BE
  %}
  ins_pipe(iload_mem);
%}

// Load Unsigned Short/Char (16bit UNsigned) with a 13-bit mask into a Long Register
instruct loadUS2L_immI13(iRegL dst, memory mem, immI13 mask) %{
  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
  ins_cost(MEMORY_REF_COST + DEFAULT_COST);

  size(2*4);
  format %{ "LDUH   $mem,$dst\t! ushort/char & 13-bit mask -> long\n\t"
            "AND    $dst,$mask,$dst" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    __ lduh($mem$$Address, Rdst);
    __ and3(Rdst, $mask$$constant, Rdst);
  %}
  ins_pipe(iload_mem);
%}

// Load Unsigned Short/Char (16bit UNsigned) with a 16-bit mask into a Long Register
instruct loadUS2L_immI16(iRegL dst, memory mem, immI16 mask, iRegL tmp) %{
  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
  effect(TEMP dst, TEMP tmp);
  ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);

  size((3+1)*4);  // set may use two instructions.
  format %{ "LDUH   $mem,$dst\t! ushort/char & 16-bit mask -> long\n\t"
            "SET    $mask,$tmp\n\t"
            "AND    $dst,$tmp,$dst" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Register Rtmp = $tmp$$Register;
    __ lduh($mem$$Address, Rdst);
    __ set($mask$$constant, Rtmp);
    __ and3(Rdst, Rtmp, Rdst);
  %}
  ins_pipe(iload_mem);
%}

// Load Integer
instruct loadI(iRegI dst, memory mem) %{
  match(Set dst (LoadI mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUW   $mem,$dst\t! int" %}
  ins_encode %{
    __ lduw($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Integer to Byte (8 bit signed)
instruct loadI2B(iRegI dst, indOffset13m7 mem, immI_24 twentyfour) %{
  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
  ins_cost(MEMORY_REF_COST);

  size(4);

  format %{ "LDSB   $mem+3,$dst\t! int -> byte" %}
  ins_encode %{
    __ ldsb($mem$$Address, $dst$$Register, 3);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Integer to Unsigned Byte (8 bit UNsigned)
instruct loadI2UB(iRegI dst, indOffset13m7 mem, immI_255 mask) %{
  match(Set dst (AndI (LoadI mem) mask));
  ins_cost(MEMORY_REF_COST);

  size(4);

  format %{ "LDUB   $mem+3,$dst\t! int -> ubyte" %}
  ins_encode %{
    __ ldub($mem$$Address, $dst$$Register, 3);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Integer to Short (16 bit signed)
instruct loadI2S(iRegI dst, indOffset13m7 mem, immI_16 sixteen) %{
  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
  ins_cost(MEMORY_REF_COST);

  size(4);

  format %{ "LDSH   $mem+2,$dst\t! int -> short" %}
  ins_encode %{
    __ ldsh($mem$$Address, $dst$$Register, 2);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Integer to Unsigned Short (16 bit UNsigned)
instruct loadI2US(iRegI dst, indOffset13m7 mem, immI_65535 mask) %{
  match(Set dst (AndI (LoadI mem) mask));
  ins_cost(MEMORY_REF_COST);

  size(4);

  format %{ "LDUH   $mem+2,$dst\t! int -> ushort/char" %}
  ins_encode %{
    __ lduh($mem$$Address, $dst$$Register, 2);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Integer into a Long Register
instruct loadI2L(iRegL dst, memory mem) %{
  match(Set dst (ConvI2L (LoadI mem)));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDSW   $mem,$dst\t! int -> long" %}
  ins_encode %{
    __ ldsw($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mask_mem);
%}

// Load Integer with mask 0xFF into a Long Register
instruct loadI2L_immI_255(iRegL dst, indOffset13m7 mem, immI_255 mask) %{
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUB   $mem+3,$dst\t! int & 0xFF -> long" %}
  ins_encode %{
    __ ldub($mem$$Address, $dst$$Register, 3);  // LSB is index+3 on BE
  %}
  ins_pipe(iload_mem);
%}

// Load Integer with mask 0xFFFF into a Long Register
instruct loadI2L_immI_65535(iRegL dst, indOffset13m7 mem, immI_65535 mask) %{
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUH   $mem+2,$dst\t! int & 0xFFFF -> long" %}
  ins_encode %{
    __ lduh($mem$$Address, $dst$$Register, 2);  // LSW is index+2 on BE
  %}
  ins_pipe(iload_mem);
%}

// Load Integer with a 13-bit mask into a Long Register
instruct loadI2L_immI13(iRegL dst, memory mem, immI13 mask) %{
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
  ins_cost(MEMORY_REF_COST + DEFAULT_COST);

  size(2*4);
  format %{ "LDUW   $mem,$dst\t! int & 13-bit mask -> long\n\t"
            "AND    $dst,$mask,$dst" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    __ lduw($mem$$Address, Rdst);
    __ and3(Rdst, $mask$$constant, Rdst);
  %}
  ins_pipe(iload_mem);
%}

// Load Integer with a 32-bit mask into a Long Register
instruct loadI2L_immI(iRegL dst, memory mem, immI mask, iRegL tmp) %{
  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
  effect(TEMP dst, TEMP tmp);
  ins_cost(MEMORY_REF_COST + 2*DEFAULT_COST);

  size((3+1)*4);  // set may use two instructions.
  format %{ "LDUW   $mem,$dst\t! int & 32-bit mask -> long\n\t"
            "SET    $mask,$tmp\n\t"
            "AND    $dst,$tmp,$dst" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Register Rtmp = $tmp$$Register;
    __ lduw($mem$$Address, Rdst);
    __ set($mask$$constant, Rtmp);
    __ and3(Rdst, Rtmp, Rdst);
  %}
  ins_pipe(iload_mem);
%}

// Load Unsigned Integer into a Long Register
instruct loadUI2L(iRegL dst, memory mem) %{
  match(Set dst (LoadUI2L mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUW   $mem,$dst\t! uint -> long" %}
  ins_encode %{
    __ lduw($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Long - aligned
instruct loadL(iRegL dst, memory mem ) %{
  match(Set dst (LoadL mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDX    $mem,$dst\t! long" %}
  ins_encode %{
    __ ldx($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Long - UNaligned
instruct loadL_unaligned(iRegL dst, memory mem, o7RegI tmp) %{
  match(Set dst (LoadL_unaligned mem));
  effect(KILL tmp);
  ins_cost(MEMORY_REF_COST*2+DEFAULT_COST);
  size(16);
  format %{ "LDUW   $mem+4,R_O7\t! misaligned long\n"
          "\tLDUW   $mem  ,$dst\n"
          "\tSLLX   #32, $dst, $dst\n"
          "\tOR     $dst, R_O7, $dst" %}
  opcode(Assembler::lduw_op3);
  ins_encode(form3_mem_reg_long_unaligned_marshal( mem, dst ));
  ins_pipe(iload_mem);
%}

// Load Aligned Packed Byte into a Double Register
instruct loadA8B(regD dst, memory mem) %{
  match(Set dst (Load8B mem));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDDF   $mem,$dst\t! packed8B" %}
  opcode(Assembler::lddf_op3);
  ins_encode(simple_form3_mem_reg( mem, dst ) );
  ins_pipe(floadD_mem);
%}

// Load Aligned Packed Char into a Double Register
instruct loadA4C(regD dst, memory mem) %{
  match(Set dst (Load4C mem));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDDF   $mem,$dst\t! packed4C" %}
  opcode(Assembler::lddf_op3);
  ins_encode(simple_form3_mem_reg( mem, dst ) );
  ins_pipe(floadD_mem);
%}

// Load Aligned Packed Short into a Double Register
instruct loadA4S(regD dst, memory mem) %{
  match(Set dst (Load4S mem));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDDF   $mem,$dst\t! packed4S" %}
  opcode(Assembler::lddf_op3);
  ins_encode(simple_form3_mem_reg( mem, dst ) );
  ins_pipe(floadD_mem);
%}

// Load Aligned Packed Int into a Double Register
instruct loadA2I(regD dst, memory mem) %{
  match(Set dst (Load2I mem));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDDF   $mem,$dst\t! packed2I" %}
  opcode(Assembler::lddf_op3);
  ins_encode(simple_form3_mem_reg( mem, dst ) );
  ins_pipe(floadD_mem);
%}

// Load Range
instruct loadRange(iRegI dst, memory mem) %{
  match(Set dst (LoadRange mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUW   $mem,$dst\t! range" %}
  opcode(Assembler::lduw_op3);
  ins_encode(simple_form3_mem_reg( mem, dst ) );
  ins_pipe(iload_mem);
%}

// Load Integer into %f register (for fitos/fitod)
instruct loadI_freg(regF dst, memory mem) %{
  match(Set dst (LoadI mem));
  ins_cost(MEMORY_REF_COST);
  size(4);

  format %{ "LDF    $mem,$dst\t! for fitos/fitod" %}
  opcode(Assembler::ldf_op3);
  ins_encode(simple_form3_mem_reg( mem, dst ) );
  ins_pipe(floadF_mem);
%}

// Load Pointer
instruct loadP(iRegP dst, memory mem) %{
  match(Set dst (LoadP mem));
  ins_cost(MEMORY_REF_COST);
  size(4);

#ifndef _LP64
  format %{ "LDUW   $mem,$dst\t! ptr" %}
  ins_encode %{
    __ lduw($mem$$Address, $dst$$Register);
  %}
#else
  format %{ "LDX    $mem,$dst\t! ptr" %}
  ins_encode %{
    __ ldx($mem$$Address, $dst$$Register);
  %}
#endif
  ins_pipe(iload_mem);
%}

// Load Compressed Pointer
instruct loadN(iRegN dst, memory mem) %{
  match(Set dst (LoadN mem));
  ins_cost(MEMORY_REF_COST);
  size(4);

  format %{ "LDUW   $mem,$dst\t! compressed ptr" %}
  ins_encode %{
    __ lduw($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Klass Pointer
instruct loadKlass(iRegP dst, memory mem) %{
  match(Set dst (LoadKlass mem));
  ins_cost(MEMORY_REF_COST);
  size(4);

#ifndef _LP64
  format %{ "LDUW   $mem,$dst\t! klass ptr" %}
  ins_encode %{
    __ lduw($mem$$Address, $dst$$Register);
  %}
#else
  format %{ "LDX    $mem,$dst\t! klass ptr" %}
  ins_encode %{
    __ ldx($mem$$Address, $dst$$Register);
  %}
#endif
  ins_pipe(iload_mem);
%}

// Load narrow Klass Pointer
instruct loadNKlass(iRegN dst, memory mem) %{
  match(Set dst (LoadNKlass mem));
  ins_cost(MEMORY_REF_COST);
  size(4);

  format %{ "LDUW   $mem,$dst\t! compressed klass ptr" %}
  ins_encode %{
    __ lduw($mem$$Address, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Double
instruct loadD(regD dst, memory mem) %{
  match(Set dst (LoadD mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDDF   $mem,$dst" %}
  opcode(Assembler::lddf_op3);
  ins_encode(simple_form3_mem_reg( mem, dst ) );
  ins_pipe(floadD_mem);
%}

// Load Double - UNaligned
instruct loadD_unaligned(regD_low dst, memory mem ) %{
  match(Set dst (LoadD_unaligned mem));
  ins_cost(MEMORY_REF_COST*2+DEFAULT_COST);
  size(8);
  format %{ "LDF    $mem  ,$dst.hi\t! misaligned double\n"
          "\tLDF    $mem+4,$dst.lo\t!" %}
  opcode(Assembler::ldf_op3);
  ins_encode( form3_mem_reg_double_unaligned( mem, dst ));
  ins_pipe(iload_mem);
%}

// Load Float
instruct loadF(regF dst, memory mem) %{
  match(Set dst (LoadF mem));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDF    $mem,$dst" %}
  opcode(Assembler::ldf_op3);
  ins_encode(simple_form3_mem_reg( mem, dst ) );
  ins_pipe(floadF_mem);
%}

// Load Constant
instruct loadConI( iRegI dst, immI src ) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST * 3/2);
  format %{ "SET    $src,$dst" %}
  ins_encode( Set32(src, dst) );
  ins_pipe(ialu_hi_lo_reg);
%}

instruct loadConI13( iRegI dst, immI13 src ) %{
  match(Set dst src);

  size(4);
  format %{ "MOV    $src,$dst" %}
  ins_encode( Set13( src, dst ) );
  ins_pipe(ialu_imm);
%}

#ifndef _LP64
instruct loadConP(iRegP dst, immP con) %{
  match(Set dst con);
  ins_cost(DEFAULT_COST * 3/2);
  format %{ "SET    $con,$dst\t!ptr" %}
  ins_encode %{
    // [RGV] This next line should be generated from ADLC
    if (_opnds[1]->constant_is_oop()) {
      intptr_t val = $con$$constant;
      __ set_oop_constant((jobject) val, $dst$$Register);
    } else {          // non-oop pointers, e.g. card mark base, heap top
      __ set($con$$constant, $dst$$Register);
    }
  %}
  ins_pipe(loadConP);
%}
#else
instruct loadConP_set(iRegP dst, immP_set con) %{
  match(Set dst con);
  ins_cost(DEFAULT_COST * 3/2);
  format %{ "SET    $con,$dst\t! ptr" %}
  ins_encode %{
    // [RGV] This next line should be generated from ADLC
    if (_opnds[1]->constant_is_oop()) {
      intptr_t val = $con$$constant;
      __ set_oop_constant((jobject) val, $dst$$Register);
    } else {          // non-oop pointers, e.g. card mark base, heap top
      __ set($con$$constant, $dst$$Register);
    }
  %}
  ins_pipe(loadConP);
%}

instruct loadConP_load(iRegP dst, immP_load con) %{
  match(Set dst con);
  ins_cost(MEMORY_REF_COST);
  format %{ "LD     [$constanttablebase + $constantoffset],$dst\t! load from constant table: ptr=$con" %}
  ins_encode %{
    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register);
    __ ld_ptr($constanttablebase, con_offset, $dst$$Register);
  %}
  ins_pipe(loadConP);
%}

instruct loadConP_no_oop_cheap(iRegP dst, immP_no_oop_cheap con) %{
  match(Set dst con);
  ins_cost(DEFAULT_COST * 3/2);
  format %{ "SET    $con,$dst\t! non-oop ptr" %}
  ins_encode %{
    __ set($con$$constant, $dst$$Register);
  %}
  ins_pipe(loadConP);
%}
#endif // _LP64

instruct loadConP0(iRegP dst, immP0 src) %{
  match(Set dst src);

  size(4);
  format %{ "CLR    $dst\t!ptr" %}
  ins_encode %{
    __ clr($dst$$Register);
  %}
  ins_pipe(ialu_imm);
%}

instruct loadConP_poll(iRegP dst, immP_poll src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST);
  format %{ "SET    $src,$dst\t!ptr" %}
  ins_encode %{
    AddressLiteral polling_page(os::get_polling_page());
    __ sethi(polling_page, reg_to_register_object($dst$$reg));
  %}
  ins_pipe(loadConP_poll);
%}

instruct loadConN0(iRegN dst, immN0 src) %{
  match(Set dst src);

  size(4);
  format %{ "CLR    $dst\t! compressed NULL ptr" %}
  ins_encode %{
    __ clr($dst$$Register);
  %}
  ins_pipe(ialu_imm);
%}

instruct loadConN(iRegN dst, immN src) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST * 3/2);
  format %{ "SET    $src,$dst\t! compressed ptr" %}
  ins_encode %{
    Register dst = $dst$$Register;
    __ set_narrow_oop((jobject)$src$$constant, dst);
  %}
  ins_pipe(ialu_hi_lo_reg);
%}

// Materialize long value (predicated by immL_cheap).
instruct loadConL_set64(iRegL dst, immL_cheap con, o7RegL tmp) %{
  match(Set dst con);
  effect(KILL tmp);
  ins_cost(DEFAULT_COST * 3);
  format %{ "SET64   $con,$dst KILL $tmp\t! cheap long" %}
  ins_encode %{
    __ set64($con$$constant, $dst$$Register, $tmp$$Register);
  %}
  ins_pipe(loadConL);
%}

// Load long value from constant table (predicated by immL_expensive).
instruct loadConL_ldx(iRegL dst, immL_expensive con) %{
  match(Set dst con);
  ins_cost(MEMORY_REF_COST);
  format %{ "LDX     [$constanttablebase + $constantoffset],$dst\t! load from constant table: long=$con" %}
  ins_encode %{
      RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $dst$$Register);
    __ ldx($constanttablebase, con_offset, $dst$$Register);
  %}
  ins_pipe(loadConL);
%}

instruct loadConL0( iRegL dst, immL0 src ) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST);
  size(4);
  format %{ "CLR    $dst\t! long" %}
  ins_encode( Set13( src, dst ) );
  ins_pipe(ialu_imm);
%}

instruct loadConL13( iRegL dst, immL13 src ) %{
  match(Set dst src);
  ins_cost(DEFAULT_COST * 2);

  size(4);
  format %{ "MOV    $src,$dst\t! long" %}
  ins_encode( Set13( src, dst ) );
  ins_pipe(ialu_imm);
%}

instruct loadConF(regF dst, immF con, o7RegI tmp) %{
  match(Set dst con);
  effect(KILL tmp);
  format %{ "LDF    [$constanttablebase + $constantoffset],$dst\t! load from constant table: float=$con" %}
  ins_encode %{
      RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $tmp$$Register);
    __ ldf(FloatRegisterImpl::S, $constanttablebase, con_offset, $dst$$FloatRegister);
  %}
  ins_pipe(loadConFD);
%}

instruct loadConD(regD dst, immD con, o7RegI tmp) %{
  match(Set dst con);
  effect(KILL tmp);
  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: double=$con" %}
  ins_encode %{
    // XXX This is a quick fix for 6833573.
    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset($con), $dst$$FloatRegister);
    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset($con), $tmp$$Register);
    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
  %}
  ins_pipe(loadConFD);
%}

// Prefetch instructions.
// Must be safe to execute with invalid address (cannot fault).

instruct prefetchr( memory mem ) %{
  match( PrefetchRead mem );
  ins_cost(MEMORY_REF_COST);
  size(4);

  format %{ "PREFETCH $mem,0\t! Prefetch read-many" %}
  opcode(Assembler::prefetch_op3);
  ins_encode( form3_mem_prefetch_read( mem ) );
  ins_pipe(iload_mem);
%}

instruct prefetchw( memory mem ) %{
  match( PrefetchWrite mem );
  ins_cost(MEMORY_REF_COST);
  size(4);

  format %{ "PREFETCH $mem,2\t! Prefetch write-many (and read)" %}
  opcode(Assembler::prefetch_op3);
  ins_encode( form3_mem_prefetch_write( mem ) );
  ins_pipe(iload_mem);
%}

// Prefetch instructions for allocation.

instruct prefetchAlloc( memory mem ) %{
  predicate(AllocatePrefetchInstr == 0);
  match( PrefetchAllocation mem );
  ins_cost(MEMORY_REF_COST);
  size(4);

  format %{ "PREFETCH $mem,2\t! Prefetch allocation" %}
  opcode(Assembler::prefetch_op3);
  ins_encode( form3_mem_prefetch_write( mem ) );
  ins_pipe(iload_mem);
%}

// Use BIS instruction to prefetch for allocation.
// Could fault, need space at the end of TLAB.
instruct prefetchAlloc_bis( iRegP dst ) %{
  predicate(AllocatePrefetchInstr == 1);
  match( PrefetchAllocation dst );
  ins_cost(MEMORY_REF_COST);
  size(4);

  format %{ "STXA   [$dst]\t! // Prefetch allocation using BIS" %}
  ins_encode %{
    __ stxa(G0, $dst$$Register, G0, Assembler::ASI_ST_BLKINIT_PRIMARY);
  %}
  ins_pipe(istore_mem_reg);
%}

// Next code is used for finding next cache line address to prefetch.
#ifndef _LP64
instruct cacheLineAdr( iRegP dst, iRegP src, immI13 mask ) %{
  match(Set dst (CastX2P (AndI (CastP2X src) mask)));
  ins_cost(DEFAULT_COST);
  size(4);

  format %{ "AND    $src,$mask,$dst\t! next cache line address" %}
  ins_encode %{
    __ and3($src$$Register, $mask$$constant, $dst$$Register);
  %}
  ins_pipe(ialu_reg_imm);
%}
#else
instruct cacheLineAdr( iRegP dst, iRegP src, immL13 mask ) %{
  match(Set dst (CastX2P (AndL (CastP2X src) mask)));
  ins_cost(DEFAULT_COST);
  size(4);

  format %{ "AND    $src,$mask,$dst\t! next cache line address" %}
  ins_encode %{
    __ and3($src$$Register, $mask$$constant, $dst$$Register);
  %}
  ins_pipe(ialu_reg_imm);
%}
#endif

//----------Store Instructions-------------------------------------------------
// Store Byte
instruct storeB(memory mem, iRegI src) %{
  match(Set mem (StoreB mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STB    $src,$mem\t! byte" %}
  opcode(Assembler::stb_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(istore_mem_reg);
%}

instruct storeB0(memory mem, immI0 src) %{
  match(Set mem (StoreB mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STB    $src,$mem\t! byte" %}
  opcode(Assembler::stb_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(istore_mem_zero);
%}

instruct storeCM0(memory mem, immI0 src) %{
  match(Set mem (StoreCM mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STB    $src,$mem\t! CMS card-mark byte 0" %}
  opcode(Assembler::stb_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(istore_mem_zero);
%}

// Store Char/Short
instruct storeC(memory mem, iRegI src) %{
  match(Set mem (StoreC mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STH    $src,$mem\t! short" %}
  opcode(Assembler::sth_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(istore_mem_reg);
%}

instruct storeC0(memory mem, immI0 src) %{
  match(Set mem (StoreC mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STH    $src,$mem\t! short" %}
  opcode(Assembler::sth_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(istore_mem_zero);
%}

// Store Integer
instruct storeI(memory mem, iRegI src) %{
  match(Set mem (StoreI mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STW    $src,$mem" %}
  opcode(Assembler::stw_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(istore_mem_reg);
%}

// Store Long
instruct storeL(memory mem, iRegL src) %{
  match(Set mem (StoreL mem src));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STX    $src,$mem\t! long" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(istore_mem_reg);
%}

instruct storeI0(memory mem, immI0 src) %{
  match(Set mem (StoreI mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STW    $src,$mem" %}
  opcode(Assembler::stw_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(istore_mem_zero);
%}

instruct storeL0(memory mem, immL0 src) %{
  match(Set mem (StoreL mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STX    $src,$mem" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(istore_mem_zero);
%}

// Store Integer from float register (used after fstoi)
instruct storeI_Freg(memory mem, regF src) %{
  match(Set mem (StoreI mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STF    $src,$mem\t! after fstoi/fdtoi" %}
  opcode(Assembler::stf_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(fstoreF_mem_reg);
%}

// Store Pointer
instruct storeP(memory dst, sp_ptr_RegP src) %{
  match(Set dst (StoreP dst src));
  ins_cost(MEMORY_REF_COST);
  size(4);

#ifndef _LP64
  format %{ "STW    $src,$dst\t! ptr" %}
  opcode(Assembler::stw_op3, 0, REGP_OP);
#else
  format %{ "STX    $src,$dst\t! ptr" %}
  opcode(Assembler::stx_op3, 0, REGP_OP);
#endif
  ins_encode( form3_mem_reg( dst, src ) );
  ins_pipe(istore_mem_spORreg);
%}

instruct storeP0(memory dst, immP0 src) %{
  match(Set dst (StoreP dst src));
  ins_cost(MEMORY_REF_COST);
  size(4);

#ifndef _LP64
  format %{ "STW    $src,$dst\t! ptr" %}
  opcode(Assembler::stw_op3, 0, REGP_OP);
#else
  format %{ "STX    $src,$dst\t! ptr" %}
  opcode(Assembler::stx_op3, 0, REGP_OP);
#endif
  ins_encode( form3_mem_reg( dst, R_G0 ) );
  ins_pipe(istore_mem_zero);
%}

// Store Compressed Pointer
instruct storeN(memory dst, iRegN src) %{
   match(Set dst (StoreN dst src));
   ins_cost(MEMORY_REF_COST);
   size(4);

   format %{ "STW    $src,$dst\t! compressed ptr" %}
   ins_encode %{
     Register base = as_Register($dst$$base);
     Register index = as_Register($dst$$index);
     Register src = $src$$Register;
     if (index != G0) {
       __ stw(src, base, index);
     } else {
       __ stw(src, base, $dst$$disp);
     }
   %}
   ins_pipe(istore_mem_spORreg);
%}

instruct storeN0(memory dst, immN0 src) %{
   match(Set dst (StoreN dst src));
   ins_cost(MEMORY_REF_COST);
   size(4);

   format %{ "STW    $src,$dst\t! compressed ptr" %}
   ins_encode %{
     Register base = as_Register($dst$$base);
     Register index = as_Register($dst$$index);
     if (index != G0) {
       __ stw(0, base, index);
     } else {
       __ stw(0, base, $dst$$disp);
     }
   %}
   ins_pipe(istore_mem_zero);
%}

// Store Double
instruct storeD( memory mem, regD src) %{
  match(Set mem (StoreD mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STDF   $src,$mem" %}
  opcode(Assembler::stdf_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(fstoreD_mem_reg);
%}

instruct storeD0( memory mem, immD0 src) %{
  match(Set mem (StoreD mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STX    $src,$mem" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(fstoreD_mem_zero);
%}

// Store Float
instruct storeF( memory mem, regF src) %{
  match(Set mem (StoreF mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STF    $src,$mem" %}
  opcode(Assembler::stf_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(fstoreF_mem_reg);
%}

instruct storeF0( memory mem, immF0 src) %{
  match(Set mem (StoreF mem src));
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STW    $src,$mem\t! storeF0" %}
  opcode(Assembler::stw_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(fstoreF_mem_zero);
%}

// Store Aligned Packed Bytes in Double register to memory
instruct storeA8B(memory mem, regD src) %{
  match(Set mem (Store8B mem src));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STDF   $src,$mem\t! packed8B" %}
  opcode(Assembler::stdf_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(fstoreD_mem_reg);
%}

// Convert oop pointer into compressed form
instruct encodeHeapOop(iRegN dst, iRegP src) %{
  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
  match(Set dst (EncodeP src));
  format %{ "encode_heap_oop $src, $dst" %}
  ins_encode %{
    __ encode_heap_oop($src$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct encodeHeapOop_not_null(iRegN dst, iRegP src) %{
  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
  match(Set dst (EncodeP src));
  format %{ "encode_heap_oop_not_null $src, $dst" %}
  ins_encode %{
    __ encode_heap_oop_not_null($src$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct decodeHeapOop(iRegP dst, iRegN src) %{
  predicate(n->bottom_type()->is_oopptr()->ptr() != TypePtr::NotNull &&
            n->bottom_type()->is_oopptr()->ptr() != TypePtr::Constant);
  match(Set dst (DecodeN src));
  format %{ "decode_heap_oop $src, $dst" %}
  ins_encode %{
    __ decode_heap_oop($src$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

instruct decodeHeapOop_not_null(iRegP dst, iRegN src) %{
  predicate(n->bottom_type()->is_oopptr()->ptr() == TypePtr::NotNull ||
            n->bottom_type()->is_oopptr()->ptr() == TypePtr::Constant);
  match(Set dst (DecodeN src));
  format %{ "decode_heap_oop_not_null $src, $dst" %}
  ins_encode %{
    __ decode_heap_oop_not_null($src$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}


// Store Zero into Aligned Packed Bytes
instruct storeA8B0(memory mem, immI0 zero) %{
  match(Set mem (Store8B mem zero));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STX    $zero,$mem\t! packed8B" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(fstoreD_mem_zero);
%}

// Store Aligned Packed Chars/Shorts in Double register to memory
instruct storeA4C(memory mem, regD src) %{
  match(Set mem (Store4C mem src));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STDF   $src,$mem\t! packed4C" %}
  opcode(Assembler::stdf_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(fstoreD_mem_reg);
%}

// Store Zero into Aligned Packed Chars/Shorts
instruct storeA4C0(memory mem, immI0 zero) %{
  match(Set mem (Store4C mem (Replicate4C zero)));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STX    $zero,$mem\t! packed4C" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(fstoreD_mem_zero);
%}

// Store Aligned Packed Ints in Double register to memory
instruct storeA2I(memory mem, regD src) %{
  match(Set mem (Store2I mem src));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STDF   $src,$mem\t! packed2I" %}
  opcode(Assembler::stdf_op3);
  ins_encode(simple_form3_mem_reg( mem, src ) );
  ins_pipe(fstoreD_mem_reg);
%}

// Store Zero into Aligned Packed Ints
instruct storeA2I0(memory mem, immI0 zero) %{
  match(Set mem (Store2I mem zero));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STX    $zero,$mem\t! packed2I" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( mem, R_G0 ) );
  ins_pipe(fstoreD_mem_zero);
%}


//----------MemBar Instructions-----------------------------------------------
// Memory barrier flavors

instruct membar_acquire() %{
  match(MemBarAcquire);
  ins_cost(4*MEMORY_REF_COST);

  size(0);
  format %{ "MEMBAR-acquire" %}
  ins_encode( enc_membar_acquire );
  ins_pipe(long_memory_op);
%}

instruct membar_acquire_lock() %{
  match(MemBarAcquireLock);
  ins_cost(0);

  size(0);
  format %{ "!MEMBAR-acquire (CAS in prior FastLock so empty encoding)" %}
  ins_encode( );
  ins_pipe(empty);
%}

instruct membar_release() %{
  match(MemBarRelease);
  ins_cost(4*MEMORY_REF_COST);

  size(0);
  format %{ "MEMBAR-release" %}
  ins_encode( enc_membar_release );
  ins_pipe(long_memory_op);
%}

instruct membar_release_lock() %{
  match(MemBarReleaseLock);
  ins_cost(0);

  size(0);
  format %{ "!MEMBAR-release (CAS in succeeding FastUnlock so empty encoding)" %}
  ins_encode( );
  ins_pipe(empty);
%}

instruct membar_volatile() %{
  match(MemBarVolatile);
  ins_cost(4*MEMORY_REF_COST);

  size(4);
  format %{ "MEMBAR-volatile" %}
  ins_encode( enc_membar_volatile );
  ins_pipe(long_memory_op);
%}

instruct unnecessary_membar_volatile() %{
  match(MemBarVolatile);
  predicate(Matcher::post_store_load_barrier(n));
  ins_cost(0);

  size(0);
  format %{ "!MEMBAR-volatile (unnecessary so empty encoding)" %}
  ins_encode( );
  ins_pipe(empty);
%}

instruct membar_storestore() %{
  match(MemBarStoreStore);
  ins_cost(0);

  size(0);
  format %{ "!MEMBAR-storestore (empty encoding)" %}
  ins_encode( );
  ins_pipe(empty);
%}

//----------Register Move Instructions-----------------------------------------
instruct roundDouble_nop(regD dst) %{
  match(Set dst (RoundDouble dst));
  ins_cost(0);
  // SPARC results are already "rounded" (i.e., normal-format IEEE)
  ins_encode( );
  ins_pipe(empty);
%}


instruct roundFloat_nop(regF dst) %{
  match(Set dst (RoundFloat dst));
  ins_cost(0);
  // SPARC results are already "rounded" (i.e., normal-format IEEE)
  ins_encode( );
  ins_pipe(empty);
%}


// Cast Index to Pointer for unsafe natives
instruct castX2P(iRegX src, iRegP dst) %{
  match(Set dst (CastX2P src));

  format %{ "MOV    $src,$dst\t! IntX->Ptr" %}
  ins_encode( form3_g0_rs2_rd_move( src, dst ) );
  ins_pipe(ialu_reg);
%}

// Cast Pointer to Index for unsafe natives
instruct castP2X(iRegP src, iRegX dst) %{
  match(Set dst (CastP2X src));

  format %{ "MOV    $src,$dst\t! Ptr->IntX" %}
  ins_encode( form3_g0_rs2_rd_move( src, dst ) );
  ins_pipe(ialu_reg);
%}

instruct stfSSD(stackSlotD stkSlot, regD src) %{
  // %%%% TO DO: Tell the coalescer that this kind of node is a copy!
  match(Set stkSlot src);   // chain rule
  ins_cost(MEMORY_REF_COST);
  format %{ "STDF   $src,$stkSlot\t!stk" %}
  opcode(Assembler::stdf_op3);
  ins_encode(simple_form3_mem_reg(stkSlot, src));
  ins_pipe(fstoreD_stk_reg);
%}

instruct ldfSSD(regD dst, stackSlotD stkSlot) %{
  // %%%% TO DO: Tell the coalescer that this kind of node is a copy!
  match(Set dst stkSlot);   // chain rule
  ins_cost(MEMORY_REF_COST);
  format %{ "LDDF   $stkSlot,$dst\t!stk" %}
  opcode(Assembler::lddf_op3);
  ins_encode(simple_form3_mem_reg(stkSlot, dst));
  ins_pipe(floadD_stk);
%}

instruct stfSSF(stackSlotF stkSlot, regF src) %{
  // %%%% TO DO: Tell the coalescer that this kind of node is a copy!
  match(Set stkSlot src);   // chain rule
  ins_cost(MEMORY_REF_COST);
  format %{ "STF   $src,$stkSlot\t!stk" %}
  opcode(Assembler::stf_op3);
  ins_encode(simple_form3_mem_reg(stkSlot, src));
  ins_pipe(fstoreF_stk_reg);
%}

//----------Conditional Move---------------------------------------------------
// Conditional move
instruct cmovIP_reg(cmpOpP cmp, flagsRegP pcc, iRegI dst, iRegI src) %{
  match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
  ins_cost(150);
  format %{ "MOV$cmp $pcc,$src,$dst" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::ptr_cc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovIP_imm(cmpOpP cmp, flagsRegP pcc, iRegI dst, immI11 src) %{
  match(Set dst (CMoveI (Binary cmp pcc) (Binary dst src)));
  ins_cost(140);
  format %{ "MOV$cmp $pcc,$src,$dst" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::ptr_cc)) );
  ins_pipe(ialu_imm);
%}

instruct cmovII_reg(cmpOp cmp, flagsReg icc, iRegI dst, iRegI src) %{
  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
  ins_cost(150);
  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovII_imm(cmpOp cmp, flagsReg icc, iRegI dst, immI11 src) %{
  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
  ins_cost(140);
  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_imm);
%}

instruct cmovIIu_reg(cmpOpU cmp, flagsRegU icc, iRegI dst, iRegI src) %{
  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
  ins_cost(150);
  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovIIu_imm(cmpOpU cmp, flagsRegU icc, iRegI dst, immI11 src) %{
  match(Set dst (CMoveI (Binary cmp icc) (Binary dst src)));
  ins_cost(140);
  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_imm);
%}

instruct cmovIF_reg(cmpOpF cmp, flagsRegF fcc, iRegI dst, iRegI src) %{
  match(Set dst (CMoveI (Binary cmp fcc) (Binary dst src)));
  ins_cost(150);
  size(4);
  format %{ "MOV$cmp $fcc,$src,$dst" %}
  ins_encode( enc_cmov_reg_f(cmp,dst,src, fcc) );
  ins_pipe(ialu_reg);
%}

instruct cmovIF_imm(cmpOpF cmp, flagsRegF fcc, iRegI dst, immI11 src) %{
  match(Set dst (CMoveI (Binary cmp fcc) (Binary dst src)));
  ins_cost(140);
  size(4);
  format %{ "MOV$cmp $fcc,$src,$dst" %}
  ins_encode( enc_cmov_imm_f(cmp,dst,src, fcc) );
  ins_pipe(ialu_imm);
%}

// Conditional move for RegN. Only cmov(reg,reg).
instruct cmovNP_reg(cmpOpP cmp, flagsRegP pcc, iRegN dst, iRegN src) %{
  match(Set dst (CMoveN (Binary cmp pcc) (Binary dst src)));
  ins_cost(150);
  format %{ "MOV$cmp $pcc,$src,$dst" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::ptr_cc)) );
  ins_pipe(ialu_reg);
%}

// This instruction also works with CmpN so we don't need cmovNN_reg.
instruct cmovNI_reg(cmpOp cmp, flagsReg icc, iRegN dst, iRegN src) %{
  match(Set dst (CMoveN (Binary cmp icc) (Binary dst src)));
  ins_cost(150);
  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_reg);
%}

// This instruction also works with CmpN so we don't need cmovNN_reg.
instruct cmovNIu_reg(cmpOpU cmp, flagsRegU icc, iRegN dst, iRegN src) %{
  match(Set dst (CMoveN (Binary cmp icc) (Binary dst src)));
  ins_cost(150);
  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovNF_reg(cmpOpF cmp, flagsRegF fcc, iRegN dst, iRegN src) %{
  match(Set dst (CMoveN (Binary cmp fcc) (Binary dst src)));
  ins_cost(150);
  size(4);
  format %{ "MOV$cmp $fcc,$src,$dst" %}
  ins_encode( enc_cmov_reg_f(cmp,dst,src, fcc) );
  ins_pipe(ialu_reg);
%}

// Conditional move
instruct cmovPP_reg(cmpOpP cmp, flagsRegP pcc, iRegP dst, iRegP src) %{
  match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
  ins_cost(150);
  format %{ "MOV$cmp $pcc,$src,$dst\t! ptr" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::ptr_cc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovPP_imm(cmpOpP cmp, flagsRegP pcc, iRegP dst, immP0 src) %{
  match(Set dst (CMoveP (Binary cmp pcc) (Binary dst src)));
  ins_cost(140);
  format %{ "MOV$cmp $pcc,$src,$dst\t! ptr" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::ptr_cc)) );
  ins_pipe(ialu_imm);
%}

// This instruction also works with CmpN so we don't need cmovPN_reg.
instruct cmovPI_reg(cmpOp cmp, flagsReg icc, iRegP dst, iRegP src) %{
  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
  ins_cost(150);

  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst\t! ptr" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovPIu_reg(cmpOpU cmp, flagsRegU icc, iRegP dst, iRegP src) %{
  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
  ins_cost(150);

  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst\t! ptr" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovPI_imm(cmpOp cmp, flagsReg icc, iRegP dst, immP0 src) %{
  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
  ins_cost(140);

  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst\t! ptr" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_imm);
%}

instruct cmovPIu_imm(cmpOpU cmp, flagsRegU icc, iRegP dst, immP0 src) %{
  match(Set dst (CMoveP (Binary cmp icc) (Binary dst src)));
  ins_cost(140);

  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst\t! ptr" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_imm);
%}

instruct cmovPF_reg(cmpOpF cmp, flagsRegF fcc, iRegP dst, iRegP src) %{
  match(Set dst (CMoveP (Binary cmp fcc) (Binary dst src)));
  ins_cost(150);
  size(4);
  format %{ "MOV$cmp $fcc,$src,$dst" %}
  ins_encode( enc_cmov_reg_f(cmp,dst,src, fcc) );
  ins_pipe(ialu_imm);
%}

instruct cmovPF_imm(cmpOpF cmp, flagsRegF fcc, iRegP dst, immP0 src) %{
  match(Set dst (CMoveP (Binary cmp fcc) (Binary dst src)));
  ins_cost(140);
  size(4);
  format %{ "MOV$cmp $fcc,$src,$dst" %}
  ins_encode( enc_cmov_imm_f(cmp,dst,src, fcc) );
  ins_pipe(ialu_imm);
%}

// Conditional move
instruct cmovFP_reg(cmpOpP cmp, flagsRegP pcc, regF dst, regF src) %{
  match(Set dst (CMoveF (Binary cmp pcc) (Binary dst src)));
  ins_cost(150);
  opcode(0x101);
  format %{ "FMOVD$cmp $pcc,$src,$dst" %}
  ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::ptr_cc)) );
  ins_pipe(int_conditional_float_move);
%}

instruct cmovFI_reg(cmpOp cmp, flagsReg icc, regF dst, regF src) %{
  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
  ins_cost(150);

  size(4);
  format %{ "FMOVS$cmp $icc,$src,$dst" %}
  opcode(0x101);
  ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(int_conditional_float_move);
%}

instruct cmovFIu_reg(cmpOpU cmp, flagsRegU icc, regF dst, regF src) %{
  match(Set dst (CMoveF (Binary cmp icc) (Binary dst src)));
  ins_cost(150);

  size(4);
  format %{ "FMOVS$cmp $icc,$src,$dst" %}
  opcode(0x101);
  ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(int_conditional_float_move);
%}

// Conditional move,
instruct cmovFF_reg(cmpOpF cmp, flagsRegF fcc, regF dst, regF src) %{
  match(Set dst (CMoveF (Binary cmp fcc) (Binary dst src)));
  ins_cost(150);
  size(4);
  format %{ "FMOVF$cmp $fcc,$src,$dst" %}
  opcode(0x1);
  ins_encode( enc_cmovff_reg(cmp,fcc,dst,src) );
  ins_pipe(int_conditional_double_move);
%}

// Conditional move
instruct cmovDP_reg(cmpOpP cmp, flagsRegP pcc, regD dst, regD src) %{
  match(Set dst (CMoveD (Binary cmp pcc) (Binary dst src)));
  ins_cost(150);
  size(4);
  opcode(0x102);
  format %{ "FMOVD$cmp $pcc,$src,$dst" %}
  ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::ptr_cc)) );
  ins_pipe(int_conditional_double_move);
%}

instruct cmovDI_reg(cmpOp cmp, flagsReg icc, regD dst, regD src) %{
  match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
  ins_cost(150);

  size(4);
  format %{ "FMOVD$cmp $icc,$src,$dst" %}
  opcode(0x102);
  ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(int_conditional_double_move);
%}

instruct cmovDIu_reg(cmpOpU cmp, flagsRegU icc, regD dst, regD src) %{
  match(Set dst (CMoveD (Binary cmp icc) (Binary dst src)));
  ins_cost(150);

  size(4);
  format %{ "FMOVD$cmp $icc,$src,$dst" %}
  opcode(0x102);
  ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(int_conditional_double_move);
%}

// Conditional move,
instruct cmovDF_reg(cmpOpF cmp, flagsRegF fcc, regD dst, regD src) %{
  match(Set dst (CMoveD (Binary cmp fcc) (Binary dst src)));
  ins_cost(150);
  size(4);
  format %{ "FMOVD$cmp $fcc,$src,$dst" %}
  opcode(0x2);
  ins_encode( enc_cmovff_reg(cmp,fcc,dst,src) );
  ins_pipe(int_conditional_double_move);
%}

// Conditional move
instruct cmovLP_reg(cmpOpP cmp, flagsRegP pcc, iRegL dst, iRegL src) %{
  match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
  ins_cost(150);
  format %{ "MOV$cmp $pcc,$src,$dst\t! long" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::ptr_cc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovLP_imm(cmpOpP cmp, flagsRegP pcc, iRegL dst, immI11 src) %{
  match(Set dst (CMoveL (Binary cmp pcc) (Binary dst src)));
  ins_cost(140);
  format %{ "MOV$cmp $pcc,$src,$dst\t! long" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::ptr_cc)) );
  ins_pipe(ialu_imm);
%}

instruct cmovLI_reg(cmpOp cmp, flagsReg icc, iRegL dst, iRegL src) %{
  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
  ins_cost(150);

  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst\t! long" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_reg);
%}


instruct cmovLIu_reg(cmpOpU cmp, flagsRegU icc, iRegL dst, iRegL src) %{
  match(Set dst (CMoveL (Binary cmp icc) (Binary dst src)));
  ins_cost(150);

  size(4);
  format %{ "MOV$cmp  $icc,$src,$dst\t! long" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::icc)) );
  ins_pipe(ialu_reg);
%}


instruct cmovLF_reg(cmpOpF cmp, flagsRegF fcc, iRegL dst, iRegL src) %{
  match(Set dst (CMoveL (Binary cmp fcc) (Binary dst src)));
  ins_cost(150);

  size(4);
  format %{ "MOV$cmp  $fcc,$src,$dst\t! long" %}
  ins_encode( enc_cmov_reg_f(cmp,dst,src, fcc) );
  ins_pipe(ialu_reg);
%}



//----------OS and Locking Instructions----------------------------------------

// This name is KNOWN by the ADLC and cannot be changed.
// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
// for this guy.
instruct tlsLoadP(g2RegP dst) %{
  match(Set dst (ThreadLocal));

  size(0);
  ins_cost(0);
  format %{ "# TLS is in G2" %}
  ins_encode( /*empty encoding*/ );
  ins_pipe(ialu_none);
%}

instruct checkCastPP( iRegP dst ) %{
  match(Set dst (CheckCastPP dst));

  size(0);
  format %{ "# checkcastPP of $dst" %}
  ins_encode( /*empty encoding*/ );
  ins_pipe(empty);
%}


instruct castPP( iRegP dst ) %{
  match(Set dst (CastPP dst));
  format %{ "# castPP of $dst" %}
  ins_encode( /*empty encoding*/ );
  ins_pipe(empty);
%}

instruct castII( iRegI dst ) %{
  match(Set dst (CastII dst));
  format %{ "# castII of $dst" %}
  ins_encode( /*empty encoding*/ );
  ins_cost(0);
  ins_pipe(empty);
%}

//----------Arithmetic Instructions--------------------------------------------
// Addition Instructions
// Register Addition
instruct addI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
  match(Set dst (AddI src1 src2));

  size(4);
  format %{ "ADD    $src1,$src2,$dst" %}
  ins_encode %{
    __ add($src1$$Register, $src2$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

// Immediate Addition
instruct addI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{
  match(Set dst (AddI src1 src2));

  size(4);
  format %{ "ADD    $src1,$src2,$dst" %}
  opcode(Assembler::add_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Pointer Register Addition
instruct addP_reg_reg(iRegP dst, iRegP src1, iRegX src2) %{
  match(Set dst (AddP src1 src2));

  size(4);
  format %{ "ADD    $src1,$src2,$dst" %}
  opcode(Assembler::add_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Pointer Immediate Addition
instruct addP_reg_imm13(iRegP dst, iRegP src1, immX13 src2) %{
  match(Set dst (AddP src1 src2));

  size(4);
  format %{ "ADD    $src1,$src2,$dst" %}
  opcode(Assembler::add_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Long Addition
instruct addL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
  match(Set dst (AddL src1 src2));

  size(4);
  format %{ "ADD    $src1,$src2,$dst\t! long" %}
  opcode(Assembler::add_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

instruct addL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{
  match(Set dst (AddL src1 con));

  size(4);
  format %{ "ADD    $src1,$con,$dst" %}
  opcode(Assembler::add_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, con, dst ) );
  ins_pipe(ialu_reg_imm);
%}

//----------Conditional_store--------------------------------------------------
// Conditional-store of the updated heap-top.
// Used during allocation of the shared heap.
// Sets flags (EQ) on success.  Implemented with a CASA on Sparc.

// LoadP-locked.  Same as a regular pointer load when used with a compare-swap
instruct loadPLocked(iRegP dst, memory mem) %{
  match(Set dst (LoadPLocked mem));
  ins_cost(MEMORY_REF_COST);

#ifndef _LP64
  size(4);
  format %{ "LDUW   $mem,$dst\t! ptr" %}
  opcode(Assembler::lduw_op3, 0, REGP_OP);
#else
  format %{ "LDX    $mem,$dst\t! ptr" %}
  opcode(Assembler::ldx_op3, 0, REGP_OP);
#endif
  ins_encode( form3_mem_reg( mem, dst ) );
  ins_pipe(iload_mem);
%}

// LoadL-locked.  Same as a regular long load when used with a compare-swap
instruct loadLLocked(iRegL dst, memory mem) %{
  match(Set dst (LoadLLocked mem));
  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDX    $mem,$dst\t! long" %}
  opcode(Assembler::ldx_op3);
  ins_encode(simple_form3_mem_reg( mem, dst ) );
  ins_pipe(iload_mem);
%}

instruct storePConditional( iRegP heap_top_ptr, iRegP oldval, g3RegP newval, flagsRegP pcc ) %{
  match(Set pcc (StorePConditional heap_top_ptr (Binary oldval newval)));
  effect( KILL newval );
  format %{ "CASA   [$heap_top_ptr],$oldval,R_G3\t! If $oldval==[$heap_top_ptr] Then store R_G3 into [$heap_top_ptr], set R_G3=[$heap_top_ptr] in any case\n\t"
            "CMP    R_G3,$oldval\t\t! See if we made progress"  %}
  ins_encode( enc_cas(heap_top_ptr,oldval,newval) );
  ins_pipe( long_memory_op );
%}

// Conditional-store of an int value.
instruct storeIConditional( iRegP mem_ptr, iRegI oldval, g3RegI newval, flagsReg icc ) %{
  match(Set icc (StoreIConditional mem_ptr (Binary oldval newval)));
  effect( KILL newval );
  format %{ "CASA   [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr], set $newval=[$mem_ptr] in any case\n\t"
            "CMP    $oldval,$newval\t\t! See if we made progress"  %}
  ins_encode( enc_cas(mem_ptr,oldval,newval) );
  ins_pipe( long_memory_op );
%}

// Conditional-store of a long value.
instruct storeLConditional( iRegP mem_ptr, iRegL oldval, g3RegL newval, flagsRegL xcc ) %{
  match(Set xcc (StoreLConditional mem_ptr (Binary oldval newval)));
  effect( KILL newval );
  format %{ "CASXA  [$mem_ptr],$oldval,$newval\t! If $oldval==[$mem_ptr] Then store $newval into [$mem_ptr], set $newval=[$mem_ptr] in any case\n\t"
            "CMP    $oldval,$newval\t\t! See if we made progress"  %}
  ins_encode( enc_cas(mem_ptr,oldval,newval) );
  ins_pipe( long_memory_op );
%}

// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them

instruct compareAndSwapL_bool(iRegP mem_ptr, iRegL oldval, iRegL newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{
  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
  effect( USE mem_ptr, KILL ccr, KILL tmp1);
  format %{
            "MOV    $newval,O7\n\t"
            "CASXA  [$mem_ptr],$oldval,O7\t! If $oldval==[$mem_ptr] Then store O7 into [$mem_ptr], set O7=[$mem_ptr] in any case\n\t"
            "CMP    $oldval,O7\t\t! See if we made progress\n\t"
            "MOV    1,$res\n\t"
            "MOVne  xcc,R_G0,$res"
  %}
  ins_encode( enc_casx(mem_ptr, oldval, newval),
              enc_lflags_ne_to_boolean(res) );
  ins_pipe( long_memory_op );
%}


instruct compareAndSwapI_bool(iRegP mem_ptr, iRegI oldval, iRegI newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{
  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
  effect( USE mem_ptr, KILL ccr, KILL tmp1);
  format %{
            "MOV    $newval,O7\n\t"
            "CASA   [$mem_ptr],$oldval,O7\t! If $oldval==[$mem_ptr] Then store O7 into [$mem_ptr], set O7=[$mem_ptr] in any case\n\t"
            "CMP    $oldval,O7\t\t! See if we made progress\n\t"
            "MOV    1,$res\n\t"
            "MOVne  icc,R_G0,$res"
  %}
  ins_encode( enc_casi(mem_ptr, oldval, newval),
              enc_iflags_ne_to_boolean(res) );
  ins_pipe( long_memory_op );
%}

instruct compareAndSwapP_bool(iRegP mem_ptr, iRegP oldval, iRegP newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{
  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
  effect( USE mem_ptr, KILL ccr, KILL tmp1);
  format %{
            "MOV    $newval,O7\n\t"
            "CASA_PTR  [$mem_ptr],$oldval,O7\t! If $oldval==[$mem_ptr] Then store O7 into [$mem_ptr], set O7=[$mem_ptr] in any case\n\t"
            "CMP    $oldval,O7\t\t! See if we made progress\n\t"
            "MOV    1,$res\n\t"
            "MOVne  xcc,R_G0,$res"
  %}
#ifdef _LP64
  ins_encode( enc_casx(mem_ptr, oldval, newval),
              enc_lflags_ne_to_boolean(res) );
#else
  ins_encode( enc_casi(mem_ptr, oldval, newval),
              enc_iflags_ne_to_boolean(res) );
#endif
  ins_pipe( long_memory_op );
%}

instruct compareAndSwapN_bool(iRegP mem_ptr, iRegN oldval, iRegN newval, iRegI res, o7RegI tmp1, flagsReg ccr ) %{
  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
  effect( USE mem_ptr, KILL ccr, KILL tmp1);
  format %{
            "MOV    $newval,O7\n\t"
            "CASA   [$mem_ptr],$oldval,O7\t! If $oldval==[$mem_ptr] Then store O7 into [$mem_ptr], set O7=[$mem_ptr] in any case\n\t"
            "CMP    $oldval,O7\t\t! See if we made progress\n\t"
            "MOV    1,$res\n\t"
            "MOVne  icc,R_G0,$res"
  %}
  ins_encode( enc_casi(mem_ptr, oldval, newval),
              enc_iflags_ne_to_boolean(res) );
  ins_pipe( long_memory_op );
%}

//---------------------
// Subtraction Instructions
// Register Subtraction
instruct subI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
  match(Set dst (SubI src1 src2));

  size(4);
  format %{ "SUB    $src1,$src2,$dst" %}
  opcode(Assembler::sub_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Immediate Subtraction
instruct subI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{
  match(Set dst (SubI src1 src2));

  size(4);
  format %{ "SUB    $src1,$src2,$dst" %}
  opcode(Assembler::sub_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

instruct subI_zero_reg(iRegI dst, immI0 zero, iRegI src2) %{
  match(Set dst (SubI zero src2));

  size(4);
  format %{ "NEG    $src2,$dst" %}
  opcode(Assembler::sub_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( R_G0, src2, dst ) );
  ins_pipe(ialu_zero_reg);
%}

// Long subtraction
instruct subL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
  match(Set dst (SubL src1 src2));

  size(4);
  format %{ "SUB    $src1,$src2,$dst\t! long" %}
  opcode(Assembler::sub_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Immediate Subtraction
instruct subL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{
  match(Set dst (SubL src1 con));

  size(4);
  format %{ "SUB    $src1,$con,$dst\t! long" %}
  opcode(Assembler::sub_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, con, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Long negation
instruct negL_reg_reg(iRegL dst, immL0 zero, iRegL src2) %{
  match(Set dst (SubL zero src2));

  size(4);
  format %{ "NEG    $src2,$dst\t! long" %}
  opcode(Assembler::sub_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( R_G0, src2, dst ) );
  ins_pipe(ialu_zero_reg);
%}

// Multiplication Instructions
// Integer Multiplication
// Register Multiplication
instruct mulI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
  match(Set dst (MulI src1 src2));

  size(4);
  format %{ "MULX   $src1,$src2,$dst" %}
  opcode(Assembler::mulx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(imul_reg_reg);
%}

// Immediate Multiplication
instruct mulI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{
  match(Set dst (MulI src1 src2));

  size(4);
  format %{ "MULX   $src1,$src2,$dst" %}
  opcode(Assembler::mulx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(imul_reg_imm);
%}

instruct mulL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
  match(Set dst (MulL src1 src2));
  ins_cost(DEFAULT_COST * 5);
  size(4);
  format %{ "MULX   $src1,$src2,$dst\t! long" %}
  opcode(Assembler::mulx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(mulL_reg_reg);
%}

// Immediate Multiplication
instruct mulL_reg_imm13(iRegL dst, iRegL src1, immL13 src2) %{
  match(Set dst (MulL src1 src2));
  ins_cost(DEFAULT_COST * 5);
  size(4);
  format %{ "MULX   $src1,$src2,$dst" %}
  opcode(Assembler::mulx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(mulL_reg_imm);
%}

// Integer Division
// Register Division
instruct divI_reg_reg(iRegI dst, iRegIsafe src1, iRegIsafe src2) %{
  match(Set dst (DivI src1 src2));
  ins_cost((2+71)*DEFAULT_COST);

  format %{ "SRA     $src2,0,$src2\n\t"
            "SRA     $src1,0,$src1\n\t"
            "SDIVX   $src1,$src2,$dst" %}
  ins_encode( idiv_reg( src1, src2, dst ) );
  ins_pipe(sdiv_reg_reg);
%}

// Immediate Division
instruct divI_reg_imm13(iRegI dst, iRegIsafe src1, immI13 src2) %{
  match(Set dst (DivI src1 src2));
  ins_cost((2+71)*DEFAULT_COST);

  format %{ "SRA     $src1,0,$src1\n\t"
            "SDIVX   $src1,$src2,$dst" %}
  ins_encode( idiv_imm( src1, src2, dst ) );
  ins_pipe(sdiv_reg_imm);
%}

//----------Div-By-10-Expansion------------------------------------------------
// Extract hi bits of a 32x32->64 bit multiply.
// Expand rule only, not matched
instruct mul_hi(iRegIsafe dst, iRegIsafe src1, iRegIsafe src2 ) %{
  effect( DEF dst, USE src1, USE src2 );
  format %{ "MULX   $src1,$src2,$dst\t! Used in div-by-10\n\t"
            "SRLX   $dst,#32,$dst\t\t! Extract only hi word of result" %}
  ins_encode( enc_mul_hi(dst,src1,src2));
  ins_pipe(sdiv_reg_reg);
%}

// Magic constant, reciprocal of 10
instruct loadConI_x66666667(iRegIsafe dst) %{
  effect( DEF dst );

  size(8);
  format %{ "SET    0x66666667,$dst\t! Used in div-by-10" %}
  ins_encode( Set32(0x66666667, dst) );
  ins_pipe(ialu_hi_lo_reg);
%}

// Register Shift Right Arithmetic Long by 32-63
instruct sra_31( iRegI dst, iRegI src ) %{
  effect( DEF dst, USE src );
  format %{ "SRA    $src,31,$dst\t! Used in div-by-10" %}
  ins_encode( form3_rs1_rd_copysign_hi(src,dst) );
  ins_pipe(ialu_reg_reg);
%}

// Arithmetic Shift Right by 8-bit immediate
instruct sra_reg_2( iRegI dst, iRegI src ) %{
  effect( DEF dst, USE src );
  format %{ "SRA    $src,2,$dst\t! Used in div-by-10" %}
  opcode(Assembler::sra_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src, 0x2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Integer DIV with 10
instruct divI_10( iRegI dst, iRegIsafe src, immI10 div ) %{
  match(Set dst (DivI src div));
  ins_cost((6+6)*DEFAULT_COST);
  expand %{
    iRegIsafe tmp1;               // Killed temps;
    iRegIsafe tmp2;               // Killed temps;
    iRegI tmp3;                   // Killed temps;
    iRegI tmp4;                   // Killed temps;
    loadConI_x66666667( tmp1 );   // SET  0x66666667 -> tmp1
    mul_hi( tmp2, src, tmp1 );    // MUL  hibits(src * tmp1) -> tmp2
    sra_31( tmp3, src );          // SRA  src,31 -> tmp3
    sra_reg_2( tmp4, tmp2 );      // SRA  tmp2,2 -> tmp4
    subI_reg_reg( dst,tmp4,tmp3); // SUB  tmp4 - tmp3 -> dst
  %}
%}

// Register Long Division
instruct divL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
  match(Set dst (DivL src1 src2));
  ins_cost(DEFAULT_COST*71);
  size(4);
  format %{ "SDIVX  $src1,$src2,$dst\t! long" %}
  opcode(Assembler::sdivx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(divL_reg_reg);
%}

// Register Long Division
instruct divL_reg_imm13(iRegL dst, iRegL src1, immL13 src2) %{
  match(Set dst (DivL src1 src2));
  ins_cost(DEFAULT_COST*71);
  size(4);
  format %{ "SDIVX  $src1,$src2,$dst\t! long" %}
  opcode(Assembler::sdivx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(divL_reg_imm);
%}

// Integer Remainder
// Register Remainder
instruct modI_reg_reg(iRegI dst, iRegIsafe src1, iRegIsafe src2, o7RegP temp, flagsReg ccr ) %{
  match(Set dst (ModI src1 src2));
  effect( KILL ccr, KILL temp);

  format %{ "SREM   $src1,$src2,$dst" %}
  ins_encode( irem_reg(src1, src2, dst, temp) );
  ins_pipe(sdiv_reg_reg);
%}

// Immediate Remainder
instruct modI_reg_imm13(iRegI dst, iRegIsafe src1, immI13 src2, o7RegP temp, flagsReg ccr ) %{
  match(Set dst (ModI src1 src2));
  effect( KILL ccr, KILL temp);

  format %{ "SREM   $src1,$src2,$dst" %}
  ins_encode( irem_imm(src1, src2, dst, temp) );
  ins_pipe(sdiv_reg_imm);
%}

// Register Long Remainder
instruct divL_reg_reg_1(iRegL dst, iRegL src1, iRegL src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(4);
  format %{ "SDIVX  $src1,$src2,$dst\t! long" %}
  opcode(Assembler::sdivx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(divL_reg_reg);
%}

// Register Long Division
instruct divL_reg_imm13_1(iRegL dst, iRegL src1, immL13 src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(4);
  format %{ "SDIVX  $src1,$src2,$dst\t! long" %}
  opcode(Assembler::sdivx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(divL_reg_imm);
%}

instruct mulL_reg_reg_1(iRegL dst, iRegL src1, iRegL src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(4);
  format %{ "MULX   $src1,$src2,$dst\t! long" %}
  opcode(Assembler::mulx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(mulL_reg_reg);
%}

// Immediate Multiplication
instruct mulL_reg_imm13_1(iRegL dst, iRegL src1, immL13 src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(4);
  format %{ "MULX   $src1,$src2,$dst" %}
  opcode(Assembler::mulx_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(mulL_reg_imm);
%}

instruct subL_reg_reg_1(iRegL dst, iRegL src1, iRegL src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(4);
  format %{ "SUB    $src1,$src2,$dst\t! long" %}
  opcode(Assembler::sub_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

instruct subL_reg_reg_2(iRegL dst, iRegL src1, iRegL src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(4);
  format %{ "SUB    $src1,$src2,$dst\t! long" %}
  opcode(Assembler::sub_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Register Long Remainder
instruct modL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
  match(Set dst (ModL src1 src2));
  ins_cost(DEFAULT_COST*(71 + 6 + 1));
  expand %{
    iRegL tmp1;
    iRegL tmp2;
    divL_reg_reg_1(tmp1, src1, src2);
    mulL_reg_reg_1(tmp2, tmp1, src2);
    subL_reg_reg_1(dst,  src1, tmp2);
  %}
%}

// Register Long Remainder
instruct modL_reg_imm13(iRegL dst, iRegL src1, immL13 src2) %{
  match(Set dst (ModL src1 src2));
  ins_cost(DEFAULT_COST*(71 + 6 + 1));
  expand %{
    iRegL tmp1;
    iRegL tmp2;
    divL_reg_imm13_1(tmp1, src1, src2);
    mulL_reg_imm13_1(tmp2, tmp1, src2);
    subL_reg_reg_2  (dst,  src1, tmp2);
  %}
%}

// Integer Shift Instructions
// Register Shift Left
instruct shlI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
  match(Set dst (LShiftI src1 src2));

  size(4);
  format %{ "SLL    $src1,$src2,$dst" %}
  opcode(Assembler::sll_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Register Shift Left Immediate
instruct shlI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
  match(Set dst (LShiftI src1 src2));

  size(4);
  format %{ "SLL    $src1,$src2,$dst" %}
  opcode(Assembler::sll_op3, Assembler::arith_op);
  ins_encode( form3_rs1_imm5_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Register Shift Left
instruct shlL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
  match(Set dst (LShiftL src1 src2));

  size(4);
  format %{ "SLLX   $src1,$src2,$dst" %}
  opcode(Assembler::sllx_op3, Assembler::arith_op);
  ins_encode( form3_sd_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Register Shift Left Immediate
instruct shlL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{
  match(Set dst (LShiftL src1 src2));

  size(4);
  format %{ "SLLX   $src1,$src2,$dst" %}
  opcode(Assembler::sllx_op3, Assembler::arith_op);
  ins_encode( form3_sd_rs1_imm6_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Register Arithmetic Shift Right
instruct sarI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
  match(Set dst (RShiftI src1 src2));
  size(4);
  format %{ "SRA    $src1,$src2,$dst" %}
  opcode(Assembler::sra_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Register Arithmetic Shift Right Immediate
instruct sarI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
  match(Set dst (RShiftI src1 src2));

  size(4);
  format %{ "SRA    $src1,$src2,$dst" %}
  opcode(Assembler::sra_op3, Assembler::arith_op);
  ins_encode( form3_rs1_imm5_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Register Shift Right Arithmatic Long
instruct sarL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
  match(Set dst (RShiftL src1 src2));

  size(4);
  format %{ "SRAX   $src1,$src2,$dst" %}
  opcode(Assembler::srax_op3, Assembler::arith_op);
  ins_encode( form3_sd_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Register Shift Left Immediate
instruct sarL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{
  match(Set dst (RShiftL src1 src2));

  size(4);
  format %{ "SRAX   $src1,$src2,$dst" %}
  opcode(Assembler::srax_op3, Assembler::arith_op);
  ins_encode( form3_sd_rs1_imm6_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Register Shift Right
instruct shrI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
  match(Set dst (URShiftI src1 src2));

  size(4);
  format %{ "SRL    $src1,$src2,$dst" %}
  opcode(Assembler::srl_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Register Shift Right Immediate
instruct shrI_reg_imm5(iRegI dst, iRegI src1, immU5 src2) %{
  match(Set dst (URShiftI src1 src2));

  size(4);
  format %{ "SRL    $src1,$src2,$dst" %}
  opcode(Assembler::srl_op3, Assembler::arith_op);
  ins_encode( form3_rs1_imm5_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Register Shift Right
instruct shrL_reg_reg(iRegL dst, iRegL src1, iRegI src2) %{
  match(Set dst (URShiftL src1 src2));

  size(4);
  format %{ "SRLX   $src1,$src2,$dst" %}
  opcode(Assembler::srlx_op3, Assembler::arith_op);
  ins_encode( form3_sd_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Register Shift Right Immediate
instruct shrL_reg_imm6(iRegL dst, iRegL src1, immU6 src2) %{
  match(Set dst (URShiftL src1 src2));

  size(4);
  format %{ "SRLX   $src1,$src2,$dst" %}
  opcode(Assembler::srlx_op3, Assembler::arith_op);
  ins_encode( form3_sd_rs1_imm6_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Register Shift Right Immediate with a CastP2X
#ifdef _LP64
instruct shrP_reg_imm6(iRegL dst, iRegP src1, immU6 src2) %{
  match(Set dst (URShiftL (CastP2X src1) src2));
  size(4);
  format %{ "SRLX   $src1,$src2,$dst\t! Cast ptr $src1 to long and shift" %}
  opcode(Assembler::srlx_op3, Assembler::arith_op);
  ins_encode( form3_sd_rs1_imm6_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}
#else
instruct shrP_reg_imm5(iRegI dst, iRegP src1, immU5 src2) %{
  match(Set dst (URShiftI (CastP2X src1) src2));
  size(4);
  format %{ "SRL    $src1,$src2,$dst\t! Cast ptr $src1 to int and shift" %}
  opcode(Assembler::srl_op3, Assembler::arith_op);
  ins_encode( form3_rs1_imm5_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}
#endif


//----------Floating Point Arithmetic Instructions-----------------------------

//  Add float single precision
instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
  match(Set dst (AddF src1 src2));

  size(4);
  format %{ "FADDS  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fadds_opf);
  ins_encode(form3_opf_rs1F_rs2F_rdF(src1, src2, dst));
  ins_pipe(faddF_reg_reg);
%}

//  Add float double precision
instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
  match(Set dst (AddD src1 src2));

  size(4);
  format %{ "FADDD  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::faddd_opf);
  ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst));
  ins_pipe(faddD_reg_reg);
%}

//  Sub float single precision
instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
  match(Set dst (SubF src1 src2));

  size(4);
  format %{ "FSUBS  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fsubs_opf);
  ins_encode(form3_opf_rs1F_rs2F_rdF(src1, src2, dst));
  ins_pipe(faddF_reg_reg);
%}

//  Sub float double precision
instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
  match(Set dst (SubD src1 src2));

  size(4);
  format %{ "FSUBD  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fsubd_opf);
  ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst));
  ins_pipe(faddD_reg_reg);
%}

//  Mul float single precision
instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
  match(Set dst (MulF src1 src2));

  size(4);
  format %{ "FMULS  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fmuls_opf);
  ins_encode(form3_opf_rs1F_rs2F_rdF(src1, src2, dst));
  ins_pipe(fmulF_reg_reg);
%}

//  Mul float double precision
instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
  match(Set dst (MulD src1 src2));

  size(4);
  format %{ "FMULD  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fmuld_opf);
  ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst));
  ins_pipe(fmulD_reg_reg);
%}

//  Div float single precision
instruct divF_reg_reg(regF dst, regF src1, regF src2) %{
  match(Set dst (DivF src1 src2));

  size(4);
  format %{ "FDIVS  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fdivs_opf);
  ins_encode(form3_opf_rs1F_rs2F_rdF(src1, src2, dst));
  ins_pipe(fdivF_reg_reg);
%}

//  Div float double precision
instruct divD_reg_reg(regD dst, regD src1, regD src2) %{
  match(Set dst (DivD src1 src2));

  size(4);
  format %{ "FDIVD  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fdivd_opf);
  ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst));
  ins_pipe(fdivD_reg_reg);
%}

//  Absolute float double precision
instruct absD_reg(regD dst, regD src) %{
  match(Set dst (AbsD src));

  format %{ "FABSd  $src,$dst" %}
  ins_encode(fabsd(dst, src));
  ins_pipe(faddD_reg);
%}

//  Absolute float single precision
instruct absF_reg(regF dst, regF src) %{
  match(Set dst (AbsF src));

  format %{ "FABSs  $src,$dst" %}
  ins_encode(fabss(dst, src));
  ins_pipe(faddF_reg);
%}

instruct negF_reg(regF dst, regF src) %{
  match(Set dst (NegF src));

  size(4);
  format %{ "FNEGs  $src,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fnegs_opf);
  ins_encode(form3_opf_rs2F_rdF(src, dst));
  ins_pipe(faddF_reg);
%}

instruct negD_reg(regD dst, regD src) %{
  match(Set dst (NegD src));

  format %{ "FNEGd  $src,$dst" %}
  ins_encode(fnegd(dst, src));
  ins_pipe(faddD_reg);
%}

//  Sqrt float double precision
instruct sqrtF_reg_reg(regF dst, regF src) %{
  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));

  size(4);
  format %{ "FSQRTS $src,$dst" %}
  ins_encode(fsqrts(dst, src));
  ins_pipe(fdivF_reg_reg);
%}

//  Sqrt float double precision
instruct sqrtD_reg_reg(regD dst, regD src) %{
  match(Set dst (SqrtD src));

  size(4);
  format %{ "FSQRTD $src,$dst" %}
  ins_encode(fsqrtd(dst, src));
  ins_pipe(fdivD_reg_reg);
%}

//----------Logical Instructions-----------------------------------------------
// And Instructions
// Register And
instruct andI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
  match(Set dst (AndI src1 src2));

  size(4);
  format %{ "AND    $src1,$src2,$dst" %}
  opcode(Assembler::and_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Immediate And
instruct andI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{
  match(Set dst (AndI src1 src2));

  size(4);
  format %{ "AND    $src1,$src2,$dst" %}
  opcode(Assembler::and_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Register And Long
instruct andL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
  match(Set dst (AndL src1 src2));

  ins_cost(DEFAULT_COST);
  size(4);
  format %{ "AND    $src1,$src2,$dst\t! long" %}
  opcode(Assembler::and_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

instruct andL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{
  match(Set dst (AndL src1 con));

  ins_cost(DEFAULT_COST);
  size(4);
  format %{ "AND    $src1,$con,$dst\t! long" %}
  opcode(Assembler::and_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, con, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Or Instructions
// Register Or
instruct orI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
  match(Set dst (OrI src1 src2));

  size(4);
  format %{ "OR     $src1,$src2,$dst" %}
  opcode(Assembler::or_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Immediate Or
instruct orI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{
  match(Set dst (OrI src1 src2));

  size(4);
  format %{ "OR     $src1,$src2,$dst" %}
  opcode(Assembler::or_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Register Or Long
instruct orL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
  match(Set dst (OrL src1 src2));

  ins_cost(DEFAULT_COST);
  size(4);
  format %{ "OR     $src1,$src2,$dst\t! long" %}
  opcode(Assembler::or_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

instruct orL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{
  match(Set dst (OrL src1 con));
  ins_cost(DEFAULT_COST*2);

  ins_cost(DEFAULT_COST);
  size(4);
  format %{ "OR     $src1,$con,$dst\t! long" %}
  opcode(Assembler::or_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, con, dst ) );
  ins_pipe(ialu_reg_imm);
%}

#ifndef _LP64

// Use sp_ptr_RegP to match G2 (TLS register) without spilling.
instruct orI_reg_castP2X(iRegI dst, iRegI src1, sp_ptr_RegP src2) %{
  match(Set dst (OrI src1 (CastP2X src2)));

  size(4);
  format %{ "OR     $src1,$src2,$dst" %}
  opcode(Assembler::or_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

#else

instruct orL_reg_castP2X(iRegL dst, iRegL src1, sp_ptr_RegP src2) %{
  match(Set dst (OrL src1 (CastP2X src2)));

  ins_cost(DEFAULT_COST);
  size(4);
  format %{ "OR     $src1,$src2,$dst\t! long" %}
  opcode(Assembler::or_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

#endif

// Xor Instructions
// Register Xor
instruct xorI_reg_reg(iRegI dst, iRegI src1, iRegI src2) %{
  match(Set dst (XorI src1 src2));

  size(4);
  format %{ "XOR    $src1,$src2,$dst" %}
  opcode(Assembler::xor_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Immediate Xor
instruct xorI_reg_imm13(iRegI dst, iRegI src1, immI13 src2) %{
  match(Set dst (XorI src1 src2));

  size(4);
  format %{ "XOR    $src1,$src2,$dst" %}
  opcode(Assembler::xor_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Register Xor Long
instruct xorL_reg_reg(iRegL dst, iRegL src1, iRegL src2) %{
  match(Set dst (XorL src1 src2));

  ins_cost(DEFAULT_COST);
  size(4);
  format %{ "XOR    $src1,$src2,$dst\t! long" %}
  opcode(Assembler::xor_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src1, src2, dst ) );
  ins_pipe(ialu_reg_reg);
%}

instruct xorL_reg_imm13(iRegL dst, iRegL src1, immL13 con) %{
  match(Set dst (XorL src1 con));

  ins_cost(DEFAULT_COST);
  size(4);
  format %{ "XOR    $src1,$con,$dst\t! long" %}
  opcode(Assembler::xor_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( src1, con, dst ) );
  ins_pipe(ialu_reg_imm);
%}

//----------Convert to Boolean-------------------------------------------------
// Nice hack for 32-bit tests but doesn't work for
// 64-bit pointers.
instruct convI2B( iRegI dst, iRegI src, flagsReg ccr ) %{
  match(Set dst (Conv2B src));
  effect( KILL ccr );
  ins_cost(DEFAULT_COST*2);
  format %{ "CMP    R_G0,$src\n\t"
            "ADDX   R_G0,0,$dst" %}
  ins_encode( enc_to_bool( src, dst ) );
  ins_pipe(ialu_reg_ialu);
%}

#ifndef _LP64
instruct convP2B( iRegI dst, iRegP src, flagsReg ccr ) %{
  match(Set dst (Conv2B src));
  effect( KILL ccr );
  ins_cost(DEFAULT_COST*2);
  format %{ "CMP    R_G0,$src\n\t"
            "ADDX   R_G0,0,$dst" %}
  ins_encode( enc_to_bool( src, dst ) );
  ins_pipe(ialu_reg_ialu);
%}
#else
instruct convP2B( iRegI dst, iRegP src ) %{
  match(Set dst (Conv2B src));
  ins_cost(DEFAULT_COST*2);
  format %{ "MOV    $src,$dst\n\t"
            "MOVRNZ $src,1,$dst" %}
  ins_encode( form3_g0_rs2_rd_move( src, dst ), enc_convP2B( dst, src ) );
  ins_pipe(ialu_clr_and_mover);
%}
#endif

instruct cmpLTMask0( iRegI dst, iRegI src, immI0 zero, flagsReg ccr ) %{
  match(Set dst (CmpLTMask src zero));
  effect(KILL ccr);
  size(4);
  format %{ "SRA    $src,#31,$dst\t# cmpLTMask0" %}
  ins_encode %{
    __ sra($src$$Register, 31, $dst$$Register);
  %}
  ins_pipe(ialu_reg_imm);
%}

instruct cmpLTMask_reg_reg( iRegI dst, iRegI p, iRegI q, flagsReg ccr ) %{
  match(Set dst (CmpLTMask p q));
  effect( KILL ccr );
  ins_cost(DEFAULT_COST*4);
  format %{ "CMP    $p,$q\n\t"
            "MOV    #0,$dst\n\t"
            "BLT,a  .+8\n\t"
            "MOV    #-1,$dst" %}
  ins_encode( enc_ltmask(p,q,dst) );
  ins_pipe(ialu_reg_reg_ialu);
%}

instruct cadd_cmpLTMask( iRegI p, iRegI q, iRegI y, iRegI tmp, flagsReg ccr ) %{
  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
  effect(KILL ccr, TEMP tmp);
  ins_cost(DEFAULT_COST*3);

  format %{ "SUBcc  $p,$q,$p\t! p' = p-q\n\t"
            "ADD    $p,$y,$tmp\t! g3=p-q+y\n\t"
            "MOVlt  $tmp,$p\t! p' < 0 ? p'+y : p'" %}
  ins_encode( enc_cadd_cmpLTMask(p, q, y, tmp) );
  ins_pipe( cadd_cmpltmask );
%}


//-----------------------------------------------------------------
// Direct raw moves between float and general registers using VIS3.

//  ins_pipe(faddF_reg);
instruct MoveF2I_reg_reg(iRegI dst, regF src) %{
  predicate(UseVIS >= 3);
  match(Set dst (MoveF2I src));

  format %{ "MOVSTOUW $src,$dst\t! MoveF2I" %}
  ins_encode %{
    __ movstouw($src$$FloatRegister, $dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct MoveI2F_reg_reg(regF dst, iRegI src) %{
  predicate(UseVIS >= 3);
  match(Set dst (MoveI2F src));

  format %{ "MOVWTOS $src,$dst\t! MoveI2F" %}
  ins_encode %{
    __ movwtos($src$$Register, $dst$$FloatRegister);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct MoveD2L_reg_reg(iRegL dst, regD src) %{
  predicate(UseVIS >= 3);
  match(Set dst (MoveD2L src));

  format %{ "MOVDTOX $src,$dst\t! MoveD2L" %}
  ins_encode %{
    __ movdtox(as_DoubleFloatRegister($src$$reg), $dst$$Register);
  %}
  ins_pipe(ialu_reg_reg);
%}

instruct MoveL2D_reg_reg(regD dst, iRegL src) %{
  predicate(UseVIS >= 3);
  match(Set dst (MoveL2D src));

  format %{ "MOVXTOD $src,$dst\t! MoveL2D" %}
  ins_encode %{
    __ movxtod($src$$Register, as_DoubleFloatRegister($dst$$reg));
  %}
  ins_pipe(ialu_reg_reg);
%}


// Raw moves between float and general registers using stack.

instruct MoveF2I_stack_reg(iRegI dst, stackSlotF src) %{
  match(Set dst (MoveF2I src));
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDUW   $src,$dst\t! MoveF2I" %}
  opcode(Assembler::lduw_op3);
  ins_encode(simple_form3_mem_reg( src, dst ) );
  ins_pipe(iload_mem);
%}

instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
  match(Set dst (MoveI2F src));
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDF    $src,$dst\t! MoveI2F" %}
  opcode(Assembler::ldf_op3);
  ins_encode(simple_form3_mem_reg(src, dst));
  ins_pipe(floadF_stk);
%}

instruct MoveD2L_stack_reg(iRegL dst, stackSlotD src) %{
  match(Set dst (MoveD2L src));
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDX    $src,$dst\t! MoveD2L" %}
  opcode(Assembler::ldx_op3);
  ins_encode(simple_form3_mem_reg( src, dst ) );
  ins_pipe(iload_mem);
%}

instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
  match(Set dst (MoveL2D src));
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "LDDF   $src,$dst\t! MoveL2D" %}
  opcode(Assembler::lddf_op3);
  ins_encode(simple_form3_mem_reg(src, dst));
  ins_pipe(floadD_stk);
%}

instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
  match(Set dst (MoveF2I src));
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STF   $src,$dst\t! MoveF2I" %}
  opcode(Assembler::stf_op3);
  ins_encode(simple_form3_mem_reg(dst, src));
  ins_pipe(fstoreF_stk_reg);
%}

instruct MoveI2F_reg_stack(stackSlotF dst, iRegI src) %{
  match(Set dst (MoveI2F src));
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STW    $src,$dst\t! MoveI2F" %}
  opcode(Assembler::stw_op3);
  ins_encode(simple_form3_mem_reg( dst, src ) );
  ins_pipe(istore_mem_reg);
%}

instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
  match(Set dst (MoveD2L src));
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STDF   $src,$dst\t! MoveD2L" %}
  opcode(Assembler::stdf_op3);
  ins_encode(simple_form3_mem_reg(dst, src));
  ins_pipe(fstoreD_stk_reg);
%}

instruct MoveL2D_reg_stack(stackSlotD dst, iRegL src) %{
  match(Set dst (MoveL2D src));
  effect(DEF dst, USE src);
  ins_cost(MEMORY_REF_COST);

  size(4);
  format %{ "STX    $src,$dst\t! MoveL2D" %}
  opcode(Assembler::stx_op3);
  ins_encode(simple_form3_mem_reg( dst, src ) );
  ins_pipe(istore_mem_reg);
%}


//----------Arithmetic Conversion Instructions---------------------------------
// The conversions operations are all Alpha sorted.  Please keep it that way!

instruct convD2F_reg(regF dst, regD src) %{
  match(Set dst (ConvD2F src));
  size(4);
  format %{ "FDTOS  $src,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fdtos_opf);
  ins_encode(form3_opf_rs2D_rdF(src, dst));
  ins_pipe(fcvtD2F);
%}


// Convert a double to an int in a float register.
// If the double is a NAN, stuff a zero in instead.
instruct convD2I_helper(regF dst, regD src, flagsRegF0 fcc0) %{
  effect(DEF dst, USE src, KILL fcc0);
  format %{ "FCMPd  fcc0,$src,$src\t! check for NAN\n\t"
            "FBO,pt fcc0,skip\t! branch on ordered, predict taken\n\t"
            "FDTOI  $src,$dst\t! convert in delay slot\n\t"
            "FITOS  $dst,$dst\t! change NaN/max-int to valid float\n\t"
            "FSUBs  $dst,$dst,$dst\t! cleared only if nan\n"
      "skip:" %}
  ins_encode(form_d2i_helper(src,dst));
  ins_pipe(fcvtD2I);
%}

instruct convD2I_stk(stackSlotI dst, regD src) %{
  match(Set dst (ConvD2I src));
  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST);
  expand %{
    regF tmp;
    convD2I_helper(tmp, src);
    regF_to_stkI(dst, tmp);
  %}
%}

instruct convD2I_reg(iRegI dst, regD src) %{
  predicate(UseVIS >= 3);
  match(Set dst (ConvD2I src));
  ins_cost(DEFAULT_COST*2 + BRANCH_COST);
  expand %{
    regF tmp;
    convD2I_helper(tmp, src);
    MoveF2I_reg_reg(dst, tmp);
  %}
%}


// Convert a double to a long in a double register.
// If the double is a NAN, stuff a zero in instead.
instruct convD2L_helper(regD dst, regD src, flagsRegF0 fcc0) %{
  effect(DEF dst, USE src, KILL fcc0);
  format %{ "FCMPd  fcc0,$src,$src\t! check for NAN\n\t"
            "FBO,pt fcc0,skip\t! branch on ordered, predict taken\n\t"
            "FDTOX  $src,$dst\t! convert in delay slot\n\t"
            "FXTOD  $dst,$dst\t! change NaN/max-long to valid double\n\t"
            "FSUBd  $dst,$dst,$dst\t! cleared only if nan\n"
      "skip:" %}
  ins_encode(form_d2l_helper(src,dst));
  ins_pipe(fcvtD2L);
%}

instruct convD2L_stk(stackSlotL dst, regD src) %{
  match(Set dst (ConvD2L src));
  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST);
  expand %{
    regD tmp;
    convD2L_helper(tmp, src);
    regD_to_stkL(dst, tmp);
  %}
%}

instruct convD2L_reg(iRegL dst, regD src) %{
  predicate(UseVIS >= 3);
  match(Set dst (ConvD2L src));
  ins_cost(DEFAULT_COST*2 + BRANCH_COST);
  expand %{
    regD tmp;
    convD2L_helper(tmp, src);
    MoveD2L_reg_reg(dst, tmp);
  %}
%}


instruct convF2D_reg(regD dst, regF src) %{
  match(Set dst (ConvF2D src));
  format %{ "FSTOD  $src,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fstod_opf);
  ins_encode(form3_opf_rs2F_rdD(src, dst));
  ins_pipe(fcvtF2D);
%}


// Convert a float to an int in a float register.
// If the float is a NAN, stuff a zero in instead.
instruct convF2I_helper(regF dst, regF src, flagsRegF0 fcc0) %{
  effect(DEF dst, USE src, KILL fcc0);
  format %{ "FCMPs  fcc0,$src,$src\t! check for NAN\n\t"
            "FBO,pt fcc0,skip\t! branch on ordered, predict taken\n\t"
            "FSTOI  $src,$dst\t! convert in delay slot\n\t"
            "FITOS  $dst,$dst\t! change NaN/max-int to valid float\n\t"
            "FSUBs  $dst,$dst,$dst\t! cleared only if nan\n"
      "skip:" %}
  ins_encode(form_f2i_helper(src,dst));
  ins_pipe(fcvtF2I);
%}

instruct convF2I_stk(stackSlotI dst, regF src) %{
  match(Set dst (ConvF2I src));
  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST);
  expand %{
    regF tmp;
    convF2I_helper(tmp, src);
    regF_to_stkI(dst, tmp);
  %}
%}

instruct convF2I_reg(iRegI dst, regF src) %{
  predicate(UseVIS >= 3);
  match(Set dst (ConvF2I src));
  ins_cost(DEFAULT_COST*2 + BRANCH_COST);
  expand %{
    regF tmp;
    convF2I_helper(tmp, src);
    MoveF2I_reg_reg(dst, tmp);
  %}
%}


// Convert a float to a long in a float register.
// If the float is a NAN, stuff a zero in instead.
instruct convF2L_helper(regD dst, regF src, flagsRegF0 fcc0) %{
  effect(DEF dst, USE src, KILL fcc0);
  format %{ "FCMPs  fcc0,$src,$src\t! check for NAN\n\t"
            "FBO,pt fcc0,skip\t! branch on ordered, predict taken\n\t"
            "FSTOX  $src,$dst\t! convert in delay slot\n\t"
            "FXTOD  $dst,$dst\t! change NaN/max-long to valid double\n\t"
            "FSUBd  $dst,$dst,$dst\t! cleared only if nan\n"
      "skip:" %}
  ins_encode(form_f2l_helper(src,dst));
  ins_pipe(fcvtF2L);
%}

instruct convF2L_stk(stackSlotL dst, regF src) %{
  match(Set dst (ConvF2L src));
  ins_cost(DEFAULT_COST*2 + MEMORY_REF_COST*2 + BRANCH_COST);
  expand %{
    regD tmp;
    convF2L_helper(tmp, src);
    regD_to_stkL(dst, tmp);
  %}
%}

instruct convF2L_reg(iRegL dst, regF src) %{
  predicate(UseVIS >= 3);
  match(Set dst (ConvF2L src));
  ins_cost(DEFAULT_COST*2 + BRANCH_COST);
  expand %{
    regD tmp;
    convF2L_helper(tmp, src);
    MoveD2L_reg_reg(dst, tmp);
  %}
%}


instruct convI2D_helper(regD dst, regF tmp) %{
  effect(USE tmp, DEF dst);
  format %{ "FITOD  $tmp,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fitod_opf);
  ins_encode(form3_opf_rs2F_rdD(tmp, dst));
  ins_pipe(fcvtI2D);
%}

instruct convI2D_stk(stackSlotI src, regD dst) %{
  match(Set dst (ConvI2D src));
  ins_cost(DEFAULT_COST + MEMORY_REF_COST);
  expand %{
    regF tmp;
    stkI_to_regF(tmp, src);
    convI2D_helper(dst, tmp);
  %}
%}

instruct convI2D_reg(regD_low dst, iRegI src) %{
  predicate(UseVIS >= 3);
  match(Set dst (ConvI2D src));
  expand %{
    regF tmp;
    MoveI2F_reg_reg(tmp, src);
    convI2D_helper(dst, tmp);
  %}
%}

instruct convI2D_mem(regD_low dst, memory mem) %{
  match(Set dst (ConvI2D (LoadI mem)));
  ins_cost(DEFAULT_COST + MEMORY_REF_COST);
  size(8);
  format %{ "LDF    $mem,$dst\n\t"
            "FITOD  $dst,$dst" %}
  opcode(Assembler::ldf_op3, Assembler::fitod_opf);
  ins_encode(simple_form3_mem_reg( mem, dst ), form3_convI2F(dst, dst));
  ins_pipe(floadF_mem);
%}


instruct convI2F_helper(regF dst, regF tmp) %{
  effect(DEF dst, USE tmp);
  format %{ "FITOS  $tmp,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fitos_opf);
  ins_encode(form3_opf_rs2F_rdF(tmp, dst));
  ins_pipe(fcvtI2F);
%}

instruct convI2F_stk(regF dst, stackSlotI src) %{
  match(Set dst (ConvI2F src));
  ins_cost(DEFAULT_COST + MEMORY_REF_COST);
  expand %{
    regF tmp;
    stkI_to_regF(tmp,src);
    convI2F_helper(dst, tmp);
  %}
%}

instruct convI2F_reg(regF dst, iRegI src) %{
  predicate(UseVIS >= 3);
  match(Set dst (ConvI2F src));
  ins_cost(DEFAULT_COST);
  expand %{
    regF tmp;
    MoveI2F_reg_reg(tmp, src);
    convI2F_helper(dst, tmp);
  %}
%}

instruct convI2F_mem( regF dst, memory mem ) %{
  match(Set dst (ConvI2F (LoadI mem)));
  ins_cost(DEFAULT_COST + MEMORY_REF_COST);
  size(8);
  format %{ "LDF    $mem,$dst\n\t"
            "FITOS  $dst,$dst" %}
  opcode(Assembler::ldf_op3, Assembler::fitos_opf);
  ins_encode(simple_form3_mem_reg( mem, dst ), form3_convI2F(dst, dst));
  ins_pipe(floadF_mem);
%}


instruct convI2L_reg(iRegL dst, iRegI src) %{
  match(Set dst (ConvI2L src));
  size(4);
  format %{ "SRA    $src,0,$dst\t! int->long" %}
  opcode(Assembler::sra_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src, R_G0, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Zero-extend convert int to long
instruct convI2L_reg_zex(iRegL dst, iRegI src, immL_32bits mask ) %{
  match(Set dst (AndL (ConvI2L src) mask) );
  size(4);
  format %{ "SRL    $src,0,$dst\t! zero-extend int to long" %}
  opcode(Assembler::srl_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src, R_G0, dst ) );
  ins_pipe(ialu_reg_reg);
%}

// Zero-extend long
instruct zerox_long(iRegL dst, iRegL src, immL_32bits mask ) %{
  match(Set dst (AndL src mask) );
  size(4);
  format %{ "SRL    $src,0,$dst\t! zero-extend long" %}
  opcode(Assembler::srl_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( src, R_G0, dst ) );
  ins_pipe(ialu_reg_reg);
%}


//-----------
// Long to Double conversion using V8 opcodes.
// Still useful because cheetah traps and becomes
// amazingly slow for some common numbers.

// Magic constant, 0x43300000
instruct loadConI_x43300000(iRegI dst) %{
  effect(DEF dst);
  size(4);
  format %{ "SETHI  HI(0x43300000),$dst\t! 2^52" %}
  ins_encode(SetHi22(0x43300000, dst));
  ins_pipe(ialu_none);
%}

// Magic constant, 0x41f00000
instruct loadConI_x41f00000(iRegI dst) %{
  effect(DEF dst);
  size(4);
  format %{ "SETHI  HI(0x41f00000),$dst\t! 2^32" %}
  ins_encode(SetHi22(0x41f00000, dst));
  ins_pipe(ialu_none);
%}

// Construct a double from two float halves
instruct regDHi_regDLo_to_regD(regD_low dst, regD_low src1, regD_low src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(8);
  format %{ "FMOVS  $src1.hi,$dst.hi\n\t"
            "FMOVS  $src2.lo,$dst.lo" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fmovs_opf);
  ins_encode(form3_opf_rs2D_hi_rdD_hi(src1, dst), form3_opf_rs2D_lo_rdD_lo(src2, dst));
  ins_pipe(faddD_reg_reg);
%}

// Convert integer in high half of a double register (in the lower half of
// the double register file) to double
instruct convI2D_regDHi_regD(regD dst, regD_low src) %{
  effect(DEF dst, USE src);
  size(4);
  format %{ "FITOD  $src,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fitod_opf);
  ins_encode(form3_opf_rs2D_rdD(src, dst));
  ins_pipe(fcvtLHi2D);
%}

// Add float double precision
instruct addD_regD_regD(regD dst, regD src1, regD src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(4);
  format %{ "FADDD  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::faddd_opf);
  ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst));
  ins_pipe(faddD_reg_reg);
%}

// Sub float double precision
instruct subD_regD_regD(regD dst, regD src1, regD src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(4);
  format %{ "FSUBD  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fsubd_opf);
  ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst));
  ins_pipe(faddD_reg_reg);
%}

// Mul float double precision
instruct mulD_regD_regD(regD dst, regD src1, regD src2) %{
  effect(DEF dst, USE src1, USE src2);
  size(4);
  format %{ "FMULD  $src1,$src2,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fmuld_opf);
  ins_encode(form3_opf_rs1D_rs2D_rdD(src1, src2, dst));
  ins_pipe(fmulD_reg_reg);
%}

instruct convL2D_reg_slow_fxtof(regD dst, stackSlotL src) %{
  match(Set dst (ConvL2D src));
  ins_cost(DEFAULT_COST*8 + MEMORY_REF_COST*6);

  expand %{
    regD_low   tmpsrc;
    iRegI      ix43300000;
    iRegI      ix41f00000;
    stackSlotL lx43300000;
    stackSlotL lx41f00000;
    regD_low   dx43300000;
    regD       dx41f00000;
    regD       tmp1;
    regD_low   tmp2;
    regD       tmp3;
    regD       tmp4;

    stkL_to_regD(tmpsrc, src);

    loadConI_x43300000(ix43300000);
    loadConI_x41f00000(ix41f00000);
    regI_to_stkLHi(lx43300000, ix43300000);
    regI_to_stkLHi(lx41f00000, ix41f00000);
    stkL_to_regD(dx43300000, lx43300000);
    stkL_to_regD(dx41f00000, lx41f00000);

    convI2D_regDHi_regD(tmp1, tmpsrc);
    regDHi_regDLo_to_regD(tmp2, dx43300000, tmpsrc);
    subD_regD_regD(tmp3, tmp2, dx43300000);
    mulD_regD_regD(tmp4, tmp1, dx41f00000);
    addD_regD_regD(dst, tmp3, tmp4);
  %}
%}

// Long to Double conversion using fast fxtof
instruct convL2D_helper(regD dst, regD tmp) %{
  effect(DEF dst, USE tmp);
  size(4);
  format %{ "FXTOD  $tmp,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fxtod_opf);
  ins_encode(form3_opf_rs2D_rdD(tmp, dst));
  ins_pipe(fcvtL2D);
%}

instruct convL2D_stk_fast_fxtof(regD dst, stackSlotL src) %{
  predicate(VM_Version::has_fast_fxtof());
  match(Set dst (ConvL2D src));
  ins_cost(DEFAULT_COST + 3 * MEMORY_REF_COST);
  expand %{
    regD tmp;
    stkL_to_regD(tmp, src);
    convL2D_helper(dst, tmp);
  %}
%}

instruct convL2D_reg(regD dst, iRegL src) %{
  predicate(UseVIS >= 3);
  match(Set dst (ConvL2D src));
  expand %{
    regD tmp;
    MoveL2D_reg_reg(tmp, src);
    convL2D_helper(dst, tmp);
  %}
%}

// Long to Float conversion using fast fxtof
instruct convL2F_helper(regF dst, regD tmp) %{
  effect(DEF dst, USE tmp);
  size(4);
  format %{ "FXTOS  $tmp,$dst" %}
  opcode(Assembler::fpop1_op3, Assembler::arith_op, Assembler::fxtos_opf);
  ins_encode(form3_opf_rs2D_rdF(tmp, dst));
  ins_pipe(fcvtL2F);
%}

instruct convL2F_stk_fast_fxtof(regF dst, stackSlotL src) %{
  match(Set dst (ConvL2F src));
  ins_cost(DEFAULT_COST + MEMORY_REF_COST);
  expand %{
    regD tmp;
    stkL_to_regD(tmp, src);
    convL2F_helper(dst, tmp);
  %}
%}

instruct convL2F_reg(regF dst, iRegL src) %{
  predicate(UseVIS >= 3);
  match(Set dst (ConvL2F src));
  ins_cost(DEFAULT_COST);
  expand %{
    regD tmp;
    MoveL2D_reg_reg(tmp, src);
    convL2F_helper(dst, tmp);
  %}
%}

//-----------

instruct convL2I_reg(iRegI dst, iRegL src) %{
  match(Set dst (ConvL2I src));
#ifndef _LP64
  format %{ "MOV    $src.lo,$dst\t! long->int" %}
  ins_encode( form3_g0_rs2_rd_move_lo2( src, dst ) );
  ins_pipe(ialu_move_reg_I_to_L);
#else
  size(4);
  format %{ "SRA    $src,R_G0,$dst\t! long->int" %}
  ins_encode( form3_rs1_rd_signextend_lo1( src, dst ) );
  ins_pipe(ialu_reg);
#endif
%}

// Register Shift Right Immediate
instruct shrL_reg_imm6_L2I(iRegI dst, iRegL src, immI_32_63 cnt) %{
  match(Set dst (ConvL2I (RShiftL src cnt)));

  size(4);
  format %{ "SRAX   $src,$cnt,$dst" %}
  opcode(Assembler::srax_op3, Assembler::arith_op);
  ins_encode( form3_sd_rs1_imm6_rd( src, cnt, dst ) );
  ins_pipe(ialu_reg_imm);
%}

// Replicate scalar to packed byte values in Double register
instruct Repl8B_reg_helper(iRegL dst, iRegI src) %{
  effect(DEF dst, USE src);
  format %{ "SLLX  $src,56,$dst\n\t"
            "SRLX  $dst, 8,O7\n\t"
            "OR    $dst,O7,$dst\n\t"
            "SRLX  $dst,16,O7\n\t"
            "OR    $dst,O7,$dst\n\t"
            "SRLX  $dst,32,O7\n\t"
            "OR    $dst,O7,$dst\t! replicate8B" %}
  ins_encode( enc_repl8b(src, dst));
  ins_pipe(ialu_reg);
%}

// Replicate scalar to packed byte values in Double register
instruct Repl8B_reg(stackSlotD dst, iRegI src) %{
  match(Set dst (Replicate8B src));
  expand %{
    iRegL tmp;
    Repl8B_reg_helper(tmp, src);
    regL_to_stkD(dst, tmp);
  %}
%}

// Replicate scalar constant to packed byte values in Double register
instruct Repl8B_immI(regD dst, immI13 con, o7RegI tmp) %{
  match(Set dst (Replicate8B con));
  effect(KILL tmp);
  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl8B($con)" %}
  ins_encode %{
    // XXX This is a quick fix for 6833573.
    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 8, 1)), $dst$$FloatRegister);
    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 8, 1)), $tmp$$Register);
    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
  %}
  ins_pipe(loadConFD);
%}

// Replicate scalar to packed char values into stack slot
instruct Repl4C_reg_helper(iRegL dst, iRegI src) %{
  effect(DEF dst, USE src);
  format %{ "SLLX  $src,48,$dst\n\t"
            "SRLX  $dst,16,O7\n\t"
            "OR    $dst,O7,$dst\n\t"
            "SRLX  $dst,32,O7\n\t"
            "OR    $dst,O7,$dst\t! replicate4C" %}
  ins_encode( enc_repl4s(src, dst) );
  ins_pipe(ialu_reg);
%}

// Replicate scalar to packed char values into stack slot
instruct Repl4C_reg(stackSlotD dst, iRegI src) %{
  match(Set dst (Replicate4C src));
  expand %{
    iRegL tmp;
    Repl4C_reg_helper(tmp, src);
    regL_to_stkD(dst, tmp);
  %}
%}

// Replicate scalar constant to packed char values in Double register
instruct Repl4C_immI(regD dst, immI con, o7RegI tmp) %{
  match(Set dst (Replicate4C con));
  effect(KILL tmp);
  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4C($con)" %}
  ins_encode %{
    // XXX This is a quick fix for 6833573.
    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
  %}
  ins_pipe(loadConFD);
%}

// Replicate scalar to packed short values into stack slot
instruct Repl4S_reg_helper(iRegL dst, iRegI src) %{
  effect(DEF dst, USE src);
  format %{ "SLLX  $src,48,$dst\n\t"
            "SRLX  $dst,16,O7\n\t"
            "OR    $dst,O7,$dst\n\t"
            "SRLX  $dst,32,O7\n\t"
            "OR    $dst,O7,$dst\t! replicate4S" %}
  ins_encode( enc_repl4s(src, dst) );
  ins_pipe(ialu_reg);
%}

// Replicate scalar to packed short values into stack slot
instruct Repl4S_reg(stackSlotD dst, iRegI src) %{
  match(Set dst (Replicate4S src));
  expand %{
    iRegL tmp;
    Repl4S_reg_helper(tmp, src);
    regL_to_stkD(dst, tmp);
  %}
%}

// Replicate scalar constant to packed short values in Double register
instruct Repl4S_immI(regD dst, immI con, o7RegI tmp) %{
  match(Set dst (Replicate4S con));
  effect(KILL tmp);
  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl4S($con)" %}
  ins_encode %{
    // XXX This is a quick fix for 6833573.
    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 4, 2)), $dst$$FloatRegister);
    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 4, 2)), $tmp$$Register);
    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
  %}
  ins_pipe(loadConFD);
%}

// Replicate scalar to packed int values in Double register
instruct Repl2I_reg_helper(iRegL dst, iRegI src) %{
  effect(DEF dst, USE src);
  format %{ "SLLX  $src,32,$dst\n\t"
            "SRLX  $dst,32,O7\n\t"
            "OR    $dst,O7,$dst\t! replicate2I" %}
  ins_encode( enc_repl2i(src, dst));
  ins_pipe(ialu_reg);
%}

// Replicate scalar to packed int values in Double register
instruct Repl2I_reg(stackSlotD dst, iRegI src) %{
  match(Set dst (Replicate2I src));
  expand %{
    iRegL tmp;
    Repl2I_reg_helper(tmp, src);
    regL_to_stkD(dst, tmp);
  %}
%}

// Replicate scalar zero constant to packed int values in Double register
instruct Repl2I_immI(regD dst, immI con, o7RegI tmp) %{
  match(Set dst (Replicate2I con));
  effect(KILL tmp);
  format %{ "LDDF   [$constanttablebase + $constantoffset],$dst\t! load from constant table: Repl2I($con)" %}
  ins_encode %{
    // XXX This is a quick fix for 6833573.
    //__ ldf(FloatRegisterImpl::D, $constanttablebase, $constantoffset(replicate_immI($con$$constant, 2, 4)), $dst$$FloatRegister);
    RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset(replicate_immI($con$$constant, 2, 4)), $tmp$$Register);
    __ ldf(FloatRegisterImpl::D, $constanttablebase, con_offset, as_DoubleFloatRegister($dst$$reg));
  %}
  ins_pipe(loadConFD);
%}

//----------Control Flow Instructions------------------------------------------
// Compare Instructions
// Compare Integers
instruct compI_iReg(flagsReg icc, iRegI op1, iRegI op2) %{
  match(Set icc (CmpI op1 op2));
  effect( DEF icc, USE op1, USE op2 );

  size(4);
  format %{ "CMP    $op1,$op2" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_reg);
%}

instruct compU_iReg(flagsRegU icc, iRegI op1, iRegI op2) %{
  match(Set icc (CmpU op1 op2));

  size(4);
  format %{ "CMP    $op1,$op2\t! unsigned" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_reg);
%}

instruct compI_iReg_imm13(flagsReg icc, iRegI op1, immI13 op2) %{
  match(Set icc (CmpI op1 op2));
  effect( DEF icc, USE op1 );

  size(4);
  format %{ "CMP    $op1,$op2" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_imm);
%}

instruct testI_reg_reg( flagsReg icc, iRegI op1, iRegI op2, immI0 zero ) %{
  match(Set icc (CmpI (AndI op1 op2) zero));

  size(4);
  format %{ "BTST   $op2,$op1" %}
  opcode(Assembler::andcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_reg_zero);
%}

instruct testI_reg_imm( flagsReg icc, iRegI op1, immI13 op2, immI0 zero ) %{
  match(Set icc (CmpI (AndI op1 op2) zero));

  size(4);
  format %{ "BTST   $op2,$op1" %}
  opcode(Assembler::andcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_imm_zero);
%}

instruct compL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2 ) %{
  match(Set xcc (CmpL op1 op2));
  effect( DEF xcc, USE op1, USE op2 );

  size(4);
  format %{ "CMP    $op1,$op2\t\t! long" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_reg);
%}

instruct compL_reg_con(flagsRegL xcc, iRegL op1, immL13 con) %{
  match(Set xcc (CmpL op1 con));
  effect( DEF xcc, USE op1, USE con );

  size(4);
  format %{ "CMP    $op1,$con\t\t! long" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( op1, con, R_G0 ) );
  ins_pipe(ialu_cconly_reg_reg);
%}

instruct testL_reg_reg(flagsRegL xcc, iRegL op1, iRegL op2, immL0 zero) %{
  match(Set xcc (CmpL (AndL op1 op2) zero));
  effect( DEF xcc, USE op1, USE op2 );

  size(4);
  format %{ "BTST   $op1,$op2\t\t! long" %}
  opcode(Assembler::andcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_reg);
%}

// useful for checking the alignment of a pointer:
instruct testL_reg_con(flagsRegL xcc, iRegL op1, immL13 con, immL0 zero) %{
  match(Set xcc (CmpL (AndL op1 con) zero));
  effect( DEF xcc, USE op1, USE con );

  size(4);
  format %{ "BTST   $op1,$con\t\t! long" %}
  opcode(Assembler::andcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( op1, con, R_G0 ) );
  ins_pipe(ialu_cconly_reg_reg);
%}

instruct compU_iReg_imm13(flagsRegU icc, iRegI op1, immU13 op2 ) %{
  match(Set icc (CmpU op1 op2));

  size(4);
  format %{ "CMP    $op1,$op2\t! unsigned" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_imm);
%}

// Compare Pointers
instruct compP_iRegP(flagsRegP pcc, iRegP op1, iRegP op2 ) %{
  match(Set pcc (CmpP op1 op2));

  size(4);
  format %{ "CMP    $op1,$op2\t! ptr" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_reg);
%}

instruct compP_iRegP_imm13(flagsRegP pcc, iRegP op1, immP13 op2 ) %{
  match(Set pcc (CmpP op1 op2));

  size(4);
  format %{ "CMP    $op1,$op2\t! ptr" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_imm);
%}

// Compare Narrow oops
instruct compN_iRegN(flagsReg icc, iRegN op1, iRegN op2 ) %{
  match(Set icc (CmpN op1 op2));

  size(4);
  format %{ "CMP    $op1,$op2\t! compressed ptr" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_rs2_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_reg);
%}

instruct compN_iRegN_immN0(flagsReg icc, iRegN op1, immN0 op2 ) %{
  match(Set icc (CmpN op1 op2));

  size(4);
  format %{ "CMP    $op1,$op2\t! compressed ptr" %}
  opcode(Assembler::subcc_op3, Assembler::arith_op);
  ins_encode( form3_rs1_simm13_rd( op1, op2, R_G0 ) );
  ins_pipe(ialu_cconly_reg_imm);
%}

//----------Max and Min--------------------------------------------------------
// Min Instructions
// Conditional move for min
instruct cmovI_reg_lt( iRegI op2, iRegI op1, flagsReg icc ) %{
  effect( USE_DEF op2, USE op1, USE icc );

  size(4);
  format %{ "MOVlt  icc,$op1,$op2\t! min" %}
  opcode(Assembler::less);
  ins_encode( enc_cmov_reg_minmax(op2,op1) );
  ins_pipe(ialu_reg_flags);
%}

// Min Register with Register.
instruct minI_eReg(iRegI op1, iRegI op2) %{
  match(Set op2 (MinI op1 op2));
  ins_cost(DEFAULT_COST*2);
  expand %{
    flagsReg icc;
    compI_iReg(icc,op1,op2);
    cmovI_reg_lt(op2,op1,icc);
  %}
%}

// Max Instructions
// Conditional move for max
instruct cmovI_reg_gt( iRegI op2, iRegI op1, flagsReg icc ) %{
  effect( USE_DEF op2, USE op1, USE icc );
  format %{ "MOVgt  icc,$op1,$op2\t! max" %}
  opcode(Assembler::greater);
  ins_encode( enc_cmov_reg_minmax(op2,op1) );
  ins_pipe(ialu_reg_flags);
%}

// Max Register with Register
instruct maxI_eReg(iRegI op1, iRegI op2) %{
  match(Set op2 (MaxI op1 op2));
  ins_cost(DEFAULT_COST*2);
  expand %{
    flagsReg icc;
    compI_iReg(icc,op1,op2);
    cmovI_reg_gt(op2,op1,icc);
  %}
%}


//----------Float Compares----------------------------------------------------
// Compare floating, generate condition code
instruct cmpF_cc(flagsRegF fcc, regF src1, regF src2) %{
  match(Set fcc (CmpF src1 src2));

  size(4);
  format %{ "FCMPs  $fcc,$src1,$src2" %}
  opcode(Assembler::fpop2_op3, Assembler::arith_op, Assembler::fcmps_opf);
  ins_encode( form3_opf_rs1F_rs2F_fcc( src1, src2, fcc ) );
  ins_pipe(faddF_fcc_reg_reg_zero);
%}

instruct cmpD_cc(flagsRegF fcc, regD src1, regD src2) %{
  match(Set fcc (CmpD src1 src2));

  size(4);
  format %{ "FCMPd  $fcc,$src1,$src2" %}
  opcode(Assembler::fpop2_op3, Assembler::arith_op, Assembler::fcmpd_opf);
  ins_encode( form3_opf_rs1D_rs2D_fcc( src1, src2, fcc ) );
  ins_pipe(faddD_fcc_reg_reg_zero);
%}


// Compare floating, generate -1,0,1
instruct cmpF_reg(iRegI dst, regF src1, regF src2, flagsRegF0 fcc0) %{
  match(Set dst (CmpF3 src1 src2));
  effect(KILL fcc0);
  ins_cost(DEFAULT_COST*3+BRANCH_COST*3);
  format %{ "fcmpl  $dst,$src1,$src2" %}
  // Primary = float
  opcode( true );
  ins_encode( floating_cmp( dst, src1, src2 ) );
  ins_pipe( floating_cmp );
%}

instruct cmpD_reg(iRegI dst, regD src1, regD src2, flagsRegF0 fcc0) %{
  match(Set dst (CmpD3 src1 src2));
  effect(KILL fcc0);
  ins_cost(DEFAULT_COST*3+BRANCH_COST*3);
  format %{ "dcmpl  $dst,$src1,$src2" %}
  // Primary = double (not float)
  opcode( false );
  ins_encode( floating_cmp( dst, src1, src2 ) );
  ins_pipe( floating_cmp );
%}

//----------Branches---------------------------------------------------------
// Jump
// (compare 'operand indIndex' and 'instruct addP_reg_reg' above)
instruct jumpXtnd(iRegX switch_val, o7RegI table) %{
  match(Jump switch_val);
  effect(TEMP table);

  ins_cost(350);

  format %{  "ADD    $constanttablebase, $constantoffset, O7\n\t"
             "LD     [O7 + $switch_val], O7\n\t"
             "JUMP   O7" %}
  ins_encode %{
    // Calculate table address into a register.
    Register table_reg;
    Register label_reg = O7;
    // If we are calculating the size of this instruction don't trust
    // zero offsets because they might change when
    // MachConstantBaseNode decides to optimize the constant table
    // base.
    if ((constant_offset() == 0) && !Compile::current()->in_scratch_emit_size()) {
      table_reg = $constanttablebase;
    } else {
      table_reg = O7;
      RegisterOrConstant con_offset = __ ensure_simm13_or_reg($constantoffset, O7);
      __ add($constanttablebase, con_offset, table_reg);
    }

    // Jump to base address + switch value
    __ ld_ptr(table_reg, $switch_val$$Register, label_reg);
    __ jmp(label_reg, G0);
    __ delayed()->nop();
  %}
  ins_pipe(ialu_reg_reg);
%}

// Direct Branch.  Use V8 version with longer range.
instruct branch(label labl) %{
  match(Goto);
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "BA     $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    __ ba(*L);
    __ delayed()->nop();
  %}
  ins_pipe(br);
%}

// Direct Branch, short with no delay slot
instruct branch_short(label labl) %{
  match(Goto);
  predicate(UseCBCond);
  effect(USE labl);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "BA     $labl\t! short branch" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ ba_short(*L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_imm);
%}

// Conditional Direct Branch
instruct branchCon(cmpOp cmp, flagsReg icc, label labl) %{
  match(If cmp icc);
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "BP$cmp   $icc,$labl" %}
  // Prim = bits 24-22, Secnd = bits 31-30
  ins_encode( enc_bp( labl, cmp, icc ) );
  ins_pipe(br_cc);
%}

instruct branchConU(cmpOpU cmp, flagsRegU icc, label labl) %{
  match(If cmp icc);
  effect(USE labl);

  ins_cost(BRANCH_COST);
  format %{ "BP$cmp  $icc,$labl" %}
  // Prim = bits 24-22, Secnd = bits 31-30
  ins_encode( enc_bp( labl, cmp, icc ) );
  ins_pipe(br_cc);
%}

instruct branchConP(cmpOpP cmp, flagsRegP pcc, label labl) %{
  match(If cmp pcc);
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "BP$cmp  $pcc,$labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;

    __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(br_cc);
%}

instruct branchConF(cmpOpF cmp, flagsRegF fcc, label labl) %{
  match(If cmp fcc);
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "FBP$cmp $fcc,$labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;

    __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($fcc$$reg), predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(br_fcc);
%}

instruct branchLoopEnd(cmpOp cmp, flagsReg icc, label labl) %{
  match(CountedLoopEnd cmp icc);
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "BP$cmp   $icc,$labl\t! Loop end" %}
  // Prim = bits 24-22, Secnd = bits 31-30
  ins_encode( enc_bp( labl, cmp, icc ) );
  ins_pipe(br_cc);
%}

instruct branchLoopEndU(cmpOpU cmp, flagsRegU icc, label labl) %{
  match(CountedLoopEnd cmp icc);
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "BP$cmp  $icc,$labl\t! Loop end" %}
  // Prim = bits 24-22, Secnd = bits 31-30
  ins_encode( enc_bp( labl, cmp, icc ) );
  ins_pipe(br_cc);
%}

// Compare and branch instructions
instruct cmpI_reg_branch(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{
  match(If cmp (CmpI op1 op2));
  effect(USE labl, KILL icc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! int\n\t"
            "BP$cmp   $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$Register);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_reg);
%}

instruct cmpI_imm_branch(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{
  match(If cmp (CmpI op1 op2));
  effect(USE labl, KILL icc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! int\n\t"
            "BP$cmp   $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$constant);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_imm);
%}

instruct cmpU_reg_branch(cmpOpU cmp, iRegI op1, iRegI op2, label labl, flagsRegU icc) %{
  match(If cmp (CmpU op1 op2));
  effect(USE labl, KILL icc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! unsigned\n\t"
            "BP$cmp  $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$Register);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_reg);
%}

instruct cmpU_imm_branch(cmpOpU cmp, iRegI op1, immI5 op2, label labl, flagsRegU icc) %{
  match(If cmp (CmpU op1 op2));
  effect(USE labl, KILL icc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! unsigned\n\t"
            "BP$cmp  $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$constant);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_imm);
%}

instruct cmpL_reg_branch(cmpOp cmp, iRegL op1, iRegL op2, label labl, flagsRegL xcc) %{
  match(If cmp (CmpL op1 op2));
  effect(USE labl, KILL xcc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! long\n\t"
            "BP$cmp   $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$Register);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_reg);
%}

instruct cmpL_imm_branch(cmpOp cmp, iRegL op1, immL5 op2, label labl, flagsRegL xcc) %{
  match(If cmp (CmpL op1 op2));
  effect(USE labl, KILL xcc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! long\n\t"
            "BP$cmp   $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$constant);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_imm);
%}

// Compare Pointers and branch
instruct cmpP_reg_branch(cmpOpP cmp, iRegP op1, iRegP op2, label labl, flagsRegP pcc) %{
  match(If cmp (CmpP op1 op2));
  effect(USE labl, KILL pcc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! ptr\n\t"
            "B$cmp   $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$Register);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_reg);
%}

instruct cmpP_null_branch(cmpOpP cmp, iRegP op1, immP0 null, label labl, flagsRegP pcc) %{
  match(If cmp (CmpP op1 null));
  effect(USE labl, KILL pcc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,0\t! ptr\n\t"
            "B$cmp   $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, G0);
    // bpr() is not used here since it has shorter distance.
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_reg);
%}

instruct cmpN_reg_branch(cmpOp cmp, iRegN op1, iRegN op2, label labl, flagsReg icc) %{
  match(If cmp (CmpN op1 op2));
  effect(USE labl, KILL icc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
            "BP$cmp   $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$Register);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_reg);
%}

instruct cmpN_null_branch(cmpOp cmp, iRegN op1, immN0 null, label labl, flagsReg icc) %{
  match(If cmp (CmpN op1 null));
  effect(USE labl, KILL icc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
            "BP$cmp   $labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, G0);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_reg);
%}

// Loop back branch
instruct cmpI_reg_branchLoopEnd(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{
  match(CountedLoopEnd cmp (CmpI op1 op2));
  effect(USE labl, KILL icc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! int\n\t"
            "BP$cmp   $labl\t! Loop end" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$Register);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_reg);
%}

instruct cmpI_imm_branchLoopEnd(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{
  match(CountedLoopEnd cmp (CmpI op1 op2));
  effect(USE labl, KILL icc);

  size(12);
  ins_cost(BRANCH_COST);
  format %{ "CMP    $op1,$op2\t! int\n\t"
            "BP$cmp   $labl\t! Loop end" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;
    __ cmp($op1$$Register, $op2$$constant);
    __ bp((Assembler::Condition)($cmp$$cmpcode), false, Assembler::icc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(cmp_br_reg_imm);
%}

// Short compare and branch instructions
instruct cmpI_reg_branch_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{
  match(If cmp (CmpI op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL icc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,$op2,$labl\t! int" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_reg);
%}

instruct cmpI_imm_branch_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{
  match(If cmp (CmpI op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL icc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,$op2,$labl\t! int" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_imm);
%}

instruct cmpU_reg_branch_short(cmpOpU cmp, iRegI op1, iRegI op2, label labl, flagsRegU icc) %{
  match(If cmp (CmpU op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL icc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_reg);
%}

instruct cmpU_imm_branch_short(cmpOpU cmp, iRegI op1, immI5 op2, label labl, flagsRegU icc) %{
  match(If cmp (CmpU op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL icc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp $op1,$op2,$labl\t! unsigned" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_imm);
%}

instruct cmpL_reg_branch_short(cmpOp cmp, iRegL op1, iRegL op2, label labl, flagsRegL xcc) %{
  match(If cmp (CmpL op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL xcc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CXB$cmp  $op1,$op2,$labl\t! long" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$Register, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_reg);
%}

instruct cmpL_imm_branch_short(cmpOp cmp, iRegL op1, immL5 op2, label labl, flagsRegL xcc) %{
  match(If cmp (CmpL op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL xcc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CXB$cmp  $op1,$op2,$labl\t! long" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$constant, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_imm);
%}

// Compare Pointers and branch
instruct cmpP_reg_branch_short(cmpOpP cmp, iRegP op1, iRegP op2, label labl, flagsRegP pcc) %{
  match(If cmp (CmpP op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL pcc);

  size(4);
  ins_cost(BRANCH_COST);
#ifdef _LP64
  format %{ "CXB$cmp $op1,$op2,$labl\t! ptr" %}
#else
  format %{ "CWB$cmp $op1,$op2,$labl\t! ptr" %}
#endif
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, $op2$$Register, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_reg);
%}

instruct cmpP_null_branch_short(cmpOpP cmp, iRegP op1, immP0 null, label labl, flagsRegP pcc) %{
  match(If cmp (CmpP op1 null));
  predicate(UseCBCond);
  effect(USE labl, KILL pcc);

  size(4);
  ins_cost(BRANCH_COST);
#ifdef _LP64
  format %{ "CXB$cmp $op1,0,$labl\t! ptr" %}
#else
  format %{ "CWB$cmp $op1,0,$labl\t! ptr" %}
#endif
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, G0, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_reg);
%}

instruct cmpN_reg_branch_short(cmpOp cmp, iRegN op1, iRegN op2, label labl, flagsReg icc) %{
  match(If cmp (CmpN op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL icc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,op2,$labl\t! compressed ptr" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_reg);
%}

instruct cmpN_null_branch_short(cmpOp cmp, iRegN op1, immN0 null, label labl, flagsReg icc) %{
  match(If cmp (CmpN op1 null));
  predicate(UseCBCond);
  effect(USE labl, KILL icc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,0,$labl\t! compressed ptr" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, G0, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_reg);
%}

// Loop back branch
instruct cmpI_reg_branchLoopEnd_short(cmpOp cmp, iRegI op1, iRegI op2, label labl, flagsReg icc) %{
  match(CountedLoopEnd cmp (CmpI op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL icc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,$op2,$labl\t! Loop end" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_reg);
%}

instruct cmpI_imm_branchLoopEnd_short(cmpOp cmp, iRegI op1, immI5 op2, label labl, flagsReg icc) %{
  match(CountedLoopEnd cmp (CmpI op1 op2));
  predicate(UseCBCond);
  effect(USE labl, KILL icc);

  size(4);
  ins_cost(BRANCH_COST);
  format %{ "CWB$cmp  $op1,$op2,$labl\t! Loop end" %}
  ins_encode %{
    Label* L = $labl$$label;
    assert(__ use_cbcond(*L), "back to back cbcond");
    __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
  %}
  ins_short_branch(1);
  ins_avoid_back_to_back(1);
  ins_pipe(cbcond_reg_imm);
%}

// Branch-on-register tests all 64 bits.  We assume that values
// in 64-bit registers always remains zero or sign extended
// unless our code munges the high bits.  Interrupts can chop
// the high order bits to zero or sign at any time.
instruct branchCon_regI(cmpOp_reg cmp, iRegI op1, immI0 zero, label labl) %{
  match(If cmp (CmpI op1 zero));
  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "BR$cmp   $op1,$labl" %}
  ins_encode( enc_bpr( labl, cmp, op1 ) );
  ins_pipe(br_reg);
%}

instruct branchCon_regP(cmpOp_reg cmp, iRegP op1, immP0 null, label labl) %{
  match(If cmp (CmpP op1 null));
  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "BR$cmp   $op1,$labl" %}
  ins_encode( enc_bpr( labl, cmp, op1 ) );
  ins_pipe(br_reg);
%}

instruct branchCon_regL(cmpOp_reg cmp, iRegL op1, immL0 zero, label labl) %{
  match(If cmp (CmpL op1 zero));
  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "BR$cmp   $op1,$labl" %}
  ins_encode( enc_bpr( labl, cmp, op1 ) );
  ins_pipe(br_reg);
%}


// ============================================================================
// Long Compare
//
// Currently we hold longs in 2 registers.  Comparing such values efficiently
// is tricky.  The flavor of compare used depends on whether we are testing
// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
// The GE test is the negated LT test.  The LE test can be had by commuting
// the operands (yielding a GE test) and then negating; negate again for the
// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
// NE test is negated from that.

// Due to a shortcoming in the ADLC, it mixes up expressions like:
// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
// are collapsed internally in the ADLC's dfa-gen code.  The match for
// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
// foo match ends up with the wrong leaf.  One fix is to not match both
// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
// both forms beat the trinary form of long-compare and both are very useful
// on Intel which has so few registers.

instruct branchCon_long(cmpOp cmp, flagsRegL xcc, label labl) %{
  match(If cmp xcc);
  effect(USE labl);

  size(8);
  ins_cost(BRANCH_COST);
  format %{ "BP$cmp   $xcc,$labl" %}
  ins_encode %{
    Label* L = $labl$$label;
    Assembler::Predict predict_taken =
      cbuf.is_backward_branch(*L) ? Assembler::pt : Assembler::pn;

    __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L);
    __ delayed()->nop();
  %}
  ins_pipe(br_cc);
%}

// Manifest a CmpL3 result in an integer register.  Very painful.
// This is the test to avoid.
instruct cmpL3_reg_reg(iRegI dst, iRegL src1, iRegL src2, flagsReg ccr ) %{
  match(Set dst (CmpL3 src1 src2) );
  effect( KILL ccr );
  ins_cost(6*DEFAULT_COST);
  size(24);
  format %{ "CMP    $src1,$src2\t\t! long\n"
          "\tBLT,a,pn done\n"
          "\tMOV    -1,$dst\t! delay slot\n"
          "\tBGT,a,pn done\n"
          "\tMOV    1,$dst\t! delay slot\n"
          "\tCLR    $dst\n"
    "done:"     %}
  ins_encode( cmpl_flag(src1,src2,dst) );
  ins_pipe(cmpL_reg);
%}

// Conditional move
instruct cmovLL_reg(cmpOp cmp, flagsRegL xcc, iRegL dst, iRegL src) %{
  match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
  ins_cost(150);
  format %{ "MOV$cmp  $xcc,$src,$dst\t! long" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::xcc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovLL_imm(cmpOp cmp, flagsRegL xcc, iRegL dst, immL0 src) %{
  match(Set dst (CMoveL (Binary cmp xcc) (Binary dst src)));
  ins_cost(140);
  format %{ "MOV$cmp  $xcc,$src,$dst\t! long" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::xcc)) );
  ins_pipe(ialu_imm);
%}

instruct cmovIL_reg(cmpOp cmp, flagsRegL xcc, iRegI dst, iRegI src) %{
  match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
  ins_cost(150);
  format %{ "MOV$cmp  $xcc,$src,$dst" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::xcc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovIL_imm(cmpOp cmp, flagsRegL xcc, iRegI dst, immI11 src) %{
  match(Set dst (CMoveI (Binary cmp xcc) (Binary dst src)));
  ins_cost(140);
  format %{ "MOV$cmp  $xcc,$src,$dst" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::xcc)) );
  ins_pipe(ialu_imm);
%}

instruct cmovNL_reg(cmpOp cmp, flagsRegL xcc, iRegN dst, iRegN src) %{
  match(Set dst (CMoveN (Binary cmp xcc) (Binary dst src)));
  ins_cost(150);
  format %{ "MOV$cmp  $xcc,$src,$dst" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::xcc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovPL_reg(cmpOp cmp, flagsRegL xcc, iRegP dst, iRegP src) %{
  match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
  ins_cost(150);
  format %{ "MOV$cmp  $xcc,$src,$dst" %}
  ins_encode( enc_cmov_reg(cmp,dst,src, (Assembler::xcc)) );
  ins_pipe(ialu_reg);
%}

instruct cmovPL_imm(cmpOp cmp, flagsRegL xcc, iRegP dst, immP0 src) %{
  match(Set dst (CMoveP (Binary cmp xcc) (Binary dst src)));
  ins_cost(140);
  format %{ "MOV$cmp  $xcc,$src,$dst" %}
  ins_encode( enc_cmov_imm(cmp,dst,src, (Assembler::xcc)) );
  ins_pipe(ialu_imm);
%}

instruct cmovFL_reg(cmpOp cmp, flagsRegL xcc, regF dst, regF src) %{
  match(Set dst (CMoveF (Binary cmp xcc) (Binary dst src)));
  ins_cost(150);
  opcode(0x101);
  format %{ "FMOVS$cmp $xcc,$src,$dst" %}
  ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::xcc)) );
  ins_pipe(int_conditional_float_move);
%}

instruct cmovDL_reg(cmpOp cmp, flagsRegL xcc, regD dst, regD src) %{
  match(Set dst (CMoveD (Binary cmp xcc) (Binary dst src)));
  ins_cost(150);
  opcode(0x102);
  format %{ "FMOVD$cmp $xcc,$src,$dst" %}
  ins_encode( enc_cmovf_reg(cmp,dst,src, (Assembler::xcc)) );
  ins_pipe(int_conditional_float_move);
%}

// ============================================================================
// Safepoint Instruction
instruct safePoint_poll(iRegP poll) %{
  match(SafePoint poll);
  effect(USE poll);

  size(4);
#ifdef _LP64
  format %{ "LDX    [$poll],R_G0\t! Safepoint: poll for GC" %}
#else
  format %{ "LDUW   [$poll],R_G0\t! Safepoint: poll for GC" %}
#endif
  ins_encode %{
    __ relocate(relocInfo::poll_type);
    __ ld_ptr($poll$$Register, 0, G0);
  %}
  ins_pipe(loadPollP);
%}

// ============================================================================
// Call Instructions
// Call Java Static Instruction
instruct CallStaticJavaDirect( method meth ) %{
  match(CallStaticJava);
  predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
  effect(USE meth);

  size(8);
  ins_cost(CALL_COST);
  format %{ "CALL,static  ; NOP ==> " %}
  ins_encode( Java_Static_Call( meth ), call_epilog );
  ins_pipe(simple_call);
%}

// Call Java Static Instruction (method handle version)
instruct CallStaticJavaHandle(method meth, l7RegP l7_mh_SP_save) %{
  match(CallStaticJava);
  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
  effect(USE meth, KILL l7_mh_SP_save);

  size(16);
  ins_cost(CALL_COST);
  format %{ "CALL,static/MethodHandle" %}
  ins_encode(preserve_SP, Java_Static_Call(meth), restore_SP, call_epilog);
  ins_pipe(simple_call);
%}

// Call Java Dynamic Instruction
instruct CallDynamicJavaDirect( method meth ) %{
  match(CallDynamicJava);
  effect(USE meth);

  ins_cost(CALL_COST);
  format %{ "SET    (empty),R_G5\n\t"
            "CALL,dynamic  ; NOP ==> " %}
  ins_encode( Java_Dynamic_Call( meth ), call_epilog );
  ins_pipe(call);
%}

// Call Runtime Instruction
instruct CallRuntimeDirect(method meth, l7RegP l7) %{
  match(CallRuntime);
  effect(USE meth, KILL l7);
  ins_cost(CALL_COST);
  format %{ "CALL,runtime" %}
  ins_encode( Java_To_Runtime( meth ),
              call_epilog, adjust_long_from_native_call );
  ins_pipe(simple_call);
%}

// Call runtime without safepoint - same as CallRuntime
instruct CallLeafDirect(method meth, l7RegP l7) %{
  match(CallLeaf);
  effect(USE meth, KILL l7);
  ins_cost(CALL_COST);
  format %{ "CALL,runtime leaf" %}
  ins_encode( Java_To_Runtime( meth ),
              call_epilog,
              adjust_long_from_native_call );
  ins_pipe(simple_call);
%}

// Call runtime without safepoint - same as CallLeaf
instruct CallLeafNoFPDirect(method meth, l7RegP l7) %{
  match(CallLeafNoFP);
  effect(USE meth, KILL l7);
  ins_cost(CALL_COST);
  format %{ "CALL,runtime leaf nofp" %}
  ins_encode( Java_To_Runtime( meth ),
              call_epilog,
              adjust_long_from_native_call );
  ins_pipe(simple_call);
%}

// Tail Call; Jump from runtime stub to Java code.
// Also known as an 'interprocedural jump'.
// Target of jump will eventually return to caller.
// TailJump below removes the return address.
instruct TailCalljmpInd(g3RegP jump_target, inline_cache_regP method_oop) %{
  match(TailCall jump_target method_oop );

  ins_cost(CALL_COST);
  format %{ "Jmp     $jump_target  ; NOP \t! $method_oop holds method oop" %}
  ins_encode(form_jmpl(jump_target));
  ins_pipe(tail_call);
%}


// Return Instruction
instruct Ret() %{
  match(Return);

  // The epilogue node did the ret already.
  size(0);
  format %{ "! return" %}
  ins_encode();
  ins_pipe(empty);
%}


// Tail Jump; remove the return address; jump to target.
// TailCall above leaves the return address around.
// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
// "restore" before this instruction (in Epilogue), we need to materialize it
// in %i0.
instruct tailjmpInd(g1RegP jump_target, i0RegP ex_oop) %{
  match( TailJump jump_target ex_oop );
  ins_cost(CALL_COST);
  format %{ "! discard R_O7\n\t"
            "Jmp     $jump_target  ; ADD O7,8,O1 \t! $ex_oop holds exc. oop" %}
  ins_encode(form_jmpl_set_exception_pc(jump_target));
  // opcode(Assembler::jmpl_op3, Assembler::arith_op);
  // The hack duplicates the exception oop into G3, so that CreateEx can use it there.
  // ins_encode( form3_rs1_simm13_rd( jump_target, 0x00, R_G0 ), move_return_pc_to_o1() );
  ins_pipe(tail_call);
%}

// Create exception oop: created by stack-crawling runtime code.
// Created exception is now available to this handler, and is setup
// just prior to jumping to this handler.  No code emitted.
instruct CreateException( o0RegP ex_oop )
%{
  match(Set ex_oop (CreateEx));
  ins_cost(0);

  size(0);
  // use the following format syntax
  format %{ "! exception oop is in R_O0; no code emitted" %}
  ins_encode();
  ins_pipe(empty);
%}


// Rethrow exception:
// The exception oop will come in the first argument position.
// Then JUMP (not call) to the rethrow stub code.
instruct RethrowException()
%{
  match(Rethrow);
  ins_cost(CALL_COST);

  // use the following format syntax
  format %{ "Jmp    rethrow_stub" %}
  ins_encode(enc_rethrow);
  ins_pipe(tail_call);
%}


// Die now
instruct ShouldNotReachHere( )
%{
  match(Halt);
  ins_cost(CALL_COST);

  size(4);
  // Use the following format syntax
  format %{ "ILLTRAP   ; ShouldNotReachHere" %}
  ins_encode( form2_illtrap() );
  ins_pipe(tail_call);
%}

// ============================================================================
// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
// array for an instance of the superklass.  Set a hidden internal cache on a
// hit (cache is checked with exposed code in gen_subtype_check()).  Return
// not zero for a miss or zero for a hit.  The encoding ALSO sets flags.
instruct partialSubtypeCheck( o0RegP index, o1RegP sub, o2RegP super, flagsRegP pcc, o7RegP o7 ) %{
  match(Set index (PartialSubtypeCheck sub super));
  effect( KILL pcc, KILL o7 );
  ins_cost(DEFAULT_COST*10);
  format %{ "CALL   PartialSubtypeCheck\n\tNOP" %}
  ins_encode( enc_PartialSubtypeCheck() );
  ins_pipe(partial_subtype_check_pipe);
%}

instruct partialSubtypeCheck_vs_zero( flagsRegP pcc, o1RegP sub, o2RegP super, immP0 zero, o0RegP idx, o7RegP o7 ) %{
  match(Set pcc (CmpP (PartialSubtypeCheck sub super) zero));
  effect( KILL idx, KILL o7 );
  ins_cost(DEFAULT_COST*10);
  format %{ "CALL   PartialSubtypeCheck\n\tNOP\t# (sets condition codes)" %}
  ins_encode( enc_PartialSubtypeCheck() );
  ins_pipe(partial_subtype_check_pipe);
%}


// ============================================================================
// inlined locking and unlocking

instruct cmpFastLock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
  match(Set pcc (FastLock object box));

  effect(TEMP scratch2, USE_KILL box, KILL scratch);
  ins_cost(100);

  format %{ "FASTLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
  ins_encode( Fast_Lock(object, box, scratch, scratch2) );
  ins_pipe(long_memory_op);
%}


instruct cmpFastUnlock(flagsRegP pcc, iRegP object, o1RegP box, iRegP scratch2, o7RegP scratch ) %{
  match(Set pcc (FastUnlock object box));
  effect(TEMP scratch2, USE_KILL box, KILL scratch);
  ins_cost(100);

  format %{ "FASTUNLOCK  $object,$box\t! kills $box,$scratch,$scratch2" %}
  ins_encode( Fast_Unlock(object, box, scratch, scratch2) );
  ins_pipe(long_memory_op);
%}

// The encodings are generic.
instruct clear_array(iRegX cnt, iRegP base, iRegX temp, Universe dummy, flagsReg ccr) %{
  predicate(!use_block_zeroing(n->in(2)) );
  match(Set dummy (ClearArray cnt base));
  effect(TEMP temp, KILL ccr);
  ins_cost(300);
  format %{ "MOV    $cnt,$temp\n"
    "loop:   SUBcc  $temp,8,$temp\t! Count down a dword of bytes\n"
    "        BRge   loop\t\t! Clearing loop\n"
    "        STX    G0,[$base+$temp]\t! delay slot" %}

  ins_encode %{
    // Compiler ensures base is doubleword aligned and cnt is count of doublewords
    Register nof_bytes_arg    = $cnt$$Register;
    Register nof_bytes_tmp    = $temp$$Register;
    Register base_pointer_arg = $base$$Register;

    Label loop;
    __ mov(nof_bytes_arg, nof_bytes_tmp);

    // Loop and clear, walking backwards through the array.
    // nof_bytes_tmp (if >0) is always the number of bytes to zero
    __ bind(loop);
    __ deccc(nof_bytes_tmp, 8);
    __ br(Assembler::greaterEqual, true, Assembler::pt, loop);
    __ delayed()-> stx(G0, base_pointer_arg, nof_bytes_tmp);
    // %%%% this mini-loop must not cross a cache boundary!
  %}
  ins_pipe(long_memory_op);
%}

instruct clear_array_bis(g1RegX cnt, o0RegP base, Universe dummy, flagsReg ccr) %{
  predicate(use_block_zeroing(n->in(2)));
  match(Set dummy (ClearArray cnt base));
  effect(USE_KILL cnt, USE_KILL base, KILL ccr);
  ins_cost(300);
  format %{ "CLEAR  [$base, $cnt]\t! ClearArray" %}

  ins_encode %{

    assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
    Register to    = $base$$Register;
    Register count = $cnt$$Register;

    Label Ldone;
    __ nop(); // Separate short branches
    // Use BIS for zeroing (temp is not used).
    __ bis_zeroing(to, count, G0, Ldone);
    __ bind(Ldone);

  %}
  ins_pipe(long_memory_op);
%}

instruct clear_array_bis_2(g1RegX cnt, o0RegP base, iRegX tmp, Universe dummy, flagsReg ccr) %{
  predicate(use_block_zeroing(n->in(2)) && !Assembler::is_simm13((int)BlockZeroingLowLimit));
  match(Set dummy (ClearArray cnt base));
  effect(TEMP tmp, USE_KILL cnt, USE_KILL base, KILL ccr);
  ins_cost(300);
  format %{ "CLEAR  [$base, $cnt]\t! ClearArray" %}

  ins_encode %{

    assert(MinObjAlignmentInBytes >= BytesPerLong, "need alternate implementation");
    Register to    = $base$$Register;
    Register count = $cnt$$Register;
    Register temp  = $tmp$$Register;

    Label Ldone;
    __ nop(); // Separate short branches
    // Use BIS for zeroing
    __ bis_zeroing(to, count, temp, Ldone);
    __ bind(Ldone);

  %}
  ins_pipe(long_memory_op);
%}

instruct string_compare(o0RegP str1, o1RegP str2, g3RegI cnt1, g4RegI cnt2, notemp_iRegI result,
                        o7RegI tmp, flagsReg ccr) %{
  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL ccr, KILL tmp);
  ins_cost(300);
  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp" %}
  ins_encode( enc_String_Compare(str1, str2, cnt1, cnt2, result) );
  ins_pipe(long_memory_op);
%}

instruct string_equals(o0RegP str1, o1RegP str2, g3RegI cnt, notemp_iRegI result,
                       o7RegI tmp, flagsReg ccr) %{
  match(Set result (StrEquals (Binary str1 str2) cnt));
  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp, KILL ccr);
  ins_cost(300);
  format %{ "String Equals $str1,$str2,$cnt -> $result   // KILL $tmp" %}
  ins_encode( enc_String_Equals(str1, str2, cnt, result) );
  ins_pipe(long_memory_op);
%}

instruct array_equals(o0RegP ary1, o1RegP ary2, g3RegI tmp1, notemp_iRegI result,
                      o7RegI tmp2, flagsReg ccr) %{
  match(Set result (AryEq ary1 ary2));
  effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL ccr);
  ins_cost(300);
  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1,$tmp2" %}
  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, result));
  ins_pipe(long_memory_op);
%}


//---------- Zeros Count Instructions ------------------------------------------

instruct countLeadingZerosI(iRegI dst, iRegI src, iRegI tmp, flagsReg cr) %{
  predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
  match(Set dst (CountLeadingZerosI src));
  effect(TEMP dst, TEMP tmp, KILL cr);

  // x |= (x >> 1);
  // x |= (x >> 2);
  // x |= (x >> 4);
  // x |= (x >> 8);
  // x |= (x >> 16);
  // return (WORDBITS - popc(x));
  format %{ "SRL     $src,1,$tmp\t! count leading zeros (int)\n\t"
            "SRL     $src,0,$dst\t! 32-bit zero extend\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "SRL     $dst,2,$tmp\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "SRL     $dst,4,$tmp\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "SRL     $dst,8,$tmp\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "SRL     $dst,16,$tmp\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "POPC    $dst,$dst\n\t"
            "MOV     32,$tmp\n\t"
            "SUB     $tmp,$dst,$dst" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Register Rsrc = $src$$Register;
    Register Rtmp = $tmp$$Register;
    __ srl(Rsrc, 1,    Rtmp);
    __ srl(Rsrc, 0,    Rdst);
    __ or3(Rdst, Rtmp, Rdst);
    __ srl(Rdst, 2,    Rtmp);
    __ or3(Rdst, Rtmp, Rdst);
    __ srl(Rdst, 4,    Rtmp);
    __ or3(Rdst, Rtmp, Rdst);
    __ srl(Rdst, 8,    Rtmp);
    __ or3(Rdst, Rtmp, Rdst);
    __ srl(Rdst, 16,   Rtmp);
    __ or3(Rdst, Rtmp, Rdst);
    __ popc(Rdst, Rdst);
    __ mov(BitsPerInt, Rtmp);
    __ sub(Rtmp, Rdst, Rdst);
  %}
  ins_pipe(ialu_reg);
%}

instruct countLeadingZerosL(iRegIsafe dst, iRegL src, iRegL tmp, flagsReg cr) %{
  predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
  match(Set dst (CountLeadingZerosL src));
  effect(TEMP dst, TEMP tmp, KILL cr);

  // x |= (x >> 1);
  // x |= (x >> 2);
  // x |= (x >> 4);
  // x |= (x >> 8);
  // x |= (x >> 16);
  // x |= (x >> 32);
  // return (WORDBITS - popc(x));
  format %{ "SRLX    $src,1,$tmp\t! count leading zeros (long)\n\t"
            "OR      $src,$tmp,$dst\n\t"
            "SRLX    $dst,2,$tmp\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "SRLX    $dst,4,$tmp\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "SRLX    $dst,8,$tmp\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "SRLX    $dst,16,$tmp\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "SRLX    $dst,32,$tmp\n\t"
            "OR      $dst,$tmp,$dst\n\t"
            "POPC    $dst,$dst\n\t"
            "MOV     64,$tmp\n\t"
            "SUB     $tmp,$dst,$dst" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Register Rsrc = $src$$Register;
    Register Rtmp = $tmp$$Register;
    __ srlx(Rsrc, 1,    Rtmp);
    __ or3( Rsrc, Rtmp, Rdst);
    __ srlx(Rdst, 2,    Rtmp);
    __ or3( Rdst, Rtmp, Rdst);
    __ srlx(Rdst, 4,    Rtmp);
    __ or3( Rdst, Rtmp, Rdst);
    __ srlx(Rdst, 8,    Rtmp);
    __ or3( Rdst, Rtmp, Rdst);
    __ srlx(Rdst, 16,   Rtmp);
    __ or3( Rdst, Rtmp, Rdst);
    __ srlx(Rdst, 32,   Rtmp);
    __ or3( Rdst, Rtmp, Rdst);
    __ popc(Rdst, Rdst);
    __ mov(BitsPerLong, Rtmp);
    __ sub(Rtmp, Rdst, Rdst);
  %}
  ins_pipe(ialu_reg);
%}

instruct countTrailingZerosI(iRegI dst, iRegI src, flagsReg cr) %{
  predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
  match(Set dst (CountTrailingZerosI src));
  effect(TEMP dst, KILL cr);

  // return popc(~x & (x - 1));
  format %{ "SUB     $src,1,$dst\t! count trailing zeros (int)\n\t"
            "ANDN    $dst,$src,$dst\n\t"
            "SRL     $dst,R_G0,$dst\n\t"
            "POPC    $dst,$dst" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Register Rsrc = $src$$Register;
    __ sub(Rsrc, 1, Rdst);
    __ andn(Rdst, Rsrc, Rdst);
    __ srl(Rdst, G0, Rdst);
    __ popc(Rdst, Rdst);
  %}
  ins_pipe(ialu_reg);
%}

instruct countTrailingZerosL(iRegIsafe dst, iRegL src, flagsReg cr) %{
  predicate(UsePopCountInstruction);  // See Matcher::match_rule_supported
  match(Set dst (CountTrailingZerosL src));
  effect(TEMP dst, KILL cr);

  // return popc(~x & (x - 1));
  format %{ "SUB     $src,1,$dst\t! count trailing zeros (long)\n\t"
            "ANDN    $dst,$src,$dst\n\t"
            "POPC    $dst,$dst" %}
  ins_encode %{
    Register Rdst = $dst$$Register;
    Register Rsrc = $src$$Register;
    __ sub(Rsrc, 1, Rdst);
    __ andn(Rdst, Rsrc, Rdst);
    __ popc(Rdst, Rdst);
  %}
  ins_pipe(ialu_reg);
%}


//---------- Population Count Instructions -------------------------------------

instruct popCountI(iRegI dst, iRegI src) %{
  predicate(UsePopCountInstruction);
  match(Set dst (PopCountI src));

  format %{ "POPC   $src, $dst" %}
  ins_encode %{
    __ popc($src$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}

// Note: Long.bitCount(long) returns an int.
instruct popCountL(iRegI dst, iRegL src) %{
  predicate(UsePopCountInstruction);
  match(Set dst (PopCountL src));

  format %{ "POPC   $src, $dst" %}
  ins_encode %{
    __ popc($src$$Register, $dst$$Register);
  %}
  ins_pipe(ialu_reg);
%}


// ============================================================================
//------------Bytes reverse--------------------------------------------------

instruct bytes_reverse_int(iRegI dst, stackSlotI src) %{
  match(Set dst (ReverseBytesI src));

  // Op cost is artificially doubled to make sure that load or store
  // instructions are preferred over this one which requires a spill
  // onto a stack slot.
  ins_cost(2*DEFAULT_COST + MEMORY_REF_COST);
  format %{ "LDUWA  $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ set($src$$disp + STACK_BIAS, O7);
    __ lduwa($src$$base$$Register, O7, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register);
  %}
  ins_pipe( iload_mem );
%}

instruct bytes_reverse_long(iRegL dst, stackSlotL src) %{
  match(Set dst (ReverseBytesL src));

  // Op cost is artificially doubled to make sure that load or store
  // instructions are preferred over this one which requires a spill
  // onto a stack slot.
  ins_cost(2*DEFAULT_COST + MEMORY_REF_COST);
  format %{ "LDXA   $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ set($src$$disp + STACK_BIAS, O7);
    __ ldxa($src$$base$$Register, O7, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register);
  %}
  ins_pipe( iload_mem );
%}

instruct bytes_reverse_unsigned_short(iRegI dst, stackSlotI src) %{
  match(Set dst (ReverseBytesUS src));

  // Op cost is artificially doubled to make sure that load or store
  // instructions are preferred over this one which requires a spill
  // onto a stack slot.
  ins_cost(2*DEFAULT_COST + MEMORY_REF_COST);
  format %{ "LDUHA  $src, $dst\t!asi=primary_little\n\t" %}

  ins_encode %{
    // the value was spilled as an int so bias the load
    __ set($src$$disp + STACK_BIAS + 2, O7);
    __ lduha($src$$base$$Register, O7, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register);
  %}
  ins_pipe( iload_mem );
%}

instruct bytes_reverse_short(iRegI dst, stackSlotI src) %{
  match(Set dst (ReverseBytesS src));

  // Op cost is artificially doubled to make sure that load or store
  // instructions are preferred over this one which requires a spill
  // onto a stack slot.
  ins_cost(2*DEFAULT_COST + MEMORY_REF_COST);
  format %{ "LDSHA  $src, $dst\t!asi=primary_little\n\t" %}

  ins_encode %{
    // the value was spilled as an int so bias the load
    __ set($src$$disp + STACK_BIAS + 2, O7);
    __ ldsha($src$$base$$Register, O7, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register);
  %}
  ins_pipe( iload_mem );
%}

// Load Integer reversed byte order
instruct loadI_reversed(iRegI dst, indIndexMemory src) %{
  match(Set dst (ReverseBytesI (LoadI src)));

  ins_cost(DEFAULT_COST + MEMORY_REF_COST);
  size(4);
  format %{ "LDUWA  $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ lduwa($src$$base$$Register, $src$$index$$Register, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load Long - aligned and reversed
instruct loadL_reversed(iRegL dst, indIndexMemory src) %{
  match(Set dst (ReverseBytesL (LoadL src)));

  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDXA   $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ ldxa($src$$base$$Register, $src$$index$$Register, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load unsigned short / char reversed byte order
instruct loadUS_reversed(iRegI dst, indIndexMemory src) %{
  match(Set dst (ReverseBytesUS (LoadUS src)));

  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDUHA  $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ lduha($src$$base$$Register, $src$$index$$Register, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Load short reversed byte order
instruct loadS_reversed(iRegI dst, indIndexMemory src) %{
  match(Set dst (ReverseBytesS (LoadS src)));

  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "LDSHA  $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ ldsha($src$$base$$Register, $src$$index$$Register, Assembler::ASI_PRIMARY_LITTLE, $dst$$Register);
  %}
  ins_pipe(iload_mem);
%}

// Store Integer reversed byte order
instruct storeI_reversed(indIndexMemory dst, iRegI src) %{
  match(Set dst (StoreI dst (ReverseBytesI src)));

  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STWA   $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ stwa($src$$Register, $dst$$base$$Register, $dst$$index$$Register, Assembler::ASI_PRIMARY_LITTLE);
  %}
  ins_pipe(istore_mem_reg);
%}

// Store Long reversed byte order
instruct storeL_reversed(indIndexMemory dst, iRegL src) %{
  match(Set dst (StoreL dst (ReverseBytesL src)));

  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STXA   $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ stxa($src$$Register, $dst$$base$$Register, $dst$$index$$Register, Assembler::ASI_PRIMARY_LITTLE);
  %}
  ins_pipe(istore_mem_reg);
%}

// Store unsighed short/char reversed byte order
instruct storeUS_reversed(indIndexMemory dst, iRegI src) %{
  match(Set dst (StoreC dst (ReverseBytesUS src)));

  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STHA   $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ stha($src$$Register, $dst$$base$$Register, $dst$$index$$Register, Assembler::ASI_PRIMARY_LITTLE);
  %}
  ins_pipe(istore_mem_reg);
%}

// Store short reversed byte order
instruct storeS_reversed(indIndexMemory dst, iRegI src) %{
  match(Set dst (StoreC dst (ReverseBytesS src)));

  ins_cost(MEMORY_REF_COST);
  size(4);
  format %{ "STHA   $src, $dst\t!asi=primary_little" %}

  ins_encode %{
    __ stha($src$$Register, $dst$$base$$Register, $dst$$index$$Register, Assembler::ASI_PRIMARY_LITTLE);
  %}
  ins_pipe(istore_mem_reg);
%}

//----------PEEPHOLE RULES-----------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.
//
// peepmatch ( root_instr_name [preceding_instruction]* );
//
// peepconstraint %{
// (instruction_number.operand_name relational_op instruction_number.operand_name
//  [, ...] );
// // instruction numbers are zero-based using left to right order in peepmatch
//
// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
// // provide an instruction_number.operand_name for each operand that appears
// // in the replacement instruction's match rule
//
// ---------VM FLAGS---------------------------------------------------------
//
// All peephole optimizations can be turned off using -XX:-OptoPeephole
//
// Each peephole rule is given an identifying number starting with zero and
// increasing by one in the order seen by the parser.  An individual peephole
// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
// on the command-line.
//
// ---------CURRENT LIMITATIONS----------------------------------------------
//
// Only match adjacent instructions in same basic block
// Only equality constraints
// Only constraints between operands, not (0.dest_reg == EAX_enc)
// Only one replacement instruction
//
// ---------EXAMPLE----------------------------------------------------------
//
// // pertinent parts of existing instructions in architecture description
// instruct movI(eRegI dst, eRegI src) %{
//   match(Set dst (CopyI src));
// %}
//
// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
//   match(Set dst (AddI dst src));
//   effect(KILL cr);
// %}
//
// // Change (inc mov) to lea
// peephole %{
//   // increment preceeded by register-register move
//   peepmatch ( incI_eReg movI );
//   // require that the destination register of the increment
//   // match the destination register of the move
//   peepconstraint ( 0.dst == 1.dst );
//   // construct a replacement instruction that sets
//   // the destination to ( move's source register + one )
//   peepreplace ( incI_eReg_immI1( 0.dst 1.src 0.src ) );
// %}
//

// // Change load of spilled value to only a spill
// instruct storeI(memory mem, eRegI src) %{
//   match(Set mem (StoreI mem src));
// %}
//
// instruct loadI(eRegI dst, memory mem) %{
//   match(Set dst (LoadI mem));
// %}
//
// peephole %{
//   peepmatch ( loadI storeI );
//   peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
//   peepreplace ( storeI( 1.mem 1.mem 1.src ) );
// %}

//----------SMARTSPILL RULES---------------------------------------------------
// These must follow all instruction definitions as they use the names
// defined in the instructions definitions.
//
// SPARC will probably not have any of these rules due to RISC instruction set.

//----------PIPELINE-----------------------------------------------------------
// Rules which define the behavior of the target architectures pipeline.