changeset 6316:c86f5b83df67

8038297: Avoid placing CTI immediately following cbcond instruction on T4 Summary: Insert a nop between cbcond and CTI Reviewed-by: kvn, twisti
author iveresov
date Thu, 10 Apr 2014 23:15:13 -0700
parents aabca16ccbca
children 34829a112d52
files src/cpu/sparc/vm/assembler_sparc.hpp src/cpu/sparc/vm/assembler_sparc.inline.hpp src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp src/cpu/sparc/vm/sparc.ad src/share/vm/adlc/output_h.cpp src/share/vm/opto/machnode.hpp src/share/vm/opto/node.hpp src/share/vm/opto/output.cpp
diffstat 8 files changed, 125 insertions(+), 65 deletions(-) [+]
line wrap: on
line diff
--- a/src/cpu/sparc/vm/assembler_sparc.hpp	Fri Apr 11 00:35:28 2014 +0400
+++ b/src/cpu/sparc/vm/assembler_sparc.hpp	Thu Apr 10 23:15:13 2014 -0700
@@ -630,11 +630,20 @@
   }
 
  protected:
+  // Insert a nop if the previous is cbcond
+  void insert_nop_after_cbcond() {
+    if (UseCBCond && cbcond_before()) {
+      nop();
+    }
+  }
   // Delay slot helpers
   // cti is called when emitting control-transfer instruction,
   // BEFORE doing the emitting.
   // Only effective when assertion-checking is enabled.
   void cti() {
+    // A cbcond instruction immediately followed by a CTI
+    // instruction introduces pipeline stalls, we need to avoid that.
+    no_cbcond_before();
 #ifdef CHECK_DELAY
     assert_not_delayed("cti should not be in delay slot");
 #endif
@@ -658,7 +667,6 @@
   void no_cbcond_before() {
     assert(offset() == 0 || !cbcond_before(), "cbcond should not follow an other cbcond");
   }
-
 public:
 
   bool use_cbcond(Label& L) {
--- a/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Fri Apr 11 00:35:28 2014 +0400
+++ b/src/cpu/sparc/vm/assembler_sparc.inline.hpp	Thu Apr 10 23:15:13 2014 -0700
@@ -54,33 +54,33 @@
 inline void Assembler::add(Register s1, Register s2, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::add(Register s1, int simm13a, Register d ) { emit_int32( op(arith_op) | rd(d) | op3(add_op3) | rs1(s1) | immed(true) | simm(simm13a, 13) ); }
 
-inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt);  has_delay_slot(); }
-inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { bpr( c, a, p, s1, target(L)); }
+inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, address d, relocInfo::relocType rt ) { v9_only(); insert_nop_after_cbcond(); cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(bpr_op2) | wdisp16(intptr_t(d), intptr_t(pc())) | predict(p) | rs1(s1), rt);  has_delay_slot(); }
+inline void Assembler::bpr( RCondition c, bool a, Predict p, Register s1, Label& L) { insert_nop_after_cbcond(); bpr( c, a, p, s1, target(L)); }
 
-inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
-inline void Assembler::fb( Condition c, bool a, Label& L ) { fb(c, a, target(L)); }
+inline void Assembler::fb( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep();  insert_nop_after_cbcond(); cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(fb_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
+inline void Assembler::fb( Condition c, bool a, Label& L ) { insert_nop_after_cbcond(); fb(c, a, target(L)); }
 
-inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt);  has_delay_slot(); }
-inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { fbp(c, a, cc, p, target(L)); }
+inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only(); insert_nop_after_cbcond(); cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(fbp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt);  has_delay_slot(); }
+inline void Assembler::fbp( Condition c, bool a, CC cc, Predict p, Label& L ) { insert_nop_after_cbcond(); fbp(c, a, cc, p, target(L)); }
 
-inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep();  cti();   emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
-inline void Assembler::br( Condition c, bool a, Label& L ) { br(c, a, target(L)); }
+inline void Assembler::br( Condition c, bool a, address d, relocInfo::relocType rt ) { v9_dep(); insert_nop_after_cbcond(); cti();   emit_data( op(branch_op) | annul(a) | cond(c) | op2(br_op2) | wdisp(intptr_t(d), intptr_t(pc()), 22), rt);  has_delay_slot(); }
+inline void Assembler::br( Condition c, bool a, Label& L ) { insert_nop_after_cbcond(); br(c, a, target(L)); }
 
-inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only();  cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt);  has_delay_slot(); }
-inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { bp(c, a, cc, p, target(L)); }
+inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, address d, relocInfo::relocType rt ) { v9_only();  insert_nop_after_cbcond(); cti();  emit_data( op(branch_op) | annul(a) | cond(c) | op2(bp_op2) | branchcc(cc) | predict(p) | wdisp(intptr_t(d), intptr_t(pc()), 19), rt);  has_delay_slot(); }
+inline void Assembler::bp( Condition c, bool a, CC cc, Predict p, Label& L ) { insert_nop_after_cbcond(); bp(c, a, cc, p, target(L)); }
 
 // compare and branch
 inline void Assembler::cbcond(Condition c, CC cc, Register s1, Register s2, Label& L) { cti();  no_cbcond_before();  emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | rs2(s2)); }
 inline void Assembler::cbcond(Condition c, CC cc, Register s1, int simm5, Label& L)   { cti();  no_cbcond_before();  emit_data(op(branch_op) | cond_cbcond(c) | op2(bpr_op2) | branchcc(cc) | wdisp10(intptr_t(target(L)), intptr_t(pc())) | rs1(s1) | immed(true) | simm(simm5, 5)); }
 
-inline void Assembler::call( address d,  relocInfo::relocType rt ) { cti();  emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt);  has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); }
-inline void Assembler::call( Label& L,   relocInfo::relocType rt ) { call( target(L), rt); }
+inline void Assembler::call( address d,  relocInfo::relocType rt ) { insert_nop_after_cbcond(); cti();  emit_data( op(call_op) | wdisp(intptr_t(d), intptr_t(pc()), 30), rt);  has_delay_slot(); assert(rt != relocInfo::virtual_call_type, "must use virtual_call_Relocation::spec"); }
+inline void Assembler::call( Label& L,   relocInfo::relocType rt ) { insert_nop_after_cbcond(); call( target(L), rt); }
 
 inline void Assembler::flush( Register s1, Register s2) { emit_int32( op(arith_op) | op3(flush_op3) | rs1(s1) | rs2(s2)); }
 inline void Assembler::flush( Register s1, int simm13a) { emit_data( op(arith_op) | op3(flush_op3) | rs1(s1) | immed(true) | simm(simm13a, 13)); }
 
-inline void Assembler::jmpl( Register s1, Register s2, Register d ) { cti();  emit_int32( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
-inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { cti();  emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);  has_delay_slot(); }
+inline void Assembler::jmpl( Register s1, Register s2, Register d ) { insert_nop_after_cbcond(); cti();  emit_int32( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | rs2(s2));  has_delay_slot(); }
+inline void Assembler::jmpl( Register s1, int simm13a, Register d, RelocationHolder const& rspec ) { insert_nop_after_cbcond(); cti();  emit_data( op(arith_op) | rd(d) | op3(jmpl_op3) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec);  has_delay_slot(); }
 
 inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, Register s2, FloatRegister d) { emit_int32( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | rs2(s2) ); }
 inline void Assembler::ldf(FloatRegisterImpl::Width w, Register s1, int simm13a, FloatRegister d, RelocationHolder const& rspec) { emit_data( op(ldst_op) | fd(d, w) | alt_op3(ldf_op3, w) | rs1(s1) | immed(true) | simm(simm13a, 13), rspec); }
--- a/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp	Fri Apr 11 00:35:28 2014 +0400
+++ b/src/cpu/sparc/vm/macroAssembler_sparc.inline.hpp	Thu Apr 10 23:15:13 2014 -0700
@@ -233,6 +233,7 @@
 }
 
 inline void MacroAssembler::br( Condition c, bool a, Predict p, Label& L ) {
+  insert_nop_after_cbcond();
   br(c, a, p, target(L));
 }
 
@@ -248,6 +249,7 @@
 }
 
 inline void MacroAssembler::brx( Condition c, bool a, Predict p, Label& L ) {
+  insert_nop_after_cbcond();
   brx(c, a, p, target(L));
 }
 
@@ -269,6 +271,7 @@
 }
 
 inline void MacroAssembler::fb( Condition c, bool a, Predict p, Label& L ) {
+  insert_nop_after_cbcond();
   fb(c, a, p, target(L));
 }
 
@@ -318,6 +321,7 @@
 }
 
 inline void MacroAssembler::call( Label& L,   relocInfo::relocType rt ) {
+  insert_nop_after_cbcond();
   MacroAssembler::call( target(L), rt);
 }
 
--- a/src/cpu/sparc/vm/sparc.ad	Fri Apr 11 00:35:28 2014 +0400
+++ b/src/cpu/sparc/vm/sparc.ad	Thu Apr 10 23:15:13 2014 -0700
@@ -1268,7 +1268,7 @@
 void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
   Compile* C = ra_->C;
 
-  if( do_polling() && ra_->C->is_method_compilation() ) {
+  if(do_polling() && ra_->C->is_method_compilation()) {
     st->print("SETHI  #PollAddr,L0\t! Load Polling address\n\t");
 #ifdef _LP64
     st->print("LDX    [L0],G0\t!Poll for Safepointing\n\t");
@@ -1277,8 +1277,12 @@
 #endif
   }
 
-  if( do_polling() )
+  if(do_polling()) {
+    if (UseCBCond && !ra_->C->is_method_compilation()) {
+      st->print("NOP\n\t");
+    }
     st->print("RET\n\t");
+  }
 
   st->print("RESTORE");
 }
@@ -1291,15 +1295,20 @@
   __ verify_thread();
 
   // If this does safepoint polling, then do it here
-  if( do_polling() && ra_->C->is_method_compilation() ) {
+  if(do_polling() && ra_->C->is_method_compilation()) {
     AddressLiteral polling_page(os::get_polling_page());
     __ sethi(polling_page, L0);
     __ relocate(relocInfo::poll_return_type);
-    __ ld_ptr( L0, 0, G0 );
+    __ ld_ptr(L0, 0, G0);
   }
 
   // If this is a return, then stuff the restore in the delay slot
-  if( do_polling() ) {
+  if(do_polling()) {
+    if (UseCBCond && !ra_->C->is_method_compilation()) {
+      // Insert extra padding for the case when the epilogue is preceded by
+      // a cbcond jump, which can't be followed by a CTI instruction
+      __ nop();
+    }
     __ ret();
     __ delayed()->restore();
   } else {
@@ -3330,7 +3339,18 @@
 //----------Instruction Attributes---------------------------------------------
 ins_attrib ins_cost(DEFAULT_COST); // Required cost attribute
 ins_attrib ins_size(32);           // Required size attribute (in bits)
-ins_attrib ins_avoid_back_to_back(0); // instruction should not be generated back to back
+
+// avoid_back_to_back attribute is an expression that must return
+// one of the following values defined in MachNode:
+// AVOID_NONE   - instruction can be placed anywhere
+// AVOID_BEFORE - instruction cannot be placed after an
+//                instruction with MachNode::AVOID_AFTER
+// AVOID_AFTER  - the next instruction cannot be the one 
+//                with MachNode::AVOID_BEFORE
+// AVOID_BEFORE_AND_AFTER - BEFORE and AFTER attributes at 
+//                          the same time                                
+ins_attrib ins_avoid_back_to_back(MachNode::AVOID_NONE);
+
 ins_attrib ins_short_branch(0);    // Required flag: is this instruction a
                                    // non-matching short branch variant of some
                                                             // long branch?
@@ -6630,6 +6650,7 @@
   ins_encode %{
     __ encode_heap_oop($src$$Register, $dst$$Register);
   %}
+  ins_avoid_back_to_back(Universe::narrow_oop_base() == NULL ? AVOID_NONE : AVOID_BEFORE);
   ins_pipe(ialu_reg);
 %}
 
@@ -9199,6 +9220,7 @@
     __ ba(*L);
     __ delayed()->nop();
   %}
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br);
 %}
 
@@ -9217,7 +9239,7 @@
     __ ba_short(*L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_imm);
 %}
 
@@ -9231,6 +9253,7 @@
   format %{ "BP$cmp   $icc,$labl" %}
   // Prim = bits 24-22, Secnd = bits 31-30
   ins_encode( enc_bp( labl, cmp, icc ) );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_cc);
 %}
 
@@ -9242,6 +9265,7 @@
   format %{ "BP$cmp  $icc,$labl" %}
   // Prim = bits 24-22, Secnd = bits 31-30
   ins_encode( enc_bp( labl, cmp, icc ) );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_cc);
 %}
 
@@ -9260,6 +9284,7 @@
     __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::ptr_cc, predict_taken, *L);
     __ delayed()->nop();
   %}
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_cc);
 %}
 
@@ -9278,6 +9303,7 @@
     __ fbp( (Assembler::Condition)($cmp$$cmpcode), false, (Assembler::CC)($fcc$$reg), predict_taken, *L);
     __ delayed()->nop();
   %}
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_fcc);
 %}
 
@@ -9290,6 +9316,7 @@
   format %{ "BP$cmp   $icc,$labl\t! Loop end" %}
   // Prim = bits 24-22, Secnd = bits 31-30
   ins_encode( enc_bp( labl, cmp, icc ) );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_cc);
 %}
 
@@ -9302,6 +9329,7 @@
   format %{ "BP$cmp  $icc,$labl\t! Loop end" %}
   // Prim = bits 24-22, Secnd = bits 31-30
   ins_encode( enc_bp( labl, cmp, icc ) );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_cc);
 %}
 
@@ -9552,7 +9580,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_reg);
 %}
 
@@ -9570,7 +9598,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_imm);
 %}
 
@@ -9588,7 +9616,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_reg);
 %}
 
@@ -9606,7 +9634,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_imm);
 %}
 
@@ -9624,7 +9652,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$Register, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_reg);
 %}
 
@@ -9642,7 +9670,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::xcc, $op1$$Register, $op2$$constant, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_imm);
 %}
 
@@ -9665,7 +9693,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, $op2$$Register, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_reg);
 %}
 
@@ -9687,7 +9715,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::ptr_cc, $op1$$Register, G0, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_reg);
 %}
 
@@ -9705,7 +9733,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_reg);
 %}
 
@@ -9723,7 +9751,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, G0, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_reg);
 %}
 
@@ -9742,7 +9770,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$Register, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_reg);
 %}
 
@@ -9760,7 +9788,7 @@
     __ cbcond((Assembler::Condition)($cmp$$cmpcode), Assembler::icc, $op1$$Register, $op2$$constant, *L);
   %}
   ins_short_branch(1);
-  ins_avoid_back_to_back(1);
+  ins_avoid_back_to_back(AVOID_BEFORE_AND_AFTER);
   ins_pipe(cbcond_reg_imm);
 %}
 
@@ -9777,6 +9805,7 @@
   ins_cost(BRANCH_COST);
   format %{ "BR$cmp   $op1,$labl" %}
   ins_encode( enc_bpr( labl, cmp, op1 ) );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_reg);
 %}
 
@@ -9789,6 +9818,7 @@
   ins_cost(BRANCH_COST);
   format %{ "BR$cmp   $op1,$labl" %}
   ins_encode( enc_bpr( labl, cmp, op1 ) );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_reg);
 %}
 
@@ -9801,6 +9831,7 @@
   ins_cost(BRANCH_COST);
   format %{ "BR$cmp   $op1,$labl" %}
   ins_encode( enc_bpr( labl, cmp, op1 ) );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_reg);
 %}
 
@@ -9841,6 +9872,7 @@
     __ bp( (Assembler::Condition)($cmp$$cmpcode), false, Assembler::xcc, predict_taken, *L);
     __ delayed()->nop();
   %}
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(br_cc);
 %}
 
@@ -9968,6 +10000,7 @@
   ins_cost(CALL_COST);
   format %{ "CALL,static  ; NOP ==> " %}
   ins_encode( Java_Static_Call( meth ), call_epilog );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(simple_call);
 %}
 
@@ -10004,6 +10037,7 @@
   format %{ "CALL,runtime" %}
   ins_encode( Java_To_Runtime( meth ),
               call_epilog, adjust_long_from_native_call );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(simple_call);
 %}
 
@@ -10016,6 +10050,7 @@
   ins_encode( Java_To_Runtime( meth ),
               call_epilog,
               adjust_long_from_native_call );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(simple_call);
 %}
 
@@ -10028,6 +10063,7 @@
   ins_encode( Java_To_Runtime( meth ),
               call_epilog,
               adjust_long_from_native_call );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(simple_call);
 %}
 
@@ -10041,6 +10077,7 @@
   ins_cost(CALL_COST);
   format %{ "Jmp     $jump_target  ; NOP \t! $method_oop holds method oop" %}
   ins_encode(form_jmpl(jump_target));
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(tail_call);
 %}
 
@@ -10072,6 +10109,7 @@
   // opcode(Assembler::jmpl_op3, Assembler::arith_op);
   // The hack duplicates the exception oop into G3, so that CreateEx can use it there.
   // ins_encode( form3_rs1_simm13_rd( jump_target, 0x00, R_G0 ), move_return_pc_to_o1() );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(tail_call);
 %}
 
@@ -10102,6 +10140,7 @@
   // use the following format syntax
   format %{ "Jmp    rethrow_stub" %}
   ins_encode(enc_rethrow);
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(tail_call);
 %}
 
@@ -10130,6 +10169,7 @@
   ins_cost(DEFAULT_COST*10);
   format %{ "CALL   PartialSubtypeCheck\n\tNOP" %}
   ins_encode( enc_PartialSubtypeCheck() );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(partial_subtype_check_pipe);
 %}
 
@@ -10139,6 +10179,7 @@
   ins_cost(DEFAULT_COST*10);
   format %{ "CALL   PartialSubtypeCheck\n\tNOP\t# (sets condition codes)" %}
   ins_encode( enc_PartialSubtypeCheck() );
+  ins_avoid_back_to_back(AVOID_BEFORE);
   ins_pipe(partial_subtype_check_pipe);
 %}
 
--- a/src/share/vm/adlc/output_h.cpp	Fri Apr 11 00:35:28 2014 +0400
+++ b/src/share/vm/adlc/output_h.cpp	Thu Apr 10 23:15:13 2014 -0700
@@ -1613,21 +1613,20 @@
     // Each instruction attribute results in a virtual call of same name.
     // The ins_cost is not handled here.
     Attribute *attr = instr->_attribs;
-    bool avoid_back_to_back = false;
+    Attribute *avoid_back_to_back_attr = NULL;
     while (attr != NULL) {
-      if (strcmp (attr->_ident, "ins_cost") != 0 &&
+      if (strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") == 0) {
+        fprintf(fp, "  virtual bool           is_TrapBasedCheckNode() const { return %s; }\n", attr->_val);
+      } else if (strcmp (attr->_ident, "ins_cost") != 0 &&
           strncmp(attr->_ident, "ins_field_", 10) != 0 &&
           // Must match function in node.hpp: return type bool, no prefix "ins_".
           strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") != 0 &&
           strcmp (attr->_ident, "ins_short_branch") != 0) {
         fprintf(fp, "  virtual int            %s() const { return %s; }\n", attr->_ident, attr->_val);
       }
-      // Check value for ins_avoid_back_to_back, and if it is true (1), set the flag
-      if (!strcmp(attr->_ident, "ins_avoid_back_to_back") != 0 && attr->int_val(*this) != 0)
-        avoid_back_to_back = true;
-      if (strcmp (attr->_ident, "ins_is_TrapBasedCheckNode") == 0)
-        fprintf(fp, "  virtual bool           is_TrapBasedCheckNode() const { return %s; }\n", attr->_val);
-
+      if (strcmp(attr->_ident, "ins_avoid_back_to_back") == 0) {
+        avoid_back_to_back_attr = attr;
+      }
       attr = (Attribute *)attr->_next;
     }
 
@@ -1799,11 +1798,11 @@
     }
 
     // flag: if this instruction should not be generated back to back.
-    if ( avoid_back_to_back ) {
-      if ( node_flags_set ) {
-        fprintf(fp," | Flag_avoid_back_to_back");
+    if (avoid_back_to_back_attr != NULL) {
+      if (node_flags_set) {
+        fprintf(fp," | (%s)", avoid_back_to_back_attr->_val);
       } else {
-        fprintf(fp,"init_flags(Flag_avoid_back_to_back");
+        fprintf(fp,"init_flags((%s)", avoid_back_to_back_attr->_val);
         node_flags_set = true;
       }
     }
--- a/src/share/vm/opto/machnode.hpp	Fri Apr 11 00:35:28 2014 +0400
+++ b/src/share/vm/opto/machnode.hpp	Thu Apr 10 23:15:13 2014 -0700
@@ -210,7 +210,14 @@
   bool may_be_short_branch() const { return (flags() & Flag_may_be_short_branch) != 0; }
 
   // Avoid back to back some instructions on some CPUs.
-  bool avoid_back_to_back() const { return (flags() & Flag_avoid_back_to_back) != 0; }
+  enum AvoidBackToBackFlag { AVOID_NONE = 0,
+                             AVOID_BEFORE = Flag_avoid_back_to_back_before,
+                             AVOID_AFTER = Flag_avoid_back_to_back_after,
+                             AVOID_BEFORE_AND_AFTER = AVOID_BEFORE | AVOID_AFTER };
+
+  bool avoid_back_to_back(AvoidBackToBackFlag flag_value) const {
+    return (flags() & flag_value) == flag_value;
+  }
 
   // instruction implemented with a call
   bool has_call() const { return (flags() & Flag_has_call) != 0; }
--- a/src/share/vm/opto/node.hpp	Fri Apr 11 00:35:28 2014 +0400
+++ b/src/share/vm/opto/node.hpp	Thu Apr 10 23:15:13 2014 -0700
@@ -645,17 +645,18 @@
 
   // Flags are sorted by usage frequency.
   enum NodeFlags {
-    Flag_is_Copy             = 0x01, // should be first bit to avoid shift
-    Flag_rematerialize       = Flag_is_Copy << 1,
+    Flag_is_Copy                     = 0x01, // should be first bit to avoid shift
+    Flag_rematerialize               = Flag_is_Copy << 1,
     Flag_needs_anti_dependence_check = Flag_rematerialize << 1,
-    Flag_is_macro            = Flag_needs_anti_dependence_check << 1,
-    Flag_is_Con              = Flag_is_macro << 1,
-    Flag_is_cisc_alternate   = Flag_is_Con << 1,
-    Flag_is_dead_loop_safe   = Flag_is_cisc_alternate << 1,
-    Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1,
-    Flag_avoid_back_to_back  = Flag_may_be_short_branch << 1,
-    Flag_has_call            = Flag_avoid_back_to_back << 1,
-    Flag_is_expensive        = Flag_has_call << 1,
+    Flag_is_macro                    = Flag_needs_anti_dependence_check << 1,
+    Flag_is_Con                      = Flag_is_macro << 1,
+    Flag_is_cisc_alternate           = Flag_is_Con << 1,
+    Flag_is_dead_loop_safe           = Flag_is_cisc_alternate << 1,
+    Flag_may_be_short_branch         = Flag_is_dead_loop_safe << 1,
+    Flag_avoid_back_to_back_before   = Flag_may_be_short_branch << 1,
+    Flag_avoid_back_to_back_after    = Flag_avoid_back_to_back_before << 1,
+    Flag_has_call                    = Flag_avoid_back_to_back_after << 1,
+    Flag_is_expensive                = Flag_has_call << 1,
     _max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination
   };
 
--- a/src/share/vm/opto/output.cpp	Fri Apr 11 00:35:28 2014 +0400
+++ b/src/share/vm/opto/output.cpp	Thu Apr 10 23:15:13 2014 -0700
@@ -411,7 +411,7 @@
             blk_size += nop_size;
           }
         }
-        if (mach->avoid_back_to_back()) {
+        if (mach->avoid_back_to_back(MachNode::AVOID_BEFORE)) {
           // Nop is inserted between "avoid back to back" instructions.
           // ScheduleAndBundle() can rearrange nodes in a block,
           // check for all offsets inside this block.
@@ -439,7 +439,7 @@
         last_call_adr = blk_starts[i]+blk_size;
       }
       // Remember end of avoid_back_to_back offset
-      if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back()) {
+      if (nj->is_Mach() && nj->as_Mach()->avoid_back_to_back(MachNode::AVOID_AFTER)) {
         last_avoid_back_to_back_adr = blk_starts[i]+blk_size;
       }
     }
@@ -525,11 +525,11 @@
           int new_size = replacement->size(_regalloc);
           int diff     = br_size - new_size;
           assert(diff >= (int)nop_size, "short_branch size should be smaller");
-          // Conservatively take into accound padding between
+          // Conservatively take into account padding between
           // avoid_back_to_back branches. Previous branch could be
           // converted into avoid_back_to_back branch during next
           // rounds.
-          if (needs_padding && replacement->avoid_back_to_back()) {
+          if (needs_padding && replacement->avoid_back_to_back(MachNode::AVOID_BEFORE)) {
             jmp_offset[i] += nop_size;
             diff -= nop_size;
           }
@@ -548,7 +548,7 @@
         }
       } // (mach->may_be_short_branch())
       if (mach != NULL && (mach->may_be_short_branch() ||
-                           mach->avoid_back_to_back())) {
+                           mach->avoid_back_to_back(MachNode::AVOID_AFTER))) {
         last_may_be_short_branch_adr = blk_starts[i] + jmp_offset[i] + jmp_size[i];
       }
       blk_starts[i+1] -= adjust_block_start;
@@ -1313,7 +1313,7 @@
         if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset) {
           padding = nop_size;
         }
-        if (padding == 0 && mach->avoid_back_to_back() &&
+        if (padding == 0 && mach->avoid_back_to_back(MachNode::AVOID_BEFORE) &&
             current_offset == last_avoid_back_to_back_offset) {
           // Avoid back to back some instructions.
           padding = nop_size;
@@ -1407,7 +1407,7 @@
               int new_size = replacement->size(_regalloc);
               assert((br_size - new_size) >= (int)nop_size, "short_branch size should be smaller");
               // Insert padding between avoid_back_to_back branches.
-              if (needs_padding && replacement->avoid_back_to_back()) {
+              if (needs_padding && replacement->avoid_back_to_back(MachNode::AVOID_BEFORE)) {
                 MachNode *nop = new (this) MachNopNode();
                 block->insert_node(nop, j++);
                 _cfg->map_node_to_block(nop, block);
@@ -1515,7 +1515,7 @@
         last_call_offset = current_offset;
       }
 
-      if (n->is_Mach() && n->as_Mach()->avoid_back_to_back()) {
+      if (n->is_Mach() && n->as_Mach()->avoid_back_to_back(MachNode::AVOID_AFTER)) {
         // Avoid back to back some instructions.
         last_avoid_back_to_back_offset = current_offset;
       }