changeset 405:b4e0a161f551

Merge
author never
date Mon, 06 Oct 2008 13:11:32 -0700
parents f1ecf9191140 ee8f06bfb27c
children b744678d4d71
files src/share/vm/runtime/globals.hpp
diffstat 12 files changed, 185 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/opto/addnode.cpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/addnode.cpp	Mon Oct 06 13:11:32 2008 -0700
@@ -156,7 +156,8 @@
   if( add1_op == this_op && !con_right ) {
     Node *a12 = add1->in(2);
     const Type *t12 = phase->type( a12 );
-    if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) ) {
+    if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) &&
+       !(add1->in(1)->is_Phi() && add1->in(1)->as_Phi()->is_tripcount()) ) {
       assert(add1->in(1) != this, "dead loop in AddNode::Ideal");
       add2 = add1->clone();
       add2->set_req(2, in(2));
@@ -173,7 +174,8 @@
   if( add2_op == this_op && !con_left ) {
     Node *a22 = add2->in(2);
     const Type *t22 = phase->type( a22 );
-    if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) ) {
+    if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) &&
+       !(add2->in(1)->is_Phi() && add2->in(1)->as_Phi()->is_tripcount()) ) {
       assert(add2->in(1) != this, "dead loop in AddNode::Ideal");
       Node *addx = add2->clone();
       addx->set_req(1, in(1));
@@ -225,34 +227,63 @@
 //=============================================================================
 //------------------------------Idealize---------------------------------------
 Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  int op1 = in(1)->Opcode();
-  int op2 = in(2)->Opcode();
+  Node* in1 = in(1);
+  Node* in2 = in(2);
+  int op1 = in1->Opcode();
+  int op2 = in2->Opcode();
   // Fold (con1-x)+con2 into (con1+con2)-x
+  if ( op1 == Op_AddI && op2 == Op_SubI ) {
+    // Swap edges to try optimizations below
+    in1 = in2;
+    in2 = in(1);
+    op1 = op2;
+    op2 = in2->Opcode();
+  }
   if( op1 == Op_SubI ) {
-    const Type *t_sub1 = phase->type( in(1)->in(1) );
-    const Type *t_2    = phase->type( in(2)        );
+    const Type *t_sub1 = phase->type( in1->in(1) );
+    const Type *t_2    = phase->type( in2        );
     if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
       return new (phase->C, 3) SubINode(phase->makecon( add_ring( t_sub1, t_2 ) ),
-                              in(1)->in(2) );
+                              in1->in(2) );
     // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
     if( op2 == Op_SubI ) {
       // Check for dead cycle: d = (a-b)+(c-d)
-      assert( in(1)->in(2) != this && in(2)->in(2) != this,
+      assert( in1->in(2) != this && in2->in(2) != this,
               "dead loop in AddINode::Ideal" );
       Node *sub  = new (phase->C, 3) SubINode(NULL, NULL);
-      sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in(1)->in(1), in(2)->in(1) ) ));
-      sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in(1)->in(2), in(2)->in(2) ) ));
+      sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in1->in(1), in2->in(1) ) ));
+      sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in1->in(2), in2->in(2) ) ));
       return sub;
     }
+    // Convert "(a-b)+(b+c)" into "(a+c)"
+    if( op2 == Op_AddI && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) AddINode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c+b)" into "(a+c)"
+    if( op2 == Op_AddI && in1->in(2) == in2->in(2) ) {
+      assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) AddINode(in1->in(1), in2->in(1));
+    }
+    // Convert "(a-b)+(b-c)" into "(a-c)"
+    if( op2 == Op_SubI && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) SubINode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c-a)" into "(c-b)"
+    if( op2 == Op_SubI && in1->in(1) == in2->in(2) ) {
+      assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddINode::Ideal");
+      return new (phase->C, 3) SubINode(in2->in(1), in1->in(2));
+    }
   }
 
   // Convert "x+(0-y)" into "(x-y)"
-  if( op2 == Op_SubI && phase->type(in(2)->in(1)) == TypeInt::ZERO )
-    return new (phase->C, 3) SubINode(in(1), in(2)->in(2) );
+  if( op2 == Op_SubI && phase->type(in2->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubINode(in1, in2->in(2) );
 
   // Convert "(0-y)+x" into "(x-y)"
-  if( op1 == Op_SubI && phase->type(in(1)->in(1)) == TypeInt::ZERO )
-    return new (phase->C, 3) SubINode( in(2), in(1)->in(2) );
+  if( op1 == Op_SubI && phase->type(in1->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubINode( in2, in1->in(2) );
 
   // Convert (x>>>z)+y into (x+(y<<z))>>>z for small constant z and y.
   // Helps with array allocation math constant folding
@@ -266,15 +297,15 @@
   // Have not observed cases where type information exists to support
   // positive y and (x <= -(y << z))
   if( op1 == Op_URShiftI && op2 == Op_ConI &&
-      in(1)->in(2)->Opcode() == Op_ConI ) {
-    jint z = phase->type( in(1)->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter
-    jint y = phase->type( in(2) )->is_int()->get_con();
+      in1->in(2)->Opcode() == Op_ConI ) {
+    jint z = phase->type( in1->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter
+    jint y = phase->type( in2 )->is_int()->get_con();
 
     if( z < 5 && -5 < y && y < 0 ) {
-      const Type *t_in11 = phase->type(in(1)->in(1));
+      const Type *t_in11 = phase->type(in1->in(1));
       if( t_in11 != Type::TOP && (t_in11->is_int()->_lo >= -(y << z)) ) {
-        Node *a = phase->transform( new (phase->C, 3) AddINode( in(1)->in(1), phase->intcon(y<<z) ) );
-        return new (phase->C, 3) URShiftINode( a, in(1)->in(2) );
+        Node *a = phase->transform( new (phase->C, 3) AddINode( in1->in(1), phase->intcon(y<<z) ) );
+        return new (phase->C, 3) URShiftINode( a, in1->in(2) );
       }
     }
   }
@@ -328,39 +359,73 @@
 //=============================================================================
 //------------------------------Idealize---------------------------------------
 Node *AddLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  int op1 = in(1)->Opcode();
-  int op2 = in(2)->Opcode();
+  Node* in1 = in(1);
+  Node* in2 = in(2);
+  int op1 = in1->Opcode();
+  int op2 = in2->Opcode();
+  // Fold (con1-x)+con2 into (con1+con2)-x
+  if ( op1 == Op_AddL && op2 == Op_SubL ) {
+    // Swap edges to try optimizations below
+    in1 = in2;
+    in2 = in(1);
+    op1 = op2;
+    op2 = in2->Opcode();
+  }
   // Fold (con1-x)+con2 into (con1+con2)-x
   if( op1 == Op_SubL ) {
-    const Type *t_sub1 = phase->type( in(1)->in(1) );
-    const Type *t_2    = phase->type( in(2)        );
+    const Type *t_sub1 = phase->type( in1->in(1) );
+    const Type *t_2    = phase->type( in2        );
     if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
       return new (phase->C, 3) SubLNode(phase->makecon( add_ring( t_sub1, t_2 ) ),
-                              in(1)->in(2) );
+                              in1->in(2) );
     // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
     if( op2 == Op_SubL ) {
       // Check for dead cycle: d = (a-b)+(c-d)
-      assert( in(1)->in(2) != this && in(2)->in(2) != this,
+      assert( in1->in(2) != this && in2->in(2) != this,
               "dead loop in AddLNode::Ideal" );
       Node *sub  = new (phase->C, 3) SubLNode(NULL, NULL);
-      sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(1), in(2)->in(1) ) ));
-      sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(2), in(2)->in(2) ) ));
+      sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in1->in(1), in2->in(1) ) ));
+      sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in1->in(2), in2->in(2) ) ));
       return sub;
     }
+    // Convert "(a-b)+(b+c)" into "(a+c)"
+    if( op2 == Op_AddL && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) AddLNode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c+b)" into "(a+c)"
+    if( op2 == Op_AddL && in1->in(2) == in2->in(2) ) {
+      assert(in1->in(1) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) AddLNode(in1->in(1), in2->in(1));
+    }
+    // Convert "(a-b)+(b-c)" into "(a-c)"
+    if( op2 == Op_SubL && in1->in(2) == in2->in(1) ) {
+      assert(in1->in(1) != this && in2->in(2) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) SubLNode(in1->in(1), in2->in(2));
+    }
+    // Convert "(a-b)+(c-a)" into "(c-b)"
+    if( op2 == Op_SubL && in1->in(1) == in1->in(2) ) {
+      assert(in1->in(2) != this && in2->in(1) != this,"dead loop in AddLNode::Ideal");
+      return new (phase->C, 3) SubLNode(in2->in(1), in1->in(2));
+    }
   }
 
   // Convert "x+(0-y)" into "(x-y)"
-  if( op2 == Op_SubL && phase->type(in(2)->in(1)) == TypeLong::ZERO )
-    return new (phase->C, 3) SubLNode(in(1), in(2)->in(2) );
+  if( op2 == Op_SubL && phase->type(in2->in(1)) == TypeLong::ZERO )
+    return new (phase->C, 3) SubLNode( in1, in2->in(2) );
+
+  // Convert "(0-y)+x" into "(x-y)"
+  if( op1 == Op_SubL && phase->type(in1->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubLNode( in2, in1->in(2) );
 
   // Convert "X+X+X+X+X...+X+Y" into "k*X+Y" or really convert "X+(X+Y)"
   // into "(X<<1)+Y" and let shift-folding happen.
   if( op2 == Op_AddL &&
-      in(2)->in(1) == in(1) &&
+      in2->in(1) == in1 &&
       op1 != Op_ConL &&
       0 ) {
-    Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in(1),phase->intcon(1)));
-    return new (phase->C, 3) AddLNode(shift,in(2)->in(2));
+    Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in1,phase->intcon(1)));
+    return new (phase->C, 3) AddLNode(shift,in2->in(2));
   }
 
   return AddNode::Ideal(phase, can_reshape);
--- a/src/share/vm/opto/cfgnode.cpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/cfgnode.cpp	Mon Oct 06 13:11:32 2008 -0700
@@ -1817,6 +1817,12 @@
   return progress;              // Return any progress
 }
 
+//------------------------------is_tripcount-----------------------------------
+bool PhiNode::is_tripcount() const {
+  return (in(0) != NULL && in(0)->is_CountedLoop() &&
+          in(0)->as_CountedLoop()->phi() == this);
+}
+
 //------------------------------out_RegMask------------------------------------
 const RegMask &PhiNode::in_RegMask(uint i) const {
   return i ? out_RegMask() : RegMask::Empty;
@@ -1832,9 +1838,7 @@
 #ifndef PRODUCT
 void PhiNode::dump_spec(outputStream *st) const {
   TypeNode::dump_spec(st);
-  if (in(0) != NULL &&
-      in(0)->is_CountedLoop() &&
-      in(0)->as_CountedLoop()->phi() == this) {
+  if (is_tripcount()) {
     st->print(" #tripcount");
   }
 }
--- a/src/share/vm/opto/cfgnode.hpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/cfgnode.hpp	Mon Oct 06 13:11:32 2008 -0700
@@ -162,6 +162,8 @@
     return NULL;  // not a copy!
   }
 
+  bool is_tripcount() const;
+
   // Determine a unique non-trivial input, if any.
   // Ignore casts if it helps.  Return NULL on failure.
   Node* unique_input(PhaseTransform *phase);
--- a/src/share/vm/opto/divnode.cpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/divnode.cpp	Mon Oct 06 13:11:32 2008 -0700
@@ -110,10 +110,13 @@
     } else if( dividend->Opcode() == Op_AndI ) {
       // An AND mask of sufficient size clears the low bits and
       // I can avoid rounding.
-      const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int();
-      if( andconi && andconi->is_con(-d) ) {
-        dividend = dividend->in(1);
-        needs_rounding = false;
+      const TypeInt *andconi_t = phase->type( dividend->in(2) )->isa_int();
+      if( andconi_t && andconi_t->is_con() ) {
+        jint andconi = andconi_t->get_con();
+        if( andconi < 0 && is_power_of_2(-andconi) && (-andconi) >= d ) {
+          dividend = dividend->in(1);
+          needs_rounding = false;
+        }
       }
     }
 
@@ -316,10 +319,13 @@
     } else if( dividend->Opcode() == Op_AndL ) {
       // An AND mask of sufficient size clears the low bits and
       // I can avoid rounding.
-      const TypeLong *andconl = phase->type( dividend->in(2) )->isa_long();
-      if( andconl && andconl->is_con(-d)) {
-        dividend = dividend->in(1);
-        needs_rounding = false;
+      const TypeLong *andconl_t = phase->type( dividend->in(2) )->isa_long();
+      if( andconl_t && andconl_t->is_con() ) {
+        jlong andconl = andconl_t->get_con();
+        if( andconl < 0 && is_power_of_2_long(-andconl) && (-andconl) >= d ) {
+          dividend = dividend->in(1);
+          needs_rounding = false;
+        }
       }
     }
 
--- a/src/share/vm/opto/loopTransform.cpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/loopTransform.cpp	Mon Oct 06 13:11:32 2008 -0700
@@ -679,6 +679,10 @@
   CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
   post_head->set_post_loop(main_head);
 
+  // Reduce the post-loop trip count.
+  CountedLoopEndNode* post_end = old_new[main_end ->_idx]->as_CountedLoopEnd();
+  post_end->_prob = PROB_FAIR;
+
   // Build the main-loop normal exit.
   IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end);
   _igvn.register_new_node_with_optimizer( new_main_exit );
@@ -748,6 +752,9 @@
   pre_head->set_pre_loop(main_head);
   Node *pre_incr = old_new[incr->_idx];
 
+  // Reduce the pre-loop trip count.
+  pre_end->_prob = PROB_FAIR;
+
   // Find the pre-loop normal exit.
   Node* pre_exit = pre_end->proj_out(false);
   assert( pre_exit->Opcode() == Op_IfFalse, "" );
@@ -767,8 +774,8 @@
   register_new_node( min_cmp , new_pre_exit );
   register_new_node( min_bol , new_pre_exit );
 
-  // Build the IfNode
-  IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_FAIR, COUNT_UNKNOWN );
+  // Build the IfNode (assume the main-loop is executed always).
+  IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
   _igvn.register_new_node_with_optimizer( min_iff );
   set_idom(min_iff, new_pre_exit, dd_main_head);
   set_loop(min_iff, loop->_parent);
@@ -1583,10 +1590,10 @@
 
 //=============================================================================
 //------------------------------iteration_split_impl---------------------------
-void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
+bool IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
   // Check and remove empty loops (spam micro-benchmarks)
   if( policy_do_remove_empty_loop(phase) )
-    return;                     // Here we removed an empty loop
+    return true;                     // Here we removed an empty loop
 
   bool should_peel = policy_peeling(phase); // Should we peel?
 
@@ -1596,7 +1603,8 @@
   // This removes loop-invariant tests (usually null checks).
   if( !_head->is_CountedLoop() ) { // Non-counted loop
     if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
-      return;
+      // Partial peel succeeded so terminate this round of loop opts
+      return false;
     }
     if( should_peel ) {            // Should we peel?
 #ifndef PRODUCT
@@ -1606,14 +1614,14 @@
     } else if( should_unswitch ) {
       phase->do_unswitching(this, old_new);
     }
-    return;
+    return true;
   }
   CountedLoopNode *cl = _head->as_CountedLoop();
 
-  if( !cl->loopexit() ) return; // Ignore various kinds of broken loops
+  if( !cl->loopexit() ) return true; // Ignore various kinds of broken loops
 
   // Do nothing special to pre- and post- loops
-  if( cl->is_pre_loop() || cl->is_post_loop() ) return;
+  if( cl->is_pre_loop() || cl->is_post_loop() ) return true;
 
   // Compute loop trip count from profile data
   compute_profile_trip_cnt(phase);
@@ -1626,11 +1634,11 @@
       // Here we did some unrolling and peeling.  Eventually we will
       // completely unroll this loop and it will no longer be a loop.
       phase->do_maximally_unroll(this,old_new);
-      return;
+      return true;
     }
     if (should_unswitch) {
       phase->do_unswitching(this, old_new);
-      return;
+      return true;
     }
   }
 
@@ -1691,14 +1699,16 @@
     if( should_peel )           // Might want to peel but do nothing else
       phase->do_peeling(this,old_new);
   }
+  return true;
 }
 
 
 //=============================================================================
 //------------------------------iteration_split--------------------------------
-void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
+bool IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
   // Recursively iteration split nested loops
-  if( _child ) _child->iteration_split( phase, old_new );
+  if( _child && !_child->iteration_split( phase, old_new ))
+    return false;
 
   // Clean out prior deadwood
   DCE_loop_body();
@@ -1720,7 +1730,9 @@
       _allow_optimizations &&
       !tail()->is_top() ) {     // Also ignore the occasional dead backedge
     if (!_has_call) {
-      iteration_split_impl( phase, old_new );
+      if (!iteration_split_impl( phase, old_new )) {
+        return false;
+      }
     } else if (policy_unswitching(phase)) {
       phase->do_unswitching(this, old_new);
     }
@@ -1729,5 +1741,7 @@
   // Minor offset re-organization to remove loop-fallout uses of
   // trip counter.
   if( _head->is_CountedLoop() ) phase->reorg_offsets( this );
-  if( _next ) _next->iteration_split( phase, old_new );
+  if( _next && !_next->iteration_split( phase, old_new ))
+    return false;
+  return true;
 }
--- a/src/share/vm/opto/loopnode.hpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/loopnode.hpp	Mon Oct 06 13:11:32 2008 -0700
@@ -325,12 +325,14 @@
   // Returns TRUE if loop tree is structurally changed.
   bool beautify_loops( PhaseIdealLoop *phase );
 
-  // Perform iteration-splitting on inner loops.  Split iterations to avoid
-  // range checks or one-shot null checks.
-  void iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
+  // Perform iteration-splitting on inner loops.  Split iterations to
+  // avoid range checks or one-shot null checks.  Returns false if the
+  // current round of loop opts should stop.
+  bool iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
 
-  // Driver for various flavors of iteration splitting
-  void iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new );
+  // Driver for various flavors of iteration splitting.  Returns false
+  // if the current round of loop opts should stop.
+  bool iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new );
 
   // Given dominators, try to find loops with calls that must always be
   // executed (call dominates loop tail).  These loops do not need non-call
--- a/src/share/vm/opto/loopopts.cpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/loopopts.cpp	Mon Oct 06 13:11:32 2008 -0700
@@ -1903,9 +1903,6 @@
       // Use in a phi is considered a use in the associated predecessor block
       use_c = use->in(0)->in(j);
     }
-    if (use_c->is_CountedLoop()) {
-      use_c = use_c->in(LoopNode::EntryControl);
-    }
     set_ctrl(n_clone, use_c);
     assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
     get_loop(use_c)->_body.push(n_clone);
--- a/src/share/vm/opto/node.hpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/node.hpp	Mon Oct 06 13:11:32 2008 -0700
@@ -1320,7 +1320,8 @@
   Node *pop() {
     if( _clock_index >= size() ) _clock_index = 0;
     Node *b = at(_clock_index);
-    map( _clock_index++, Node_List::pop());
+    map( _clock_index, Node_List::pop());
+    if (size() != 0) _clock_index++; // Always start from 0
     _in_worklist >>= b->_idx;
     return b;
   }
--- a/src/share/vm/opto/postaloc.cpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/postaloc.cpp	Mon Oct 06 13:11:32 2008 -0700
@@ -34,7 +34,7 @@
 #endif
 }
 
-//------------------------------may_be_copy_of_callee-----------------------------
+//---------------------------may_be_copy_of_callee-----------------------------
 // Check to see if we can possibly be a copy of a callee-save value.
 bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const {
   // Short circuit if there are no callee save registers
@@ -225,6 +225,20 @@
 
   // Scan all registers to see if this value is around already
   for( uint reg = 0; reg < (uint)_max_reg; reg++ ) {
+    if (reg == (uint)nk_reg) {
+      // Found ourselves so check if there is only one user of this
+      // copy and keep on searching for a better copy if so.
+      bool ignore_self = true;
+      x = n->in(k);
+      DUIterator_Fast imax, i = x->fast_outs(imax);
+      Node* first = x->fast_out(i); i++;
+      while (i < imax && ignore_self) {
+        Node* use = x->fast_out(i); i++;
+        if (use != first) ignore_self = false;
+      }
+      if (ignore_self) continue;
+    }
+
     Node *vv = value[reg];
     if( !single ) {             // Doubles check for aligned-adjacent pair
       if( (reg&1)==0 ) continue;  // Wrong half of a pair
--- a/src/share/vm/opto/subnode.cpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/opto/subnode.cpp	Mon Oct 06 13:11:32 2008 -0700
@@ -206,6 +206,14 @@
   if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(2) )
     return new (phase->C, 3) SubINode( in1->in(1), in2->in(1) );
 
+  // Convert "(A+X) - (X+B)" into "A - B"
+  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(1) )
+    return new (phase->C, 3) SubINode( in1->in(1), in2->in(2) );
+
+  // Convert "(X+A) - (B+X)" into "A - B"
+  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(1) == in2->in(2) )
+    return new (phase->C, 3) SubINode( in1->in(2), in2->in(1) );
+
   // Convert "A-(B-C)" into (A+C)-B", since add is commutative and generally
   // nicer to optimize than subtract.
   if( op2 == Op_SubI && in2->outcnt() == 1) {
--- a/src/share/vm/runtime/globals.hpp	Thu Oct 02 14:11:33 2008 -0700
+++ b/src/share/vm/runtime/globals.hpp	Mon Oct 06 13:11:32 2008 -0700
@@ -2546,7 +2546,7 @@
   develop(intx, MaxRecursiveInlineLevel, 1,                                 \
           "maximum number of nested recursive calls that are inlined")      \
                                                                             \
-  develop(intx, InlineSmallCode, 1000,                                      \
+  product(intx, InlineSmallCode, 1000,                                      \
           "Only inline already compiled methods if their code size is "     \
           "less than this")                                                 \
                                                                             \
--- a/test/compiler/6700047/Test6700047.java	Thu Oct 02 14:11:33 2008 -0700
+++ b/test/compiler/6700047/Test6700047.java	Mon Oct 06 13:11:32 2008 -0700
@@ -29,6 +29,8 @@
  */
 
 public class Test6700047 {
+    static byte[] dummy = new byte[256];
+
     public static void main(String[] args) {
         for (int i = 0; i < 100000; i++) {
             intToLeftPaddedAsciiBytes();
@@ -53,6 +55,7 @@
         if (offset > 0) {
             for(int j = 0; j < offset; j++) {
                 result++;
+                dummy[i] = 0;
             }
         }
         return result;