changeset 4156:aaad39923cdb

Merge
author kmo
date Tue, 12 Feb 2013 14:33:19 -0800
parents c703f9c4b025 8b3da8d14c93
children 12e01444ca2d
files
diffstat 10 files changed, 456 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/src/share/vm/opto/c2_globals.hpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/c2_globals.hpp	Tue Feb 12 14:33:19 2013 -0800
@@ -618,6 +618,9 @@
                                                                             \
   product(intx, LiveNodeCountInliningCutoff, 20000,                         \
           "max number of live nodes in a method")                           \
+                                                                            \
+  diagnostic(bool, OptimizeExpensiveOps, true,                              \
+          "Find best control for expensive operations")                     \
 
 
 C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_EXPERIMENTAL_FLAG, DECLARE_NOTPRODUCT_FLAG)
--- a/src/share/vm/opto/compile.cpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/compile.cpp	Tue Feb 12 14:33:19 2013 -0800
@@ -409,6 +409,13 @@
       remove_macro_node(n);
     }
   }
+  // Remove useless expensive node
+  for (int i = C->expensive_count()-1; i >= 0; i--) {
+    Node* n = C->expensive_node(i);
+    if (!useful.member(n)) {
+      remove_expensive_node(n);
+    }
+  }
   // clean up the late inline lists
   remove_useless_late_inlines(&_string_late_inlines, useful);
   remove_useless_late_inlines(&_late_inlines, useful);
@@ -1061,6 +1068,7 @@
   _intrinsics = NULL;
   _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
+  _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
   register_library_intrinsics();
 }
 
@@ -1927,6 +1935,10 @@
 
   if (failing())  return;
 
+  // No more new expensive nodes will be added to the list from here
+  // so keep only the actual candidates for optimizations.
+  cleanup_expensive_nodes(igvn);
+
   // Perform escape analysis
   if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
     if (has_loops()) {
@@ -3010,6 +3022,15 @@
     return true;
   }
 
+  // Expensive nodes have their control input set to prevent the GVN
+  // from freely commoning them. There's no GVN beyond this point so
+  // no need to keep the control input. We want the expensive nodes to
+  // be freely moved to the least frequent code path by gcm.
+  assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?");
+  for (int i = 0; i < expensive_count(); i++) {
+    _expensive_nodes->at(i)->set_req(0, NULL);
+  }
+
   Final_Reshape_Counts frc;
 
   // Visit everybody reachable!
@@ -3525,3 +3546,126 @@
     }
   }
 }
+
+int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {
+  if (n1->Opcode() < n2->Opcode())      return -1;
+  else if (n1->Opcode() > n2->Opcode()) return 1;
+
+  assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req()));
+  for (uint i = 1; i < n1->req(); i++) {
+    if (n1->in(i) < n2->in(i))      return -1;
+    else if (n1->in(i) > n2->in(i)) return 1;
+  }
+
+  return 0;
+}
+
+int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) {
+  Node* n1 = *n1p;
+  Node* n2 = *n2p;
+
+  return cmp_expensive_nodes(n1, n2);
+}
+
+void Compile::sort_expensive_nodes() {
+  if (!expensive_nodes_sorted()) {
+    _expensive_nodes->sort(cmp_expensive_nodes);
+  }
+}
+
+bool Compile::expensive_nodes_sorted() const {
+  for (int i = 1; i < _expensive_nodes->length(); i++) {
+    if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) {
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) {
+  if (_expensive_nodes->length() == 0) {
+    return false;
+  }
+
+  assert(OptimizeExpensiveOps, "optimization off?");
+
+  // Take this opportunity to remove dead nodes from the list
+  int j = 0;
+  for (int i = 0; i < _expensive_nodes->length(); i++) {
+    Node* n = _expensive_nodes->at(i);
+    if (!n->is_unreachable(igvn)) {
+      assert(n->is_expensive(), "should be expensive");
+      _expensive_nodes->at_put(j, n);
+      j++;
+    }
+  }
+  _expensive_nodes->trunc_to(j);
+
+  // Then sort the list so that similar nodes are next to each other
+  // and check for at least two nodes of identical kind with same data
+  // inputs.
+  sort_expensive_nodes();
+
+  for (int i = 0; i < _expensive_nodes->length()-1; i++) {
+    if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) {
+      return true;
+    }
+  }
+
+  return false;
+}
+
+void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {
+  if (_expensive_nodes->length() == 0) {
+    return;
+  }
+
+  assert(OptimizeExpensiveOps, "optimization off?");
+
+  // Sort to bring similar nodes next to each other and clear the
+  // control input of nodes for which there's only a single copy.
+  sort_expensive_nodes();
+
+  int j = 0;
+  int identical = 0;
+  int i = 0;
+  for (; i < _expensive_nodes->length()-1; i++) {
+    assert(j <= i, "can't write beyond current index");
+    if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) {
+      identical++;
+      _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
+      continue;
+    }
+    if (identical > 0) {
+      _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
+      identical = 0;
+    } else {
+      Node* n = _expensive_nodes->at(i);
+      igvn.hash_delete(n);
+      n->set_req(0, NULL);
+      igvn.hash_insert(n);
+    }
+  }
+  if (identical > 0) {
+    _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
+  } else if (_expensive_nodes->length() >= 1) {
+    Node* n = _expensive_nodes->at(i);
+    igvn.hash_delete(n);
+    n->set_req(0, NULL);
+    igvn.hash_insert(n);
+  }
+  _expensive_nodes->trunc_to(j);
+}
+
+void Compile::add_expensive_node(Node * n) {
+  assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list");
+  assert(n->is_expensive(), "expensive nodes with non-null control here only");
+  assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here");
+  if (OptimizeExpensiveOps) {
+    _expensive_nodes->append(n);
+  } else {
+    // Clear control input and let IGVN optimize expensive nodes if
+    // OptimizeExpensiveOps is off.
+    n->set_req(0, NULL);
+  }
+}
--- a/src/share/vm/opto/compile.hpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/compile.hpp	Tue Feb 12 14:33:19 2013 -0800
@@ -314,6 +314,7 @@
   GrowableArray<CallGenerator*>* _intrinsics;   // List of intrinsics.
   GrowableArray<Node*>* _macro_nodes;           // List of nodes which need to be expanded before matching.
   GrowableArray<Node*>* _predicate_opaqs;       // List of Opaque1 nodes for the loop predicates.
+  GrowableArray<Node*>* _expensive_nodes;       // List of nodes that are expensive to compute and that we'd better not let the GVN freely common
   ConnectionGraph*      _congraph;
 #ifndef PRODUCT
   IdealGraphPrinter*    _printer;
@@ -398,6 +399,13 @@
   GrowableArray<PrintInliningBuffer>* _print_inlining_list;
   int _print_inlining;
 
+  // Only keep nodes in the expensive node list that need to be optimized
+  void cleanup_expensive_nodes(PhaseIterGVN &igvn);
+  // Use for sorting expensive nodes to bring similar nodes together
+  static int cmp_expensive_nodes(Node** n1, Node** n2);
+  // Expensive nodes list already sorted?
+  bool expensive_nodes_sorted() const;
+
  public:
 
   outputStream* print_inlining_stream() const {
@@ -573,8 +581,10 @@
 
   int           macro_count()                   { return _macro_nodes->length(); }
   int           predicate_count()               { return _predicate_opaqs->length();}
+  int           expensive_count()               { return _expensive_nodes->length(); }
   Node*         macro_node(int idx)             { return _macro_nodes->at(idx); }
   Node*         predicate_opaque1_node(int idx) { return _predicate_opaqs->at(idx);}
+  Node*         expensive_node(int idx)         { return _expensive_nodes->at(idx); }
   ConnectionGraph* congraph()                   { return _congraph;}
   void set_congraph(ConnectionGraph* congraph)  { _congraph = congraph;}
   void add_macro_node(Node * n) {
@@ -592,6 +602,12 @@
       _predicate_opaqs->remove(n);
     }
   }
+  void add_expensive_node(Node * n);
+  void remove_expensive_node(Node * n) {
+    if (_expensive_nodes->contains(n)) {
+      _expensive_nodes->remove(n);
+    }
+  }
   void add_predicate_opaq(Node * n) {
     assert(!_predicate_opaqs->contains(n), " duplicate entry in predicate opaque1");
     assert(_macro_nodes->contains(n), "should have already been in macro list");
@@ -604,6 +620,13 @@
     return _predicate_opaqs->contains(n);
   }
 
+  // Are there candidate expensive nodes for optimization?
+  bool should_optimize_expensive_nodes(PhaseIterGVN &igvn);
+  // Check whether n1 and n2 are similar
+  static int cmp_expensive_nodes(Node* n1, Node* n2);
+  // Sort expensive nodes to locate similar expensive nodes
+  void sort_expensive_nodes();
+
   // Compilation environment.
   Arena*            comp_arena()                { return &_comp_arena; }
   ciEnv*            env() const                 { return _env; }
--- a/src/share/vm/opto/library_call.cpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/library_call.cpp	Tue Feb 12 14:33:19 2013 -0800
@@ -1653,7 +1653,7 @@
 // really odd corner cases (+/- Infinity).  Just uncommon-trap them.
 bool LibraryCallKit::inline_exp() {
   Node* arg = round_double_node(argument(0));
-  Node* n   = _gvn.transform(new (C) ExpDNode(0, arg));
+  Node* n   = _gvn.transform(new (C) ExpDNode(C, control(), arg));
 
   finish_pow_exp(n, arg, NULL, OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
 
@@ -1688,7 +1688,7 @@
 
   if (!too_many_traps(Deoptimization::Reason_intrinsic)) {
     // Short form: skip the fancy tests and just check for NaN result.
-    result = _gvn.transform(new (C) PowDNode(0, x, y));
+    result = _gvn.transform(new (C) PowDNode(C, control(), x, y));
   } else {
     // If this inlining ever returned NaN in the past, include all
     // checks + call to the runtime.
@@ -1715,7 +1715,7 @@
     Node *complex_path = _gvn.transform( new (C) IfTrueNode(if1) );
 
     // Set fast path result
-    Node *fast_result = _gvn.transform( new (C) PowDNode(0, x, y) );
+    Node *fast_result = _gvn.transform( new (C) PowDNode(C, control(), x, y) );
     phi->init_req(3, fast_result);
 
     // Complex path
@@ -1775,7 +1775,7 @@
     // abs(x)
     Node *absx=_gvn.transform( new (C) AbsDNode(x));
     // abs(x)^y
-    Node *absxpowy = _gvn.transform( new (C) PowDNode(0, absx, y) );
+    Node *absxpowy = _gvn.transform( new (C) PowDNode(C, control(), absx, y) );
     // -abs(x)^y
     Node *negabsxpowy = _gvn.transform(new (C) NegDNode (absxpowy));
     // (1&(long)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
--- a/src/share/vm/opto/loopnode.cpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/loopnode.cpp	Tue Feb 12 14:33:19 2013 -0800
@@ -88,9 +88,9 @@
   assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" );
   uint i;
   Node *early;
-  if( n->in(0) ) {
+  if (n->in(0) && !n->is_expensive()) {
     early = n->in(0);
-    if( !early->is_CFG() ) // Might be a non-CFG multi-def
+    if (!early->is_CFG()) // Might be a non-CFG multi-def
       early = get_ctrl(early);        // So treat input as a straight data input
     i = 1;
   } else {
@@ -99,28 +99,28 @@
   }
   uint e_d = dom_depth(early);
   assert( early, "" );
-  for( ; i < n->req(); i++ ) {
+  for (; i < n->req(); i++) {
     Node *cin = get_ctrl(n->in(i));
     assert( cin, "" );
     // Keep deepest dominator depth
     uint c_d = dom_depth(cin);
-    if( c_d > e_d ) {           // Deeper guy?
+    if (c_d > e_d) {           // Deeper guy?
       early = cin;              // Keep deepest found so far
       e_d = c_d;
-    } else if( c_d == e_d &&    // Same depth?
-               early != cin ) { // If not equal, must use slower algorithm
+    } else if (c_d == e_d &&    // Same depth?
+               early != cin) { // If not equal, must use slower algorithm
       // If same depth but not equal, one _must_ dominate the other
       // and we want the deeper (i.e., dominated) guy.
       Node *n1 = early;
       Node *n2 = cin;
-      while( 1 ) {
+      while (1) {
         n1 = idom(n1);          // Walk up until break cycle
         n2 = idom(n2);
-        if( n1 == cin ||        // Walked early up to cin
-            dom_depth(n2) < c_d )
+        if (n1 == cin ||        // Walked early up to cin
+            dom_depth(n2) < c_d)
           break;                // early is deeper; keep him
-        if( n2 == early ||      // Walked cin up to early
-            dom_depth(n1) < c_d ) {
+        if (n2 == early ||      // Walked cin up to early
+            dom_depth(n1) < c_d) {
           early = cin;          // cin is deeper; keep him
           break;
         }
@@ -132,9 +132,108 @@
   // Return earliest legal location
   assert(early == find_non_split_ctrl(early), "unexpected early control");
 
+  if (n->is_expensive()) {
+    assert(n->in(0), "should have control input");
+    early = get_early_ctrl_for_expensive(n, early);
+  }
+
   return early;
 }
 
+//------------------------------get_early_ctrl_for_expensive---------------------------------
+// Move node up the dominator tree as high as legal while still beneficial
+Node *PhaseIdealLoop::get_early_ctrl_for_expensive(Node *n, Node* earliest) {
+  assert(n->in(0) && n->is_expensive(), "expensive node with control input here");
+  assert(OptimizeExpensiveOps, "optimization off?");
+
+  Node* ctl = n->in(0);
+  assert(ctl->is_CFG(), "expensive input 0 must be cfg");
+  uint min_dom_depth = dom_depth(earliest);
+#ifdef ASSERT
+  if (!is_dominator(ctl, earliest) && !is_dominator(earliest, ctl)) {
+    dump_bad_graph("Bad graph detected in get_early_ctrl_for_expensive", n, earliest, ctl);
+    assert(false, "Bad graph detected in get_early_ctrl_for_expensive");
+  }
+#endif
+  if (dom_depth(ctl) < min_dom_depth) {
+    return earliest;
+  }
+
+  while (1) {
+    Node *next = ctl;
+    // Moving the node out of a loop on the projection of a If
+    // confuses loop predication. So once we hit a Loop in a If branch
+    // that doesn't branch to an UNC, we stop. The code that process
+    // expensive nodes will notice the loop and skip over it to try to
+    // move the node further up.
+    if (ctl->is_CountedLoop() && ctl->in(1) != NULL && ctl->in(1)->in(0) != NULL && ctl->in(1)->in(0)->is_If()) {
+      if (!is_uncommon_trap_if_pattern(ctl->in(1)->as_Proj(), Deoptimization::Reason_none)) {
+        break;
+      }
+      next = idom(ctl->in(1)->in(0));
+    } else if (ctl->is_Proj()) {
+      // We only move it up along a projection if the projection is
+      // the single control projection for its parent: same code path,
+      // if it's a If with UNC or fallthrough of a call.
+      Node* parent_ctl = ctl->in(0);
+      if (parent_ctl == NULL) {
+        break;
+      } else if (parent_ctl->is_CountedLoopEnd() && parent_ctl->as_CountedLoopEnd()->loopnode() != NULL) {
+        next = parent_ctl->as_CountedLoopEnd()->loopnode()->init_control();
+      } else if (parent_ctl->is_If()) {
+        if (!is_uncommon_trap_if_pattern(ctl->as_Proj(), Deoptimization::Reason_none)) {
+          break;
+        }
+        assert(idom(ctl) == parent_ctl, "strange");
+        next = idom(parent_ctl);
+      } else if (ctl->is_CatchProj()) {
+        if (ctl->as_Proj()->_con != CatchProjNode::fall_through_index) {
+          break;
+        }
+        assert(parent_ctl->in(0)->in(0)->is_Call(), "strange graph");
+        next = parent_ctl->in(0)->in(0)->in(0);
+      } else {
+        // Check if parent control has a single projection (this
+        // control is the only possible successor of the parent
+        // control). If so, we can try to move the node above the
+        // parent control.
+        int nb_ctl_proj = 0;
+        for (DUIterator_Fast imax, i = parent_ctl->fast_outs(imax); i < imax; i++) {
+          Node *p = parent_ctl->fast_out(i);
+          if (p->is_Proj() && p->is_CFG()) {
+            nb_ctl_proj++;
+            if (nb_ctl_proj > 1) {
+              break;
+            }
+          }
+        }
+
+        if (nb_ctl_proj > 1) {
+          break;
+        }
+        assert(parent_ctl->is_Start() || parent_ctl->is_MemBar() || parent_ctl->is_Call(), "unexpected node");
+        assert(idom(ctl) == parent_ctl, "strange");
+        next = idom(parent_ctl);
+      }
+    } else {
+      next = idom(ctl);
+    }
+    if (next->is_Root() || next->is_Start() || dom_depth(next) < min_dom_depth) {
+      break;
+    }
+    ctl = next;
+  }
+
+  if (ctl != n->in(0)) {
+    _igvn.hash_delete(n);
+    n->set_req(0, ctl);
+    _igvn.hash_insert(n);
+  }
+
+  return ctl;
+}
+
+
 //------------------------------set_early_ctrl---------------------------------
 // Set earliest legal control
 void PhaseIdealLoop::set_early_ctrl( Node *n ) {
@@ -1892,6 +1991,98 @@
   }
 }
 
+//------------------------process_expensive_nodes-----------------------------
+// Expensive nodes have their control input set to prevent the GVN
+// from commoning them and as a result forcing the resulting node to
+// be in a more frequent path. Use CFG information here, to change the
+// control inputs so that some expensive nodes can be commoned while
+// not executed more frequently.
+bool PhaseIdealLoop::process_expensive_nodes() {
+  assert(OptimizeExpensiveOps, "optimization off?");
+
+  // Sort nodes to bring similar nodes together
+  C->sort_expensive_nodes();
+
+  bool progress = false;
+
+  for (int i = 0; i < C->expensive_count(); ) {
+    Node* n = C->expensive_node(i);
+    int start = i;
+    // Find nodes similar to n
+    i++;
+    for (; i < C->expensive_count() && Compile::cmp_expensive_nodes(n, C->expensive_node(i)) == 0; i++);
+    int end = i;
+    // And compare them two by two
+    for (int j = start; j < end; j++) {
+      Node* n1 = C->expensive_node(j);
+      if (is_node_unreachable(n1)) {
+        continue;
+      }
+      for (int k = j+1; k < end; k++) {
+        Node* n2 = C->expensive_node(k);
+        if (is_node_unreachable(n2)) {
+          continue;
+        }
+
+        assert(n1 != n2, "should be pair of nodes");
+
+        Node* c1 = n1->in(0);
+        Node* c2 = n2->in(0);
+
+        Node* parent_c1 = c1;
+        Node* parent_c2 = c2;
+
+        // The call to get_early_ctrl_for_expensive() moves the
+        // expensive nodes up but stops at loops that are in a if
+        // branch. See whether we can exit the loop and move above the
+        // If.
+        if (c1->is_Loop()) {
+          parent_c1 = c1->in(1);
+        }
+        if (c2->is_Loop()) {
+          parent_c2 = c2->in(1);
+        }
+
+        if (parent_c1 == parent_c2) {
+          _igvn._worklist.push(n1);
+          _igvn._worklist.push(n2);
+          continue;
+        }
+
+        // Look for identical expensive node up the dominator chain.
+        if (is_dominator(c1, c2)) {
+          c2 = c1;
+        } else if (is_dominator(c2, c1)) {
+          c1 = c2;
+        } else if (parent_c1->is_Proj() && parent_c1->in(0)->is_If() &&
+                   parent_c2->is_Proj() && parent_c1->in(0) == parent_c2->in(0)) {
+          // Both branches have the same expensive node so move it up
+          // before the if.
+          c1 = c2 = idom(parent_c1->in(0));
+        }
+        // Do the actual moves
+        if (n1->in(0) != c1) {
+          _igvn.hash_delete(n1);
+          n1->set_req(0, c1);
+          _igvn.hash_insert(n1);
+          _igvn._worklist.push(n1);
+          progress = true;
+        }
+        if (n2->in(0) != c2) {
+          _igvn.hash_delete(n2);
+          n2->set_req(0, c2);
+          _igvn.hash_insert(n2);
+          _igvn._worklist.push(n2);
+          progress = true;
+        }
+      }
+    }
+  }
+
+  return progress;
+}
+
+
 //=============================================================================
 //----------------------------build_and_optimize-------------------------------
 // Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
@@ -1960,7 +2151,9 @@
   }
 
   // Nothing to do, so get out
-  if( !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only ) {
+  bool stop_early = !C->has_loops() && !skip_loop_opts && !do_split_ifs && !_verify_me && !_verify_only;
+  bool do_expensive_nodes = C->should_optimize_expensive_nodes(_igvn);
+  if (stop_early && !do_expensive_nodes) {
     _igvn.optimize();           // Cleanup NeverBranches
     return;
   }
@@ -2058,6 +2251,21 @@
     return;
   }
 
+  if (stop_early) {
+    assert(do_expensive_nodes, "why are we here?");
+    if (process_expensive_nodes()) {
+      // If we made some progress when processing expensive nodes then
+      // the IGVN may modify the graph in a way that will allow us to
+      // make some more progress: we need to try processing expensive
+      // nodes again.
+      C->set_major_progress();
+    }
+
+    _igvn.optimize();
+
+    return;
+  }
+
   // Some parser-inserted loop predicates could never be used by loop
   // predication or they were moved away from loop during some optimizations.
   // For example, peeling. Eliminate them before next loop optimizations.
@@ -2120,6 +2328,10 @@
     NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
   }
 
+  if (!C->major_progress() && do_expensive_nodes && process_expensive_nodes()) {
+    C->set_major_progress();
+  }
+
   // Perform loop predication before iteration splitting
   if (C->has_loops() && !C->major_progress() && (C->predicate_count() > 0)) {
     _ltree_root->_child->loop_predication(this);
@@ -3299,7 +3511,7 @@
 #ifdef ASSERT
     if (legal->is_Start() && !early->is_Root()) {
       // Bad graph. Print idom path and fail.
-      dump_bad_graph(n, early, LCA);
+      dump_bad_graph("Bad graph detected in build_loop_late", n, early, LCA);
       assert(false, "Bad graph detected in build_loop_late");
     }
 #endif
@@ -3350,8 +3562,8 @@
 }
 
 #ifdef ASSERT
-void PhaseIdealLoop::dump_bad_graph(Node* n, Node* early, Node* LCA) {
-  tty->print_cr( "Bad graph detected in build_loop_late");
+void PhaseIdealLoop::dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA) {
+  tty->print_cr(msg);
   tty->print("n: "); n->dump();
   tty->print("early(n): "); early->dump();
   if (n->in(0) != NULL  && !n->in(0)->is_top() &&
--- a/src/share/vm/opto/loopnode.hpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/loopnode.hpp	Tue Feb 12 14:33:19 2013 -0800
@@ -263,9 +263,18 @@
   bool stride_is_con() const        { Node *tmp = stride  (); return (tmp != NULL && tmp->is_Con()); }
   BoolTest::mask test_trip() const  { return in(TestValue)->as_Bool()->_test._test; }
   CountedLoopNode *loopnode() const {
+    // The CountedLoopNode that goes with this CountedLoopEndNode may
+    // have been optimized out by the IGVN so be cautious with the
+    // pattern matching on the graph
+    if (phi() == NULL) {
+      return NULL;
+    }
     Node *ln = phi()->in(0);
-    assert( ln->Opcode() == Op_CountedLoop, "malformed loop" );
-    return (CountedLoopNode*)ln; }
+    if (ln->is_CountedLoop() && ln->as_CountedLoop()->loopexit() == this) {
+      return (CountedLoopNode*)ln;
+    }
+    return NULL;
+  }
 
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
@@ -598,6 +607,7 @@
   // check if transform created new nodes that need _ctrl recorded
   Node *get_late_ctrl( Node *n, Node *early );
   Node *get_early_ctrl( Node *n );
+  Node *get_early_ctrl_for_expensive(Node *n, Node* earliest);
   void set_early_ctrl( Node *n );
   void set_subtree_ctrl( Node *root );
   void set_ctrl( Node *n, Node *ctrl ) {
@@ -905,6 +915,16 @@
   void collect_potentially_useful_predicates(IdealLoopTree *loop, Unique_Node_List &predicate_opaque1);
   void eliminate_useless_predicates();
 
+  // Change the control input of expensive nodes to allow commoning by
+  // IGVN when it is guaranteed to not result in a more frequent
+  // execution of the expensive node. Return true if progress.
+  bool process_expensive_nodes();
+
+  // Check whether node has become unreachable
+  bool is_node_unreachable(Node *n) const {
+    return !has_node(n) || n->is_unreachable(_igvn);
+  }
+
   // Eliminate range-checks and other trip-counter vs loop-invariant tests.
   void do_range_check( IdealLoopTree *loop, Node_List &old_new );
 
@@ -1043,7 +1063,7 @@
   void register_new_node( Node *n, Node *blk );
 
 #ifdef ASSERT
-void dump_bad_graph(Node* n, Node* early, Node* LCA);
+  void dump_bad_graph(const char* msg, Node* n, Node* early, Node* LCA);
 #endif
 
 #ifndef PRODUCT
--- a/src/share/vm/opto/node.cpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/node.cpp	Tue Feb 12 14:33:19 2013 -0800
@@ -493,6 +493,8 @@
   }
   if (is_macro())
     compile->add_macro_node(n);
+  if (is_expensive())
+    compile->add_expensive_node(n);
 
   n->set_idx(compile->next_unique()); // Get new unique index as well
   debug_only( n->verify_construction() );
@@ -616,6 +618,9 @@
   if (is_macro()) {
     compile->remove_macro_node(this);
   }
+  if (is_expensive()) {
+    compile->remove_expensive_node(this);
+  }
 #ifdef ASSERT
   // We will not actually delete the storage, but we'll make the node unusable.
   *(address*)this = badAddress;  // smash the C++ vtbl, probably
@@ -689,6 +694,13 @@
 }
 #endif
 
+
+//------------------------------is_unreachable---------------------------------
+bool Node::is_unreachable(PhaseIterGVN &igvn) const {
+  assert(!is_Mach(), "doesn't work with MachNodes");
+  return outcnt() == 0 || igvn.type(this) == Type::TOP || in(0)->is_top();
+}
+
 //------------------------------add_req----------------------------------------
 // Add a new required input at the end
 void Node::add_req( Node *n ) {
@@ -1246,6 +1258,9 @@
       if (dead->is_macro()) {
         igvn->C->remove_macro_node(dead);
       }
+      if (dead->is_expensive()) {
+        igvn->C->remove_expensive_node(dead);
+      }
       // Kill all inputs to the dead guy
       for (uint i=0; i < dead->req(); i++) {
         Node *n = dead->in(i);      // Get input to dead guy
--- a/src/share/vm/opto/node.hpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/node.hpp	Tue Feb 12 14:33:19 2013 -0800
@@ -378,6 +378,8 @@
   bool is_dead() const;
 #define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead()))
 #endif
+  // Check whether node has become unreachable
+  bool is_unreachable(PhaseIterGVN &igvn) const;
 
   // Set a required input edge, also updates corresponding output edge
   void add_req( Node *n ); // Append a NEW required input
@@ -646,7 +648,8 @@
     Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1,
     Flag_avoid_back_to_back  = Flag_may_be_short_branch << 1,
     Flag_has_call            = Flag_avoid_back_to_back << 1,
-    _max_flags = (Flag_has_call << 1) - 1 // allow flags combination
+    Flag_is_expensive        = Flag_has_call << 1,
+    _max_flags = (Flag_is_expensive << 1) - 1 // allow flags combination
   };
 
 private:
@@ -819,6 +822,8 @@
 
   // The node is a "macro" node which needs to be expanded before matching
   bool is_macro() const { return (_flags & Flag_is_macro) != 0; }
+  // The node is expensive: the best control is set during loop opts
+  bool is_expensive() const { return (_flags & Flag_is_expensive) != 0 && in(0) != NULL; }
 
 //----------------- Optimization
 
--- a/src/share/vm/opto/phaseX.cpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/phaseX.cpp	Tue Feb 12 14:33:19 2013 -0800
@@ -1203,6 +1203,9 @@
         if (dead->is_macro()) {
           C->remove_macro_node(dead);
         }
+        if (dead->is_expensive()) {
+          C->remove_expensive_node(dead);
+        }
 
         if (recurse) {
           continue;
--- a/src/share/vm/opto/subnode.hpp	Tue Feb 12 07:39:42 2013 -0800
+++ b/src/share/vm/opto/subnode.hpp	Tue Feb 12 14:33:19 2013 -0800
@@ -456,7 +456,10 @@
 //  Exponentiate a double
 class ExpDNode : public Node {
 public:
-  ExpDNode( Node *c, Node *in1 ) : Node(c, in1) {}
+  ExpDNode(Compile* C, Node *c, Node *in1) : Node(c, in1) {
+    init_flags(Flag_is_expensive);
+    C->add_expensive_node(this);
+  }
   virtual int Opcode() const;
   const Type *bottom_type() const { return Type::DOUBLE; }
   virtual uint ideal_reg() const { return Op_RegD; }
@@ -489,7 +492,10 @@
 // Raise a double to a double power
 class PowDNode : public Node {
 public:
-  PowDNode(Node *c, Node *in1, Node *in2  ) : Node(c, in1, in2) {}
+  PowDNode(Compile* C, Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {
+    init_flags(Flag_is_expensive);
+    C->add_expensive_node(this);
+  }
   virtual int Opcode() const;
   const Type *bottom_type() const { return Type::DOUBLE; }
   virtual uint ideal_reg() const { return Op_RegD; }