changeset 54705:fc7627bf4b01

8216137: assert failed: Live node limit exceeded. 8219520: assert failed: Live node limit exceeded. Summary: Adding simplistic node budget support for loop-transformations. This /should/attempts to/ resolve most cases of: "assert(Compile::current()->live_nodes() < Compile::current()->max_node_limit()) failed: Live Node limit exceeded limit". Reviewed-by: vlivanov, neliasso
author phedlin
date Wed, 17 Apr 2019 14:57:53 +0200
parents 3a79044dd980
children 81d57ba42425
files src/hotspot/share/opto/loopTransform.cpp src/hotspot/share/opto/loopUnswitch.cpp src/hotspot/share/opto/loopnode.cpp src/hotspot/share/opto/loopnode.hpp src/hotspot/share/opto/loopopts.cpp
diffstat 5 files changed, 247 insertions(+), 89 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/share/opto/loopTransform.cpp	Wed Apr 17 14:56:45 2019 +0200
+++ b/src/hotspot/share/opto/loopTransform.cpp	Wed Apr 17 14:57:53 2019 +0200
@@ -346,12 +346,18 @@
 // make some loop-invariant test (usually a null-check) happen before the loop.
 bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const {
   IdealLoopTree *loop = (IdealLoopTree*)this;
-  Node *test = loop->tail();
-  int body_size = loop->_body.size();
+
+  // If nodes are depleted, some transform has miscalculated its needs.
+  assert(!phase->exceeding_node_budget(), "sanity");
+
+  uint body_size = loop->_body.size();
   // Peeling does loop cloning which can result in O(N^2) node construction
-  if (body_size > 255 /* Prevent overflow for large body_size */
-      || (body_size * body_size + phase->C->live_nodes()) > phase->C->max_node_limit()) {
-    return false;           // too large to safely clone
+  if (body_size > 255) {
+    return false;   // Prevent overflow for large body size
+  }
+  uint estimate = body_size * body_size;
+  if (phase->exceeding_node_budget(estimate)) {
+    return false;   // Too large to safely clone
   }
 
   // check for vectorized loops, any peeling done was already applied
@@ -362,6 +368,8 @@
     }
   }
 
+  Node* test = loop->tail();
+
   while (test != _head) {       // Scan till run off top of loop
     if (test->is_If()) {        // Test?
       Node *ctrl = phase->get_ctrl(test->in(1));
@@ -375,7 +383,8 @@
              "Check this code when new subtype is added");
       // Condition is not a member of this loop?
       if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
-        return true;            // Found reason to peel!
+        // Found reason to peel!
+        return phase->may_require_nodes(estimate);
       }
     }
     // Walk up dominators to loop _head looking for test which is
@@ -669,6 +678,9 @@
   assert(trip_count > 1, "one iteration loop should be optimized out already");
   assert(trip_count < max_juint, "exact trip_count should be less than max_uint.");
 
+  // If nodes are depleted, some transform has miscalculated its needs.
+  assert(!phase->exceeding_node_budget(), "sanity");
+
   // Real policy: if we maximally unroll, does it get too big?
   // Allow the unrolled mess to get larger than standard loop
   // size.  After all, it will no longer be a loop.
@@ -679,21 +691,23 @@
     return false;
   }
 
-  // Fully unroll a loop with few iterations regardless next
-  // conditions since following loop optimizations will split
-  // such loop anyway (pre-main-post).
-  if (trip_count <= 3)
-    return true;
-
   // Take into account that after unroll conjoined heads and tails will fold,
   // otherwise policy_unroll() may allow more unrolling than max unrolling.
-  uint new_body_size = EMPTY_LOOP_SIZE + (body_size - EMPTY_LOOP_SIZE) * trip_count;
-  uint tst_body_size = (new_body_size - EMPTY_LOOP_SIZE) / trip_count + EMPTY_LOOP_SIZE;
-  if (body_size != tst_body_size) // Check for int overflow
+  uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
+
+  if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
     return false;
+  }
+
+  // Fully unroll a loop with few iterations regardless next conditions since
+  // following loop optimizations will split such loop anyway (pre-main-post).
+  if (trip_count <= 3) {
+    return phase->may_require_nodes(new_body_size);
+  }
+
   if (new_body_size > unroll_limit ||
       // Unrolling can result in a large amount of node construction
-      new_body_size >= phase->C->max_node_limit() - phase->C->live_nodes()) {
+      phase->exceeding_node_budget(new_body_size)) {
     return false;
   }
 
@@ -723,26 +737,31 @@
     } // switch
   }
 
-  return true; // Do maximally unroll
+  return phase->may_require_nodes(new_body_size);
 }
 
 
 //------------------------------policy_unroll----------------------------------
-// Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
-// the loop is a CountedLoop and the body is small enough.
+// Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if the
+// loop is a CountedLoop and the body is small enough.
 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 
   CountedLoopNode *cl = _head->as_CountedLoop();
   assert(cl->is_normal_loop() || cl->is_main_loop(), "");
 
-  if (!cl->is_valid_counted_loop())
+  if (!cl->is_valid_counted_loop()) {
     return false; // Malformed counted loop
+  }
+
+  // If nodes are depleted, some transform has miscalculated its needs.
+  assert(!phase->exceeding_node_budget(), "sanity");
 
   // Protect against over-unrolling.
   // After split at least one iteration will be executed in pre-loop.
-  if (cl->trip_count() <= (uint)(cl->is_normal_loop() ? 2 : 1)) return false;
-
-  _local_loop_unroll_limit = LoopUnrollLimit;
+  if (cl->trip_count() <= (cl->is_normal_loop() ? 2u : 1u)) {
+    return false;
+  }
+  _local_loop_unroll_limit  = LoopUnrollLimit;
   _local_loop_unroll_factor = 4;
   int future_unroll_cnt = cl->unrolled_count() * 2;
   if (!cl->is_vectorized_loop()) {
@@ -867,32 +886,40 @@
   if ((LoopMaxUnroll < slp_max_unroll_factor) && FLAG_IS_DEFAULT(LoopMaxUnroll) && UseSubwordForMaxVector) {
     LoopMaxUnroll = slp_max_unroll_factor;
   }
+
+  uint estimate = est_loop_clone_sz(2, body_size);
+
   if (cl->has_passed_slp()) {
-    if (slp_max_unroll_factor >= future_unroll_cnt) return true;
-    // Normal case: loop too big
-    return false;
+    if (slp_max_unroll_factor >= future_unroll_cnt) {
+      return phase->may_require_nodes(estimate);
+    }
+    return false; // Loop too big.
   }
 
   // Check for being too big
   if (body_size > (uint)_local_loop_unroll_limit) {
-    if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < (uint)LoopUnrollLimit * 4) {
-      return true;
+    if ((cl->is_subword_loop() || xors_in_loop >= 4) && body_size < 4u * LoopUnrollLimit) {
+      return phase->may_require_nodes(estimate);
     }
-    // Normal case: loop too big
-    return false;
+    return false; // Loop too big.
   }
 
   if (cl->is_unroll_only()) {
     if (TraceSuperWordLoopUnrollAnalysis) {
-      tty->print_cr("policy_unroll passed vector loop(vlen=%d,factor = %d)\n", slp_max_unroll_factor, future_unroll_cnt);
+      tty->print_cr("policy_unroll passed vector loop(vlen=%d, factor=%d)\n",
+                    slp_max_unroll_factor, future_unroll_cnt);
     }
   }
 
   // Unroll once!  (Each trip will soon do double iterations)
-  return true;
+  return phase->may_require_nodes(estimate);
 }
 
 void IdealLoopTree::policy_unroll_slp_analysis(CountedLoopNode *cl, PhaseIdealLoop *phase, int future_unroll_cnt) {
+
+  // If nodes are depleted, some transform has miscalculated its needs.
+  assert(!phase->exceeding_node_budget(), "sanity");
+
   // Enable this functionality target by target as needed
   if (SuperWordLoopUnrollAnalysis) {
     if (!cl->was_slp_analyzed()) {
@@ -936,6 +963,9 @@
 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
   if (!RangeCheckElimination) return false;
 
+  // If nodes are depleted, some transform has miscalculated its needs.
+  assert(!phase->exceeding_node_budget(), "sanity");
+
   CountedLoopNode *cl = _head->as_CountedLoop();
   // If we unrolled with no intention of doing RCE and we later
   // changed our minds, we got no pre-loop.  Either we need to
@@ -986,11 +1016,13 @@
       if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
         continue;
       }
-      // Yeah!  Found a test like 'trip+off vs limit'
-      // Test is an IfNode, has 2 projections.  If BOTH are in the loop
-      // we need loop unswitching instead of iteration splitting.
+      // Found a test like 'trip+off vs  limit'.  Test is an IfNode, has two
+      // (2) projections.  If BOTH are in  the loop we need loop unswitching
+      // instead of iteration splitting.
       if (is_loop_exit(iff)) {
-        return true;            // Found reason to split iterations
+        // Found valid reason to split iterations (if there is room).
+        // NOTE: Usually a gross overestimate.
+        return phase->may_require_nodes(est_loop_clone_sz(2, _body.size()));
       }
     } // End of is IF
   }
@@ -1002,6 +1034,10 @@
 // Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned.  Useful
 // for unrolling loops with NO array accesses.
 bool IdealLoopTree::policy_peel_only(PhaseIdealLoop *phase) const {
+
+  // If nodes are depleted, some transform has miscalculated its needs.
+  assert(!phase->exceeding_node_budget(), "sanity");
+
   // check for vectorized loops, any peeling done was already applied
   if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
     return false;
@@ -1485,6 +1521,9 @@
   // only process vectorized main loops
   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
 
+  if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
+    return;
+  }
   int slp_max_unroll_factor = cl->slp_max_unroll();
   int cur_unroll = cl->unrolled_count();
 
@@ -1829,7 +1868,8 @@
     // Verify that policy_unroll result is still valid.
     const TypeInt* limit_type = _igvn.type(limit)->is_int();
     assert(stride_con > 0 && ((limit_type->_hi - stride_con) < limit_type->_hi) ||
-        stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo), "sanity");
+           stride_con < 0 && ((limit_type->_lo - stride_con) > limit_type->_lo),
+           "sanity");
 
     if (limit->is_Con()) {
       // The check in policy_unroll and the assert above guarantee
@@ -1898,6 +1938,7 @@
       }
       register_new_node(new_limit, ctrl);
     }
+
     assert(new_limit != NULL, "");
     // Replace in loop test.
     assert(loop_end->in(1)->in(1) == cmp, "sanity");
@@ -2009,7 +2050,6 @@
     }
   }
 #endif
-
 }
 
 //------------------------------do_maximally_unroll----------------------------
@@ -3135,8 +3175,10 @@
   if (do_remove_empty_loop(phase)) {
     return true;  // Here we removed an empty loop
   }
-  bool should_peel = policy_peeling(phase); // Should we peel?
-
+
+  AutoNodeBudget node_budget(phase);
+
+  bool should_peel     = policy_peeling(phase);
   bool should_unswitch = policy_unswitching(phase);
 
   // Non-counted loops may be peeled; exactly 1 iteration is peeled.
@@ -3171,22 +3213,15 @@
       phase->do_unswitching(this, old_new);
       return true;
     }
-    bool should_maximally_unroll =  policy_maximally_unroll(phase);
+    bool should_maximally_unroll = policy_maximally_unroll(phase);
     if (should_maximally_unroll) {
       // Here we did some unrolling and peeling.  Eventually we will
       // completely unroll this loop and it will no longer be a loop.
-      phase->do_maximally_unroll(this,old_new);
+      phase->do_maximally_unroll(this, old_new);
       return true;
     }
   }
 
-  // Skip next optimizations if running low on nodes. Note that
-  // policy_unswitching and policy_maximally_unroll have this check.
-  int nodes_left = phase->C->max_node_limit() - phase->C->live_nodes();
-  if ((int)(2 * _body.size()) > nodes_left) {
-    return true;
-  }
-
   // Counted loops may be peeled, may need some iterations run up
   // front for RCE, and may want to align loop refs to a cache
   // line.  Thus we clone a full loop up front whose trip count is
@@ -3200,26 +3235,28 @@
   // unrolling), plus any needed for RCE purposes.
 
   bool should_unroll = policy_unroll(phase);
-
-  bool should_rce = policy_range_check(phase);
-
-  bool should_align = policy_align(phase);
-
-  // If not RCE'ing (iteration splitting) or Aligning, then we do not
-  // need a pre-loop.  We may still need to peel an initial iteration but
-  // we will not be needing an unknown number of pre-iterations.
+  bool should_rce    = policy_range_check(phase);
+  // TODO: Remove align -- not used.
+  bool should_align  = policy_align(phase);
+
+  // If not RCE'ing  (iteration splitting) or Aligning, then we  do not need a
+  // pre-loop.  We may still need to peel an initial iteration but we will not
+  // be needing an unknown number of pre-iterations.
   //
-  // Basically, if may_rce_align reports FALSE first time through,
-  // we will not be able to later do RCE or Aligning on this loop.
+  // Basically, if may_rce_align reports FALSE first time through, we will not
+  // be able to later do RCE or Aligning on this loop.
   bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
 
   // If we have any of these conditions (RCE, alignment, unrolling) met, then
   // we switch to the pre-/main-/post-loop model.  This model also covers
   // peeling.
   if (should_rce || should_align || should_unroll) {
-    if (cl->is_normal_loop())  // Convert to 'pre/main/post' loops
+    if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
+      if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) {
+        return false;
+      }
       phase->insert_pre_post_loops(this,old_new, !may_rce_align);
-
+    }
     // Adjust the pre- and main-loop limits to let the pre and post loops run
     // with full checks, but the main-loop with no checks.  Remove said
     // checks from the main body.
@@ -3286,8 +3323,11 @@
       if (!iteration_split_impl(phase, old_new)) {
         return false;
       }
-    } else if (policy_unswitching(phase)) {
-      phase->do_unswitching(this, old_new);
+    } else {
+      AutoNodeBudget node_budget(phase);
+      if (policy_unswitching(phase)) {
+        phase->do_unswitching(this, old_new);
+      }
     }
   }
 
--- a/src/hotspot/share/opto/loopUnswitch.cpp	Wed Apr 17 14:56:45 2019 +0200
+++ b/src/hotspot/share/opto/loopUnswitch.cpp	Wed Apr 17 14:57:53 2019 +0200
@@ -55,27 +55,31 @@
 // Return TRUE or FALSE if the loop should be unswitched
 // (ie. clone loop with an invariant test that does not exit the loop)
 bool IdealLoopTree::policy_unswitching( PhaseIdealLoop *phase ) const {
-  if( !LoopUnswitching ) {
+  if (!LoopUnswitching) {
     return false;
   }
   if (!_head->is_Loop()) {
     return false;
   }
 
+  // If nodes are depleted, some transform has miscalculated its needs.
+  assert(!phase->exceeding_node_budget(), "sanity");
+
   // check for vectorized loops, any unswitching was already applied
   if (_head->is_CountedLoop() && _head->as_CountedLoop()->is_unroll_only()) {
     return false;
   }
 
-  int nodes_left = phase->C->max_node_limit() - phase->C->live_nodes();
-  if ((int)(2 * _body.size()) > nodes_left) {
-    return false; // Too speculative if running low on nodes.
-  }
   LoopNode* head = _head->as_Loop();
   if (head->unswitch_count() + 1 > head->unswitch_max()) {
     return false;
   }
-  return phase->find_unswitching_candidate(this) != NULL;
+  if (phase->find_unswitching_candidate(this) == NULL) {
+    return false;
+  }
+
+  // Too speculative if running low on nodes.
+  return phase->may_require_nodes(est_loop_clone_sz(3, _body.size()));
 }
 
 //------------------------------find_unswitching_candidate-----------------------------
--- a/src/hotspot/share/opto/loopnode.cpp	Wed Apr 17 14:56:45 2019 +0200
+++ b/src/hotspot/share/opto/loopnode.cpp	Wed Apr 17 14:57:53 2019 +0200
@@ -2938,6 +2938,7 @@
   }
 
   if (ReassociateInvariants) {
+    AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK);
     // Reassociate invariants and prep for split_thru_phi
     for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
       IdealLoopTree* lpt = iter.current();
--- a/src/hotspot/share/opto/loopnode.hpp	Wed Apr 17 14:56:45 2019 +0200
+++ b/src/hotspot/share/opto/loopnode.hpp	Wed Apr 17 14:57:53 2019 +0200
@@ -479,9 +479,9 @@
   IdealLoopTree *_child;        // First child in loop tree
 
   // The head-tail backedge defines the loop.
-  // If tail is NULL then this loop has multiple backedges as part of the
-  // same loop.  During cleanup I'll peel off the multiple backedges; merge
-  // them at the loop bottom and flow 1 real backedge into the loop.
+  // If a loop has multiple backedges, this is addressed during cleanup where
+  // we peel off the multiple backedges,  merging all edges at the bottom and
+  // ensuring that one proper backedge flow into the loop.
   Node *_head;                  // Head of loop
   Node *_tail;                  // Tail of loop
   inline Node *tail();          // Handle lazy update of _tail field
@@ -510,7 +510,10 @@
       _safepts(NULL),
       _required_safept(NULL),
       _allow_optimizations(true)
-  { }
+  {
+    precond(_head != NULL);
+    precond(_tail != NULL);
+  }
 
   // Is 'l' a member of 'this'?
   bool is_member(const IdealLoopTree *l) const; // Test for nested membership
@@ -662,6 +665,7 @@
   friend class SuperWord;
   friend class CountedLoopReserveKit;
   friend class ShenandoahBarrierC2Support;
+  friend class AutoNodeBudget;
 
   // Pre-computed def-use info
   PhaseIterGVN &_igvn;
@@ -907,7 +911,8 @@
     _igvn(igvn),
     _verify_me(NULL),
     _verify_only(true),
-    _dom_lca_tags(arena()) { // Thread::resource_area
+    _dom_lca_tags(arena()),  // Thread::resource_area
+    _nodes_required(UINT_MAX) {
     build_and_optimize(LoopOptsVerify);
   }
 
@@ -923,7 +928,8 @@
     _igvn(igvn),
     _verify_me(NULL),
     _verify_only(false),
-    _dom_lca_tags(arena()) { // Thread::resource_area
+    _dom_lca_tags(arena()),  // Thread::resource_area
+    _nodes_required(UINT_MAX) {
     build_and_optimize(mode);
   }
 
@@ -933,7 +939,8 @@
     _igvn(igvn),
     _verify_me(verify_me),
     _verify_only(false),
-    _dom_lca_tags(arena()) { // Thread::resource_area
+    _dom_lca_tags(arena()),  // Thread::resource_area
+    _nodes_required(UINT_MAX) {
     build_and_optimize(LoopOptsVerify);
   }
 
@@ -1344,8 +1351,54 @@
     return C->live_nodes() > threshold;
   }
 
+  // A simplistic node request tracking mechanism, where
+  //   = UINT_MAX   Request not valid or made final.
+  //   < UINT_MAX   Nodes currently requested (estimate).
+  uint _nodes_required;
+
+  bool exceeding_node_budget(uint required = 0) {
+    assert(C->live_nodes() < C->max_node_limit(), "sanity");
+    uint available = C->max_node_limit() - C->live_nodes();
+    return available < required + _nodes_required;
+  }
+
+  uint require_nodes(uint require) {
+    precond(require > 0);
+    _nodes_required += MAX2(100u, require); // Keep requests at minimum 100.
+    return _nodes_required;
+  }
+
+  bool may_require_nodes(uint require) {
+    return !exceeding_node_budget(require) && require_nodes(require) > 0;
+  }
+
+  void require_nodes_begin() {
+    assert(_nodes_required == UINT_MAX, "Bad state (begin).");
+    _nodes_required = 0;
+  }
+
+  // Final check  that the requested nodes  did not exceed the  limit and that
+  // the request  was reasonably  correct with  respect to  the number  of new
+  // nodes introduced by any transform since the last 'begin'.
+  void require_nodes_final_check(uint live_at_begin) {
+    uint required = _nodes_required;
+    require_nodes_final();
+    uint delta = C->live_nodes() - live_at_begin;
+    assert(delta <= 2 * required, "Bad node estimate (actual: %d, request: %d)",
+           delta, required);
+  }
+
+  void require_nodes_final() {
+    assert(_nodes_required < UINT_MAX, "Bad state (final).");
+    assert(!exceeding_node_budget(), "Too many NODES required!");
+    _nodes_required = UINT_MAX;
+  }
+
   bool _created_loop_node;
+
 public:
+  uint nodes_required() const { return _nodes_required; }
+
   void set_created_loop_node() { _created_loop_node = true; }
   bool created_loop_node()     { return _created_loop_node; }
   void register_new_node( Node *n, Node *blk );
@@ -1371,6 +1424,62 @@
   void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const;
 };
 
+
+class AutoNodeBudget : public StackObj
+{
+public:
+  enum budget_check_t { BUDGET_CHECK, NO_BUDGET_CHECK };
+
+  AutoNodeBudget(PhaseIdealLoop* phase, budget_check_t chk = BUDGET_CHECK)
+    : _phase(phase),
+      _check_at_final(chk == BUDGET_CHECK),
+      _nodes_at_begin(0)
+  {
+    precond(_phase != NULL);
+
+    _nodes_at_begin = _phase->C->live_nodes();
+    _phase->require_nodes_begin();
+  }
+
+  ~AutoNodeBudget() {
+    if (_check_at_final) {
+#ifndef PRODUCT
+      if (TraceLoopOpts) {
+        uint request = _phase->nodes_required();
+
+        if (request > 0) {
+          uint delta = _phase->C->live_nodes() - _nodes_at_begin;
+
+          if (request < delta) {
+            tty->print_cr("Exceeding node budget: %d < %d", request, delta);
+          }
+        }
+      }
+#endif
+      _phase->require_nodes_final_check(_nodes_at_begin);
+    } else {
+      _phase->require_nodes_final();
+    }
+  }
+
+private:
+  PhaseIdealLoop* _phase;
+  bool _check_at_final;
+  uint _nodes_at_begin;
+};
+
+// The Estimated Loop Clone Size: CloneFactor * (BodySize + BC) + CC, where BC
+// and CC are totally ad-hoc/magic "body" and "clone" constants, respectively,
+// used to ensure that node usage estimates made are on the safe side, for the
+// most part.
+static inline uint est_loop_clone_sz(uint fact, uint size) {
+  uint const bc = 31;
+  uint const cc = 41;
+  uint estimate = fact * (size + bc) + cc;
+  return (estimate - cc) / fact == size + bc ? estimate : UINT_MAX;
+}
+
+
 // This kit may be used for making of a reserved copy of a loop before this loop
 //  goes under non-reversible changes.
 //
--- a/src/hotspot/share/opto/loopopts.cpp	Wed Apr 17 14:56:45 2019 +0200
+++ b/src/hotspot/share/opto/loopopts.cpp	Wed Apr 17 14:57:53 2019 +0200
@@ -2662,7 +2662,7 @@
     assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
     get_loop(use_c)->_body.push(n_clone);
     _igvn.register_new_node_with_optimizer(n_clone);
-#if !defined(PRODUCT)
+#ifndef PRODUCT
     if (TracePartialPeeling) {
       tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
     }
@@ -2700,7 +2700,7 @@
     set_ctrl(n_clone, get_ctrl(n));
     sink_list.push(n_clone);
     not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
-#if !defined(PRODUCT)
+#ifndef PRODUCT
     if (TracePartialPeeling) {
       tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
     }
@@ -3046,7 +3046,7 @@
         opc == Op_CatchProj ||
         opc == Op_Jump      ||
         opc == Op_JumpProj) {
-#if !defined(PRODUCT)
+#ifndef PRODUCT
       if (TracePartialPeeling) {
         tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
       }
@@ -3102,7 +3102,7 @@
     return false;
   }
 
-#if !defined(PRODUCT)
+#ifndef PRODUCT
   if (TraceLoopOpts) {
     tty->print("PartialPeel  ");
     loop->dump_head();
@@ -3131,6 +3131,10 @@
   Node_List worklist(area);
   Node_List sink_list(area);
 
+  if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
+    return false;
+  }
+
   // Set of cfg nodes to peel are those that are executable from
   // the head through last_peel.
   assert(worklist.size() == 0, "should be empty");
@@ -3179,7 +3183,7 @@
     if (use->is_Phi()) old_phi_cnt++;
   }
 
-#if !defined(PRODUCT)
+#ifndef PRODUCT
   if (TracePartialPeeling) {
     tty->print_cr("\npeeled list");
   }
@@ -3190,7 +3194,7 @@
   uint cloned_for_outside_use = 0;
   for (i = 0; i < peel_list.size();) {
     Node* n = peel_list.at(i);
-#if !defined(PRODUCT)
+#ifndef PRODUCT
   if (TracePartialPeeling) n->dump();
 #endif
     bool incr = true;
@@ -3212,7 +3216,7 @@
             not_peel <<= n->_idx; // add n to not_peel set.
             peel_list.remove(i);
             incr = false;
-#if !defined(PRODUCT)
+#ifndef PRODUCT
             if (TracePartialPeeling) {
               tty->print_cr("sink to not_peeled region: %d newbb: %d",
                             n->_idx, get_ctrl(n)->_idx);
@@ -3231,7 +3235,7 @@
   }
 
   if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
-#if !defined(PRODUCT)
+#ifndef PRODUCT
     if (TracePartialPeeling) {
       tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
                     new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
@@ -3389,7 +3393,7 @@
   C->set_major_progress();
   loop->record_for_igvn();
 
-#if !defined(PRODUCT)
+#ifndef PRODUCT
   if (TracePartialPeeling) {
     tty->print_cr("\nafter partial peel one iteration");
     Node_List wl(area);
@@ -3429,10 +3433,10 @@
   Node *exit = cle->proj_out(false);
   Node *phi = cl->phi();
 
-  // Check for the special case of folks using the pre-incremented
-  // trip-counter on the fall-out path (forces the pre-incremented
-  // and post-incremented trip counter to be live at the same time).
-  // Fix this by adjusting to use the post-increment trip counter.
+  // Check for the special case when using the pre-incremented trip-counter on
+  // the fall-out  path (forces the pre-incremented  and post-incremented trip
+  // counter to be live  at the same time).  Fix this by  adjusting to use the
+  // post-increment trip counter.
 
   bool progress = true;
   while (progress) {