changeset 56532:ba171f871932

8223363: Bad node estimate assertion failure 8223502: Node estimate for loop unswitching is not correct: assert(delta <= 2 * required) failed: Bad node estimate 8224648: assert(!exceeding_node_budget()) failed: Too many NODES required! failure with ctw Summary: Tighten the node estimates. New est_loop_clone_sz() implementation that will compute a "fan-out" complexity estimate as part of the size estimate (to better estimate complex loop body size after cloning). New est_loop_unroll_sz() function, used to estimate the size of a loop body att full/maximal unrolling. Correction to node budget final tests and asserts. Reviewed-by: neliasso, kvn
author phedlin
date Tue, 28 May 2019 14:56:58 +0200
parents 00f7fce88e25
children 9691a169f1dd
files src/hotspot/share/opto/loopTransform.cpp src/hotspot/share/opto/loopUnswitch.cpp src/hotspot/share/opto/loopnode.cpp src/hotspot/share/opto/loopnode.hpp src/hotspot/share/opto/loopopts.cpp test/hotspot/jtreg/compiler/loopopts/LoopUnswitchingBadNodeBudget.java
diffstat 6 files changed, 414 insertions(+), 143 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/share/opto/loopTransform.cpp	Mon Jun 03 10:51:28 2019 +0200
+++ b/src/hotspot/share/opto/loopTransform.cpp	Tue May 28 14:56:58 2019 +0200
@@ -286,6 +286,9 @@
   Node* n2 = n1->in(3 - inv1_idx);
   int inv2_idx = is_invariant_addition(n2, phase);
   if (!inv2_idx) return NULL;
+
+  if (!phase->may_require_nodes(10, 10)) return NULL;
+
   Node* x    = n2->in(3 - inv2_idx);
   Node* inv2 = n2->in(inv2_idx);
 
@@ -337,61 +340,72 @@
       Node* nn = reassociate_add_sub(n, phase);
       if (nn == NULL) break;
       n = nn; // again
-    };
+    }
   }
 }
 
 //------------------------------policy_peeling---------------------------------
-// Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
-// make some loop-invariant test (usually a null-check) happen before the loop.
-bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) const {
-  IdealLoopTree *loop = (IdealLoopTree*)this;
+// Return TRUE if the loop should be peeled, otherwise return FALSE. Peeling
+// is applicable if we can make a loop-invariant test (usually a null-check)
+// execute before we enter the loop. When TRUE, the estimated node budget is
+// also requested.
+bool IdealLoopTree::policy_peeling(PhaseIdealLoop *phase) {
+  uint estimate = estimate_peeling(phase);
+
+  return estimate == 0 ? false : phase->may_require_nodes(estimate);
+}
+
+// Perform actual policy and size estimate for the loop peeling transform, and
+// return the estimated loop size if peeling is applicable, otherwise return
+// zero. No node budget is allocated.
+uint IdealLoopTree::estimate_peeling(PhaseIdealLoop *phase) {
 
   // If nodes are depleted, some transform has miscalculated its needs.
   assert(!phase->exceeding_node_budget(), "sanity");
 
-  uint body_size = loop->_body.size();
-  // Peeling does loop cloning which can result in O(N^2) node construction
-  if (body_size > 255) {
-    return false;   // Prevent overflow for large body size
+  // Peeling does loop cloning which can result in O(N^2) node construction.
+  if (_body.size() > 255) {
+    return 0;   // Suppress too large body size.
   }
-  uint estimate = body_size * body_size;
+  // Optimistic estimate that approximates loop body complexity via data and
+  // control flow fan-out (instead of using the more pessimistic: BodySize^2).
+  uint estimate = est_loop_clone_sz(2);
+
   if (phase->exceeding_node_budget(estimate)) {
-    return false;   // Too large to safely clone
+    return 0;   // Too large to safely clone.
   }
 
-  // check for vectorized loops, any peeling done was already applied
+  // Check for vectorized loops, any peeling done was already applied.
   if (_head->is_CountedLoop()) {
     CountedLoopNode* cl = _head->as_CountedLoop();
     if (cl->is_unroll_only() || cl->trip_count() == 1) {
-      return false;
+      return 0;
     }
   }
 
-  Node* test = loop->tail();
-
-  while (test != _head) {       // Scan till run off top of loop
-    if (test->is_If()) {        // Test?
+  Node* test = tail();
+
+  while (test != _head) {   // Scan till run off top of loop
+    if (test->is_If()) {    // Test?
       Node *ctrl = phase->get_ctrl(test->in(1));
       if (ctrl->is_top()) {
-        return false;           // Found dead test on live IF?  No peeling!
+        return 0;           // Found dead test on live IF?  No peeling!
       }
-      // Standard IF only has one input value to check for loop invariance
+      // Standard IF only has one input value to check for loop invariance.
       assert(test->Opcode() == Op_If ||
              test->Opcode() == Op_CountedLoopEnd ||
              test->Opcode() == Op_RangeCheck,
              "Check this code when new subtype is added");
       // Condition is not a member of this loop?
       if (!is_member(phase->get_loop(ctrl)) && is_loop_exit(test)) {
-        // Found reason to peel!
-        return phase->may_require_nodes(estimate);
+        return estimate;    // Found reason to peel!
       }
     }
-    // Walk up dominators to loop _head looking for test which is
-    // executed on every path thru loop.
+    // Walk up dominators to loop _head looking for test which is executed on
+    // every path through the loop.
     test = phase->idom(test);
   }
-  return false;
+  return 0;
 }
 
 //------------------------------peeled_dom_test_elim---------------------------
@@ -638,8 +652,8 @@
     }
   }
 
-
   // Step 4: Correct dom-depth info.  Set to loop-head depth.
+
   int dd = dom_depth(head);
   set_idom(head, head->in(1), dd);
   for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
@@ -657,11 +671,30 @@
   loop->record_for_igvn();
 }
 
-#define EMPTY_LOOP_SIZE 7 // number of nodes in an empty loop
+// The Estimated Loop Unroll Size: UnrollFactor * (106% * BodySize + BC) + CC,
+// where BC  and CC are  (totally) ad-hoc/magic "body" and  "clone" constants,
+// respectively, used to ensure that node usage estimates made are on the safe
+// side, for the  most part.  This is  a simplified version of  the loop clone
+// size calculation in est_loop_clone_sz(),  defined for unroll factors larger
+// than one  (>1), performing  an overflow check  and returning  'UINT_MAX' in
+// case of an overflow.
+static uint est_loop_unroll_sz(uint factor, uint size) {
+  precond(0 < factor);
+
+  uint const bc = 5;
+  uint const cc = 7;
+  uint const sz = size + (size + 15) / 16;
+  uint estimate = factor * (sz + bc) + cc;
+
+  return (estimate - cc) / factor == sz + bc ? estimate : UINT_MAX;
+}
+
+#define EMPTY_LOOP_SIZE 7   // Number of nodes in an empty loop.
 
 //------------------------------policy_maximally_unroll------------------------
-// Calculate exact loop trip count and return true if loop can be maximally
-// unrolled.
+// Calculate the exact  loop trip-count and return TRUE if loop can be fully,
+// i.e. maximally, unrolled, otherwise return FALSE. When TRUE, the estimated
+// node budget is also requested.
 bool IdealLoopTree::policy_maximally_unroll(PhaseIdealLoop *phase) const {
   CountedLoopNode *cl = _head->as_CountedLoop();
   assert(cl->is_normal_loop(), "");
@@ -693,7 +726,7 @@
 
   // Take into account that after unroll conjoined heads and tails will fold,
   // otherwise policy_unroll() may allow more unrolling than max unrolling.
-  uint new_body_size = est_loop_clone_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
+  uint new_body_size = est_loop_unroll_sz(trip_count, body_size - EMPTY_LOOP_SIZE);
 
   if (new_body_size == UINT_MAX) { // Check for bad estimate (overflow).
     return false;
@@ -742,8 +775,9 @@
 
 
 //------------------------------policy_unroll----------------------------------
-// Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if the
-// loop is a CountedLoop and the body is small enough.
+// Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll if
+// the loop is  a counted loop and  the loop body is small  enough. When TRUE,
+// the estimated node budget is also requested.
 bool IdealLoopTree::policy_unroll(PhaseIdealLoop *phase) {
 
   CountedLoopNode *cl = _head->as_CountedLoop();
@@ -887,7 +921,7 @@
     LoopMaxUnroll = slp_max_unroll_factor;
   }
 
-  uint estimate = est_loop_clone_sz(2, body_size);
+  uint estimate = est_loop_clone_sz(2);
 
   if (cl->has_passed_slp()) {
     if (slp_max_unroll_factor >= future_unroll_cnt) {
@@ -958,8 +992,10 @@
 }
 
 //------------------------------policy_range_check-----------------------------
-// Return TRUE or FALSE if the loop should be range-check-eliminated.
-// Actually we do iteration-splitting, a more powerful form of RCE.
+// Return TRUE or FALSE if the loop should be range-check-eliminated or not.
+// When TRUE, the estimated node budget is also requested.
+//
+// We will actually perform iteration-splitting, a more powerful form of RCE.
 bool IdealLoopTree::policy_range_check(PhaseIdealLoop *phase) const {
   if (!RangeCheckElimination) return false;
 
@@ -967,9 +1003,9 @@
   assert(!phase->exceeding_node_budget(), "sanity");
 
   CountedLoopNode *cl = _head->as_CountedLoop();
-  // If we unrolled with no intention of doing RCE and we later
-  // changed our minds, we got no pre-loop.  Either we need to
-  // make a new pre-loop, or we gotta disallow RCE.
+  // If we unrolled  with no intention of doing RCE and we  later changed our
+  // minds, we got no pre-loop.  Either we need to make a new pre-loop, or we
+  // have to disallow RCE.
   if (cl->is_main_no_pre_loop()) return false; // Disallowed for now.
   Node *trip_counter = cl->phi();
 
@@ -1016,13 +1052,13 @@
       if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
         continue;
       }
-      // Found a test like 'trip+off vs  limit'.  Test is an IfNode, has two
-      // (2) projections.  If BOTH are in  the loop we need loop unswitching
-      // instead of iteration splitting.
+      // Found a test like 'trip+off vs limit'. Test is an IfNode, has two (2)
+      // projections. If BOTH are in the loop we need loop unswitching instead
+      // of iteration splitting.
       if (is_loop_exit(iff)) {
         // Found valid reason to split iterations (if there is room).
         // NOTE: Usually a gross overestimate.
-        return phase->may_require_nodes(est_loop_clone_sz(2, _body.size()));
+        return phase->may_require_nodes(est_loop_clone_sz(2));
       }
     } // End of is IF
   }
@@ -1521,19 +1557,20 @@
   // only process vectorized main loops
   if (!cl->is_vectorized_loop() || !cl->is_main_loop()) return;
 
-  if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
+  int slp_max_unroll_factor = cl->slp_max_unroll();
+  int cur_unroll = cl->unrolled_count();
+
+  if (slp_max_unroll_factor == 0) return;
+
+  // only process atomic unroll vector loops (not super unrolled after vectorization)
+  if (cur_unroll != slp_max_unroll_factor) return;
+
+  // we only ever process this one time
+  if (cl->has_atomic_post_loop()) return;
+
+  if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
     return;
   }
-  int slp_max_unroll_factor = cl->slp_max_unroll();
-  int cur_unroll = cl->unrolled_count();
-
-  if (slp_max_unroll_factor == 0) return;
-
-  // only process atomic unroll vector loops (not super unrolled after vectorization)
-  if (cur_unroll != slp_max_unroll_factor) return;
-
-  // we only ever process this one time
-  if (cl->has_atomic_post_loop()) return;
 
 #ifndef PRODUCT
   if (TraceLoopOpts) {
@@ -3178,9 +3215,6 @@
 
   AutoNodeBudget node_budget(phase);
 
-  bool should_peel     = policy_peeling(phase);
-  bool should_unswitch = policy_unswitching(phase);
-
   // Non-counted loops may be peeled; exactly 1 iteration is peeled.
   // This removes loop-invariant tests (usually null checks).
   if (!_head->is_CountedLoop()) { // Non-counted loop
@@ -3188,10 +3222,10 @@
       // Partial peel succeeded so terminate this round of loop opts
       return false;
     }
-    if (should_peel) {            // Should we peel?
+    if (policy_peeling(phase)) {    // Should we peel?
       if (PrintOpto) { tty->print_cr("should_peel"); }
-      phase->do_peeling(this,old_new);
-    } else if (should_unswitch) {
+      phase->do_peeling(this, old_new);
+    } else if (policy_unswitching(phase)) {
       phase->do_unswitching(this, old_new);
     }
     return true;
@@ -3209,12 +3243,11 @@
   // Before attempting fancy unrolling, RCE or alignment, see if we want
   // to completely unroll this loop or do loop unswitching.
   if (cl->is_normal_loop()) {
-    if (should_unswitch) {
+    if (policy_unswitching(phase)) {
       phase->do_unswitching(this, old_new);
       return true;
     }
-    bool should_maximally_unroll = policy_maximally_unroll(phase);
-    if (should_maximally_unroll) {
+    if (policy_maximally_unroll(phase)) {
       // Here we did some unrolling and peeling.  Eventually we will
       // completely unroll this loop and it will no longer be a loop.
       phase->do_maximally_unroll(this, old_new);
@@ -3222,6 +3255,9 @@
     }
   }
 
+  uint est_peeling = estimate_peeling(phase);
+  bool should_peel = 0 < est_peeling;
+
   // Counted loops may be peeled, may need some iterations run up
   // front for RCE, and may want to align loop refs to a cache
   // line.  Thus we clone a full loop up front whose trip count is
@@ -3252,14 +3288,15 @@
   // peeling.
   if (should_rce || should_align || should_unroll) {
     if (cl->is_normal_loop()) { // Convert to 'pre/main/post' loops
-      if (!phase->may_require_nodes(est_loop_clone_sz(3, _body.size()))) {
+      uint estimate = est_loop_clone_sz(3);
+      if (!phase->may_require_nodes(estimate)) {
         return false;
       }
-      phase->insert_pre_post_loops(this,old_new, !may_rce_align);
+      phase->insert_pre_post_loops(this, old_new, !may_rce_align);
     }
-    // Adjust the pre- and main-loop limits to let the pre and post loops run
-    // with full checks, but the main-loop with no checks.  Remove said
-    // checks from the main body.
+    // Adjust the pre- and main-loop limits to let the pre and  post loops run
+    // with full checks, but the main-loop with no checks.  Remove said checks
+    // from the main body.
     if (should_rce) {
       if (phase->do_range_check(this, old_new) != 0) {
         cl->mark_has_range_checks();
@@ -3293,7 +3330,9 @@
     }
   } else {                      // Else we have an unchanged counted loop
     if (should_peel) {          // Might want to peel but do nothing else
-      phase->do_peeling(this,old_new);
+      if (phase->may_require_nodes(est_peeling)) {
+        phase->do_peeling(this, old_new);
+      }
     }
   }
   return true;
--- a/src/hotspot/share/opto/loopUnswitch.cpp	Mon Jun 03 10:51:28 2019 +0200
+++ b/src/hotspot/share/opto/loopUnswitch.cpp	Tue May 28 14:56:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -79,7 +79,7 @@
   }
 
   // Too speculative if running low on nodes.
-  return phase->may_require_nodes(est_loop_clone_sz(3, _body.size()));
+  return phase->may_require_nodes(est_loop_clone_sz(2));
 }
 
 //------------------------------find_unswitching_candidate-----------------------------
@@ -116,7 +116,7 @@
 // Clone loop with an invariant test (that does not exit) and
 // insert a clone of the test that selects which version to
 // execute.
-void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) {
+void PhaseIdealLoop::do_unswitching(IdealLoopTree *loop, Node_List &old_new) {
 
   // Find first invariant test that doesn't exit the loop
   LoopNode *head = loop->_head->as_Loop();
--- a/src/hotspot/share/opto/loopnode.cpp	Mon Jun 03 10:51:28 2019 +0200
+++ b/src/hotspot/share/opto/loopnode.cpp	Tue May 28 14:56:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1998, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -2439,12 +2439,63 @@
   if (loop->_next)  loop->_next ->counted_loop(phase);
 }
 
+
+// The Estimated Loop Clone Size:
+//   CloneFactor * (~112% * BodySize + BC) + CC + FanOutTerm,
+// where  BC and  CC are  totally ad-hoc/magic  "body" and "clone" constants,
+// respectively, used to ensure that the node usage estimates made are on the
+// safe side, for the most part. The FanOutTerm is an attempt to estimate the
+// possible additional/excessive nodes generated due to data and control flow
+// merging, for edges reaching outside the loop.
+uint IdealLoopTree::est_loop_clone_sz(uint factor) const {
+
+  precond(0 < factor && factor < 16);
+
+  uint const bc = 13;
+  uint const cc = 17;
+  uint const sz = _body.size() + (_body.size() + 7) / 8;
+  uint estimate = factor * (sz + bc) + cc;
+
+  assert((estimate - cc) / factor == sz + bc, "overflow");
+
+  uint ctrl_edge_out_cnt = 0;
+  uint data_edge_out_cnt = 0;
+
+  for (uint i = 0; i < _body.size(); i++) {
+    Node* node = _body.at(i);
+    uint outcnt = node->outcnt();
+
+    for (uint k = 0; k < outcnt; k++) {
+      Node* out = node->raw_out(k);
+
+      if (out->is_CFG()) {
+        if (!is_member(_phase->get_loop(out))) {
+          ctrl_edge_out_cnt++;
+        }
+      } else {
+        Node* ctrl = _phase->get_ctrl(out);
+        assert(ctrl->is_CFG(), "must be");
+        if (!is_member(_phase->get_loop(ctrl))) {
+          data_edge_out_cnt++;
+        }
+      }
+    }
+  }
+  // Add data (x1.5) and control (x1.0) count to estimate iff both are > 0.
+  if (ctrl_edge_out_cnt > 0 && data_edge_out_cnt > 0) {
+    estimate += ctrl_edge_out_cnt + data_edge_out_cnt + data_edge_out_cnt / 2;
+  }
+
+  return estimate;
+}
+
 #ifndef PRODUCT
 //------------------------------dump_head--------------------------------------
 // Dump 1 liner for loop header info
-void IdealLoopTree::dump_head( ) const {
-  for (uint i=0; i<_nest; i++)
+void IdealLoopTree::dump_head() const {
+  for (uint i = 0; i < _nest; i++) {
     tty->print("  ");
+  }
   tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
   if (_irreducible) tty->print(" IRREDUCIBLE");
   Node* entry = _head->is_Loop() ? _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl) : _head->in(LoopNode::EntryControl);
@@ -2513,7 +2564,7 @@
 
 //------------------------------dump-------------------------------------------
 // Dump loops by loop tree
-void IdealLoopTree::dump( ) const {
+void IdealLoopTree::dump() const {
   dump_head();
   if (_child) _child->dump();
   if (_next)  _next ->dump();
@@ -2908,8 +2959,8 @@
     assert(C->unique() == unique, "non-optimize mode made Nodes? ? ?");
     return;
   }
-  if(VerifyLoopOptimizations) verify();
-  if(TraceLoopOpts && C->has_loops()) {
+  if (VerifyLoopOptimizations) verify();
+  if (TraceLoopOpts && C->has_loops()) {
     _ltree_root->dump();
   }
 #endif
@@ -2938,7 +2989,6 @@
   }
 
   if (ReassociateInvariants) {
-    AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK);
     // Reassociate invariants and prep for split_thru_phi
     for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
       IdealLoopTree* lpt = iter.current();
@@ -2946,14 +2996,17 @@
       if (!is_counted || !lpt->is_innermost()) continue;
 
       // check for vectorized loops, any reassociation of invariants was already done
-      if (is_counted && lpt->_head->as_CountedLoop()->is_unroll_only()) continue;
-
-      lpt->reassociate_invariants(this);
-
+      if (is_counted && lpt->_head->as_CountedLoop()->is_unroll_only()) {
+        continue;
+      } else {
+        AutoNodeBudget node_budget(this);
+        lpt->reassociate_invariants(this);
+      }
       // Because RCE opportunities can be masked by split_thru_phi,
       // look for RCE candidates and inhibit split_thru_phi
       // on just their loop-phi's for this pass of loop opts
       if (SplitIfBlocks && do_split_ifs) {
+        AutoNodeBudget node_budget(this, AutoNodeBudget::NO_BUDGET_CHECK);
         if (lpt->policy_range_check(this)) {
           lpt->_rce_candidate = 1; // = true
         }
--- a/src/hotspot/share/opto/loopnode.hpp	Mon Jun 03 10:51:28 2019 +0200
+++ b/src/hotspot/share/opto/loopnode.hpp	Tue May 28 14:56:58 2019 +0200
@@ -589,17 +589,18 @@
   // Convert one iteration loop into normal code.
   bool do_one_iteration_loop( PhaseIdealLoop *phase );
 
-  // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
-  // make some loop-invariant test (usually a null-check) happen before the
-  // loop.
-  bool policy_peeling( PhaseIdealLoop *phase ) const;
+  // Return TRUE or FALSE if the loop should be peeled or not. Peel if we can
+  // move some loop-invariant test (usually a null-check) before the loop.
+  bool policy_peeling(PhaseIdealLoop *phase);
+
+  uint estimate_peeling(PhaseIdealLoop *phase);
 
   // Return TRUE or FALSE if the loop should be maximally unrolled. Stash any
   // known trip count in the counted loop node.
-  bool policy_maximally_unroll( PhaseIdealLoop *phase ) const;
+  bool policy_maximally_unroll(PhaseIdealLoop *phase) const;
 
-  // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
-  // the loop is a CountedLoop and the body is small enough.
+  // Return TRUE or FALSE if the loop should be unrolled or not. Apply unroll
+  // if the loop is a counted loop and the loop body is small enough.
   bool policy_unroll(PhaseIdealLoop *phase);
 
   // Loop analyses to map to a maximal superword unrolling for vectorization.
@@ -620,6 +621,9 @@
   // Return TRUE if "iff" is a range check.
   bool is_range_check_if(IfNode *iff, PhaseIdealLoop *phase, Invariance& invar) const;
 
+  // Estimate the number of nodes required when cloning a loop (body).
+  uint est_loop_clone_sz(uint factor) const;
+
   // Compute loop trip count if possible
   void compute_trip_count(PhaseIdealLoop* phase);
 
@@ -1356,50 +1360,66 @@
   //   < UINT_MAX   Nodes currently requested (estimate).
   uint _nodes_required;
 
+  enum { REQUIRE_MIN = 70 };
+
+  uint nodes_required() const { return _nodes_required; }
+
+  // Given the _currently_  available number of nodes, check  whether there is
+  // "room" for an additional request or not, considering the already required
+  // number of  nodes.  Return TRUE if  the new request is  exceeding the node
+  // budget limit, otherwise return FALSE.  Note that this interpretation will
+  // act pessimistic on  additional requests when new nodes  have already been
+  // generated since the 'begin'.  This behaviour fits with the intention that
+  // node estimates/requests should be made upfront.
   bool exceeding_node_budget(uint required = 0) {
     assert(C->live_nodes() < C->max_node_limit(), "sanity");
     uint available = C->max_node_limit() - C->live_nodes();
     return available < required + _nodes_required;
   }
 
-  uint require_nodes(uint require) {
+  uint require_nodes(uint require, uint minreq = REQUIRE_MIN) {
     precond(require > 0);
-    _nodes_required += MAX2(100u, require); // Keep requests at minimum 100.
+    _nodes_required += MAX2(require, minreq);
     return _nodes_required;
   }
 
-  bool may_require_nodes(uint require) {
-    return !exceeding_node_budget(require) && require_nodes(require) > 0;
+  bool may_require_nodes(uint require, uint minreq = REQUIRE_MIN) {
+    return !exceeding_node_budget(require) && require_nodes(require, minreq) > 0;
   }
 
-  void require_nodes_begin() {
+  uint require_nodes_begin() {
     assert(_nodes_required == UINT_MAX, "Bad state (begin).");
     _nodes_required = 0;
+    return C->live_nodes();
   }
 
-  // Final check  that the requested nodes  did not exceed the  limit and that
-  // the request  was reasonably  correct with  respect to  the number  of new
-  // nodes introduced by any transform since the last 'begin'.
-  void require_nodes_final_check(uint live_at_begin) {
-    uint required = _nodes_required;
-    require_nodes_final();
-    uint delta = C->live_nodes() - live_at_begin;
-    // Assert is disabled, see JDK-8223911 and related issues.
-    assert(true || delta <= 2 * required, "Bad node estimate (actual: %d, request: %d)",
-           delta, required);
-  }
+  // When a node request is final,  optionally check that the requested number
+  // of nodes was  reasonably correct with respect to the  number of new nodes
+  // introduced since the last 'begin'. Always check that we have not exceeded
+  // the maximum node limit.
+  void require_nodes_final(uint live_at_begin, bool check_estimate) {
+    assert(_nodes_required < UINT_MAX, "Bad state (final).");
 
-  void require_nodes_final() {
-    assert(_nodes_required < UINT_MAX, "Bad state (final).");
-    assert(!exceeding_node_budget(), "Too many NODES required!");
+    if (check_estimate) {
+      // Assert that the node budget request was not off by too much (x2).
+      // Should this be the case we _surely_ need to improve the estimates
+      // used in our budget calculations.
+      assert(C->live_nodes() - live_at_begin <= 2 * _nodes_required,
+             "Bad node estimate: actual = %d >> request = %d",
+             C->live_nodes() - live_at_begin, _nodes_required);
+    }
+    // Assert that we have stayed within the node budget limit.
+    assert(C->live_nodes() < C->max_node_limit(),
+           "Exceeding node budget limit: %d + %d > %d (request = %d)",
+           C->live_nodes() - live_at_begin, live_at_begin,
+           C->max_node_limit(), _nodes_required);
+
     _nodes_required = UINT_MAX;
   }
 
   bool _created_loop_node;
 
 public:
-  uint nodes_required() const { return _nodes_required; }
-
   void set_created_loop_node() { _created_loop_node = true; }
   bool created_loop_node()     { return _created_loop_node; }
   void register_new_node( Node *n, Node *blk );
@@ -1438,29 +1458,30 @@
   {
     precond(_phase != NULL);
 
-    _nodes_at_begin = _phase->C->live_nodes();
-    _phase->require_nodes_begin();
+    _nodes_at_begin = _phase->require_nodes_begin();
   }
 
   ~AutoNodeBudget() {
-    if (_check_at_final) {
 #ifndef PRODUCT
-      if (TraceLoopOpts) {
-        uint request = _phase->nodes_required();
+    if (TraceLoopOpts) {
+      uint request = _phase->nodes_required();
+      uint delta   = _phase->C->live_nodes() - _nodes_at_begin;
 
-        if (request > 0) {
-          uint delta = _phase->C->live_nodes() - _nodes_at_begin;
-
-          if (request < delta) {
-            tty->print_cr("Exceeding node budget: %d < %d", request, delta);
+      if (request < delta) {
+        tty->print_cr("Exceeding node budget: %d < %d", request, delta);
+      } else {
+        uint const REQUIRE_MIN = PhaseIdealLoop::REQUIRE_MIN;
+        // Identify the worst estimates as "poor" ones.
+        if (request > REQUIRE_MIN && delta > 0) {
+          if ((delta >  REQUIRE_MIN && request >  3 * delta) ||
+              (delta <= REQUIRE_MIN && request > 10 * delta)) {
+            tty->print_cr("Poor node estimate: %d >> %d", request, delta);
           }
         }
       }
-#endif
-      _phase->require_nodes_final_check(_nodes_at_begin);
-    } else {
-      _phase->require_nodes_final();
     }
+#endif // PRODUCT
+    _phase->require_nodes_final(_nodes_at_begin, _check_at_final);
   }
 
 private:
@@ -1469,17 +1490,6 @@
   uint _nodes_at_begin;
 };
 
-// The Estimated Loop Clone Size: CloneFactor * (BodySize + BC) + CC, where BC
-// and CC are totally ad-hoc/magic "body" and "clone" constants, respectively,
-// used to ensure that node usage estimates made are on the safe side, for the
-// most part.
-static inline uint est_loop_clone_sz(uint fact, uint size) {
-  uint const bc = 31;
-  uint const cc = 41;
-  uint estimate = fact * (size + bc) + cc;
-  return (estimate - cc) / fact == size + bc ? estimate : UINT_MAX;
-}
-
 
 // This kit may be used for making of a reserved copy of a loop before this loop
 //  goes under non-reversible changes.
--- a/src/hotspot/share/opto/loopopts.cpp	Mon Jun 03 10:51:28 2019 +0200
+++ b/src/hotspot/share/opto/loopopts.cpp	Tue May 28 14:56:58 2019 +0200
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1999, 2019, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
@@ -3029,16 +3029,16 @@
 
   assert(!loop->_head->is_CountedLoop(), "Non-counted loop only");
   if (!loop->_head->is_Loop()) {
-    return false;  }
-
-  LoopNode *head  = loop->_head->as_Loop();
+    return false;
+  }
+  LoopNode *head = loop->_head->as_Loop();
 
   if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
     return false;
   }
 
   // Check for complex exit control
-  for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
+  for (uint ii = 0; ii < loop->_body.size(); ii++) {
     Node *n = loop->_body.at(ii);
     int opc = n->Opcode();
     if (n->is_Call()        ||
@@ -3065,12 +3065,12 @@
   IfNode *peel_if_cmpu = NULL;
 
   Node *iff = loop->tail();
-  while( iff != head ) {
-    if( iff->is_If() ) {
+  while (iff != head) {
+    if (iff->is_If()) {
       Node *ctrl = get_ctrl(iff->in(1));
       if (ctrl->is_top()) return false; // Dead test on live IF.
       // If loop-varying exit-test, check for induction variable
-      if( loop->is_member(get_loop(ctrl)) &&
+      if (loop->is_member(get_loop(ctrl)) &&
           loop->is_loop_exit(iff) &&
           is_possible_iv_test(iff)) {
         Node* cmp = iff->in(1)->in(1);
@@ -3084,6 +3084,7 @@
     }
     iff = idom(iff);
   }
+
   // Prefer signed compare over unsigned compare.
   IfNode* new_peel_if = NULL;
   if (peel_if == NULL) {
@@ -3131,7 +3132,7 @@
   Node_List worklist(area);
   Node_List sink_list(area);
 
-  if (!may_require_nodes(est_loop_clone_sz(2, loop->_body.size()))) {
+  if (!may_require_nodes(loop->est_loop_clone_sz(2))) {
     return false;
   }
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test/hotspot/jtreg/compiler/loopopts/LoopUnswitchingBadNodeBudget.java	Tue May 28 14:56:58 2019 +0200
@@ -0,0 +1,168 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+/*
+ * @test
+ * @bug 8223502
+ * @summary Node estimate for loop unswitching is not correct:
+ *          assert(delta <= 2 * required) failed: Bad node estimate
+ *
+ * @requires !vm.graal.enabled
+ *
+ * @run main/othervm -XX:-TieredCompilation -XX:-BackgroundCompilation
+ *      -XX:-UseOnStackReplacement -XX:CompileOnly=LoopUnswitchingBadNodeBudget::test
+ *      -XX:CompileCommand=dontinline,LoopUnswitchingBadNodeBudget::helper
+ *      -XX:+UnlockExperimentalVMOptions -XX:-UseSwitchProfiling LoopUnswitchingBadNodeBudget
+ *
+ */
+
+public class LoopUnswitchingBadNodeBudget {
+
+    public static void main(String[] args) {
+        for (int i = 0; i < 20_000; i++) {
+            for (int j = 0; j < 100; j++) {
+                test(j, true, 0, 0, 0);
+                test(j, false, 0, 0, 0);
+            }
+        }
+    }
+
+    private static int test(int j, boolean flag, int k, int l, int m) {
+        int res = 0;
+        for (int i = 0; i < 24; i++) {
+            if (flag) {
+                k = k / 2;
+                l = l * 2;
+                m = m + 2;
+            }
+            switch (j) {
+                case  0: break;
+                case  1: return helper(j, k, l, m);
+                case  2: return helper(j, k, l, m);
+                case  3: return helper(j, k, l, m);
+                case  4: return helper(j, k, l, m);
+                case  5: return helper(j, k, l, m);
+                case  6: return helper(j, k, l, m);
+                case  7: return helper(j, k, l, m);
+                case  8: return helper(j, k, l, m);
+                case  9: return helper(j, k, l, m);
+                case 10: return helper(j, k, l, m);
+                case 11: return helper(j, k, l, m);
+                case 12: return helper(j, k, l, m);
+                case 13: return helper(j, k, l, m);
+                case 14: return helper(j, k, l, m);
+                case 15: return helper(j, k, l, m);
+                case 16: return helper(j, k, l, m);
+                case 17: return helper(j, k, l, m);
+                case 18: return helper(j, k, l, m);
+                case 19: return helper(j, k, l, m);
+                case 20: return helper(j, k, l, m);
+                case 21: return helper(j, k, l, m);
+                case 22: return helper(j, k, l, m);
+                case 23: return helper(j, k, l, m);
+                case 24: return helper(j, k, l, m);
+                case 25: return helper(j, k, l, m);
+                case 26: return helper(j, k, l, m);
+                case 27: return helper(j, k, l, m);
+                case 28: return helper(j, k, l, m);
+                case 29: return helper(j, k, l, m);
+                case 30: return helper(j, k, l, m);
+                case 31: return helper(j, k, l, m);
+                case 32: return helper(j, k, l, m);
+                case 33: return helper(j, k, l, m);
+                case 34: return helper(j, k, l, m);
+                case 35: return helper(j, k, l, m);
+                case 36: return helper(j, k, l, m);
+                case 37: return helper(j, k, l, m);
+                case 38: return helper(j, k, l, m);
+                case 39: return helper(j, k, l, m);
+                case 40: return helper(j, k, l, m);
+                case 41: return helper(j, k, l, m);
+                case 42: return helper(j, k, l, m);
+                case 43: return helper(j, k, l, m);
+                case 44: return helper(j, k, l, m);
+                case 45: return helper(j, k, l, m);
+                case 46: return helper(j, k, l, m);
+                case 47: return helper(j, k, l, m);
+                case 48: return helper(j, k, l, m);
+                case 49: return helper(j, k, l, m);
+                case 50: return helper(j, k, l, m);
+                case 51: return helper(j, k, l, m);
+                case 52: return helper(j, k, l, m);
+                case 53: return helper(j, k, l, m);
+                case 54: return helper(j, k, l, m);
+                case 55: return helper(j, k, l, m);
+                case 56: return helper(j, k, l, m);
+                case 57: return helper(j, k, l, m);
+                case 58: return helper(j, k, l, m);
+                case 59: return helper(j, k, l, m);
+                case 60: return helper(j, k, l, m);
+                case 61: return helper(j, k, l, m);
+                case 62: return helper(j, k, l, m);
+                case 63: return helper(j, k, l, m);
+                case 64: return helper(j, k, l, m);
+                case 65: return helper(j, k, l, m);
+                case 66: return helper(j, k, l, m);
+                case 67: return helper(j, k, l, m);
+                case 68: return helper(j, k, l, m);
+                case 69: return helper(j, k, l, m);
+                case 70: return helper(j, k, l, m);
+                case 71: return helper(j, k, l, m);
+                case 72: return helper(j, k, l, m);
+                case 73: return helper(j, k, l, m);
+                case 74: return helper(j, k, l, m);
+                case 75: return helper(j, k, l, m);
+                case 76: return helper(j, k, l, m);
+                case 77: return helper(j, k, l, m);
+                case 78: return helper(j, k, l, m);
+                case 79: return helper(j, k, l, m);
+                case 80: return helper(j, k, l, m);
+                case 81: return helper(j, k, l, m);
+                case 82: return helper(j, k, l, m);
+                case 83: return helper(j, k, l, m);
+                case 84: return helper(j, k, l, m);
+                case 85: return helper(j, k, l, m);
+                case 86: return helper(j, k, l, m);
+                case 87: return helper(j, k, l, m);
+                case 88: return helper(j, k, l, m);
+                case 89: return helper(j, k, l, m);
+                case 90: return helper(j, k, l, m);
+                case 91: return helper(j, k, l, m);
+                case 92: return helper(j, k, l, m);
+                case 93: return helper(j, k, l, m);
+                case 94: return helper(j, k, l, m);
+                case 95: return helper(j, k, l, m);
+                case 96: return helper(j, k, l, m);
+                case 97: return helper(j, k, l, m);
+                case 98: return helper(j, k, l, m);
+                case 99: return helper(j, k, l, m);
+            }
+            res += helper(j, k, l, m);
+        }
+        return res;
+    }
+
+    private static int helper(int j, int k, int l, int m) {
+        return j + k;
+    }
+}