changeset 48020:f913f6dba2d3

8186027: C2: loop strip mining Reviewed-by: kvn, neliasso
author roland
date Tue, 28 Nov 2017 11:59:16 +0100
parents 364207a23251
children 646ed97b7e0d
files src/hotspot/share/gc/g1/g1Arguments.cpp src/hotspot/share/opto/c2_globals.hpp src/hotspot/share/opto/cfgnode.cpp src/hotspot/share/opto/classes.hpp src/hotspot/share/opto/compile.cpp src/hotspot/share/opto/ifnode.cpp src/hotspot/share/opto/loopPredicate.cpp src/hotspot/share/opto/loopTransform.cpp src/hotspot/share/opto/loopUnswitch.cpp src/hotspot/share/opto/loopnode.cpp src/hotspot/share/opto/loopnode.hpp src/hotspot/share/opto/loopopts.cpp src/hotspot/share/opto/macro.cpp src/hotspot/share/opto/node.hpp src/hotspot/share/opto/superword.cpp src/hotspot/share/runtime/arguments.cpp test/hotspot/jtreg/compiler/loopopts/UseCountedLoopSafepointsTest.java
diffstat 17 files changed, 1152 insertions(+), 211 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/share/gc/g1/g1Arguments.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/gc/g1/g1Arguments.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -92,6 +92,16 @@
   }
 
   log_trace(gc)("MarkStackSize: %uk  MarkStackSizeMax: %uk", (unsigned int) (MarkStackSize / K), (uint) (MarkStackSizeMax / K));
+
+#ifdef COMPILER2
+  // Enable loop strip mining to offer better pause time guarantees
+  if (FLAG_IS_DEFAULT(UseCountedLoopSafepoints)) {
+    FLAG_SET_DEFAULT(UseCountedLoopSafepoints, true);
+  }
+  if (UseCountedLoopSafepoints && FLAG_IS_DEFAULT(LoopStripMiningIter)) {
+    FLAG_SET_DEFAULT(LoopStripMiningIter, 1000);
+  }
+#endif
 }
 
 CollectedHeap* G1Arguments::create_heap() {
--- a/src/hotspot/share/opto/c2_globals.hpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/c2_globals.hpp	Tue Nov 28 11:59:16 2017 +0100
@@ -740,6 +740,14 @@
                                                                             \
   develop(bool, RenumberLiveNodes, true,                                    \
           "Renumber live nodes")                                            \
+                                                                            \
+  product(uintx, LoopStripMiningIter, 0,                                    \
+          "Number of iterations in strip mined loop")                       \
+          range(0, max_juint)                                               \
+                                                                            \
+  product(uintx, LoopStripMiningIterShortLoop, 0,                           \
+          "Loop with fewer iterations are not strip mined")                 \
+          range(0, max_juint)                                               \
 
 C2_FLAGS(DECLARE_DEVELOPER_FLAG, \
          DECLARE_PD_DEVELOPER_FLAG, \
--- a/src/hotspot/share/opto/cfgnode.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/cfgnode.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -571,6 +571,18 @@
       return NULL;
     } else if (can_reshape) {   // Optimization phase - remove the node
       PhaseIterGVN *igvn = phase->is_IterGVN();
+      // Strip mined (inner) loop is going away, remove outer loop.
+      if (is_CountedLoop() &&
+          as_Loop()->is_strip_mined()) {
+        Node* outer_sfpt = as_CountedLoop()->outer_safepoint();
+        Node* outer_out = as_CountedLoop()->outer_loop_exit();
+        if (outer_sfpt != NULL && outer_out != NULL) {
+          Node* in = outer_sfpt->in(0);
+          igvn->replace_node(outer_out, in);
+          LoopNode* outer = as_CountedLoop()->outer_loop();
+          igvn->replace_input_of(outer, LoopNode::LoopBackControl, igvn->C->top());
+        }
+      }
       Node *parent_ctrl;
       if( cnt == 0 ) {
         assert( req() == 1, "no inputs expected" );
--- a/src/hotspot/share/opto/classes.hpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/classes.hpp	Tue Nov 28 11:59:16 2017 +0100
@@ -133,6 +133,8 @@
 macro(ConvL2I)
 macro(CountedLoop)
 macro(CountedLoopEnd)
+macro(OuterStripMinedLoop)
+macro(OuterStripMinedLoopEnd)
 macro(CountLeadingZerosI)
 macro(CountLeadingZerosL)
 macro(CountTrailingZerosI)
--- a/src/hotspot/share/opto/compile.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/compile.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -3244,9 +3244,11 @@
     break;
   case Op_Loop:
   case Op_CountedLoop:
+  case Op_OuterStripMinedLoop:
     if (n->as_Loop()->is_inner_loop()) {
       frc.inc_inner_loop_count();
     }
+    n->as_Loop()->verify_strip_mined(0);
     break;
   case Op_LShiftI:
   case Op_RShiftI:
@@ -3525,6 +3527,14 @@
         record_method_not_compilable("infinite loop");
         return true;            // Found unvisited kid; must be unreach
       }
+
+    // Here so verification code in final_graph_reshaping_walk()
+    // always see an OuterStripMinedLoopEnd
+    if (n->is_OuterStripMinedLoopEnd()) {
+      IfNode* init_iff = n->as_If();
+      Node* iff = new IfNode(init_iff->in(0), init_iff->in(1), init_iff->_prob, init_iff->_fcnt);
+      n->subsume_by(iff, this);
+    }
   }
 
   // If original bytecodes contained a mixture of floats and doubles
--- a/src/hotspot/share/opto/ifnode.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/ifnode.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -117,6 +117,7 @@
   // No intervening control, like a simple Call
   Node *r = iff->in(0);
   if( !r->is_Region() ) return NULL;
+  if (r->is_Loop() && r->in(LoopNode::LoopBackControl)->is_top()) return NULL; // going away anyway
   if( phi->region() != r ) return NULL;
   // No other users of the cmp/bool
   if (b->outcnt() != 1 || cmp->outcnt() != 1) {
--- a/src/hotspot/share/opto/loopPredicate.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/loopPredicate.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -515,8 +515,8 @@
     _visited(area), _invariant(area), _stack(area, 10 /* guess */),
     _clone_visited(area), _old_new(area)
   {
-    Node* head = _lpt->_head;
-    Node* entry = head->in(LoopNode::EntryControl);
+    LoopNode* head = _lpt->_head->as_Loop();
+    Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
     if (entry->outcnt() != 1) {
       // If a node is pinned between the predicates and the loop
       // entry, we won't be able to move any node in the loop that
@@ -801,6 +801,10 @@
     return false;
   }
 
+  if (head->is_OuterStripMinedLoop()) {
+    return false;
+  }
+
   CountedLoopNode *cl = NULL;
   if (head->is_valid_counted_loop()) {
     cl = head->as_CountedLoop();
@@ -812,7 +816,7 @@
       cl = NULL;
   }
 
-  Node* entry = head->in(LoopNode::EntryControl);
+  Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
   ProjNode *predicate_proj = NULL;
   // Loop limit check predicate should be near the loop.
   predicate_proj = find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
@@ -1007,6 +1011,8 @@
   }
 #endif
 
+  head->verify_strip_mined(1);
+
   return hoisted;
 }
 
--- a/src/hotspot/share/opto/loopTransform.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/loopTransform.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -67,6 +67,16 @@
     Node *n = _body.at(i);
     _phase->_igvn._worklist.push(n);
   }
+  // put body of outer strip mined loop on igvn work list as well
+  if (_head->is_CountedLoop() && _head->as_Loop()->is_strip_mined()) {
+    CountedLoopNode* l = _head->as_CountedLoop();
+    _phase->_igvn._worklist.push(l->outer_loop());
+    _phase->_igvn._worklist.push(l->outer_loop_tail());
+    _phase->_igvn._worklist.push(l->outer_loop_end());
+    _phase->_igvn._worklist.push(l->outer_safepoint());
+    Node* cle_out = _head->as_CountedLoop()->loopexit()->proj_out(false);
+    _phase->_igvn._worklist.push(cle_out);
+  }
 }
 
 //------------------------------compute_exact_trip_count-----------------------
@@ -494,7 +504,7 @@
     loop->dump_head();
   }
 #endif
-  Node* head = loop->_head;
+  LoopNode* head = loop->_head->as_Loop();
   bool counted_loop = head->is_CountedLoop();
   if (counted_loop) {
     CountedLoopNode *cl = head->as_CountedLoop();
@@ -514,7 +524,7 @@
 
   // Step 1: Clone the loop body.  The clone becomes the peeled iteration.
   //         The pre-loop illegally has 2 control users (old & new loops).
-  clone_loop( loop, old_new, dom_depth(head) );
+  clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), ControlAroundStripMined);
 
   // Step 2: Make the old-loop fall-in edges point to the peeled iteration.
   //         Do this by making the old-loop fall-in edges act as if they came
@@ -523,8 +533,8 @@
   //         the pre-loop with only 1 user (the new peeled iteration), but the
   //         peeled-loop backedge has 2 users.
   Node* new_entry = old_new[head->in(LoopNode::LoopBackControl)->_idx];
-  _igvn.hash_delete(head);
-  head->set_req(LoopNode::EntryControl, new_entry);
+  _igvn.hash_delete(head->skip_strip_mined());
+  head->skip_strip_mined()->set_req(LoopNode::EntryControl, new_entry);
   for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
     Node* old = head->fast_out(j);
     if (old->in(0) == loop->_head && old->req() == 3 && old->is_Phi()) {
@@ -1009,8 +1019,6 @@
   CountedLoopEndNode *main_end = main_head->loopexit();
   guarantee(main_end != NULL, "no loop exit node");
   assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
-  uint dd_main_head = dom_depth(main_head);
-  uint max = main_head->outcnt();
 
   Node *pre_header= main_head->in(LoopNode::EntryControl);
   Node *init      = main_head->init_trip();
@@ -1043,7 +1051,16 @@
 
   // Step B1: Clone the loop body.  The clone becomes the pre-loop.  The main
   // loop pre-header illegally has 2 control users (old & new loops).
-  clone_loop( loop, old_new, dd_main_head );
+  LoopNode* outer_main_head = main_head;
+  IdealLoopTree* outer_loop = loop;
+  if (main_head->is_strip_mined()) {
+    main_head->verify_strip_mined(1);
+    outer_main_head = main_head->outer_loop();
+    outer_loop = loop->_parent;
+    assert(outer_loop->_head == outer_main_head, "broken loop tree");
+  }
+  uint dd_main_head = dom_depth(outer_main_head);
+  clone_loop(loop, old_new, dd_main_head, ControlAroundStripMined);
   CountedLoopNode*    pre_head = old_new[main_head->_idx]->as_CountedLoop();
   CountedLoopEndNode* pre_end  = old_new[main_end ->_idx]->as_CountedLoopEnd();
   pre_head->set_pre_loop(main_head);
@@ -1058,7 +1075,7 @@
   IfFalseNode *new_pre_exit = new IfFalseNode(pre_end);
   _igvn.register_new_node_with_optimizer( new_pre_exit );
   set_idom(new_pre_exit, pre_end, dd_main_head);
-  set_loop(new_pre_exit, loop->_parent);
+  set_loop(new_pre_exit, outer_loop->_parent);
 
   // Step B2: Build a zero-trip guard for the main-loop.  After leaving the
   // pre-loop, the main-loop may not execute at all.  Later in life this
@@ -1075,22 +1092,22 @@
   IfNode *min_iff = new IfNode( new_pre_exit, min_bol, PROB_ALWAYS, COUNT_UNKNOWN );
   _igvn.register_new_node_with_optimizer( min_iff );
   set_idom(min_iff, new_pre_exit, dd_main_head);
-  set_loop(min_iff, loop->_parent);
+  set_loop(min_iff, outer_loop->_parent);
 
   // Plug in the false-path, taken if we need to skip main-loop
   _igvn.hash_delete( pre_exit );
   pre_exit->set_req(0, min_iff);
   set_idom(pre_exit, min_iff, dd_main_head);
-  set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
+  set_idom(pre_exit->unique_ctrl_out(), min_iff, dd_main_head);
   // Make the true-path, must enter the main loop
   Node *min_taken = new IfTrueNode( min_iff );
   _igvn.register_new_node_with_optimizer( min_taken );
   set_idom(min_taken, min_iff, dd_main_head);
-  set_loop(min_taken, loop->_parent);
+  set_loop(min_taken, outer_loop->_parent);
   // Plug in the true path
-  _igvn.hash_delete( main_head );
-  main_head->set_req(LoopNode::EntryControl, min_taken);
-  set_idom(main_head, min_taken, dd_main_head);
+  _igvn.hash_delete(outer_main_head);
+  outer_main_head->set_req(LoopNode::EntryControl, min_taken);
+  set_idom(outer_main_head, min_taken, dd_main_head);
 
   Arena *a = Thread::current()->resource_area();
   VectorSet visited(a);
@@ -1102,7 +1119,7 @@
     if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
       Node *pre_phi = old_new[main_phi->_idx];
       Node *fallpre  = clone_up_backedge_goo(pre_head->back_control(),
-                                             main_head->init_control(),
+                                             main_head->skip_strip_mined()->in(LoopNode::EntryControl),
                                              pre_phi->in(LoopNode::LoopBackControl),
                                              visited, clones);
       _igvn.hash_delete(main_phi);
@@ -1305,16 +1322,24 @@
 Node *PhaseIdealLoop::insert_post_loop(IdealLoopTree *loop, Node_List &old_new,
                                        CountedLoopNode *main_head, CountedLoopEndNode *main_end,
                                        Node *incr, Node *limit, CountedLoopNode *&post_head) {
+  IfNode* outer_main_end = main_end;
+  IdealLoopTree* outer_loop = loop;
+  if (main_head->is_strip_mined()) {
+    main_head->verify_strip_mined(1);
+    outer_main_end = main_head->outer_loop_end();
+    outer_loop = loop->_parent;
+    assert(outer_loop->_head == main_head->in(LoopNode::EntryControl), "broken loop tree");
+  }
 
   //------------------------------
   // Step A: Create a new post-Loop.
-  Node* main_exit = main_end->proj_out(false);
+  Node* main_exit = outer_main_end->proj_out(false);
   assert(main_exit->Opcode() == Op_IfFalse, "");
   int dd_main_exit = dom_depth(main_exit);
 
   // Step A1: Clone the loop body of main. The clone becomes the post-loop.
   // The main loop pre-header illegally has 2 control users (old & new loops).
-  clone_loop(loop, old_new, dd_main_exit);
+  clone_loop(loop, old_new, dd_main_exit, ControlAroundStripMined);
   assert(old_new[main_end->_idx]->Opcode() == Op_CountedLoopEnd, "");
   post_head = old_new[main_head->_idx]->as_CountedLoop();
   post_head->set_normal_loop();
@@ -1325,10 +1350,10 @@
   post_end->_prob = PROB_FAIR;
 
   // Build the main-loop normal exit.
-  IfFalseNode *new_main_exit = new IfFalseNode(main_end);
+  IfFalseNode *new_main_exit = new IfFalseNode(outer_main_end);
   _igvn.register_new_node_with_optimizer(new_main_exit);
-  set_idom(new_main_exit, main_end, dd_main_exit);
-  set_loop(new_main_exit, loop->_parent);
+  set_idom(new_main_exit, outer_main_end, dd_main_exit);
+  set_loop(new_main_exit, outer_loop->_parent);
 
   // Step A2: Build a zero-trip guard for the post-loop.  After leaving the
   // main-loop, the post-loop may not execute at all.  We 'opaque' the incr
@@ -1346,7 +1371,7 @@
   IfNode *zer_iff = new IfNode(new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN);
   _igvn.register_new_node_with_optimizer(zer_iff);
   set_idom(zer_iff, new_main_exit, dd_main_exit);
-  set_loop(zer_iff, loop->_parent);
+  set_loop(zer_iff, outer_loop->_parent);
 
   // Plug in the false-path, taken if we need to skip this post-loop
   _igvn.replace_input_of(main_exit, 0, zer_iff);
@@ -1356,7 +1381,7 @@
   Node *zer_taken = new IfTrueNode(zer_iff);
   _igvn.register_new_node_with_optimizer(zer_taken);
   set_idom(zer_taken, zer_iff, dd_main_exit);
-  set_loop(zer_taken, loop->_parent);
+  set_loop(zer_taken, outer_loop->_parent);
   // Plug in the true path
   _igvn.hash_delete(post_head);
   post_head->set_req(LoopNode::EntryControl, zer_taken);
@@ -1431,7 +1456,7 @@
   // if rounds of unroll,optimize are making progress
   loop_head->set_node_count_before_unroll(loop->_body.size());
 
-  Node *ctrl  = loop_head->in(LoopNode::EntryControl);
+  Node *ctrl  = loop_head->skip_strip_mined()->in(LoopNode::EntryControl);
   Node *limit = loop_head->limit();
   Node *init  = loop_head->init_trip();
   Node *stride = loop_head->stride();
@@ -1610,7 +1635,7 @@
   // represents the odd iterations; since the loop trips an even number of
   // times its backedge is never taken.  Kill the backedge.
   uint dd = dom_depth(loop_head);
-  clone_loop( loop, old_new, dd );
+  clone_loop(loop, old_new, dd, IgnoreStripMined);
 
   // Make backedges of the clone equal to backedges of the original.
   // Make the fall-in from the original come from the fall-out of the clone.
@@ -1653,6 +1678,7 @@
   }
 
   loop->record_for_igvn();
+  loop_head->clear_strip_mined();
 
 #ifndef PRODUCT
   if (C->do_vector_loop() && (PrintOpto && (VerifyLoopOptimizations || TraceLoopOpts))) {
@@ -2047,7 +2073,7 @@
   }
 
   // Need to find the main-loop zero-trip guard
-  Node *ctrl  = cl->in(LoopNode::EntryControl);
+  Node *ctrl  = cl->skip_strip_mined()->in(LoopNode::EntryControl);
   Node *iffm = ctrl->in(0);
   Node *opqzm = iffm->in(1)->in(1)->in(2);
   assert(opqzm->in(1) == main_limit, "do not understand situation");
@@ -2413,7 +2439,6 @@
     _igvn.register_new_node_with_optimizer(cur_min);
     Node *cmp_node = rce_loop_end->cmp_node();
     _igvn.replace_input_of(cmp_node, 2, cur_min);
-    set_idom(cmp_node, cur_min, dom_depth(ctrl));
     set_ctrl(cur_min, ctrl);
     set_loop(cur_min, rce_loop->_parent);
 
@@ -2519,7 +2544,7 @@
 
 #ifdef ASSERT
 static CountedLoopNode* locate_pre_from_main(CountedLoopNode *cl) {
-  Node *ctrl  = cl->in(LoopNode::EntryControl);
+  Node *ctrl  = cl->skip_strip_mined()->in(LoopNode::EntryControl);
   assert(ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "");
   Node *iffm = ctrl->in(0);
   assert(iffm->Opcode() == Op_If, "");
@@ -2558,7 +2583,7 @@
   }
 
   assert(locate_pre_from_main(main_head) == cl, "bad main loop");
-  Node* main_iff = main_head->in(LoopNode::EntryControl)->in(0);
+  Node* main_iff = main_head->skip_strip_mined()->in(LoopNode::EntryControl)->in(0);
 
   // Remove the Opaque1Node of the pre loop and make it execute all iterations
   phase->_igvn.replace_input_of(pre_cmp, 2, pre_cmp->in(2)->in(2));
@@ -2619,7 +2644,7 @@
   }
   if (needs_guard) {
     // Check for an obvious zero trip guard.
-    Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->in(LoopNode::EntryControl));
+    Node* inctrl = PhaseIdealLoop::skip_loop_predicates(cl->skip_strip_mined()->in(LoopNode::EntryControl));
     if (inctrl->Opcode() == Op_IfTrue || inctrl->Opcode() == Op_IfFalse) {
       bool maybe_swapped = (inctrl->Opcode() == Op_IfFalse);
       // The test should look like just the backedge of a CountedLoop
@@ -3167,6 +3192,8 @@
     return false;
   }
 
+  head->verify_strip_mined(1);
+
   // Check that the body only contains a store of a loop invariant
   // value that is indexed by the loop phi.
   Node* store = NULL;
@@ -3288,6 +3315,16 @@
   }
 */
 
+  if (head->is_strip_mined()) {
+    // Inner strip mined loop goes away so get rid of outer strip
+    // mined loop
+    Node* outer_sfpt = head->outer_safepoint();
+    Node* in = outer_sfpt->in(0);
+    Node* outer_out = head->outer_loop_exit();
+    lazy_replace(outer_out, in);
+    _igvn.replace_input_of(outer_sfpt, 0, C->top());
+  }
+
   // Redirect the old control and memory edges that are outside the loop.
   // Sometimes the memory phi of the head is used as the outgoing
   // state of the loop.  It's safe in this case to replace it with the
--- a/src/hotspot/share/opto/loopUnswitch.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/loopUnswitch.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -132,11 +132,11 @@
     head->as_CountedLoop()->set_normal_loop();
   }
 
-  ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode());
+  ProjNode* proj_true = create_slow_version_of_loop(loop, old_new, unswitch_iff->Opcode(), CloneIncludesStripMined);
 
 #ifdef ASSERT
   Node* uniqc = proj_true->unique_ctrl_out();
-  Node* entry = head->in(LoopNode::EntryControl);
+  Node* entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
   Node* predicate = find_predicate(entry);
   if (predicate != NULL && UseLoopPredicate) {
     // We may have two predicates, find first.
@@ -145,7 +145,8 @@
   }
   if (predicate != NULL) predicate = predicate->in(0);
   assert(proj_true->is_IfTrue() &&
-         (predicate == NULL && uniqc == head ||
+         (predicate == NULL && uniqc == head && !head->is_strip_mined() ||
+          predicate == NULL && uniqc == head->in(LoopNode::EntryControl) && head->is_strip_mined() ||
           predicate != NULL && uniqc == predicate), "by construction");
 #endif
   // Increment unswitch count
@@ -223,13 +224,16 @@
 // Return control projection of the entry to the fast version.
 ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
                                                       Node_List &old_new,
-                                                      int opcode) {
+                                                      int opcode,
+                                                      CloneLoopMode mode) {
   LoopNode* head  = loop->_head->as_Loop();
   bool counted_loop = head->is_CountedLoop();
-  Node*     entry = head->in(LoopNode::EntryControl);
+  Node*     entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
   _igvn.rehash_node_delayed(entry);
   IdealLoopTree* outer_loop = loop->_parent;
 
+  head->verify_strip_mined(1);
+
   Node *cont      = _igvn.intcon(1);
   set_ctrl(cont, C->root());
   Node* opq       = new Opaque1Node(C, cont);
@@ -247,19 +251,21 @@
   // Clone the loop body.  The clone becomes the fast loop.  The
   // original pre-header will (illegally) have 3 control users
   // (old & new loops & new if).
-  clone_loop(loop, old_new, dom_depth(head), iff);
+  clone_loop(loop, old_new, dom_depth(head->skip_strip_mined()), mode, iff);
   assert(old_new[head->_idx]->is_Loop(), "" );
 
   // Fast (true) control
   Node* iffast_pred = clone_loop_predicates(entry, iffast, !counted_loop);
-  _igvn.replace_input_of(head, LoopNode::EntryControl, iffast_pred);
-  set_idom(head, iffast_pred, dom_depth(head));
 
   // Slow (false) control
   Node* ifslow_pred = clone_loop_predicates(entry, ifslow, !counted_loop);
-  LoopNode* slow_head = old_new[head->_idx]->as_Loop();
-  _igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow_pred);
-  set_idom(slow_head, ifslow_pred, dom_depth(slow_head));
+
+  Node* l = head->skip_strip_mined();
+  _igvn.replace_input_of(l, LoopNode::EntryControl, iffast_pred);
+  set_idom(l, iffast_pred, dom_depth(l));
+  LoopNode* slow_l = old_new[head->_idx]->as_Loop()->skip_strip_mined();
+  _igvn.replace_input_of(slow_l, LoopNode::EntryControl, ifslow_pred);
+  set_idom(slow_l, ifslow_pred, dom_depth(l));
 
   recompute_dom_depth();
 
@@ -270,9 +276,9 @@
   Node_List old_new;
   LoopNode* head  = loop->_head->as_Loop();
   bool counted_loop = head->is_CountedLoop();
-  Node*     entry = head->in(LoopNode::EntryControl);
+  Node*     entry = head->skip_strip_mined()->in(LoopNode::EntryControl);
   _igvn.rehash_node_delayed(entry);
-  IdealLoopTree* outer_loop = loop->_parent;
+  IdealLoopTree* outer_loop = head->is_strip_mined() ? loop->_parent->_parent : loop->_parent;
 
   ConINode* const_1 = _igvn.intcon(1);
   set_ctrl(const_1, C->root());
@@ -286,7 +292,7 @@
   // Clone the loop body.  The clone becomes the fast loop.  The
   // original pre-header will (illegally) have 3 control users
   // (old & new loops & new if).
-  clone_loop(loop, old_new, dom_depth(head), iff);
+  clone_loop(loop, old_new, dom_depth(head), CloneIncludesStripMined, iff);
   assert(old_new[head->_idx]->is_Loop(), "" );
 
   LoopNode* slow_head = old_new[head->_idx]->as_Loop();
@@ -303,9 +309,9 @@
 #endif
 
   // Fast (true) control
-  _igvn.replace_input_of(head, LoopNode::EntryControl, iffast);
+  _igvn.replace_input_of(head->skip_strip_mined(), LoopNode::EntryControl, iffast);
   // Slow (false) control
-  _igvn.replace_input_of(slow_head, LoopNode::EntryControl, ifslow);
+  _igvn.replace_input_of(slow_head->skip_strip_mined(), LoopNode::EntryControl, ifslow);
 
   recompute_dom_depth();
 
@@ -394,7 +400,7 @@
     return false;
   }
 
-  Node* ifslow_pred = _lp_reserved->as_CountedLoop()->in(LoopNode::EntryControl);
+  Node* ifslow_pred = _lp_reserved->skip_strip_mined()->in(LoopNode::EntryControl);
 
   if (!ifslow_pred->is_IfFalse()) {
     return false;
--- a/src/hotspot/share/opto/loopnode.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/loopnode.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -261,8 +261,68 @@
   set_early_ctrl( n );
 }
 
+// Create a skeleton strip mined outer loop: a Loop head before the
+// inner strip mined loop, a safepoint and an exit condition guarded
+// by an opaque node after the inner strip mined loop with a backedge
+// to the loop head. The inner strip mined loop is left as it is. Only
+// once loop optimizations are over, do we adjust the inner loop exit
+// condition to limit its number of iterations, set the outer loop
+// exit condition and add Phis to the outer loop head. Some loop
+// optimizations that operate on the inner strip mined loop need to be
+// aware of the outer strip mined loop: loop unswitching needs to
+// clone the outer loop as well as the inner, unrolling needs to only
+// clone the inner loop etc. No optimizations need to change the outer
+// strip mined loop as it is only a skeleton.
+IdealLoopTree* PhaseIdealLoop::create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
+                                                             IdealLoopTree* loop, float cl_prob, float le_fcnt,
+                                                             Node*& entry_control, Node*& iffalse) {
+  Node* outer_test = _igvn.intcon(0);
+  set_ctrl(outer_test, C->root());
+  Node *orig = iffalse;
+  iffalse = iffalse->clone();
+  _igvn.register_new_node_with_optimizer(iffalse);
+  set_idom(iffalse, idom(orig), dom_depth(orig));
+
+  IfNode *outer_le = new OuterStripMinedLoopEndNode(iffalse, outer_test, cl_prob, le_fcnt);
+  Node *outer_ift = new IfTrueNode (outer_le);
+  Node* outer_iff = orig;
+  _igvn.replace_input_of(outer_iff, 0, outer_le);
+
+  LoopNode *outer_l = new OuterStripMinedLoopNode(C, init_control, outer_ift);
+  entry_control = outer_l;
+
+  IdealLoopTree* outer_ilt = new IdealLoopTree(this, outer_l, outer_ift);
+  IdealLoopTree* parent = loop->_parent;
+  IdealLoopTree* sibling = parent->_child;
+  if (sibling == loop) {
+    parent->_child = outer_ilt;
+  } else {
+    while (sibling->_next != loop) {
+      sibling = sibling->_next;
+    }
+    sibling->_next = outer_ilt;
+  }
+  outer_ilt->_next = loop->_next;
+  outer_ilt->_parent = parent;
+  outer_ilt->_child = loop;
+  outer_ilt->_nest = loop->_nest;
+  loop->_parent = outer_ilt;
+  loop->_next = NULL;
+  loop->_nest++;
+
+  set_loop(iffalse, outer_ilt);
+  register_control(outer_le, outer_ilt, iffalse);
+  register_control(outer_ift, outer_ilt, outer_le);
+  set_idom(outer_iff, outer_le, dom_depth(outer_le));
+  _igvn.register_new_node_with_optimizer(outer_l);
+  set_loop(outer_l, outer_ilt);
+  set_idom(outer_l, init_control, dom_depth(init_control)+1);
+
+  return outer_ilt;
+}
+
 //------------------------------is_counted_loop--------------------------------
-bool PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
+bool PhaseIdealLoop::is_counted_loop(Node* x, IdealLoopTree*& loop) {
   PhaseGVN *gvn = &_igvn;
 
   // Counted loop head must be a good RegionNode with only 3 not NULL
@@ -280,7 +340,7 @@
 
   // Allow funny placement of Safepoint
   if (back_control->Opcode() == Op_SafePoint) {
-    if (UseCountedLoopSafepoints) {
+    if (LoopStripMiningIter != 0) {
       // Leaving the safepoint on the backedge and creating a
       // CountedLoop will confuse optimizations. We can't move the
       // safepoint around because its jvm state wouldn't match a new
@@ -600,7 +660,7 @@
   }
   set_subtree_ctrl( limit );
 
-  if (!UseCountedLoopSafepoints) {
+  if (LoopStripMiningIter == 0) {
     // Check for SafePoint on backedge and remove
     Node *sfpt = x->in(LoopNode::LoopBackControl);
     if (sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) {
@@ -683,8 +743,20 @@
   assert(iff->outcnt() == 0, "should be dead now");
   lazy_replace( iff, le ); // fix 'get_ctrl'
 
+  Node *sfpt2 = le->in(0);
+
+  Node* entry_control = init_control;
+  bool strip_mine_loop = LoopStripMiningIter > 1 && loop->_child == NULL &&
+    sfpt2->Opcode() == Op_SafePoint && !loop->_has_call;
+  IdealLoopTree* outer_ilt = NULL;
+  if (strip_mine_loop) {
+    outer_ilt = create_outer_strip_mined_loop(test, cmp, init_control, loop,
+                                              cl_prob, le->_fcnt, entry_control,
+                                              iffalse);
+  }
+
   // Now setup a new CountedLoopNode to replace the existing LoopNode
-  CountedLoopNode *l = new CountedLoopNode(init_control, back_control);
+  CountedLoopNode *l = new CountedLoopNode(entry_control, back_control);
   l->set_unswitch_count(x->as_Loop()->unswitch_count()); // Preserve
   // The following assert is approximately true, and defines the intention
   // of can_be_counted_loop.  It fails, however, because phase->type
@@ -696,12 +768,19 @@
   // Fix all data nodes placed at the old loop head.
   // Uses the lazy-update mechanism of 'get_ctrl'.
   lazy_replace( x, l );
-  set_idom(l, init_control, dom_depth(x));
-
-  if (!UseCountedLoopSafepoints) {
+  set_idom(l, entry_control, dom_depth(entry_control) + 1);
+
+  if (LoopStripMiningIter == 0 || strip_mine_loop) {
     // Check for immediately preceding SafePoint and remove
-    Node *sfpt2 = le->in(0);
-    if (sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2)) {
+    if (sfpt2->Opcode() == Op_SafePoint && (LoopStripMiningIter != 0 || is_deleteable_safept(sfpt2))) {
+      if (strip_mine_loop) {
+        Node* outer_le = outer_ilt->_tail->in(0);
+        Node* sfpt = sfpt2->clone();
+        sfpt->set_req(0, iffalse);
+        outer_le->set_req(0, sfpt);
+        register_control(sfpt, outer_ilt, iffalse);
+        set_idom(outer_le, sfpt, dom_depth(sfpt));
+      }
       lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
       if (loop->_safepts != NULL) {
         loop->_safepts->yank(sfpt2);
@@ -730,6 +809,13 @@
   // bounds
   l->phi()->as_Phi()->set_type(l->phi()->Value(&_igvn));
 
+  if (strip_mine_loop) {
+    l->mark_strip_mined();
+    l->verify_strip_mined(1);
+    outer_ilt->_head->as_Loop()->verify_strip_mined(1);
+    loop = outer_ilt;
+  }
+
   return true;
 }
 
@@ -776,12 +862,93 @@
 // Return a node which is more "ideal" than the current node.
 // Attempt to convert into a counted-loop.
 Node *LoopNode::Ideal(PhaseGVN *phase, bool can_reshape) {
-  if (!can_be_counted_loop(phase)) {
+  if (!can_be_counted_loop(phase) && !is_OuterStripMinedLoop()) {
     phase->C->set_major_progress();
   }
   return RegionNode::Ideal(phase, can_reshape);
 }
 
+void LoopNode::verify_strip_mined(int expect_skeleton) const {
+#ifdef ASSERT
+  const OuterStripMinedLoopNode* outer = NULL;
+  const CountedLoopNode* inner = NULL;
+  if (is_strip_mined()) {
+    assert(is_CountedLoop(), "no Loop should be marked strip mined");
+    inner = as_CountedLoop();
+    outer = inner->in(LoopNode::EntryControl)->as_OuterStripMinedLoop();
+  } else if (is_OuterStripMinedLoop()) {
+    outer = this->as_OuterStripMinedLoop();
+    inner = outer->unique_ctrl_out()->as_CountedLoop();
+    assert(!is_strip_mined(), "outer loop shouldn't be marked strip mined");
+  }
+  if (inner != NULL || outer != NULL) {
+    assert(inner != NULL && outer != NULL, "missing loop in strip mined nest");
+    Node* outer_tail = outer->in(LoopNode::LoopBackControl);
+    Node* outer_le = outer_tail->in(0);
+    assert(outer_le->Opcode() == Op_OuterStripMinedLoopEnd, "tail of outer loop should be an If");
+    Node* sfpt = outer_le->in(0);
+    assert(sfpt->Opcode() == Op_SafePoint, "where's the safepoint?");
+    Node* inner_out = sfpt->in(0);
+    if (inner_out->outcnt() != 1) {
+      ResourceMark rm;
+      Unique_Node_List wq;
+
+      for (DUIterator_Fast imax, i = inner_out->fast_outs(imax); i < imax; i++) {
+        Node* u = inner_out->fast_out(i);
+        if (u == sfpt) {
+          continue;
+        }
+        wq.clear();
+        wq.push(u);
+        bool found_sfpt = false;
+        for (uint next = 0; next < wq.size() && !found_sfpt; next++) {
+          Node *n = wq.at(next);
+          for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && !found_sfpt; i++) {
+            Node* u = n->fast_out(i);
+            if (u == sfpt) {
+              found_sfpt = true;
+            }
+            if (!u->is_CFG()) {
+              wq.push(u);
+            }
+          }
+        }
+        assert(found_sfpt, "no node in loop that's not input to safepoint");
+      }
+    }
+    CountedLoopEndNode* cle = inner_out->in(0)->as_CountedLoopEnd();
+    assert(cle == inner->loopexit(), "mismatch");
+    bool has_skeleton = outer_le->in(1)->bottom_type()->singleton() && outer_le->in(1)->bottom_type()->is_int()->get_con() == 0;
+    if (has_skeleton) {
+      assert(expect_skeleton == 1 || expect_skeleton == -1, "unexpected skeleton node");
+      assert(outer->outcnt() == 2, "only phis");
+    } else {
+      assert(expect_skeleton == 0 || expect_skeleton == -1, "no skeleton node?");
+      uint phis = 0;
+      for (DUIterator_Fast imax, i = inner->fast_outs(imax); i < imax; i++) {
+        Node* u = inner->fast_out(i);
+        if (u->is_Phi()) {
+          phis++;
+        }
+      }
+      for (DUIterator_Fast imax, i = outer->fast_outs(imax); i < imax; i++) {
+        Node* u = outer->fast_out(i);
+        assert(u == outer || u == inner || u->is_Phi(), "nothing between inner and outer loop");
+      }
+      uint stores = 0;
+      for (DUIterator_Fast imax, i = inner_out->fast_outs(imax); i < imax; i++) {
+        Node* u = inner_out->fast_out(i);
+        if (u->is_Store()) {
+          stores++;
+        }
+      }
+      assert(outer->outcnt() >= phis + 2 && outer->outcnt() <= phis + 2 + stores + 1, "only phis");
+    }
+    assert(sfpt->outcnt() == 1, "no data node");
+    assert(outer_tail->outcnt() == 1 || !has_skeleton, "no data node");
+  }
+#endif
+}
 
 //=============================================================================
 //------------------------------Ideal------------------------------------------
@@ -802,6 +969,7 @@
   if (is_pre_loop ()) st->print("pre of N%d" , _main_idx);
   if (is_main_loop()) st->print("main of N%d", _idx);
   if (is_post_loop()) st->print("post of N%d", _main_idx);
+  if (is_strip_mined()) st->print(" strip mined");
 }
 #endif
 
@@ -990,6 +1158,365 @@
   return NULL;
 }
 
+LoopNode* CountedLoopNode::skip_strip_mined(int expect_opaq) {
+  if (is_strip_mined()) {
+    verify_strip_mined(expect_opaq);
+    return in(EntryControl)->as_Loop();
+  }
+  return this;
+}
+
+OuterStripMinedLoopNode* CountedLoopNode::outer_loop() const {
+  assert(is_strip_mined(), "not a strip mined loop");
+  Node* c = in(EntryControl);
+  if (c == NULL || c->is_top() || !c->is_OuterStripMinedLoop()) {
+    return NULL;
+  }
+  return c->as_OuterStripMinedLoop();
+}
+
+IfTrueNode* OuterStripMinedLoopNode::outer_loop_tail() const {
+  Node* c = in(LoopBackControl);
+  if (c == NULL || c->is_top()) {
+    return NULL;
+  }
+  return c->as_IfTrue();
+}
+
+IfTrueNode* CountedLoopNode::outer_loop_tail() const {
+  LoopNode* l = outer_loop();
+  if (l == NULL) {
+    return NULL;
+  }
+  return l->outer_loop_tail();
+}
+
+OuterStripMinedLoopEndNode* OuterStripMinedLoopNode::outer_loop_end() const {
+  IfTrueNode* proj = outer_loop_tail();
+  if (proj == NULL) {
+    return NULL;
+  }
+  Node* c = proj->in(0);
+  if (c == NULL || c->is_top() || c->outcnt() != 2) {
+    return NULL;
+  }
+  return c->as_OuterStripMinedLoopEnd();
+}
+
+OuterStripMinedLoopEndNode* CountedLoopNode::outer_loop_end() const {
+  LoopNode* l = outer_loop();
+  if (l == NULL) {
+    return NULL;
+  }
+  return l->outer_loop_end();
+}
+
+IfFalseNode* OuterStripMinedLoopNode::outer_loop_exit() const {
+  IfNode* le = outer_loop_end();
+  if (le == NULL) {
+    return NULL;
+  }
+  Node* c = le->proj_out(false);
+  if (c == NULL) {
+    return NULL;
+  }
+  return c->as_IfFalse();
+}
+
+IfFalseNode* CountedLoopNode::outer_loop_exit() const {
+  LoopNode* l = outer_loop();
+  if (l == NULL) {
+    return NULL;
+  }
+  return l->outer_loop_exit();
+}
+
+SafePointNode* OuterStripMinedLoopNode::outer_safepoint() const {
+  IfNode* le = outer_loop_end();
+  if (le == NULL) {
+    return NULL;
+  }
+  Node* c = le->in(0);
+  if (c == NULL || c->is_top()) {
+    return NULL;
+  }
+  assert(c->Opcode() == Op_SafePoint, "broken outer loop");
+  return c->as_SafePoint();
+}
+
+SafePointNode* CountedLoopNode::outer_safepoint() const {
+  LoopNode* l = outer_loop();
+  if (l == NULL) {
+    return NULL;
+  }
+  return l->outer_safepoint();
+}
+
+void OuterStripMinedLoopNode::adjust_strip_mined_loop(PhaseIterGVN* igvn) {
+  // Look for the outer & inner strip mined loop, reduce number of
+  // iterations of the inner loop, set exit condition of outer loop,
+  // construct required phi nodes for outer loop.
+  CountedLoopNode* inner_cl = unique_ctrl_out()->as_CountedLoop();
+  assert(inner_cl->is_strip_mined(), "inner loop should be strip mined");
+  Node* inner_iv_phi = inner_cl->phi();
+  if (inner_iv_phi == NULL) {
+    return;
+  }
+  CountedLoopEndNode* inner_cle = inner_cl->loopexit();
+
+  int stride = inner_cl->stride_con();
+  jlong scaled_iters_long = ((jlong)LoopStripMiningIter) * ABS(stride);
+  int scaled_iters = (int)scaled_iters_long;
+  int short_scaled_iters = LoopStripMiningIterShortLoop* ABS(stride);
+  const TypeInt* inner_iv_t = igvn->type(inner_iv_phi)->is_int();
+  jlong iter_estimate = (jlong)inner_iv_t->_hi - (jlong)inner_iv_t->_lo;
+  assert(iter_estimate > 0, "broken");
+  if ((jlong)scaled_iters != scaled_iters_long || iter_estimate <= short_scaled_iters) {
+    // Remove outer loop and safepoint (too few iterations)
+    Node* outer_sfpt = outer_safepoint();
+    Node* outer_out = outer_loop_exit();
+    igvn->replace_node(outer_out, outer_sfpt->in(0));
+    igvn->replace_input_of(outer_sfpt, 0, igvn->C->top());
+    inner_cl->clear_strip_mined();
+    return;
+  }
+  if (iter_estimate <= scaled_iters_long) {
+    // We would only go through one iteration of
+    // the outer loop: drop the outer loop but
+    // keep the safepoint so we don't run for
+    // too long without a safepoint
+    IfNode* outer_le = outer_loop_end();
+    Node* iff = igvn->transform(new IfNode(outer_le->in(0), outer_le->in(1), outer_le->_prob, outer_le->_fcnt));
+    igvn->replace_node(outer_le, iff);
+    inner_cl->clear_strip_mined();
+    return;
+  }
+
+  Node* cle_tail = inner_cle->proj_out(true);
+  ResourceMark rm;
+  Node_List old_new;
+  if (cle_tail->outcnt() > 1) {
+    // Look for nodes on backedge of inner loop and clone them
+    Unique_Node_List backedge_nodes;
+    for (DUIterator_Fast imax, i = cle_tail->fast_outs(imax); i < imax; i++) {
+      Node* u = cle_tail->fast_out(i);
+      if (u != inner_cl) {
+        assert(!u->is_CFG(), "control flow on the backedge?");
+        backedge_nodes.push(u);
+      }
+    }
+    uint last = igvn->C->unique();
+    for (uint next = 0; next < backedge_nodes.size(); next++) {
+      Node* n = backedge_nodes.at(next);
+      old_new.map(n->_idx, n->clone());
+      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+        Node* u = n->fast_out(i);
+        assert(!u->is_CFG(), "broken");
+        if (u->_idx >= last) {
+          continue;
+        }
+        if (!u->is_Phi()) {
+          backedge_nodes.push(u);
+        } else {
+          assert(u->in(0) == inner_cl, "strange phi on the backedge");
+        }
+      }
+    }
+    // Put the clones on the outer loop backedge
+    Node* le_tail = outer_loop_tail();
+    for (uint next = 0; next < backedge_nodes.size(); next++) {
+      Node *n = old_new[backedge_nodes.at(next)->_idx];
+      for (uint i = 1; i < n->req(); i++) {
+        if (n->in(i) != NULL && old_new[n->in(i)->_idx] != NULL) {
+          n->set_req(i, old_new[n->in(i)->_idx]);
+        }
+      }
+      if (n->in(0) != NULL) {
+        assert(n->in(0) == cle_tail, "node not on backedge?");
+        n->set_req(0, le_tail);
+      }
+      igvn->register_new_node_with_optimizer(n);
+    }
+  }
+
+  Node* iv_phi = NULL;
+  // Make a clone of each phi in the inner loop
+  // for the outer loop
+  for (uint i = 0; i < inner_cl->outcnt(); i++) {
+    Node* u = inner_cl->raw_out(i);
+    if (u->is_Phi()) {
+      assert(u->in(0) == inner_cl, "inconsistent");
+      Node* phi = u->clone();
+      phi->set_req(0, this);
+      Node* be = old_new[phi->in(LoopNode::LoopBackControl)->_idx];
+      if (be != NULL) {
+        phi->set_req(LoopNode::LoopBackControl, be);
+      }
+      phi = igvn->transform(phi);
+      igvn->replace_input_of(u, LoopNode::EntryControl, phi);
+      if (u == inner_iv_phi) {
+        iv_phi = phi;
+      }
+    }
+  }
+  Node* cle_out = inner_cle->proj_out(false);
+  if (cle_out->outcnt() > 1) {
+    // Look for chains of stores that were sunk
+    // out of the inner loop and are in the outer loop
+    for (DUIterator_Fast imax, i = cle_out->fast_outs(imax); i < imax; i++) {
+      Node* u = cle_out->fast_out(i);
+      if (u->is_Store()) {
+        Node* first = u;
+        for(;;) {
+          Node* next = first->in(MemNode::Memory);
+          if (!next->is_Store() || next->in(0) != cle_out) {
+            break;
+          }
+          first = next;
+        }
+        Node* last = u;
+        for(;;) {
+          Node* next = NULL;
+          for (DUIterator_Fast jmax, j = last->fast_outs(jmax); j < jmax; j++) {
+            Node* uu = last->fast_out(j);
+            if (uu->is_Store() && uu->in(0) == cle_out) {
+              assert(next == NULL, "only one in the outer loop");
+              next = uu;
+            }
+          }
+          if (next == NULL) {
+            break;
+          }
+          last = next;
+        }
+        Node* phi = NULL;
+        for (DUIterator_Fast jmax, j = fast_outs(jmax); j < jmax; j++) {
+          Node* uu = fast_out(j);
+          if (uu->is_Phi()) {
+            Node* be = uu->in(LoopNode::LoopBackControl);
+            while (be->is_Store() && old_new[be->_idx] != NULL) {
+              ShouldNotReachHere();
+              be = be->in(MemNode::Memory);
+            }
+            if (be == last || be == first->in(MemNode::Memory)) {
+              assert(phi == NULL, "only one phi");
+              phi = uu;
+            }
+          }
+        }
+#ifdef ASSERT
+        for (DUIterator_Fast jmax, j = fast_outs(jmax); j < jmax; j++) {
+          Node* uu = fast_out(j);
+          if (uu->is_Phi() && uu->bottom_type() == Type::MEMORY) {
+            if (uu->adr_type() == igvn->C->get_adr_type(igvn->C->get_alias_index(u->adr_type()))) {
+              assert(phi == uu, "what's that phi?");
+            } else if (uu->adr_type() == TypePtr::BOTTOM) {
+              Node* n = uu->in(LoopNode::LoopBackControl);
+              uint limit = igvn->C->live_nodes();
+              uint i = 0;
+              while (n != uu) {
+                i++;
+                assert(i < limit, "infinite loop");
+                if (n->is_Proj()) {
+                  n = n->in(0);
+                } else if (n->is_SafePoint() || n->is_MemBar()) {
+                  n = n->in(TypeFunc::Memory);
+                } else if (n->is_Phi()) {
+                  n = n->in(1);
+                } else if (n->is_MergeMem()) {
+                  n = n->as_MergeMem()->memory_at(igvn->C->get_alias_index(u->adr_type()));
+                } else if (n->is_Store() || n->is_LoadStore() || n->is_ClearArray()) {
+                  n = n->in(MemNode::Memory);
+                } else {
+                  n->dump();
+                  ShouldNotReachHere();
+                }
+              }
+            }
+          }
+        }
+#endif
+        if (phi == NULL) {
+          // If the an entire chains was sunk, the
+          // inner loop has no phi for that memory
+          // slice, create one for the outer loop
+          phi = PhiNode::make(this, first->in(MemNode::Memory), Type::MEMORY,
+                              igvn->C->get_adr_type(igvn->C->get_alias_index(u->adr_type())));
+          phi->set_req(LoopNode::LoopBackControl, last);
+          phi = igvn->transform(phi);
+          igvn->replace_input_of(first, MemNode::Memory, phi);
+        } else {
+          // Or fix the outer loop fix to include
+          // that chain of stores.
+          Node* be = phi->in(LoopNode::LoopBackControl);
+          while (be->is_Store() && old_new[be->_idx] != NULL) {
+            ShouldNotReachHere();
+            be = be->in(MemNode::Memory);
+          }
+          if (be == first->in(MemNode::Memory)) {
+            if (be == phi->in(LoopNode::LoopBackControl)) {
+              igvn->replace_input_of(phi, LoopNode::LoopBackControl, last);
+            } else {
+              igvn->replace_input_of(be, MemNode::Memory, last);
+            }
+          } else {
+#ifdef ASSERT
+            if (be == phi->in(LoopNode::LoopBackControl)) {
+              assert(phi->in(LoopNode::LoopBackControl) == last, "");
+            } else {
+              assert(be->in(MemNode::Memory) == last, "");
+            }
+#endif
+          }
+        }
+      }
+    }
+  }
+
+  if (iv_phi != NULL) {
+    // Now adjust the inner loop's exit condition
+    Node* limit = inner_cl->limit();
+    Node* sub = NULL;
+    if (stride > 0) {
+      sub = igvn->transform(new SubINode(limit, iv_phi));
+    } else {
+      sub = igvn->transform(new SubINode(iv_phi, limit));
+    }
+    Node* min = igvn->transform(new MinINode(sub, igvn->intcon(scaled_iters)));
+    Node* new_limit = NULL;
+    if (stride > 0) {
+      new_limit = igvn->transform(new AddINode(min, iv_phi));
+    } else {
+      new_limit = igvn->transform(new SubINode(iv_phi, min));
+    }
+    igvn->replace_input_of(inner_cle->cmp_node(), 2, new_limit);
+    Node* cmp = inner_cle->cmp_node()->clone();
+    Node* bol = inner_cle->in(CountedLoopEndNode::TestValue)->clone();
+    cmp->set_req(2, limit);
+    bol->set_req(1, igvn->transform(cmp));
+    igvn->replace_input_of(outer_loop_end(), 1, igvn->transform(bol));
+  } else {
+    assert(false, "should be able to adjust outer loop");
+    IfNode* outer_le = outer_loop_end();
+    Node* iff = igvn->transform(new IfNode(outer_le->in(0), outer_le->in(1), outer_le->_prob, outer_le->_fcnt));
+    igvn->replace_node(outer_le, iff);
+    inner_cl->clear_strip_mined();
+  }
+}
+
+const Type* OuterStripMinedLoopEndNode::Value(PhaseGVN* phase) const {
+  if (!in(0)) return Type::TOP;
+  if (phase->type(in(0)) == Type::TOP)
+    return Type::TOP;
+
+  return TypeTuple::IFBOTH;
+}
+
+Node *OuterStripMinedLoopEndNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (remove_dead_region(phase, can_reshape))  return this;
+
+  return NULL;
+}
 
 //------------------------------filtered_type--------------------------------
 // Return a type based on condition control flow
@@ -1778,10 +2305,11 @@
     if (_head->is_Loop()) _head->as_Loop()->set_inner_loop();
   }
 
+  IdealLoopTree* loop = this;
   if (_head->is_CountedLoop() ||
-      phase->is_counted_loop(_head, this)) {
-
-    if (!UseCountedLoopSafepoints) {
+      phase->is_counted_loop(_head, loop)) {
+
+    if (LoopStripMiningIter == 0 || (LoopStripMiningIter > 1 && _child == NULL)) {
       // Indicate we do not need a safepoint here
       _has_sfpt = 1;
     }
@@ -1800,8 +2328,10 @@
   }
 
   // Recursively
-  if (_child) _child->counted_loop( phase );
-  if (_next)  _next ->counted_loop( phase );
+  assert(loop->_child != this || (loop->_head->as_Loop()->is_OuterStripMinedLoop() && _head->as_CountedLoop()->is_strip_mined()), "what kind of loop was added?");
+  assert(loop->_child != this || (loop->_child->_child == NULL && loop->_child->_next == NULL), "would miss some loops");
+  if (loop->_child && loop->_child != this) loop->_child->counted_loop(phase);
+  if (loop->_next)  loop->_next ->counted_loop(phase);
 }
 
 #ifndef PRODUCT
@@ -1812,7 +2342,7 @@
     tty->print("  ");
   tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
   if (_irreducible) tty->print(" IRREDUCIBLE");
-  Node* entry = _head->in(LoopNode::EntryControl);
+  Node* entry = _head->as_Loop()->skip_strip_mined(-1)->in(LoopNode::EntryControl);
   Node* predicate = PhaseIdealLoop::find_predicate_insertion_point(entry, Deoptimization::Reason_loop_limit_check);
   if (predicate != NULL ) {
     tty->print(" limit_check");
@@ -1863,6 +2393,9 @@
   if (Verbose) {
     tty->print(" body={"); _body.dump_simple(); tty->print(" }");
   }
+  if (_head->as_Loop()->is_strip_mined()) {
+    tty->print(" strip_mined");
+  }
   tty->cr();
 }
 
@@ -3232,7 +3765,7 @@
   if (!cl->is_main_loop() && !cl->is_post_loop()) {
     return false;
   }
-  Node* ctrl = cl->in(LoopNode::EntryControl);
+  Node* ctrl = cl->skip_strip_mined()->in(LoopNode::EntryControl);
   if (ctrl == NULL || (!ctrl->is_IfTrue() && !ctrl->is_IfFalse())) {
     return false;
   }
@@ -3292,7 +3825,7 @@
     }
     while(worklist.size() != 0 && LCA != early) {
       Node* s = worklist.pop();
-      if (s->is_Load()) {
+      if (s->is_Load() || s->Opcode() == Op_SafePoint) {
         continue;
       } else if (s->is_MergeMem()) {
         for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
@@ -3471,6 +4004,38 @@
   }
 }
 
+// Verify that no data node is schedules in the outer loop of a strip
+// mined loop.
+void PhaseIdealLoop::verify_strip_mined_scheduling(Node *n, Node* least) {
+#ifdef ASSERT
+  if (get_loop(least)->_nest == 0) {
+    return;
+  }
+  IdealLoopTree* loop = get_loop(least);
+  Node* head = loop->_head;
+  if (head->is_OuterStripMinedLoop()) {
+    Node* sfpt = head->as_Loop()->outer_safepoint();
+    ResourceMark rm;
+    Unique_Node_List wq;
+    wq.push(sfpt);
+    for (uint i = 0; i < wq.size(); i++) {
+      Node *m = wq.at(i);
+      for (uint i = 1; i < m->req(); i++) {
+        Node* nn = m->in(i);
+        if (nn == n) {
+          return;
+        }
+        if (nn != NULL && has_ctrl(nn) && get_loop(get_ctrl(nn)) == loop) {
+          wq.push(nn);
+        }
+      }
+    }
+    ShouldNotReachHere();
+  }
+#endif
+}
+
+
 //------------------------------build_loop_late_post---------------------------
 // Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
 // Second pass finds latest legal placement, and ideal loop placement.
@@ -3580,8 +4145,9 @@
   // which can inhibit range check elimination.
   if (least != early) {
     Node* ctrl_out = least->unique_ctrl_out();
-    if (ctrl_out && ctrl_out->is_CountedLoop() &&
-        least == ctrl_out->in(LoopNode::EntryControl)) {
+    if (ctrl_out && ctrl_out->is_Loop() &&
+        least == ctrl_out->in(LoopNode::EntryControl) &&
+        (ctrl_out->is_CountedLoop() || ctrl_out->is_OuterStripMinedLoop())) {
       Node* least_dom = idom(least);
       if (get_loop(least_dom)->is_member(get_loop(least))) {
         least = least_dom;
@@ -3606,6 +4172,7 @@
 
   // Assign discovered "here or above" point
   least = find_non_split_ctrl(least);
+  verify_strip_mined_scheduling(n, least);
   set_ctrl(n, least);
 
   // Collect inner loop bodies
--- a/src/hotspot/share/opto/loopnode.hpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/loopnode.hpp	Tue Nov 28 11:59:16 2017 +0100
@@ -37,6 +37,7 @@
 class IdealLoopTree;
 class LoopNode;
 class Node;
+class OuterStripMinedLoopEndNode;
 class PhaseIdealLoop;
 class CountedLoopReserveKit;
 class VectorSet;
@@ -71,7 +72,8 @@
          VectorizedLoop=2048,
          HasAtomicPostLoop=4096,
          HasRangeChecks=8192,
-         IsMultiversioned=16384};
+         IsMultiversioned=16384,
+         StripMined=32768};
   char _unswitch_count;
   enum { _unswitch_max=3 };
   char _postloop_flags;
@@ -90,6 +92,7 @@
   int is_partial_peel_loop() const { return _loop_flags & PartialPeelLoop; }
   void set_partial_peel_loop() { _loop_flags |= PartialPeelLoop; }
   int partial_peel_has_failed() const { return _loop_flags & PartialPeelFailed; }
+  int is_strip_mined() const { return _loop_flags & StripMined; }
 
   void mark_partial_peel_failed() { _loop_flags |= PartialPeelFailed; }
   void mark_has_reductions() { _loop_flags |= HasReductions; }
@@ -100,6 +103,8 @@
   void mark_has_atomic_post_loop() { _loop_flags |= HasAtomicPostLoop; }
   void mark_has_range_checks() { _loop_flags |=  HasRangeChecks; }
   void mark_is_multiversioned() { _loop_flags |= IsMultiversioned; }
+  void mark_strip_mined() { _loop_flags |= StripMined; }
+  void clear_strip_mined() { _loop_flags &= ~StripMined; }
 
   int unswitch_max() { return _unswitch_max; }
   int unswitch_count() { return _unswitch_count; }
@@ -131,6 +136,13 @@
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
 #endif
+
+  void verify_strip_mined(int expect_skeleton) const;
+  virtual LoopNode* skip_strip_mined(int expect_opaq = 1) { return this; }
+  virtual IfTrueNode* outer_loop_tail() const { ShouldNotReachHere(); return NULL; }
+  virtual OuterStripMinedLoopEndNode* outer_loop_end() const { ShouldNotReachHere(); return NULL; }
+  virtual IfFalseNode* outer_loop_exit() const { ShouldNotReachHere(); return NULL; }
+  virtual SafePointNode* outer_safepoint() const { ShouldNotReachHere(); return NULL; }
 };
 
 //------------------------------Counted Loops----------------------------------
@@ -278,6 +290,13 @@
   void set_slp_max_unroll(int unroll_factor) { _slp_maximum_unroll_factor = unroll_factor; }
   int  slp_max_unroll() const                { return _slp_maximum_unroll_factor; }
 
+  virtual LoopNode* skip_strip_mined(int expect_opaq = 1);
+  OuterStripMinedLoopNode* outer_loop() const;
+  virtual IfTrueNode* outer_loop_tail() const;
+  virtual OuterStripMinedLoopEndNode* outer_loop_end() const;
+  virtual IfFalseNode* outer_loop_exit() const;
+  virtual SafePointNode* outer_safepoint() const;
+
 #ifndef PRODUCT
   virtual void dump_spec(outputStream *st) const;
 #endif
@@ -374,6 +393,40 @@
   virtual Node* Identity(PhaseGVN* phase);
 };
 
+// Support for strip mining
+class OuterStripMinedLoopNode : public LoopNode {
+private:
+  CountedLoopNode* inner_loop() const;
+public:
+  OuterStripMinedLoopNode(Compile* C, Node *entry, Node *backedge)
+    : LoopNode(entry, backedge) {
+    init_class_id(Class_OuterStripMinedLoop);
+    init_flags(Flag_is_macro);
+    C->add_macro_node(this);
+  }
+
+  virtual int Opcode() const;
+
+  virtual IfTrueNode* outer_loop_tail() const;
+  virtual OuterStripMinedLoopEndNode* outer_loop_end() const;
+  virtual IfFalseNode* outer_loop_exit() const;
+  virtual SafePointNode* outer_safepoint() const;
+  void adjust_strip_mined_loop(PhaseIterGVN* igvn);
+};
+
+class OuterStripMinedLoopEndNode : public IfNode {
+public:
+  OuterStripMinedLoopEndNode(Node *control, Node *test, float prob, float cnt)
+    : IfNode(control, test, prob, cnt) {
+    init_class_id(Class_OuterStripMinedLoopEnd);
+  }
+
+  virtual int Opcode() const;
+
+  virtual const Type* Value(PhaseGVN* phase) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
 // -----------------------------IdealLoopTree----------------------------------
 class IdealLoopTree : public ResourceObj {
 public:
@@ -780,6 +833,7 @@
   void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
   void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack );
   void build_loop_late_post ( Node* n );
+  void verify_strip_mined_scheduling(Node *n, Node* least);
 
   // Array of immediate dominance info for each CFG node indexed by node idx
 private:
@@ -877,7 +931,10 @@
   // Per-Node transform
   virtual Node *transform( Node *a_node ) { return 0; }
 
-  bool is_counted_loop( Node *x, IdealLoopTree *loop );
+  bool is_counted_loop(Node* x, IdealLoopTree*& loop);
+  IdealLoopTree* create_outer_strip_mined_loop(BoolNode *test, Node *cmp, Node *init_control,
+                                               IdealLoopTree* loop, float cl_prob, float le_fcnt,
+                                               Node*& entry_control, Node*& iffalse);
 
   Node* exact_limit( IdealLoopTree *loop );
 
@@ -908,8 +965,24 @@
   //   When nonnull, the clone and original are side-by-side, both are
   //      dominated by the passed in side_by_side_idom node.  Used in
   //      construction of unswitched loops.
+  enum CloneLoopMode {
+    IgnoreStripMined = 0,        // Only clone inner strip mined loop
+    CloneIncludesStripMined = 1, // clone both inner and outer strip mined loops
+    ControlAroundStripMined = 2  // Only clone inner strip mined loop,
+                                 // result control flow branches
+                                 // either to inner clone or outer
+                                 // strip mined loop.
+  };
   void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
-                   Node* side_by_side_idom = NULL);
+                  CloneLoopMode mode, Node* side_by_side_idom = NULL);
+  void clone_loop_handle_data_uses(Node* old, Node_List &old_new,
+                                   IdealLoopTree* loop, IdealLoopTree* companion_loop,
+                                   Node_List*& split_if_set, Node_List*& split_bool_set,
+                                   Node_List*& split_cex_set, Node_List& worklist,
+                                   uint new_counter, CloneLoopMode mode);
+  void clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
+                        IdealLoopTree* outer_loop, int dd, Node_List &old_new,
+                        Node_List& extra_data_nodes);
 
   // If we got the effect of peeling, either by actually peeling or by
   // making a pre-loop which must execute at least once, we can remove
@@ -1020,7 +1093,8 @@
   // and inserting an if to select fast-slow versions.
   ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
                                         Node_List &old_new,
-                                        int opcode);
+                                        int opcode,
+                                        CloneLoopMode mode);
 
   // Clone a loop and return the clone head (clone_loop_head).
   // Added nodes include int(1), int(0) - disconnected, If, IfTrue, IfFalse,
--- a/src/hotspot/share/opto/loopopts.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/loopopts.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -26,6 +26,7 @@
 #include "memory/allocation.inline.hpp"
 #include "memory/resourceArea.hpp"
 #include "opto/addnode.hpp"
+#include "opto/callnode.hpp"
 #include "opto/castnode.hpp"
 #include "opto/connode.hpp"
 #include "opto/castnode.hpp"
@@ -306,7 +307,12 @@
           get_ctrl(m->in(2)) != n_ctrl &&
           get_ctrl(m->in(3)) != n_ctrl) {
         // Move the AddP up to dominating point
-        set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
+        Node* c = find_non_split_ctrl(idom(n_ctrl));
+        if (c->is_OuterStripMinedLoop()) {
+          c->as_Loop()->verify_strip_mined(1);
+          c = c->in(LoopNode::EntryControl);
+        }
+        set_ctrl_and_loop(m, c);
         continue;
       }
       return NULL;
@@ -750,14 +756,13 @@
       if (ctrl_ok) {
         // move the Store
         _igvn.replace_input_of(mem, LoopNode::LoopBackControl, mem);
-        _igvn.replace_input_of(n, 0, n_loop->_head->in(LoopNode::EntryControl));
+        _igvn.replace_input_of(n, 0, n_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl));
         _igvn.replace_input_of(n, MemNode::Memory, mem->in(LoopNode::EntryControl));
         // Disconnect the phi now. An empty phi can confuse other
         // optimizations in this pass of loop opts.
         _igvn.replace_node(mem, mem->in(LoopNode::EntryControl));
         n_loop->_body.yank(mem);
 
-        IdealLoopTree* new_loop = get_loop(n->in(0));
         set_ctrl_and_loop(n, n->in(0));
 
         return n;
@@ -840,6 +845,16 @@
               _igvn.replace_node(hook, n);
               return;
             }
+#ifdef ASSERT
+            if (n_loop->_head->is_Loop() && n_loop->_head->as_Loop()->is_strip_mined()) {
+              assert(n_loop->_head->Opcode() == Op_CountedLoop, "outer loop is a strip mined");
+              n_loop->_head->as_Loop()->verify_strip_mined(1);
+              Node* outer = n_loop->_head->as_CountedLoop()->outer_loop();
+              IdealLoopTree* outer_loop = get_loop(outer);
+              assert(n_loop->_parent == outer_loop, "broken loop tree");
+              assert(get_loop(lca) == outer_loop, "safepoint in outer loop consume all memory state");
+            }
+#endif
 
             // Move store out of the loop
             _igvn.replace_node(hook, n->in(MemNode::Memory));
@@ -1016,7 +1031,7 @@
   IdealLoopTree *u_loop = get_loop( useblock );
   return (u_loop->_irreducible || u_loop->_child)
     ? useblock
-    : u_loop->_head->in(LoopNode::EntryControl);
+    : u_loop->_head->as_Loop()->skip_strip_mined()->in(LoopNode::EntryControl);
 }
 
 
@@ -1569,6 +1584,252 @@
   }
 }
 
+void PhaseIdealLoop::clone_loop_handle_data_uses(Node* old, Node_List &old_new,
+                                                 IdealLoopTree* loop, IdealLoopTree* outer_loop,
+                                                 Node_List*& split_if_set, Node_List*& split_bool_set,
+                                                 Node_List*& split_cex_set, Node_List& worklist,
+                                                 uint new_counter, CloneLoopMode mode) {
+  Node* nnn = old_new[old->_idx];
+  // Copy uses to a worklist, so I can munge the def-use info
+  // with impunity.
+  for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
+    worklist.push(old->fast_out(j));
+
+  while( worklist.size() ) {
+    Node *use = worklist.pop();
+    if (!has_node(use))  continue; // Ignore dead nodes
+    if (use->in(0) == C->top())  continue;
+    IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
+    // Check for data-use outside of loop - at least one of OLD or USE
+    // must not be a CFG node.
+#ifdef ASSERT
+    if (loop->_head->as_Loop()->is_strip_mined() && outer_loop->is_member(use_loop) && !loop->is_member(use_loop) && old_new[use->_idx] == NULL) {
+      Node* sfpt = loop->_head->as_CountedLoop()->outer_safepoint();
+      assert(mode == ControlAroundStripMined && use == sfpt, "missed a node");
+    }
+#endif
+    if (!loop->is_member(use_loop) && !outer_loop->is_member(use_loop) && (!old->is_CFG() || !use->is_CFG())) {
+
+      // If the Data use is an IF, that means we have an IF outside of the
+      // loop that is switching on a condition that is set inside of the
+      // loop.  Happens if people set a loop-exit flag; then test the flag
+      // in the loop to break the loop, then test is again outside of the
+      // loop to determine which way the loop exited.
+      // Loop predicate If node connects to Bool node through Opaque1 node.
+      if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4) {
+        // Since this code is highly unlikely, we lazily build the worklist
+        // of such Nodes to go split.
+        if (!split_if_set) {
+          ResourceArea *area = Thread::current()->resource_area();
+          split_if_set = new Node_List(area);
+        }
+        split_if_set->push(use);
+      }
+      if (use->is_Bool()) {
+        if (!split_bool_set) {
+          ResourceArea *area = Thread::current()->resource_area();
+          split_bool_set = new Node_List(area);
+        }
+        split_bool_set->push(use);
+      }
+      if (use->Opcode() == Op_CreateEx) {
+        if (!split_cex_set) {
+          ResourceArea *area = Thread::current()->resource_area();
+          split_cex_set = new Node_List(area);
+        }
+        split_cex_set->push(use);
+      }
+
+
+      // Get "block" use is in
+      uint idx = 0;
+      while( use->in(idx) != old ) idx++;
+      Node *prev = use->is_CFG() ? use : get_ctrl(use);
+      assert(!loop->is_member(get_loop(prev)) && !outer_loop->is_member(get_loop(prev)), "" );
+      Node *cfg = prev->_idx >= new_counter
+        ? prev->in(2)
+        : idom(prev);
+      if( use->is_Phi() )     // Phi use is in prior block
+        cfg = prev->in(idx);  // NOT in block of Phi itself
+      if (cfg->is_top()) {    // Use is dead?
+        _igvn.replace_input_of(use, idx, C->top());
+        continue;
+      }
+
+      while(!outer_loop->is_member(get_loop(cfg))) {
+        prev = cfg;
+        cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
+      }
+      // If the use occurs after merging several exits from the loop, then
+      // old value must have dominated all those exits.  Since the same old
+      // value was used on all those exits we did not need a Phi at this
+      // merge point.  NOW we do need a Phi here.  Each loop exit value
+      // is now merged with the peeled body exit; each exit gets its own
+      // private Phi and those Phis need to be merged here.
+      Node *phi;
+      if( prev->is_Region() ) {
+        if( idx == 0 ) {      // Updating control edge?
+          phi = prev;         // Just use existing control
+        } else {              // Else need a new Phi
+          phi = PhiNode::make( prev, old );
+          // Now recursively fix up the new uses of old!
+          for( uint i = 1; i < prev->req(); i++ ) {
+            worklist.push(phi); // Onto worklist once for each 'old' input
+          }
+        }
+      } else {
+        // Get new RegionNode merging old and new loop exits
+        prev = old_new[prev->_idx];
+        assert( prev, "just made this in step 7" );
+        if( idx == 0) {      // Updating control edge?
+          phi = prev;         // Just use existing control
+        } else {              // Else need a new Phi
+          // Make a new Phi merging data values properly
+          phi = PhiNode::make( prev, old );
+          phi->set_req( 1, nnn );
+        }
+      }
+      // If inserting a new Phi, check for prior hits
+      if( idx != 0 ) {
+        Node *hit = _igvn.hash_find_insert(phi);
+        if( hit == NULL ) {
+          _igvn.register_new_node_with_optimizer(phi); // Register new phi
+        } else {                                      // or
+          // Remove the new phi from the graph and use the hit
+          _igvn.remove_dead_node(phi);
+          phi = hit;                                  // Use existing phi
+        }
+        set_ctrl(phi, prev);
+      }
+      // Make 'use' use the Phi instead of the old loop body exit value
+      _igvn.replace_input_of(use, idx, phi);
+      if( use->_idx >= new_counter ) { // If updating new phis
+        // Not needed for correctness, but prevents a weak assert
+        // in AddPNode from tripping (when we end up with different
+        // base & derived Phis that will become the same after
+        // IGVN does CSE).
+        Node *hit = _igvn.hash_find_insert(use);
+        if( hit )             // Go ahead and re-hash for hits.
+          _igvn.replace_node( use, hit );
+      }
+
+      // If 'use' was in the loop-exit block, it now needs to be sunk
+      // below the post-loop merge point.
+      sink_use( use, prev );
+    }
+  }
+}
+
+void PhaseIdealLoop::clone_outer_loop(LoopNode* head, CloneLoopMode mode, IdealLoopTree *loop,
+                                      IdealLoopTree* outer_loop, int dd, Node_List &old_new,
+                                      Node_List& extra_data_nodes) {
+  if (head->is_strip_mined() && mode != IgnoreStripMined) {
+    CountedLoopNode* cl = head->as_CountedLoop();
+    Node* l = cl->outer_loop();
+    Node* tail = cl->outer_loop_tail();
+    IfNode* le = cl->outer_loop_end();
+    Node* sfpt = cl->outer_safepoint();
+    CountedLoopEndNode* cle = cl->loopexit();
+    CountedLoopNode* new_cl = old_new[cl->_idx]->as_CountedLoop();
+    CountedLoopEndNode* new_cle = new_cl->as_CountedLoop()->loopexit();
+    Node* cle_out = cle->proj_out(false);
+
+    Node* new_sfpt = NULL;
+    Node* new_cle_out = cle_out->clone();
+    old_new.map(cle_out->_idx, new_cle_out);
+    if (mode == CloneIncludesStripMined) {
+      // clone outer loop body
+      Node* new_l = l->clone();
+      Node* new_tail = tail->clone();
+      IfNode* new_le = le->clone()->as_If();
+      new_sfpt = sfpt->clone();
+
+      set_loop(new_l, outer_loop->_parent);
+      set_idom(new_l, new_l->in(LoopNode::EntryControl), dd);
+      set_loop(new_cle_out, outer_loop->_parent);
+      set_idom(new_cle_out, new_cle, dd);
+      set_loop(new_sfpt, outer_loop->_parent);
+      set_idom(new_sfpt, new_cle_out, dd);
+      set_loop(new_le, outer_loop->_parent);
+      set_idom(new_le, new_sfpt, dd);
+      set_loop(new_tail, outer_loop->_parent);
+      set_idom(new_tail, new_le, dd);
+      set_idom(new_cl, new_l, dd);
+
+      old_new.map(l->_idx, new_l);
+      old_new.map(tail->_idx, new_tail);
+      old_new.map(le->_idx, new_le);
+      old_new.map(sfpt->_idx, new_sfpt);
+
+      new_l->set_req(LoopNode::LoopBackControl, new_tail);
+      new_l->set_req(0, new_l);
+      new_tail->set_req(0, new_le);
+      new_le->set_req(0, new_sfpt);
+      new_sfpt->set_req(0, new_cle_out);
+      new_cle_out->set_req(0, new_cle);
+      new_cl->set_req(LoopNode::EntryControl, new_l);
+
+      _igvn.register_new_node_with_optimizer(new_l);
+      _igvn.register_new_node_with_optimizer(new_tail);
+      _igvn.register_new_node_with_optimizer(new_le);
+    } else {
+      Node *newhead = old_new[loop->_head->_idx];
+      newhead->as_Loop()->clear_strip_mined();
+      _igvn.replace_input_of(newhead, LoopNode::EntryControl, newhead->in(LoopNode::EntryControl)->in(LoopNode::EntryControl));
+      set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
+    }
+    // Look at data node that were assigned a control in the outer
+    // loop: they are kept in the outer loop by the safepoint so start
+    // from the safepoint node's inputs.
+    IdealLoopTree* outer_loop = get_loop(l);
+    Node_Stack stack(2);
+    stack.push(sfpt, 1);
+    uint new_counter = C->unique();
+    while (stack.size() > 0) {
+      Node* n = stack.node();
+      uint i = stack.index();
+      while (i < n->req() &&
+             (n->in(i) == NULL ||
+              !has_ctrl(n->in(i)) ||
+              get_loop(get_ctrl(n->in(i))) != outer_loop ||
+              (old_new[n->in(i)->_idx] != NULL && old_new[n->in(i)->_idx]->_idx >= new_counter))) {
+        i++;
+      }
+      if (i < n->req()) {
+        stack.set_index(i+1);
+        stack.push(n->in(i), 0);
+      } else {
+        assert(old_new[n->_idx] == NULL || n == sfpt || old_new[n->_idx]->_idx < new_counter, "no clone yet");
+        Node* m = n == sfpt ? new_sfpt : n->clone();
+        if (m != NULL) {
+          for (uint i = 0; i < n->req(); i++) {
+            if (m->in(i) != NULL && old_new[m->in(i)->_idx] != NULL) {
+              m->set_req(i, old_new[m->in(i)->_idx]);
+            }
+          }
+        } else {
+          assert(n == sfpt && mode != CloneIncludesStripMined, "where's the safepoint clone?");
+        }
+        if (n != sfpt) {
+          extra_data_nodes.push(n);
+          _igvn.register_new_node_with_optimizer(m);
+          assert(get_ctrl(n) == cle_out, "what other control?");
+          set_ctrl(m, new_cle_out);
+          old_new.map(n->_idx, m);
+        }
+        stack.pop();
+      }
+    }
+    if (mode == CloneIncludesStripMined) {
+      _igvn.register_new_node_with_optimizer(new_sfpt);
+      _igvn.register_new_node_with_optimizer(new_cle_out);
+    }
+  } else {
+    Node *newhead = old_new[loop->_head->_idx];
+    set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
+  }
+}
+
 //------------------------------clone_loop-------------------------------------
 //
 //                   C L O N E   A   L O O P   B O D Y
@@ -1597,7 +1858,10 @@
 //      dominated by the side_by_side_idom node.  Used in construction of
 //      unswitched loops.
 void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
-                                 Node* side_by_side_idom) {
+                                CloneLoopMode mode, Node* side_by_side_idom) {
+
+  LoopNode* head = loop->_head->as_Loop();
+  head->verify_strip_mined(1);
 
   if (C->do_vector_loop() && PrintOpto) {
     const char* mname = C->method()->name()->as_quoted_ascii();
@@ -1630,6 +1894,7 @@
     _igvn.register_new_node_with_optimizer(nnn);
   }
 
+  IdealLoopTree* outer_loop = (head->is_strip_mined() && mode != IgnoreStripMined) ? get_loop(head->as_CountedLoop()->outer_loop()) : loop;
 
   // Step 2: Fix the edges in the new body.  If the old input is outside the
   // loop use it.  If the old input is INside the loop, use the corresponding
@@ -1641,7 +1906,7 @@
     if (has_ctrl(old)) {
       set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
     } else {
-      set_loop(nnn, loop->_parent);
+      set_loop(nnn, outer_loop->_parent);
       if (old->outcnt() > 0) {
         set_idom( nnn, old_new[idom(old)->_idx], dd );
       }
@@ -1657,22 +1922,21 @@
     }
     _igvn.hash_find_insert(nnn);
   }
-  Node *newhead = old_new[loop->_head->_idx];
-  set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
 
+  ResourceArea *area = Thread::current()->resource_area();
+  Node_List extra_data_nodes(area);
+  clone_outer_loop(head, mode, loop, outer_loop, dd, old_new, extra_data_nodes);
 
   // Step 3: Now fix control uses.  Loop varying control uses have already
   // been fixed up (as part of all input edges in Step 2).  Loop invariant
   // control uses must be either an IfFalse or an IfTrue.  Make a merge
   // point to merge the old and new IfFalse/IfTrue nodes; make the use
   // refer to this.
-  ResourceArea *area = Thread::current()->resource_area();
   Node_List worklist(area);
   uint new_counter = C->unique();
   for( i = 0; i < loop->_body.size(); i++ ) {
     Node* old = loop->_body.at(i);
     if( !old->is_CFG() ) continue;
-    Node* nnn = old_new[old->_idx];
 
     // Copy uses to a worklist, so I can munge the def-use info
     // with impunity.
@@ -1686,9 +1950,29 @@
       if( !loop->is_member( use_loop ) && use->is_CFG() ) {
         // Both OLD and USE are CFG nodes here.
         assert( use->is_Proj(), "" );
+        Node* nnn = old_new[old->_idx];
+
+        Node* newuse = NULL;
+        if (head->is_strip_mined() && mode != IgnoreStripMined) {
+          CountedLoopNode* cl = head->as_CountedLoop();
+          CountedLoopEndNode* cle = cl->loopexit();
+          Node* cle_out = cle->proj_out(false);
+          if (use == cle_out) {
+            IfNode* le = cl->outer_loop_end();
+            use = le->proj_out(false);
+            use_loop = get_loop(use);
+            if (mode == CloneIncludesStripMined) {
+              nnn = old_new[le->_idx];
+            } else {
+              newuse = old_new[cle_out->_idx];
+            }
+          }
+        }
+        if (newuse == NULL) {
+          newuse = use->clone();
+        }
 
         // Clone the loop exit control projection
-        Node *newuse = use->clone();
         if (C->do_vector_loop()) {
           cm.verify_insert_and_clone(use, newuse, cm.clone_idx());
         }
@@ -1722,6 +2006,10 @@
             if( useuse->in(k) == use ) {
               useuse->set_req(k, r);
               uses_found++;
+              if (useuse->is_Loop() && k == LoopNode::EntryControl) {
+                assert(dom_depth(useuse) > dd_r , "");
+                set_idom(useuse, r, dom_depth(useuse));
+              }
             }
           }
           l -= uses_found;    // we deleted 1 or more copies of this edge
@@ -1745,126 +2033,16 @@
   Node_List *split_cex_set = NULL;
   for( i = 0; i < loop->_body.size(); i++ ) {
     Node* old = loop->_body.at(i);
-    Node* nnn = old_new[old->_idx];
-    // Copy uses to a worklist, so I can munge the def-use info
-    // with impunity.
-    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
-      worklist.push(old->fast_out(j));
+    clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
+                                split_bool_set, split_cex_set, worklist, new_counter,
+                                mode);
+  }
 
-    while( worklist.size() ) {
-      Node *use = worklist.pop();
-      if (!has_node(use))  continue; // Ignore dead nodes
-      if (use->in(0) == C->top())  continue;
-      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
-      // Check for data-use outside of loop - at least one of OLD or USE
-      // must not be a CFG node.
-      if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
-
-        // If the Data use is an IF, that means we have an IF outside of the
-        // loop that is switching on a condition that is set inside of the
-        // loop.  Happens if people set a loop-exit flag; then test the flag
-        // in the loop to break the loop, then test is again outside of the
-        // loop to determine which way the loop exited.
-        // Loop predicate If node connects to Bool node through Opaque1 node.
-        if (use->is_If() || use->is_CMove() || C->is_predicate_opaq(use) || use->Opcode() == Op_Opaque4) {
-          // Since this code is highly unlikely, we lazily build the worklist
-          // of such Nodes to go split.
-          if (!split_if_set) {
-            split_if_set = new Node_List(area);
-          }
-          split_if_set->push(use);
-        }
-        if (use->is_Bool()) {
-          if (!split_bool_set) {
-            split_bool_set = new Node_List(area);
-          }
-          split_bool_set->push(use);
-        }
-        if (use->Opcode() == Op_CreateEx) {
-          if (!split_cex_set) {
-            split_cex_set = new Node_List(area);
-          }
-          split_cex_set->push(use);
-        }
-
-
-        // Get "block" use is in
-        uint idx = 0;
-        while( use->in(idx) != old ) idx++;
-        Node *prev = use->is_CFG() ? use : get_ctrl(use);
-        assert( !loop->is_member( get_loop( prev ) ), "" );
-        Node *cfg = prev->_idx >= new_counter
-          ? prev->in(2)
-          : idom(prev);
-        if( use->is_Phi() )     // Phi use is in prior block
-          cfg = prev->in(idx);  // NOT in block of Phi itself
-        if (cfg->is_top()) {    // Use is dead?
-          _igvn.replace_input_of(use, idx, C->top());
-          continue;
-        }
-
-        while( !loop->is_member( get_loop( cfg ) ) ) {
-          prev = cfg;
-          cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
-        }
-        // If the use occurs after merging several exits from the loop, then
-        // old value must have dominated all those exits.  Since the same old
-        // value was used on all those exits we did not need a Phi at this
-        // merge point.  NOW we do need a Phi here.  Each loop exit value
-        // is now merged with the peeled body exit; each exit gets its own
-        // private Phi and those Phis need to be merged here.
-        Node *phi;
-        if( prev->is_Region() ) {
-          if( idx == 0 ) {      // Updating control edge?
-            phi = prev;         // Just use existing control
-          } else {              // Else need a new Phi
-            phi = PhiNode::make( prev, old );
-            // Now recursively fix up the new uses of old!
-            for( uint i = 1; i < prev->req(); i++ ) {
-              worklist.push(phi); // Onto worklist once for each 'old' input
-            }
-          }
-        } else {
-          // Get new RegionNode merging old and new loop exits
-          prev = old_new[prev->_idx];
-          assert( prev, "just made this in step 7" );
-          if( idx == 0 ) {      // Updating control edge?
-            phi = prev;         // Just use existing control
-          } else {              // Else need a new Phi
-            // Make a new Phi merging data values properly
-            phi = PhiNode::make( prev, old );
-            phi->set_req( 1, nnn );
-          }
-        }
-        // If inserting a new Phi, check for prior hits
-        if( idx != 0 ) {
-          Node *hit = _igvn.hash_find_insert(phi);
-          if( hit == NULL ) {
-           _igvn.register_new_node_with_optimizer(phi); // Register new phi
-          } else {                                      // or
-            // Remove the new phi from the graph and use the hit
-            _igvn.remove_dead_node(phi);
-            phi = hit;                                  // Use existing phi
-          }
-          set_ctrl(phi, prev);
-        }
-        // Make 'use' use the Phi instead of the old loop body exit value
-        _igvn.replace_input_of(use, idx, phi);
-        if( use->_idx >= new_counter ) { // If updating new phis
-          // Not needed for correctness, but prevents a weak assert
-          // in AddPNode from tripping (when we end up with different
-          // base & derived Phis that will become the same after
-          // IGVN does CSE).
-          Node *hit = _igvn.hash_find_insert(use);
-          if( hit )             // Go ahead and re-hash for hits.
-            _igvn.replace_node( use, hit );
-        }
-
-        // If 'use' was in the loop-exit block, it now needs to be sunk
-        // below the post-loop merge point.
-        sink_use( use, prev );
-      }
-    }
+  for (i = 0; i < extra_data_nodes.size(); i++) {
+    Node* old = extra_data_nodes.at(i);
+    clone_loop_handle_data_uses(old, old_new, loop, outer_loop, split_if_set,
+                                split_bool_set, split_cex_set, worklist, new_counter,
+                                mode);
   }
 
   // Check for IFs that need splitting/cloning.  Happens if an IF outside of
@@ -2956,7 +3134,7 @@
 
   assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
 
-  clone_loop( loop, old_new, dd );
+  clone_loop(loop, old_new, dd, IgnoreStripMined);
 
   const uint clone_exit_idx = 1;
   const uint orig_exit_idx  = 2;
--- a/src/hotspot/share/opto/macro.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/macro.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -282,7 +282,8 @@
         if (!this_region->in(ind)->is_IfFalse()) {
           ind = 2;
         }
-        if (this_region->in(ind)->is_IfFalse()) {
+        if (this_region->in(ind)->is_IfFalse() &&
+            this_region->in(ind)->in(0)->Opcode() == Op_If) {
           Node* bol = this_region->in(ind)->in(0)->in(1);
           assert(bol->is_Bool(), "");
           cmpx = bol->in(1);
@@ -2660,6 +2661,8 @@
         break;
       case Node::Class_ArrayCopy:
         break;
+      case Node::Class_OuterStripMinedLoop:
+        break;
       default:
         assert(n->Opcode() == Op_LoopLimit ||
                n->Opcode() == Op_Opaque1   ||
@@ -2733,6 +2736,10 @@
       } else if (n->Opcode() == Op_Opaque4) {
         _igvn.replace_node(n, n->in(2));
         success = true;
+      } else if (n->Opcode() == Op_OuterStripMinedLoop) {
+        n->as_OuterStripMinedLoop()->adjust_strip_mined_loop(&_igvn);
+        C->remove_macro_node(n);
+        success = true;
       }
       assert(success == (C->macro_count() < old_macro_count), "elimination reduces macro count");
       progress = progress || success;
--- a/src/hotspot/share/opto/node.hpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/node.hpp	Tue Nov 28 11:59:16 2017 +0100
@@ -111,6 +111,8 @@
 class MultiNode;
 class MultiBranchNode;
 class NeverBranchNode;
+class OuterStripMinedLoopNode;
+class OuterStripMinedLoopEndNode;
 class Node;
 class Node_Array;
 class Node_List;
@@ -623,8 +625,9 @@
           DEFINE_CLASS_ID(Catch,       PCTable, 0)
           DEFINE_CLASS_ID(Jump,        PCTable, 1)
         DEFINE_CLASS_ID(If,          MultiBranch, 1)
-          DEFINE_CLASS_ID(CountedLoopEnd, If, 0)
-          DEFINE_CLASS_ID(RangeCheck, If, 1)
+          DEFINE_CLASS_ID(CountedLoopEnd,         If, 0)
+          DEFINE_CLASS_ID(RangeCheck,             If, 1)
+          DEFINE_CLASS_ID(OuterStripMinedLoopEnd, If, 2)
         DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
       DEFINE_CLASS_ID(Start,       Multi, 2)
       DEFINE_CLASS_ID(MemBar,      Multi, 3)
@@ -684,8 +687,9 @@
 
     DEFINE_CLASS_ID(Region, Node, 5)
       DEFINE_CLASS_ID(Loop, Region, 0)
-        DEFINE_CLASS_ID(Root,        Loop, 0)
-        DEFINE_CLASS_ID(CountedLoop, Loop, 1)
+        DEFINE_CLASS_ID(Root,                Loop, 0)
+        DEFINE_CLASS_ID(CountedLoop,         Loop, 1)
+        DEFINE_CLASS_ID(OuterStripMinedLoop, Loop, 2)
 
     DEFINE_CLASS_ID(Sub,   Node, 6)
       DEFINE_CLASS_ID(Cmp,   Sub, 0)
@@ -841,6 +845,8 @@
   DEFINE_CLASS_QUERY(Mul)
   DEFINE_CLASS_QUERY(Multi)
   DEFINE_CLASS_QUERY(MultiBranch)
+  DEFINE_CLASS_QUERY(OuterStripMinedLoop)
+  DEFINE_CLASS_QUERY(OuterStripMinedLoopEnd)
   DEFINE_CLASS_QUERY(Parm)
   DEFINE_CLASS_QUERY(PCTable)
   DEFINE_CLASS_QUERY(Phi)
--- a/src/hotspot/share/opto/superword.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/opto/superword.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -1353,6 +1353,7 @@
     for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
       Node* t2 = s2->fast_out(j);
       if (!in_bb(t2)) continue;
+      if (t2->Opcode() == Op_AddI && t2 == _lp->as_CountedLoop()->incr()) continue; // don't mess with the iv
       if (!opnd_positions_match(s1, t1, s2, t2))
         continue;
       if (stmts_can_pack(t1, t2, align)) {
@@ -3313,7 +3314,7 @@
     return NULL;
   }
 
-  Node* p_f = cl->in(LoopNode::EntryControl)->in(0)->in(0);
+  Node* p_f = cl->skip_strip_mined()->in(LoopNode::EntryControl)->in(0)->in(0);
   if (!p_f->is_IfFalse()) return NULL;
   if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;
   CountedLoopEndNode* pre_end = p_f->in(0)->as_CountedLoopEnd();
--- a/src/hotspot/share/runtime/arguments.cpp	Tue Nov 28 21:04:42 2017 +0530
+++ b/src/hotspot/share/runtime/arguments.cpp	Tue Nov 28 11:59:16 2017 +0100
@@ -2207,6 +2207,21 @@
     }
     FLAG_SET_CMDLINE(bool, PostLoopMultiversioning, false);
   }
+  if (UseCountedLoopSafepoints && LoopStripMiningIter == 0) {
+    if (!FLAG_IS_DEFAULT(UseCountedLoopSafepoints) || !FLAG_IS_DEFAULT(LoopStripMiningIter)) {
+      warning("When counted loop safepoints are enabled, LoopStripMiningIter must be at least 1 (a safepoint every 1 iteration): setting it to 1");
+    }
+    LoopStripMiningIter = 1;
+  } else if (!UseCountedLoopSafepoints && LoopStripMiningIter > 0) {
+    if (!FLAG_IS_DEFAULT(UseCountedLoopSafepoints) || !FLAG_IS_DEFAULT(LoopStripMiningIter)) {
+      warning("Disabling counted safepoints implies no loop strip mining: setting LoopStripMiningIter to 0");
+    }
+    LoopStripMiningIter = 0;
+  }
+  if (FLAG_IS_DEFAULT(LoopStripMiningIterShortLoop)) {
+    // blind guess
+    LoopStripMiningIterShortLoop = LoopStripMiningIter / 10;
+  }
 #endif
   return status;
 }
--- a/test/hotspot/jtreg/compiler/loopopts/UseCountedLoopSafepointsTest.java	Tue Nov 28 21:04:42 2017 +0530
+++ b/test/hotspot/jtreg/compiler/loopopts/UseCountedLoopSafepointsTest.java	Tue Nov 28 11:59:16 2017 +0100
@@ -61,7 +61,8 @@
         OutputAnalyzer oa;
         try {
             oa = ProcessTools.executeTestJvm("-XX:+UnlockDiagnosticVMOptions", "-Xbootclasspath/a:.",
-                    "-XX:" + (enabled ? "+" : "-") + "UseCountedLoopSafepoints", "-XX:+WhiteBoxAPI",
+                                             "-XX:" + (enabled ? "+" : "-") + "UseCountedLoopSafepoints",
+                                             "-XX:LoopStripMiningIter=" + (enabled ? "1" : "0"), "-XX:+WhiteBoxAPI",
                     "-XX:-Inline", "-Xbatch", "-XX:+PrintIdeal", "-XX:LoopUnrollLimit=0",
                     "-XX:CompileOnly=" + UseCountedLoopSafepoints.class.getName() + "::testMethod",
                     UseCountedLoopSafepoints.class.getName());