changeset 52875:38663fca7d7c fibers

merge
author rpressler
date Wed, 05 Dec 2018 16:44:54 +0000
parents bbef43f7dfd8 b4af06702fe3
children 679d52209695
files src/hotspot/share/classfile/javaClasses.cpp src/hotspot/share/classfile/javaClasses.hpp src/hotspot/share/classfile/vmSymbols.hpp src/hotspot/share/runtime/continuation.hpp src/java.base/share/classes/java/lang/Continuation.java
diffstat 14 files changed, 338 insertions(+), 55 deletions(-) [+]
line wrap: on
line diff
--- a/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/cpu/x86/sharedRuntime_x86_64.cpp	Wed Dec 05 16:44:54 2018 +0000
@@ -3442,7 +3442,7 @@
   // address of the call in order to generate an oopmap. Hence, we do all the
   // work outselves.
 
-  __ set_last_Java_frame(noreg, noreg, NULL);
+  __ set_last_Java_frame(noreg, noreg, NULL);  // JavaFrameAnchor::capture_last_Java_pc() will get the pc from the return address, which we store next:
 
   // The return address must always be correct so that frame constructor never
   // sees an invalid pc.
--- a/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp	Wed Dec 05 16:44:54 2018 +0000
@@ -5648,6 +5648,7 @@
     __ lea(fi, Address(rsp, wordSize)); // skip return address
     __ movptr(c_rarg3, rbp);
 
+    // __ stop("FFFFF");
     __ enter();
 
     // // return address and rbp are already in place
@@ -5661,8 +5662,12 @@
     __ post_call_nop(); // this must be exactly after the pc value that is pushed into the frame info, we use this nop for fast CodeBlob lookup
 
     if (ContPerfTest > 5) {
-    __ set_last_Java_frame(rsp, rbp, the_pc); // may be unnecessary. also, consider MacroAssembler::call_VM_leaf_base
-    __ call_VM(noreg, CAST_FROM_FN_PTR(address, Continuation::freeze), fi, false); // do NOT check exceptions; they'll get forwarded to the caller
+    // if (from_java) {
+      __ set_last_Java_frame(rsp, rbp, the_pc); // may be unnecessary. also, consider MacroAssembler::call_VM_leaf_base
+      __ call_VM(noreg, CAST_FROM_FN_PTR(address, Continuation::freeze), fi, false); // do NOT check exceptions; they'll get forwarded to the caller
+    // } else {
+    //   __ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::freeze_C), fi);
+    // }
     }
 
     Label pinned;
@@ -5698,6 +5703,32 @@
     return stub;
   }
 
+  address generate_cont_jump_from_safepoint() {
+    StubCodeMark mark(this, "StubRoutines","Continuation jump from safepoint");
+
+    Register fi = rbx;
+
+    address start = __ pc();
+
+    __ get_thread(r15_thread);
+    __ reset_last_Java_frame(true); // false would be fine, too, I guess
+
+    __ lea(fi, Address(r15_thread, JavaThread::cont_frame_offset()));
+    __ movptr(rdx, Address(fi, wordSize*0)); // pc
+    __ movptr(rbp, Address(fi, wordSize*1)); // fp
+    __ movptr(rbp, Address(rbp, 0)); // fp is indirect. See Continuation::freeze for an explanation.
+    __ movptr(rsp, Address(fi, wordSize*2)); // sp
+
+    __ xorq(rax, rax);
+    __ movptr(Address(fi, wordSize*0), rax); // pc
+    __ movptr(Address(fi, wordSize*1), rax); // fp
+    __ movptr(Address(fi, wordSize*2), rax); // sp
+
+    __ jmp(rdx);
+
+    return start;
+  }
+
   // c_rarg1 - sp
   // c_rarg2 - fp
   // c_rarg3 - pc
@@ -5714,6 +5745,8 @@
   }
 
   address generate_cont_thaw(bool return_barrier, bool exception) {
+    assert (return_barrier || !exception, "must be");
+
     address start = __ pc();
 
     // TODO: Handle Valhalla return types. May require generating different return barriers.
@@ -5725,7 +5758,7 @@
       // __ lea(rsp, Address(rsp, wordSize)); // pop return address. if we don't do this, we get a drift, where the bottom-most frozen frame continuously grows
     }
 
-    Label thaw_fail;
+    Label thaw_success;
     __ movptr(fi, rsp);
     if (return_barrier) {
       __ push(rax); __ push_d(xmm0); // preserve possible return value from a method returning to the return barrier
@@ -5738,7 +5771,16 @@
       __ xorq(rax, rax);
     }
     __ testq(rax, rax);           // rax contains the size of the frames to thaw, 0 if overflow or no more frames
-    __ jcc(Assembler::zero, thaw_fail);
+    __ jcc(Assembler::notZero, thaw_success);
+
+    pop_FrameInfo(_masm, fi, rbp, rbx);
+    if (return_barrier) {
+      __ pop_d(xmm0); __ pop(rax); // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK)
+    } 
+    __ movptr(rsp, fi); // we're now on the yield frame (which is in an address above us b/c rsp has been pushed down)
+    __ jmp(rbx); // a jump to StubRoutines::throw_StackOverflowError_entry
+
+    __ bind(thaw_success);
 
     pop_FrameInfo(_masm, fi, rbp, c_rarg3); // c_rarg3 would still be our return address
     if (return_barrier) {
@@ -5753,18 +5795,36 @@
     __ movl(c_rarg1, return_barrier);
     __ movl(c_rarg2, exception);
     __ call_VM_leaf(CAST_FROM_FN_PTR(address, Continuation::thaw), fi, c_rarg1, c_rarg2);
-    if (!return_barrier) {
-      __ movl(rax, 0); // return 0 (success) from doYield
-    } 
     if (exception) {
       __ movptr(rdx, rax); // rdx must contain the original pc in the case of exception
     }
-    __ bind(thaw_fail);
     pop_FrameInfo(_masm, fi, rbp, rbx);
     if (return_barrier) {
       __ pop_d(xmm0); __ pop(rax); // restore return value (no safepoint in the call to thaw, so even an oop return value should be OK)
+    } 
+
+    __ movptr(rsp, fi); // we're now on the yield frame (which is in an address above us b/c rsp has been pushed down)
+
+    if (!return_barrier) {
+      // This is necessary for forced yields, as the return addres (in rbx) is captured in a call_VM, and skips the restoration of rbcp and locals
+      // ... but it does no harm even for ordinary yields
+      // TODO: use InterpreterMacroAssembler
+      static const Register _locals_register = LP64_ONLY(r14) NOT_LP64(rdi);
+      static const Register _bcp_register    = LP64_ONLY(r13) NOT_LP64(rsi);
+
+      Label not_interpreter;
+      __ testq(rax, rax); // rax is true iff we're jumping into the interpreter
+      __ jcc(Assembler::zero, not_interpreter);
+
+      // see InterpreterMacroAssembler::restore_bcp/restore_locals
+      __ movptr(_bcp_register,    Address(rbp, frame::interpreter_frame_bcp_offset    * wordSize));
+      __ movptr(_locals_register, Address(rbp, frame::interpreter_frame_locals_offset * wordSize));
+
+      __ bind(not_interpreter);
+
+      __ movl(rax, 0); // return 0 (success) from doYield
     }
-    __ movptr(rsp, fi); // we're now on the yield frame (which is above us b/c rsp has been pushed down)
+
     __ jmp(rbx);
 
     return start;
@@ -6071,6 +6131,7 @@
     StubRoutines::_cont_returnBarrierExc = generate_cont_returnBarrier_exception();
     StubRoutines::_cont_doYield_stub = generate_cont_doYield();
     StubRoutines::_cont_doYield    = StubRoutines::_cont_doYield_stub->entry_point();
+    StubRoutines::_cont_jump_from_sp = generate_cont_jump_from_safepoint();
     StubRoutines::_cont_jump       = generate_cont_jump();
     StubRoutines::_cont_getSP      = generate_cont_getSP();
     StubRoutines::_cont_getPC      = generate_cont_getPC();
--- a/src/hotspot/share/aot/aotLoader.cpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/aot/aotLoader.cpp	Wed Dec 05 16:44:54 2018 +0000
@@ -29,6 +29,7 @@
 #include "jvmci/jvmciRuntime.hpp"
 #include "memory/allocation.inline.hpp"
 #include "oops/method.hpp"
+#include "runtime/frame.inline.hpp"
 #include "runtime/handles.inline.hpp"
 #include "runtime/os.hpp"
 #include "runtime/timerTrace.hpp"
--- a/src/hotspot/share/classfile/javaClasses.cpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/classfile/javaClasses.cpp	Wed Dec 05 16:44:54 2018 +0000
@@ -4094,6 +4094,7 @@
 int java_lang_Continuation::_numInterpretedFrames_offset;
 int java_lang_Continuation::_refStack_offset;
 int java_lang_Continuation::_parent_offset;
+int java_lang_Continuation::_yieldInfo_offset;
 int java_lang_Continuation::_entrySP_offset;
 int java_lang_Continuation::_entryFP_offset;
 int java_lang_Continuation::_entryPC_offset;
@@ -4257,6 +4258,7 @@
   macro(_scope_offset,     k, vmSymbols::scope_name(),     continuationscope_signature, false); \
   macro(_target_offset,    k, vmSymbols::target_name(),    runnable_signature,          false); \
   macro(_parent_offset,    k, vmSymbols::parent_name(),    continuation_signature,      false); \
+  macro(_yieldInfo_offset, k, vmSymbols::yieldInfo_name(), object_signature,            false); \
   macro(_stack_offset,     k, vmSymbols::stack_name(),     int_array_signature,         false); \
   macro(_maxSize_offset,   k, vmSymbols::maxSize_name(),   int_signature,               false); \
   macro(_refStack_offset,  k, vmSymbols::refStack_name(),  object_array_signature,      false); \
--- a/src/hotspot/share/classfile/javaClasses.hpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/classfile/javaClasses.hpp	Wed Dec 05 16:44:54 2018 +0000
@@ -981,6 +981,7 @@
   static int _scope_offset;
   static int _target_offset;
   static int _parent_offset;
+  static int _yieldInfo_offset;
   static int _entrySP_offset;
   static int _entryFP_offset;
   static int _entryPC_offset;
@@ -1003,6 +1004,8 @@
   static inline oop scope(oop ref);
   static inline oop target(oop ref);
   static inline oop parent(oop ref);
+  static inline oop yieldInfo(oop ref);
+  static inline void set_yieldInfo(oop ref, oop value);
   static inline typeArrayOop stack(oop ref);
   static inline objArrayOop refStack(oop ref);
   static inline void set_stack(oop obj, oop value);
--- a/src/hotspot/share/classfile/javaClasses.inline.hpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/classfile/javaClasses.inline.hpp	Wed Dec 05 16:44:54 2018 +0000
@@ -144,6 +144,12 @@
 inline oop java_lang_Continuation::parent(oop ref) {
   return ref->obj_field(_parent_offset);
 }
+inline oop java_lang_Continuation::yieldInfo(oop ref) {
+  return ref->obj_field(_yieldInfo_offset);
+}
+inline void java_lang_Continuation::set_yieldInfo(oop ref, oop value) {
+  ref->obj_field_put(_yieldInfo_offset, value);
+}
 inline typeArrayOop java_lang_Continuation::stack(oop ref) {
   oop a = ref->obj_field(_stack_offset);
   return (typeArrayOop)a;
--- a/src/hotspot/share/classfile/vmSymbols.hpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/classfile/vmSymbols.hpp	Wed Dec 05 16:44:54 2018 +0000
@@ -383,6 +383,7 @@
   template(getStacks_name,                            "getStacks")                                \
   template(onPinned_name,                             "onPinned0")                                \
   template(scope_name,                                "scope")                                    \
+  template(yieldInfo_name,                            "yieldInfo")                                \
   template(entrySP_name,                              "entrySP")                                  \
   template(entryFP_name,                              "entryFP")                                  \
   template(entryPC_name,                              "entryPC")                                  \
--- a/src/hotspot/share/runtime/continuation.cpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/runtime/continuation.cpp	Wed Dec 05 16:44:54 2018 +0000
@@ -145,6 +145,7 @@
 #define METADATA_ELEMENTS (METADATA_SIZE / ELEM_SIZE)
 
 static const unsigned char FLAG_LAST_FRAME_INTERPRETED = 1;
+static const unsigned char FLAG_SAFEPOINT_YIELD = 1 << 1;
 
 
 // static inline HFrameMetadata* metadata(intptr_t* hsp) {
@@ -1334,6 +1335,11 @@
   log_trace(jvmcont)("patched return_pc: " INTPTR_FORMAT, p2i(pc));
 }
 
+static void patch_pc(frame& f, address pc) {
+  address* pc_addr = &(((address*) f.sp())[-1]); // x86-specific
+  *pc_addr = pc;
+}
+
 // static void patch_interpreted_bci(frame& f, int bci) {
 //   f.interpreter_frame_set_bcp(f.interpreter_frame_method()->bcp_from(bci));
 // }
@@ -1471,8 +1477,9 @@
 
 #ifdef ASSERT // this section adds substantial overhead
     VMReg reg;
-    assert(offset >= 0 || p == _fr->saved_link_address(_map),
-      "offset: %d reg: %s", offset, (reg = find_register_spilled_here(p, _map), reg != NULL ? reg->name() : "NONE")); // calle-saved register can only be rbp
+    // The following is not true for the sender of the safepoint stub
+    // assert(offset >= 0 || p == _fr->saved_link_address(_map),
+    //   "offset: %d reg: %s", offset, (reg = find_register_spilled_here(p, _map), reg != NULL ? reg->name() : "NONE")); // calle-saved register can only be rbp
     reg = find_register_spilled_here(p, _map); // expensive operation
     if (reg != NULL) log_trace(jvmcont)("reg: %s", reg->name());
     log_trace(jvmcont)("p: " INTPTR_FORMAT " offset: %d %s", p2i(p), offset, p == _fr->saved_link_address(_map) ? "(link)" : "");
@@ -1655,7 +1662,8 @@
       log_trace(jvmcont)("Setting reverse oop index at " INTPTR_FORMAT " (offset: %d) : %d (length: %d)", p2i(hloc), offset, oop_reverse_index, _refStack_length);
       assert(offset < 32768, "");
     } else {
-      assert (p == (T*)_fr->saved_link_address(_map), "");
+      // The following is not true for the sender of the safepoint stub
+      // assert (p == (T*)_fr->saved_link_address(_map), "");
       _fc->set_oop_fp_index(oop_reverse_index);
       log_trace(jvmcont)("Setting reverse oop index in callerinfo (offset: %d) : %d (length: %d)", offset, oop_reverse_index, _refStack_length);
     }
@@ -1902,7 +1910,15 @@
     hframe hf = _mirror.new_hframe<true>(hsp, (hsp + (long)(f.fp() - vsp)), f.pc(), NULL, true);
     save_bounding_hframe(hf, is_first);
 
-    relativize(vfp, hfp, frame::interpreter_frame_last_sp_offset);
+    assert ((*(vfp + frame::interpreter_frame_last_sp_offset) != 0) || (f.unextended_sp() == f.sp()), 
+      "*(vfp + frame::interpreter_frame_last_sp_offset): %p f.unextended_sp(): %p f.sp(): %p",
+      (void*)*(vfp + frame::interpreter_frame_last_sp_offset), f.unextended_sp(), f.sp());
+
+    if (*(vfp + frame::interpreter_frame_last_sp_offset) == 0) {
+      *(hfp + frame::interpreter_frame_last_sp_offset) = 0;
+    } else {
+      relativize(vfp, hfp, frame::interpreter_frame_last_sp_offset);
+    }
     relativize(vfp, hfp, frame::interpreter_frame_initial_sp_offset); // == block_top == block_bottom
     relativize(vfp, hfp, frame::interpreter_frame_locals_offset);
 
@@ -1924,8 +1940,9 @@
     return freeze_ok;
   }
 
+  template <bool stub>
   res_freeze freeze_compiled_stackframe(frame& f, intptr_t** callee_link_address, hframe& caller, bool is_first) {
-    if (is_compiled_frame_owning_locks(_mirror.thread(), &_map, f)) {
+    if (!stub && is_compiled_frame_owning_locks(_mirror.thread(), &_map, f)) {
       return freeze_pinned_monitor;
     }
 
@@ -1952,7 +1969,9 @@
     assert (fsize1 == fsize, "");
 #endif
 
-    f.cb()->as_compiled_method()->inc_on_continuation_stack();
+    if (!stub) {
+      f.cb()->as_compiled_method()->inc_on_continuation_stack();
+    }
 
     intptr_t* hsp = freeze_raw_frame(vsp, fsize);
 
@@ -1973,7 +1992,11 @@
 
     update_map_with_saved_link(&_map, callee_link_address);
 
-    freeze_oops<FreezeCompiledOops>(f, vsp, hsp, oops); // must be called after patch, as patch uses the previous freeze_oop data
+    if (!stub) {
+      freeze_oops<FreezeCompiledOops>(f, vsp, hsp, oops); // must be called after patch, as patch uses the previous freeze_oop data
+    } else {
+      _has_fp_oop = false;
+    }
 
     caller = hf;
 
@@ -2030,18 +2053,18 @@
 
     _frames++;
     res_freeze result;
-    bool is_compiled = f.is_compiled_frame();
-    if (is_compiled) {
+    if (f.is_compiled_frame()) {
       if (f.oop_map() == NULL) {
         return freeze_pinned_native; // special native frame
       }
-      result = freeze_compiled_stackframe(f, callee_link_address, caller, is_first);
+      result = freeze_compiled_stackframe<false>(f, callee_link_address, caller, is_first);
+    } else if (f.is_interpreted_frame()) {
+      result = freeze_interpreted_stackframe(f, caller, is_first);
+    } else if (f.cb()->is_safepoint_stub()) {
+      assert (is_first, "");
+      result = freeze_compiled_stackframe<true>(f, callee_link_address, caller, is_first);
     } else {
-      bool is_interpreted = f.is_interpreted_frame();
-      if (!is_interpreted) {
-        return freeze_pinned_native;
-      }
-      result = freeze_interpreted_stackframe(f, caller, is_first);
+      return freeze_pinned_native;
     }
 
     if (_is_last) _is_last = false;
@@ -2094,7 +2117,7 @@
 };
 
 // freezes all frames of a single continuation
-static res_freeze freeze_continuation(JavaThread* thread, oop oopCont, frame& f, RegisterMap& map) {
+static res_freeze freeze_continuation(JavaThread* thread, oop oopCont, frame& f, RegisterMap& map, bool safepoint_yield) {
   assert (oopCont != NULL, "");
 
   log_trace(jvmcont)("Freeze ___ cont: " INTPTR_FORMAT, p2i((oopDesc*)oopCont));
@@ -2130,6 +2153,8 @@
 
   fc.finish(empty, f);
 
+  cont.set_flag(FLAG_SAFEPOINT_YIELD, safepoint_yield);
+
   cont.write();
 
   // notify JVMTI
@@ -2158,11 +2183,11 @@
 // it must set Continuation.stackSize
 // sets Continuation.fp/sp to relative indices
 //
-// In: fi->pc, fi->sp, fi->fp all point to the current (topmost) frame to freeze (the yield frame)
+// In: fi->pc, fi->sp, fi->fp all point to the current (topmost) frame to freeze (the yield frame); THESE VALUES ARE CURRENTLY UNUSED
 // Out: fi->pc, fi->sp, fi->fp all point to the run frame (entry's caller)
 //      unless freezing has failed, in which case fi->pc = 0
 //      However, fi->fp points to the _address_ on the stack of the entry frame's link to its caller (so *(fi->fp) is the fp)
-JRT_ENTRY(int, Continuation::freeze(JavaThread* thread, FrameInfo* fi))
+int Continuation::freeze0(JavaThread* thread, FrameInfo* fi, bool safepoint_yield) {
   callgrind();
   Continuation::PERFTEST_LEVEL = ContPerfTest;
 
@@ -2171,9 +2196,12 @@
     return freeze_ok;
   }
 
+  assert (!thread->_cont_yield, "");
+  thread->_cont_yield = true;
   log_debug(jvmcont)("~~~~~~~~~ freeze");
   log_trace(jvmcont)("fi->sp: " INTPTR_FORMAT " fi->fp: " INTPTR_FORMAT " fi->pc: " INTPTR_FORMAT, p2i(fi->sp), p2i(fi->fp), p2i(fi->pc));
-
+  assert (thread->thread_state() == _thread_in_vm, "");
+  
   // set_anchor(thread, fi); // DEBUG
   print_frames(thread);
 
@@ -2185,6 +2213,7 @@
   if (thread->has_pending_exception()) {
     fi->fp = NULL; fi->sp = NULL; fi->pc = NULL;
     log_trace(jvmcont)("=== end of freeze (fail 0)");
+    thread->_cont_yield = false;
     return freeze_exception;
   }
 
@@ -2201,20 +2230,34 @@
 
   // Note: if the doYield stub does not have its own frame, we may need to consider deopt here, especially if yield is inlinable
   frame f = thread->last_frame(); // this is the doYield stub frame. last_frame is set up by the call_VM infrastructure // <---- CodeCache::find_blob is expensive
-  // f.print_on(tty);
-
   frame::update_map_with_saved_link(&map, link_address(f));
-  f = f.frame_sender<ContinuationCodeBlobLookup>(&map); // LOOKUP // this is the yield frame
-
-  assert (f.pc() == fi->pc, "");
+
+  if (!safepoint_yield) {
+    assert (StubRoutines::cont_doYield_stub()->contains(f.pc()), "must be");
+    f = f.frame_sender<ContinuationCodeBlobLookup>(&map); // LOOKUP // this is the yield frame
+    assert (f.pc() == fi->pc, "");
+  } else { // safepoint yield
+    f.set_fp(f.real_fp()); // Instead of this, maybe in ContMirror::set_last_frame always use the real_fp?
+    if (Interpreter::contains(f.pc())) {
+      log_trace(jvmcont)("INTERPRETER SAFEPOINT");
+      // f.set_sp(f.sp() - 1); // state pushed to the stack
+    } else {
+      log_trace(jvmcont)("COMPILER SAFEPOINT");
+      assert (f.cb()->is_safepoint_stub(), "must be");
+      assert (f.oop_map() != NULL, "must be");
+      f.oop_map()->update_register_map(&f, &map); // we have callee-save registers in this case
+    }
+  }
+  
   // The following doesn't work because fi->fp can contain an oop, that a GC doesn't know about when walking.
   // frame::update_map_with_saved_link(&map, (intptr_t **)&fi->fp);
   // frame f(fi->sp, fi->fp, fi->pc); // the yield frame
 
-  res_freeze res = freeze_continuation(thread, cont, f, map); // changes f
+  res_freeze res = freeze_continuation(thread, cont, f, map, safepoint_yield); // changes f
   if (res != freeze_ok) {
     fi->fp = NULL; fi->sp = NULL; fi->pc = NULL;
     log_trace(jvmcont)("=== end of freeze (fail)");
+    thread->_cont_yield = false;
     return res;
   }
 
@@ -2248,9 +2291,61 @@
   log_debug(jvmcont)("ENTRY: sp: " INTPTR_FORMAT " fp: " INTPTR_FORMAT " pc: " INTPTR_FORMAT, p2i(fi->sp), p2i(fi->fp), p2i(fi->pc));
   log_debug(jvmcont)("=== End of freeze");
 
+  thread->_cont_yield = false;
   return 0;
+}
+
+JRT_ENTRY(int, Continuation::freeze(JavaThread* thread, FrameInfo* fi))
+  return freeze0(thread, fi, false);
 JRT_END
 
+typedef int (*DoYieldStub)(int scopes);
+
+int Continuation::try_force_yield(JavaThread* thread, oop cont) {
+
+  oop innermost = get_continuation(thread);
+  oop scope = NULL;
+  for (oop c = innermost; c != NULL; c = java_lang_Continuation::parent(c)) {
+    if (c == cont) {
+      scope = java_lang_Continuation::scope(c);
+      break;
+    }
+  }
+  if (scope == NULL) {
+    return -1; // no continuation
+  }
+  if (thread->_cont_yield) {
+    return -2; // during yield
+  }
+  if (innermost != cont) {
+    java_lang_Continuation::set_yieldInfo(cont, scope);
+  }
+  
+// #ifdef ASSERT
+//   tty->print_cr("FREEZING:");
+//   frame lf = thread->last_frame();
+//   lf.print_on(tty);
+//   tty->print_cr("");
+//   const ImmutableOopMap* oopmap = lf.oop_map();
+//   if (oopmap != NULL) {
+//     oopmap->print();
+//     tty->print_cr("");
+//   } else {
+//     tty->print_cr("oopmap: NULL");
+//   }
+//   tty->print_cr("*&^*&#^$*&&@(#*&@(#&*(*@#&*(&@#$^*(&#$(*&#@$(*&#($*&@#($*&$(#*$");
+// #endif
+  // TODO: save return value
+
+  FrameInfo fi;
+  int res = freeze0(thread, &fi, true); // CAST_TO_FN_PTR(DoYieldStub, StubRoutines::cont_doYield_C())(-1);
+  if (res == 0) { // success
+    thread->_cont_frame = fi;
+    frame last = thread->last_frame();
+    patch_pc(last, StubRoutines::cont_jump_from_sp()); // reinstates rbpc and rlocals for the sake of the interpreter
+  }
+  return res;
+}
 ///////////////
 
 class ThawOopFn: public ContOopBase {
@@ -2308,11 +2403,17 @@
   intptr_t* hfp = cont.stack_address(hf.fp());
   intptr_t* vfp = vsp + (hfp - hsp);
 
-  derelativize(vfp, frame::interpreter_frame_last_sp_offset);
+  bool safepoint_stub = false;
+  if (*(hfp + frame::interpreter_frame_last_sp_offset) == 0) {
+      *(vfp + frame::interpreter_frame_last_sp_offset) = 0;
+      safepoint_stub = true; // the last yield was forced and called in a safepoint
+  } else {
+    derelativize(vfp, frame::interpreter_frame_last_sp_offset);
+  }
   derelativize(vfp, frame::interpreter_frame_initial_sp_offset); // == block_top == block_bottom
   derelativize(vfp, frame::interpreter_frame_locals_offset);
 
-  intptr_t* unextended_sp = *(intptr_t**)(vfp + frame::interpreter_frame_last_sp_offset);
+  intptr_t* unextended_sp = safepoint_stub ? vsp : *(intptr_t**)(vfp + frame::interpreter_frame_last_sp_offset);
   frame f(vsp, unextended_sp, vfp, hf.pc());
 
   patch_sender_sp(f, sender.unextended_sp()); // derelativize(vfp, frame::interpreter_frame_sender_sp_offset);
@@ -2320,9 +2421,7 @@
   assert (*(intptr_t**)(vfp + frame::interpreter_frame_locals_offset) < frame_top(sender), "sender top: " INTPTR_FORMAT " locals: " INTPTR_FORMAT,
     p2i(frame_top(sender)), p2i(*(intptr_t**)(vfp + frame::interpreter_frame_locals_offset)));
 
-  if (!f.is_interpreted_frame_valid(cont.thread())) {
-    assert(f.is_interpreted_frame_valid(cont.thread()), "invalid thawed frame");
-  }
+  assert(f.is_interpreted_frame_valid(cont.thread()), "invalid thawed frame");
 
   cont.dec_num_frames();
   cont.dec_num_interpreted_frames();
@@ -2352,7 +2451,8 @@
   intptr_t* hsp = cont.stack_address(hf.sp());
   cont.copy_from_stack(hsp, vsp, hf.size(cont));
 
-  hf.cb()->as_compiled_method()->dec_on_continuation_stack();
+  if (!hf.cb()->is_safepoint_stub())
+    hf.cb()->as_compiled_method()->dec_on_continuation_stack();
 
   frame f(vsp, (intptr_t*)hf.fp(), hf.pc());
 
@@ -2415,7 +2515,7 @@
   log_trace(jvmcont)("Done walking oops");
 }
 
-static frame thaw_frame(ContMirror& cont, hframe& hf, int oop_index, frame& sender, bool &deoptimized) {
+static frame thaw_frame(ContMirror& cont, hframe& hf, int oop_index, frame& sender, bool &deoptimized, hframe* callee_safepoint_stub) {
   log_trace(jvmcont)("=============================");
 
   if (log_is_enabled(Trace, jvmcont)) hf.print(cont);
@@ -2493,10 +2593,11 @@
 
   RegisterMap map(cont.thread(), true, false, false);
   map.set_include_argument_oops(false);
+  // map is only passed to thaw_compiled_frame for use in deoptimize, which uses it only for biased locks; we may not need deoptimize there at all -- investigate
 
   bool is_interpreted = hf.is_interpreted_frame();
   frame f = is_interpreted ? thaw_interpreted_frame(cont, hf, vsp, sender)
-                                      :    thaw_compiled_frame(cont, hf, vsp, sender, map, deoptimized);
+                           : thaw_compiled_frame(cont, hf, vsp, sender, map, deoptimized);
 
   patch_link(f, sender.fp(), is_interpreted);
   patch_return_pc(f, ret_pc, is_interpreted);
@@ -2509,6 +2610,21 @@
 
   assert (!is_entry_frame(cont, sender) || sender.fp() == cont.entryFP(), "sender.fp: " INTPTR_FORMAT " entryFP: " INTPTR_FORMAT, p2i(sender.fp()), p2i(cont.entryFP()));
 
+  frame calleef;
+  if (callee_safepoint_stub != NULL) {
+    // A safepoint stub is the only case we encounter callee-saved registers (aside from rbp). We therefore thaw that frame
+    // before thawing the oops in its sender, as the oops will need to be written to that stub frame.
+    log_trace(jvmcont)("THAWING SAFEPOINT STUB");
+    hframe callee = *callee_safepoint_stub;
+    const int callee_fsize = callee.uncompressed_size(cont) != 0 ? callee.uncompressed_size(cont) : callee.size(cont);
+    const address callee_bottom = (address) f.sp();
+    intptr_t* callee_vsp = (intptr_t*)(callee_bottom - callee_fsize);
+    cont.sub_size(callee_fsize);
+
+    calleef = thaw_compiled_frame(cont, callee, callee_vsp, f, map, deoptimized);
+    calleef.oop_map()->update_register_map(&f, &map);
+    log_trace(jvmcont)("THAWING OOPS FOR SENDER OF SAFEPOINT STUB");
+  }
   // assert (oop_index == hf.ref_sp(), "");
   thaw_oops(cont, f, oop_index, hf.num_oops(cont), f.sp(), map, is_interpreted ? NULL : f.oop_map());
 
@@ -2517,10 +2633,14 @@
   print_vframe(f, &dmap);
 #endif
 
+  if (callee_safepoint_stub != NULL) {
+    return calleef;
+  }
+
   return f;
 }
 
-static frame thaw_frames(ContMirror& cont, hframe hf, int oop_index, int num_frames, int& count, int &last_oop_index, hframe& last_frame, bool& deoptimized) {
+static frame thaw_frames(ContMirror& cont, hframe hf, int oop_index, int num_frames, int& count, int &last_oop_index, hframe& last_frame, bool& deoptimized, hframe* callee_safepoint_stub) {
   if (num_frames == 0 || hf.is_empty()) {
     frame entry(cont.entrySP(), cont.entryFP(), cont.entryPC());
     log_trace(jvmcont)("Found entry:");
@@ -2539,10 +2659,18 @@
     return entry;
   }
 
+  bool is_safepoint_stub = false;
+  if (/*is_first &&*/ hf.cb() != NULL && hf.cb()->is_safepoint_stub()) {
+    log_trace(jvmcont)("Found safepoint stub");
+    is_safepoint_stub = true;
+    assert (hf.num_oops(cont) == 0, "must be");
+  }
+
   // assert (oop_index == hf.ref_sp(), "oop_index: %d hf.ref_sp(): %d", oop_index, hf.ref_sp());
   hframe hsender = hf.sender(cont);
-  frame sender = thaw_frames(cont, hsender, oop_index + hf.num_oops(cont), num_frames - 1, count, last_oop_index, last_frame, deoptimized);
-  frame f = thaw_frame(cont, hf, oop_index, sender, deoptimized);
+  frame sender = thaw_frames(cont, hsender, oop_index + hf.num_oops(cont), num_frames - 1, count, last_oop_index, last_frame, deoptimized, is_safepoint_stub ? &hf : NULL);
+  // In the case of a safepoint stub, the above line, called on the stub's sender, actually returns the safepoint stub after thawing it.
+  frame f = is_safepoint_stub ? sender : thaw_frame(cont, hf, oop_index, sender, deoptimized, callee_safepoint_stub);
 
   assert ((count == 0) == is_entry_frame(cont, sender), "");
   assert (hf.is_bottom(cont) <= last_frame.is_empty(), "hf.is_bottom(cont): %d last_frame.is_empty(): %d ", hf.is_bottom(cont), last_frame.is_empty());
@@ -2639,7 +2767,7 @@
   int last_oop_index = 0;
   hframe last_frame;
   bool deoptimized = false;
-  frame top = thaw_frames(cont, hf, cont.refSP(), num_frames, frame_count, last_oop_index, last_frame, deoptimized);
+  frame top = thaw_frames(cont, hf, cont.refSP(), num_frames, frame_count, last_oop_index, last_frame, deoptimized, NULL);
   cont.set_last_frame(last_frame);
   // assert (last_oop_index == cont.refSP(), "");
   cont.set_refSP(last_oop_index);
@@ -2811,7 +2939,7 @@
     fi->pc = SharedRuntime::raw_exception_handler_for_return_address(JavaThread::current(), fi->pc);
     return ret;
   } else
-    return NULL;
+    return reinterpret_cast<address>(Interpreter::contains(fi->pc)); // really only necessary in the case of continuing from a forced yield
 JRT_END
 
 bool Continuation::is_continuation_entry_frame(const frame& f, const RegisterMap* map) {
@@ -2834,6 +2962,10 @@
   return is_return_barrier_entry(return_pc(f, f.is_interpreted_frame()));
 }
 
+bool Continuation::is_return_barrier_entry(const address pc) { 
+  return pc == StubRoutines::cont_returnBarrier(); 
+}
+
 static oop find_continuation_for_frame(JavaThread* thread, intptr_t* const sp) {
   oop cont = get_continuation(thread);
   while (cont != NULL && java_lang_Continuation::entrySP(cont) < sp)
@@ -3046,6 +3178,9 @@
   return method->bcp_from(bcp);
 }
 
+oop Continuation::continuation_scope(oop cont) { 
+  return cont != NULL ? java_lang_Continuation::scope(cont) : (oop)NULL; 
+}
 ///// DEBUGGING
 
 static void print_oop(void *p, oop obj, outputStream* st) {
@@ -3141,7 +3276,7 @@
 }
 
 static void print_frames(JavaThread* thread, outputStream* st) {
-  if (st != NULL && !log_is_enabled(Trace, jvmcont) ) return;
+  if (st != NULL && !log_is_enabled(Trace, jvmcont)) return;
   if (st == NULL) st = tty;
 
   if (true) {
@@ -3229,11 +3364,53 @@
 }
 JVM_END
 
+JVM_ENTRY(jint, CONT_TryForceYield(JNIEnv* env, jobject jcont, jobject jthread)) {
+  JavaThread* thread = JavaThread::thread_from_jni_environment(env);
+
+  guarantee(ThreadLocalHandshakes, "ThreadLocalHandshakes disabled");
+  guarantee(SafepointMechanism::uses_thread_local_poll(), "ThreadLocalHandshakes disabled");
+
+  class ForceYieldClosure : public ThreadClosure {
+    jobject _jcont;
+    jint _result;
+
+    void do_thread(Thread* th) {
+      assert(th->is_Java_thread(), "sanity");
+      JavaThread* thread = (JavaThread*)th;
+      
+      oop oopCont = JNIHandles::resolve_non_null(_jcont);
+      _result = Continuation::try_force_yield(thread, oopCont);
+    }
+
+  public:
+    ForceYieldClosure(jobject jcont) : _jcont(jcont), _result(-1) {}
+    jint result() const { return _result; }
+  };
+  ForceYieldClosure fyc(jcont);
+
+  // tty->print_cr("TRY_FORCE_YIELD0");
+  // thread->print();
+  // tty->print_cr("");
+
+  if (true) {
+    oop thread_oop = JNIHandles::resolve(jthread);
+    if (thread_oop != NULL) {
+      JavaThread* target = java_lang_Thread::thread(thread_oop);
+      Handshake::execute(&fyc, target);
+    }
+  } else {
+    Handshake::execute(&fyc);
+  }
+  return fyc.result();
+}
+JVM_END
+
 #define CC (char*)  /*cast a literal from (const char*)*/
 #define FN_PTR(f) CAST_FROM_FN_PTR(void*, &f)
 
 static JNINativeMethod CONT_methods[] = {
-    {CC"clean0",           CC"()V",        FN_PTR(CONT_Clean)},
+    {CC"clean0",           CC"()V",                   FN_PTR(CONT_Clean)},
+    {CC"tryForceYield",    CC"(Ljava/lang/Thread;)I", FN_PTR(CONT_TryForceYield)},
 };
 
 void CONT_RegisterNativeMethods(JNIEnv *env, jclass cls) {
--- a/src/hotspot/share/runtime/continuation.hpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/runtime/continuation.hpp	Wed Dec 05 16:44:54 2018 +0000
@@ -25,8 +25,6 @@
 #ifndef SHARE_VM_RUNTIME_CONTINUATION_HPP
 #define SHARE_VM_RUNTIME_CONTINUATION_HPP
 
-#include "classfile/javaClasses.hpp"
-#include "classfile/javaClasses.inline.hpp"
 #include "runtime/globals.hpp"
 
 #define CONT_FULL_STACK (!UseNewCode)
@@ -55,13 +53,15 @@
 
 class Continuation : AllStatic {
 public:
+  static int freeze0(JavaThread* thread, FrameInfo* fi, bool safepoint_yield);
   static int freeze(JavaThread* thread, FrameInfo* fi);
   static int prepare_thaw(FrameInfo* fi, bool return_barrier);
   static address thaw(FrameInfo* fi, bool return_barrier, bool exception);
+  static int try_force_yield(JavaThread* thread, oop cont);
 
   static bool is_continuation_entry_frame(const frame& f, const RegisterMap* map);
   static bool is_cont_bottom_frame(const frame& f);
-  static bool is_return_barrier_entry(const address pc) { return pc == StubRoutines::cont_returnBarrier(); }
+  static bool is_return_barrier_entry(const address pc);
   static bool is_frame_in_continuation(JavaThread* thread, const frame& f);
   static address fix_continuation_bottom_sender(const frame* callee, RegisterMap* map, address pc);
 
@@ -77,7 +77,7 @@
   static Method* interpreter_frame_method(const frame& fr, const RegisterMap* map);
   static address interpreter_frame_bcp(const frame& fr, const RegisterMap* map);
 
-  static inline oop continuation_scope(oop cont) { return cont != NULL ? java_lang_Continuation::scope(cont) : (oop)NULL; }
+  static oop continuation_scope(oop cont);
   static bool is_scope_bottom(oop cont_scope, const frame& fr, const RegisterMap* map);
   
   static int PERFTEST_LEVEL;
--- a/src/hotspot/share/runtime/stubRoutines.cpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/runtime/stubRoutines.cpp	Wed Dec 05 16:44:54 2018 +0000
@@ -179,6 +179,7 @@
 
 RuntimeStub* StubRoutines::_cont_doYield_stub = NULL;
 address StubRoutines::_cont_doYield       = NULL;
+address StubRoutines::_cont_jump_from_sp  = NULL;
 address StubRoutines::_cont_jump          = NULL;
 address StubRoutines::_cont_thaw          = NULL;
 address StubRoutines::_cont_returnBarrier = NULL;
--- a/src/hotspot/share/runtime/stubRoutines.hpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/runtime/stubRoutines.hpp	Wed Dec 05 16:44:54 2018 +0000
@@ -210,6 +210,7 @@
 
   static RuntimeStub* _cont_doYield_stub;
   static address _cont_doYield;
+  static address _cont_jump_from_sp;
   static address _cont_jump;
   static address _cont_thaw;
   static address _cont_returnBarrier;
@@ -394,14 +395,18 @@
   static address dlibm_tan_cot_huge()  { return _dlibm_tan_cot_huge; }
   static address dtan()                { return _dtan; }
 
+  typedef void (*cont_jump_from_sp_t)();
+
   static RuntimeStub* cont_doYield_stub() { return _cont_doYield_stub; }
   static address cont_doYield()        { return _cont_doYield; }
+  static address cont_jump_from_sp()   { return _cont_jump_from_sp; }
   static address cont_jump()           { return _cont_jump; }
   static address cont_thaw()           { return _cont_thaw; }
   static address cont_returnBarrier()  { return _cont_returnBarrier; }
   static address cont_returnBarrierExc(){return _cont_returnBarrierExc; }
   static address cont_getSP()          { return _cont_getSP; }
   static address cont_getPC()          { return _cont_getPC; }
+  static cont_jump_from_sp_t  cont_jump_from_sp_C() { return CAST_TO_FN_PTR(cont_jump_from_sp_t, _cont_jump_from_sp); }
 
 
   static address select_fill_function(BasicType t, bool aligned, const char* &name);
--- a/src/hotspot/share/runtime/thread.cpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/runtime/thread.cpp	Wed Dec 05 16:44:54 2018 +0000
@@ -1597,6 +1597,9 @@
   _cached_monitor_info = NULL;
   _parker = Parker::Allocate(this);
 
+  _cont_yield = false;
+  _cont_frame = (FrameInfo){0, 0, 0};
+
 #ifndef PRODUCT
   _jmp_ring_index = 0;
   for (int ji = 0; ji < jump_ring_buffer_size; ji++) {
@@ -2239,6 +2242,10 @@
     check_and_handle_async_exceptions();
   }
 
+  if (is_cont_force_yield()) {
+    StubRoutines::cont_jump_from_sp_C()();
+  }
+
   JFR_ONLY(SUSPEND_THREAD_CONDITIONAL(this);)
 }
 
--- a/src/hotspot/share/runtime/thread.hpp	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/hotspot/share/runtime/thread.hpp	Wed Dec 05 16:44:54 2018 +0000
@@ -32,6 +32,7 @@
 #include "memory/allocation.hpp"
 #include "oops/oop.hpp"
 #include "prims/jvmtiExport.hpp"
+#include "runtime/continuation.hpp"
 #include "runtime/frame.hpp"
 #include "runtime/globals.hpp"
 #include "runtime/handshake.hpp"
@@ -911,6 +912,7 @@
   friend class VMStructs;
   friend class JVMCIVMStructs;
   friend class WhiteBox;
+  friend class Continuation;
  private:
   JavaThread*    _next;                          // The next thread in the Threads list
   bool           _on_thread_list;                // Is set when this JavaThread is added to the Threads list
@@ -1125,6 +1127,9 @@
   // failed reallocations.
   int _frames_to_pop_failed_realloc;
 
+  bool _cont_yield; // a continuation yield is in progress
+  FrameInfo _cont_frame;
+
 #ifndef PRODUCT
   int _jmp_ring_index;
   struct {
@@ -1400,6 +1405,7 @@
     return x;
   }
 
+  bool is_cont_force_yield() { return _cont_frame.pc != NULL; }
   // Are any async conditions present?
   bool has_async_condition() { return (_special_runtime_exit_condition != _no_async_condition); }
 
@@ -1421,7 +1427,7 @@
     // we have checked is_external_suspend(), we will recheck its value
     // under SR_lock in java_suspend_self().
     return (_special_runtime_exit_condition != _no_async_condition) ||
-            is_external_suspend() || is_trace_suspend();
+            is_external_suspend() || is_trace_suspend() || is_cont_force_yield();
   }
 
   void set_pending_unsafe_access_error()          { _special_runtime_exit_condition = _async_unsafe_access_error; }
@@ -1736,6 +1742,8 @@
     return byte_offset_of(JavaThread, _should_post_on_exceptions_flag);
   }
 
+  static ByteSize cont_frame_offset()         { return byte_offset_of(JavaThread, _cont_frame); }
+
   // Returns the jni environment for this thread
   JNIEnv* jni_environment()                      { return &_jni_environment; }
 
--- a/src/java.base/share/classes/java/lang/Continuation.java	Mon Dec 03 09:30:26 2018 +0000
+++ b/src/java.base/share/classes/java/lang/Continuation.java	Wed Dec 05 16:44:54 2018 +0000
@@ -47,6 +47,7 @@
     private static final boolean DEBUG = TRACE | isEmptyOrTrue("java.lang.Continuation.debug");
 
     private static final byte FLAG_LAST_FRAME_INTERPRETED = 1;
+    private static final byte FLAG_SAFEPOINT_YIELD = 1 << 1;
     private static final int METADATA_SIZE = 2;
     private static final int WATERMARK_THRESHOLD = 10;
     private static final VarHandle MOUNTED;
@@ -664,6 +665,16 @@
 
     private native void clean0();
 
+    /**
+     * TBD
+     * @param thread TBD
+     * @return TBD
+     */
+    public int forceYield(Thread thread) {
+        return tryForceYield(thread);
+    }
+    private native int tryForceYield(Thread thread);
+
     // native methods
     private static native void registerNatives();